audio2midi 0.5.0__tar.gz → 0.7.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {audio2midi-0.5.0 → audio2midi-0.7.0}/PKG-INFO +7 -1
- {audio2midi-0.5.0 → audio2midi-0.7.0}/pyproject.toml +2 -1
- {audio2midi-0.5.0 → audio2midi-0.7.0}/src/audio2midi/librosa_pitch_detector.py +1 -1
- audio2midi-0.7.0/src/audio2midi/magenta_music_transcription.py +76 -0
- {audio2midi-0.5.0 → audio2midi-0.7.0}/src/audio2midi/melodia_pitch_detector.py +5 -4
- {audio2midi-0.5.0 → audio2midi-0.7.0}/.gitignore +0 -0
- {audio2midi-0.5.0 → audio2midi-0.7.0}/.python-version +0 -0
- {audio2midi-0.5.0 → audio2midi-0.7.0}/README.md +0 -0
- {audio2midi-0.5.0 → audio2midi-0.7.0}/src/audio2midi/__init__.py +0 -0
- {audio2midi-0.5.0 → audio2midi-0.7.0}/src/audio2midi/basic_pitch_pitch_detector.py +0 -0
- {audio2midi-0.5.0 → audio2midi-0.7.0}/src/audio2midi/crepe_pitch_detector.py +0 -0
- {audio2midi-0.5.0 → audio2midi-0.7.0}/src/audio2midi/crepe_pitch_detector_tf.py +0 -0
- {audio2midi-0.5.0 → audio2midi-0.7.0}/src/audio2midi/mt3_music_transcription.py +0 -0
- {audio2midi-0.5.0 → audio2midi-0.7.0}/src/audio2midi/pop2piano.py +0 -0
- {audio2midi-0.5.0 → audio2midi-0.7.0}/src/audio2midi/py.typed +0 -0
- {audio2midi-0.5.0 → audio2midi-0.7.0}/src/audio2midi/violin_pitch_detector.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: audio2midi
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.7.0
|
4
4
|
Summary: Audio To Midi
|
5
5
|
Author-email: dummyjenil <dummyjenil@gmail.com>
|
6
6
|
Provides-Extra: all
|
@@ -52,6 +52,12 @@ Provides-Extra: librosa-pitch-detector
|
|
52
52
|
Requires-Dist: librosa; extra == 'librosa-pitch-detector'
|
53
53
|
Requires-Dist: numpy; extra == 'librosa-pitch-detector'
|
54
54
|
Requires-Dist: pretty-midi-fix; extra == 'librosa-pitch-detector'
|
55
|
+
Provides-Extra: magenta-music-transcription
|
56
|
+
Requires-Dist: huggingface-hub; extra == 'magenta-music-transcription'
|
57
|
+
Requires-Dist: librosa; extra == 'magenta-music-transcription'
|
58
|
+
Requires-Dist: numpy; extra == 'magenta-music-transcription'
|
59
|
+
Requires-Dist: pretty-midi-fix; extra == 'magenta-music-transcription'
|
60
|
+
Requires-Dist: tensorflow; extra == 'magenta-music-transcription'
|
55
61
|
Provides-Extra: melodia-pitch-detector
|
56
62
|
Requires-Dist: huggingface-hub; extra == 'melodia-pitch-detector'
|
57
63
|
Requires-Dist: librosa; extra == 'melodia-pitch-detector'
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[project]
|
2
2
|
name = "audio2midi"
|
3
|
-
version = "0.
|
3
|
+
version = "0.7.0"
|
4
4
|
description = "Audio To Midi"
|
5
5
|
readme = "README.md"
|
6
6
|
authors = [
|
@@ -18,6 +18,7 @@ crepe_pitch_detector = ["librosa", "numpy","pretty_midi_fix","hmmlearn","tensorf
|
|
18
18
|
violin_pitch_detector = ["librosa", "numpy","pretty_midi_fix","scipy","torchaudio","torch","mir_eval","huggingface_hub"]
|
19
19
|
pop2piano = ["librosa", "numpy==1.26.4","pretty_midi_fix","transformers","essentia","torch","scipy","resampy","pretty_midi","huggingface_hub"]
|
20
20
|
mt3_music_transcription = ["nest_asyncio", "mt3-audio2midi"]
|
21
|
+
magenta_music_transcription = ["librosa","numpy","pretty_midi_fix","tensorflow","huggingface_hub"]
|
21
22
|
all = ["librosa", "numpy==1.26.4","pretty_midi_fix","transformers","essentia","torch","scipy","torchaudio","torch","mir_eval","hmmlearn","tensorflow","keras","vamp","nnAudio","resampy","pretty_midi","huggingface_hub","nest_asyncio", "mt3-audio2midi"]
|
22
23
|
|
23
24
|
[build-system]
|
@@ -147,6 +147,6 @@ class Guitar_Pitch_Det:
|
|
147
147
|
# If it's a rest, just advance the time
|
148
148
|
duration_sec = duration * (60 / tempo_bpm)
|
149
149
|
note_time += duration_sec
|
150
|
-
pm.instruments.append(
|
150
|
+
pm.instruments.append(instrument)
|
151
151
|
pm.write(output_file)
|
152
152
|
return output_file
|
@@ -0,0 +1,76 @@
|
|
1
|
+
from typing import Callable
|
2
|
+
import numpy as np
|
3
|
+
import pretty_midi_fix
|
4
|
+
import tensorflow
|
5
|
+
import librosa.feature.rhythm
|
6
|
+
from huggingface_hub import hf_hub_download
|
7
|
+
import librosa
|
8
|
+
|
9
|
+
|
10
|
+
def endpitch(pitch, endFrame,volProb,intervals,MIN_NOTE_FRAMES,frameLenSecs,PITCH_BEND_ENABLED,pitch_bend_steps,pitch_bend_depth,track):
|
11
|
+
startFrame = intervals[pitch]
|
12
|
+
if endFrame - startFrame < MIN_NOTE_FRAMES:
|
13
|
+
return
|
14
|
+
vol = volProb[startFrame, pitch]
|
15
|
+
if vol < 0 or vol > 1:
|
16
|
+
return
|
17
|
+
start_time = startFrame * frameLenSecs
|
18
|
+
track.notes.append(pretty_midi_fix.Note(velocity=int(max(0, min(1, vol)) * 80 + 10), pitch=pitch + 21, start=start_time, end=endFrame * frameLenSecs))
|
19
|
+
if PITCH_BEND_ENABLED:
|
20
|
+
for step in range(pitch_bend_steps):
|
21
|
+
track.pitch_bends.append(pretty_midi_fix.PitchBend(pitch=int(np.sin(np.pi * step / (pitch_bend_steps - 1)) * pitch_bend_depth), time=start_time + step * 0.01))
|
22
|
+
track.pitch_bends.append(pretty_midi_fix.PitchBend(pitch=0, time=start_time + 0.05)) # Reset
|
23
|
+
del intervals[pitch]
|
24
|
+
|
25
|
+
def model_output_to_notes(model_output,onset_thresh,include_pitch_bends,min_note_len,gap_tolerance_frames,pitch_bend_depth,pitch_bend_steps):
|
26
|
+
actProb , onProb , offProb , volProb , tempo = model_output
|
27
|
+
midi = pretty_midi_fix.PrettyMIDI(initial_tempo=tempo)
|
28
|
+
track = pretty_midi_fix.Instrument(program=40)
|
29
|
+
frameLenSecs = librosa.frames_to_time(1, sr=16000)
|
30
|
+
intervals = {}
|
31
|
+
onsets = (onProb > onset_thresh).astype(np.int8)
|
32
|
+
frames = onsets | (actProb > onset_thresh).astype(np.int8)
|
33
|
+
for i, frame in enumerate(np.vstack([frames, np.zeros(frames.shape[1])])):
|
34
|
+
for pitch, active in enumerate(frame):
|
35
|
+
if active:
|
36
|
+
if pitch not in intervals:
|
37
|
+
if onsets is None or onsets[i, pitch]:
|
38
|
+
intervals[pitch] = i
|
39
|
+
elif onsets is not None and onsets[i, pitch] and (i - intervals[pitch] > 2):
|
40
|
+
endpitch(pitch, i,volProb,intervals,min_note_len,frameLenSecs,include_pitch_bends,pitch_bend_steps,pitch_bend_depth,track)
|
41
|
+
intervals[pitch] = i
|
42
|
+
elif pitch in intervals:
|
43
|
+
if i + gap_tolerance_frames < frames.shape[0] and np.any(frames[i:i + gap_tolerance_frames, pitch]):
|
44
|
+
continue # Don't end the note yet
|
45
|
+
endpitch(pitch, i,volProb,intervals,min_note_len,frameLenSecs,include_pitch_bends,pitch_bend_steps,pitch_bend_depth,track)
|
46
|
+
midi.instruments.append(track)
|
47
|
+
return midi
|
48
|
+
|
49
|
+
class Magenta:
|
50
|
+
def __init__(self,model_path=hf_hub_download("shethjenil/Audio2Midi_Models","magenta.tflite")):
|
51
|
+
self.interp = tensorflow.lite.Interpreter(model_path=model_path)
|
52
|
+
self.interp.allocate_tensors()
|
53
|
+
self.inputLen = self.interp.get_input_details()[0]['shape'][0]
|
54
|
+
self.outputStep = self.interp.get_output_details()[0]['shape'][1] * 512
|
55
|
+
|
56
|
+
def run_inference(self,audio_path,progress_callback):
|
57
|
+
song = librosa.load(audio_path,sr=16000)[0]
|
58
|
+
actProb, onProb, offProb, volProb = np.empty((1, 88)), np.empty((1, 88)), np.empty((1, 88)), np.empty((1, 88))
|
59
|
+
paddedSong = np.append(song, np.zeros(-(song.size - self.inputLen) % self.outputStep, dtype=np.float32))
|
60
|
+
total_size = (paddedSong.size - self.inputLen) // self.outputStep + 1
|
61
|
+
tempo = librosa.feature.rhythm.tempo(y=song, sr=16000).mean()
|
62
|
+
for i in range(total_size):
|
63
|
+
self.interp.set_tensor(self.interp.get_input_details()[0]['index'], paddedSong[i * self.outputStep : i * self.outputStep + self.inputLen])
|
64
|
+
self.interp.invoke()
|
65
|
+
actProb = np.vstack((actProb, self.interp.get_tensor(self.interp.get_output_details()[0]['index'])[0]))
|
66
|
+
onProb = np.vstack(( onProb, self.interp.get_tensor(self.interp.get_output_details()[1]['index'])[0]))
|
67
|
+
offProb = np.vstack((offProb, self.interp.get_tensor(self.interp.get_output_details()[2]['index'])[0]))
|
68
|
+
volProb = np.vstack((volProb, self.interp.get_tensor(self.interp.get_output_details()[3]['index'])[0]))
|
69
|
+
if progress_callback:
|
70
|
+
progress_callback(i,total_size)
|
71
|
+
return actProb , onProb , offProb , volProb , tempo
|
72
|
+
|
73
|
+
|
74
|
+
def predict(self,audio,onset_thresh=0,min_note_len=3,include_pitch_bends=True,gap_tolerance_frames = 3,pitch_bend_depth = 1500,pitch_bend_steps = 4,progress_callback: Callable[[int, int], None] = None,output_file="output.mid"):
|
75
|
+
model_output_to_notes(self.run_inference(audio,progress_callback),onset_thresh = onset_thresh,min_note_len = min_note_len,include_pitch_bends = include_pitch_bends,pitch_bend_depth=pitch_bend_depth,pitch_bend_steps=pitch_bend_steps,gap_tolerance_frames=gap_tolerance_frames).write(output_file)
|
76
|
+
return output_file
|
@@ -49,10 +49,11 @@ class Melodia():
|
|
49
49
|
data, sr = librosa_load(audio, sr=44100, mono=True)
|
50
50
|
pm = PrettyMIDI(initial_tempo=tempo)
|
51
51
|
instrument = Instrument(program=40)
|
52
|
-
|
53
|
-
|
54
|
-
start =
|
55
|
-
|
52
|
+
for onset_sec, duration_sec, pitch in self.midi_to_notes(
|
53
|
+
self.hz2midi(np.insert(vamp_collect(data, sr, "mtg-melodia:melodia", parameters={"voicing": 0.2})['vector'][1],0, [0]*8)), 44100, smooth, minduration, hop):
|
54
|
+
start = onset_sec
|
55
|
+
end = start + duration_sec
|
56
|
+
instrument.notes.append(Note(100, int(pitch), start, end))
|
56
57
|
pm.instruments.append(instrument)
|
57
58
|
pm.write(output_file)
|
58
59
|
return output_file
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|