audio2midi 0.6.0__tar.gz → 0.8.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: audio2midi
3
- Version: 0.6.0
3
+ Version: 0.8.0
4
4
  Summary: Audio To Midi
5
5
  Author-email: dummyjenil <dummyjenil@gmail.com>
6
6
  Provides-Extra: all
@@ -52,6 +52,12 @@ Provides-Extra: librosa-pitch-detector
52
52
  Requires-Dist: librosa; extra == 'librosa-pitch-detector'
53
53
  Requires-Dist: numpy; extra == 'librosa-pitch-detector'
54
54
  Requires-Dist: pretty-midi-fix; extra == 'librosa-pitch-detector'
55
+ Provides-Extra: magenta-music-transcription
56
+ Requires-Dist: huggingface-hub; extra == 'magenta-music-transcription'
57
+ Requires-Dist: librosa; extra == 'magenta-music-transcription'
58
+ Requires-Dist: numpy; extra == 'magenta-music-transcription'
59
+ Requires-Dist: pretty-midi-fix; extra == 'magenta-music-transcription'
60
+ Requires-Dist: tensorflow; extra == 'magenta-music-transcription'
55
61
  Provides-Extra: melodia-pitch-detector
56
62
  Requires-Dist: huggingface-hub; extra == 'melodia-pitch-detector'
57
63
  Requires-Dist: librosa; extra == 'melodia-pitch-detector'
@@ -91,7 +97,7 @@ Description-Content-Type: text/markdown
91
97
  ---
92
98
 
93
99
  ```bash
94
- pip install audio2midi[all] audio2midi[pop2piano] audio2midi[violin_pitch_detector] audio2midi[crepe_pitch_detector] audio2midi[crepe_pitch_detector_tf] audio2midi[melodia_pitch_detector] audio2midi[basic_pitch_pitch_detector] audio2midi[librosa_pitch_detector]
100
+ pip install audio2midi[all] audio2midi[pop2piano] audio2midi[violin_pitch_detector] audio2midi[crepe_pitch_detector] audio2midi[crepe_pitch_detector_tf] audio2midi[melodia_pitch_detector] audio2midi[basic_pitch_pitch_detector] audio2midi[librosa_pitch_detector] audio2midi[magenta_music_transcription]
95
101
  ```
96
102
  ---
97
103
 
@@ -140,6 +146,7 @@ from audio2midi.basic_pitch_pitch_detector import BasicPitch
140
146
  from audio2midi.crepe_pitch_detector import Crepe
141
147
  from audio2midi.violin_pitch_detector import Violin_Pitch_Det
142
148
  from audio2midi.pop2piano import Pop2Piano
149
+ from audio2midi.magenta_music_transcription import Magenta
143
150
  from torch import device as Device
144
151
  from torch.cuda import is_available as cuda_is_available
145
152
  device = Device("cuda" if cuda_is_available() else "cpu")
@@ -147,6 +154,7 @@ Crepe().predict(audio_path)
147
154
  Pop2Piano(device=device).predict(audio_path)
148
155
  Violin_Pitch_Det(device=device).predict(audio_path)
149
156
  BasicPitch(device=device).predict(audio_path)
157
+ Magenta().predict(audio_path)
150
158
  ```
151
159
 
152
160
  ---
@@ -160,6 +168,7 @@ from audio2midi.melodia_pitch_detector import Melodia
160
168
  from audio2midi.pop2piano import Pop2Piano
161
169
  from audio2midi.violin_pitch_detector import Violin_Pitch_Det
162
170
  from audio2midi.mt3_music_transcription import MT3
171
+ from audio2midi.magenta_music_transcription import Magenta
163
172
  from os import environ
164
173
  from huggingface_hub import hf_hub_download
165
174
  from shutil import unpack_archive
@@ -214,11 +223,12 @@ gr.TabbedInterface([
214
223
  gr.Interface(Guitar_Pitch_Det().predict,[gr.Audio(type="filepath",label="Input Audio"),gr.Number(4,label="mag_exp"),gr.Number(-61,label="Threshold"),gr.Number(6,label="Pre_post_max"),gr.Checkbox(False,label="backtrack"),gr.Checkbox(False,label="round_to_sixteenth"),gr.Number(1024,label="hop_length"),gr.Number(72,label="n_bins"),gr.Number(12,label="bins_per_octave")],gr.File(label="Midi File")),
215
224
  gr.Interface(Melodia().predict,[gr.Audio(type="filepath",label="Input Audio"),gr.Number(120,label="BPM",step=30),gr.Number(0.25,label="smoothness",step=0.05,info="Smooth the pitch sequence with a median filter of the provided duration (in seconds)."),gr.Number(0.1,label="minimum duration",step=0.1,info="Minimum allowed duration for note (in seconds). Shorter notes will be removed."),gr.Number(128,label="HOP")],gr.File(label="Midi File")),
216
225
  gr.Interface(BasicPitch(device=device).predict,[gr.Audio(type="filepath", label="Upload Audio"),gr.Number(0.5,label="onset_thresh",info="Minimum amplitude of an onset activation to be considered an onset."),gr.Number(0.3,label="frame_thresh",info="Minimum energy requirement for a frame to be considered present."),gr.Number(127.70,label="min_note_len",info="The minimum allowed note length in milliseconds."),gr.Number(120,label="midi_tempo"),gr.Checkbox(True,label="infer_onsets",info="add additional onsets when there are large differences in frame amplitudes."),gr.Checkbox(True,label="include_pitch_bends",info="include pitch bends."),gr.Checkbox(False,label="multiple_pitch_bends",info="allow overlapping notes in midi file to have pitch bends."),gr.Checkbox(True,label="melodia_trick",info="Use the melodia post-processing step.")],gr.File(label="Download Midi File")),
226
+ gr.Interface(Magenta().predict,[gr.Audio(type="filepath", label="Upload Audio"),gr.Number(0.5,label="onset_thresh",info="Minimum amplitude of an onset activation to be considered an onset."),gr.Number(3,label="min_note_len",info="The minimum allowed note length"),gr.Number(3,label="gap_tolerance_frames"),gr.Number(4,label="pitch_bend_steps"),gr.Number(1500,label="pitch_bend_depth"),gr.Checkbox(True,label="include_pitch_bends",info="include pitch bends.")],gr.File(label="Download Midi File")),
217
227
  gr.Interface(Violin_Pitch_Det(device=device,model_capacity=getenv("violin_model_capacity","full")).predict, [gr.Audio(label="Upload your Audio file",type="filepath"),gr.Number(32,label="Batch size"),gr.Radio(["spotify","tiktok"],value="spotify",label="Post Processing"),gr.Checkbox(True,label="include_pitch_bends")],gr.File(label="Download MIDI file")),
218
228
  gr.Interface(Crepe(getenv("crepe_model_capacity","full")).predict,[gr.Audio(type="filepath",label="Input Audio"),gr.Checkbox(False,label="viterbi",info="Apply viterbi smoothing to the estimated pitch curve"),gr.Checkbox(True,label="center"),gr.Number(10,label="step size",info="The step size in milliseconds for running pitch estimation."),gr.Number(0.8,label="minimum confidence"),gr.Number(32,label="batch size")],gr.File(label="Midi File")),
219
229
  gr.Interface(CrepeTF(getenv("crepe_model_capacity","full")).predict,[gr.Audio(type="filepath",label="Input Audio"),gr.Checkbox(False,label="viterbi",info="Apply viterbi smoothing to the estimated pitch curve"),gr.Checkbox(True,label="center"),gr.Number(10,label="step size",info="The step size in milliseconds for running pitch estimation."),gr.Number(0.8,label="minimum confidence"),gr.Number(32,label="batch size")],gr.File(label="Midi File")),
220
230
  gr.Interface(Pop2Piano(device).predict,[gr.Audio(label="Input Audio",type="filepath"),gr.Number(1, minimum=1, maximum=21, label="Composer"),gr.Number(2,label="Details in Piano"),gr.Number(1,label="Efficiency of Piano"),gr.Radio([1,2,4],label="steps per beat",value=2)],gr.File(label="MIDI File")),
221
231
  gr.Interface(MT3(str(Path("mt3_model").absolute())).predict,[gr.Audio(label="Input Audio",type="filepath"),gr.Number(0,label="seed")],gr.File(label="MIDI File")),
222
232
  midi_viz_ui
223
- ],["Normal Pitch Detection","Guitar Based Pitch Detection","Melodia","Spotify Pitch Detection","Violin Based Pitch Detection","Crepe Pitch Detection","Crepe Pitch Detection TF","Pop2Piano","MT3","Midi Vizulizer"]).launch()
233
+ ],["Normal Pitch Detection","Guitar Based Pitch Detection","Melodia","Spotify Pitch Detection","Magenta Pitch Detection","Violin Based Pitch Detection","Crepe Pitch Detection","Crepe Pitch Detection TF","Pop2Piano","MT3","Midi Vizulizer"]).launch()
224
234
  ```
@@ -5,7 +5,7 @@
5
5
  ---
6
6
 
7
7
  ```bash
8
- pip install audio2midi[all] audio2midi[pop2piano] audio2midi[violin_pitch_detector] audio2midi[crepe_pitch_detector] audio2midi[crepe_pitch_detector_tf] audio2midi[melodia_pitch_detector] audio2midi[basic_pitch_pitch_detector] audio2midi[librosa_pitch_detector]
8
+ pip install audio2midi[all] audio2midi[pop2piano] audio2midi[violin_pitch_detector] audio2midi[crepe_pitch_detector] audio2midi[crepe_pitch_detector_tf] audio2midi[melodia_pitch_detector] audio2midi[basic_pitch_pitch_detector] audio2midi[librosa_pitch_detector] audio2midi[magenta_music_transcription]
9
9
  ```
10
10
  ---
11
11
 
@@ -54,6 +54,7 @@ from audio2midi.basic_pitch_pitch_detector import BasicPitch
54
54
  from audio2midi.crepe_pitch_detector import Crepe
55
55
  from audio2midi.violin_pitch_detector import Violin_Pitch_Det
56
56
  from audio2midi.pop2piano import Pop2Piano
57
+ from audio2midi.magenta_music_transcription import Magenta
57
58
  from torch import device as Device
58
59
  from torch.cuda import is_available as cuda_is_available
59
60
  device = Device("cuda" if cuda_is_available() else "cpu")
@@ -61,6 +62,7 @@ Crepe().predict(audio_path)
61
62
  Pop2Piano(device=device).predict(audio_path)
62
63
  Violin_Pitch_Det(device=device).predict(audio_path)
63
64
  BasicPitch(device=device).predict(audio_path)
65
+ Magenta().predict(audio_path)
64
66
  ```
65
67
 
66
68
  ---
@@ -74,6 +76,7 @@ from audio2midi.melodia_pitch_detector import Melodia
74
76
  from audio2midi.pop2piano import Pop2Piano
75
77
  from audio2midi.violin_pitch_detector import Violin_Pitch_Det
76
78
  from audio2midi.mt3_music_transcription import MT3
79
+ from audio2midi.magenta_music_transcription import Magenta
77
80
  from os import environ
78
81
  from huggingface_hub import hf_hub_download
79
82
  from shutil import unpack_archive
@@ -128,11 +131,12 @@ gr.TabbedInterface([
128
131
  gr.Interface(Guitar_Pitch_Det().predict,[gr.Audio(type="filepath",label="Input Audio"),gr.Number(4,label="mag_exp"),gr.Number(-61,label="Threshold"),gr.Number(6,label="Pre_post_max"),gr.Checkbox(False,label="backtrack"),gr.Checkbox(False,label="round_to_sixteenth"),gr.Number(1024,label="hop_length"),gr.Number(72,label="n_bins"),gr.Number(12,label="bins_per_octave")],gr.File(label="Midi File")),
129
132
  gr.Interface(Melodia().predict,[gr.Audio(type="filepath",label="Input Audio"),gr.Number(120,label="BPM",step=30),gr.Number(0.25,label="smoothness",step=0.05,info="Smooth the pitch sequence with a median filter of the provided duration (in seconds)."),gr.Number(0.1,label="minimum duration",step=0.1,info="Minimum allowed duration for note (in seconds). Shorter notes will be removed."),gr.Number(128,label="HOP")],gr.File(label="Midi File")),
130
133
  gr.Interface(BasicPitch(device=device).predict,[gr.Audio(type="filepath", label="Upload Audio"),gr.Number(0.5,label="onset_thresh",info="Minimum amplitude of an onset activation to be considered an onset."),gr.Number(0.3,label="frame_thresh",info="Minimum energy requirement for a frame to be considered present."),gr.Number(127.70,label="min_note_len",info="The minimum allowed note length in milliseconds."),gr.Number(120,label="midi_tempo"),gr.Checkbox(True,label="infer_onsets",info="add additional onsets when there are large differences in frame amplitudes."),gr.Checkbox(True,label="include_pitch_bends",info="include pitch bends."),gr.Checkbox(False,label="multiple_pitch_bends",info="allow overlapping notes in midi file to have pitch bends."),gr.Checkbox(True,label="melodia_trick",info="Use the melodia post-processing step.")],gr.File(label="Download Midi File")),
134
+ gr.Interface(Magenta().predict,[gr.Audio(type="filepath", label="Upload Audio"),gr.Number(0.5,label="onset_thresh",info="Minimum amplitude of an onset activation to be considered an onset."),gr.Number(3,label="min_note_len",info="The minimum allowed note length"),gr.Number(3,label="gap_tolerance_frames"),gr.Number(4,label="pitch_bend_steps"),gr.Number(1500,label="pitch_bend_depth"),gr.Checkbox(True,label="include_pitch_bends",info="include pitch bends.")],gr.File(label="Download Midi File")),
131
135
  gr.Interface(Violin_Pitch_Det(device=device,model_capacity=getenv("violin_model_capacity","full")).predict, [gr.Audio(label="Upload your Audio file",type="filepath"),gr.Number(32,label="Batch size"),gr.Radio(["spotify","tiktok"],value="spotify",label="Post Processing"),gr.Checkbox(True,label="include_pitch_bends")],gr.File(label="Download MIDI file")),
132
136
  gr.Interface(Crepe(getenv("crepe_model_capacity","full")).predict,[gr.Audio(type="filepath",label="Input Audio"),gr.Checkbox(False,label="viterbi",info="Apply viterbi smoothing to the estimated pitch curve"),gr.Checkbox(True,label="center"),gr.Number(10,label="step size",info="The step size in milliseconds for running pitch estimation."),gr.Number(0.8,label="minimum confidence"),gr.Number(32,label="batch size")],gr.File(label="Midi File")),
133
137
  gr.Interface(CrepeTF(getenv("crepe_model_capacity","full")).predict,[gr.Audio(type="filepath",label="Input Audio"),gr.Checkbox(False,label="viterbi",info="Apply viterbi smoothing to the estimated pitch curve"),gr.Checkbox(True,label="center"),gr.Number(10,label="step size",info="The step size in milliseconds for running pitch estimation."),gr.Number(0.8,label="minimum confidence"),gr.Number(32,label="batch size")],gr.File(label="Midi File")),
134
138
  gr.Interface(Pop2Piano(device).predict,[gr.Audio(label="Input Audio",type="filepath"),gr.Number(1, minimum=1, maximum=21, label="Composer"),gr.Number(2,label="Details in Piano"),gr.Number(1,label="Efficiency of Piano"),gr.Radio([1,2,4],label="steps per beat",value=2)],gr.File(label="MIDI File")),
135
139
  gr.Interface(MT3(str(Path("mt3_model").absolute())).predict,[gr.Audio(label="Input Audio",type="filepath"),gr.Number(0,label="seed")],gr.File(label="MIDI File")),
136
140
  midi_viz_ui
137
- ],["Normal Pitch Detection","Guitar Based Pitch Detection","Melodia","Spotify Pitch Detection","Violin Based Pitch Detection","Crepe Pitch Detection","Crepe Pitch Detection TF","Pop2Piano","MT3","Midi Vizulizer"]).launch()
141
+ ],["Normal Pitch Detection","Guitar Based Pitch Detection","Melodia","Spotify Pitch Detection","Magenta Pitch Detection","Violin Based Pitch Detection","Crepe Pitch Detection","Crepe Pitch Detection TF","Pop2Piano","MT3","Midi Vizulizer"]).launch()
138
142
  ```
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "audio2midi"
3
- version = "0.6.0"
3
+ version = "0.8.0"
4
4
  description = "Audio To Midi"
5
5
  readme = "README.md"
6
6
  authors = [
@@ -18,6 +18,7 @@ crepe_pitch_detector = ["librosa", "numpy","pretty_midi_fix","hmmlearn","tensorf
18
18
  violin_pitch_detector = ["librosa", "numpy","pretty_midi_fix","scipy","torchaudio","torch","mir_eval","huggingface_hub"]
19
19
  pop2piano = ["librosa", "numpy==1.26.4","pretty_midi_fix","transformers","essentia","torch","scipy","resampy","pretty_midi","huggingface_hub"]
20
20
  mt3_music_transcription = ["nest_asyncio", "mt3-audio2midi"]
21
+ magenta_music_transcription = ["librosa","numpy","pretty_midi_fix","tensorflow","huggingface_hub"]
21
22
  all = ["librosa", "numpy==1.26.4","pretty_midi_fix","transformers","essentia","torch","scipy","torchaudio","torch","mir_eval","hmmlearn","tensorflow","keras","vamp","nnAudio","resampy","pretty_midi","huggingface_hub","nest_asyncio", "mt3-audio2midi"]
22
23
 
23
24
  [build-system]
@@ -0,0 +1,76 @@
1
+ from typing import Callable
2
+ import numpy as np
3
+ import pretty_midi_fix
4
+ import tensorflow
5
+ import librosa.feature.rhythm
6
+ from huggingface_hub import hf_hub_download
7
+ import librosa
8
+
9
+
10
+ def endpitch(pitch, endFrame,volProb,intervals,MIN_NOTE_FRAMES,frameLenSecs,PITCH_BEND_ENABLED,pitch_bend_steps,pitch_bend_depth,track):
11
+ startFrame = intervals[pitch]
12
+ if endFrame - startFrame < MIN_NOTE_FRAMES:
13
+ return
14
+ vol = volProb[startFrame, pitch]
15
+ if vol < 0 or vol > 1:
16
+ return
17
+ start_time = startFrame * frameLenSecs
18
+ track.notes.append(pretty_midi_fix.Note(velocity=int(max(0, min(1, vol)) * 80 + 10), pitch=pitch + 21, start=start_time, end=endFrame * frameLenSecs))
19
+ if PITCH_BEND_ENABLED:
20
+ for step in range(pitch_bend_steps):
21
+ track.pitch_bends.append(pretty_midi_fix.PitchBend(pitch=int(np.sin(np.pi * step / (pitch_bend_steps - 1)) * pitch_bend_depth), time=start_time + step * 0.01))
22
+ track.pitch_bends.append(pretty_midi_fix.PitchBend(pitch=0, time=start_time + 0.05)) # Reset
23
+ del intervals[pitch]
24
+
25
+ def model_output_to_notes(model_output,onset_thresh,include_pitch_bends,min_note_len,gap_tolerance_frames,pitch_bend_depth,pitch_bend_steps):
26
+ actProb , onProb , offProb , volProb , tempo = model_output
27
+ midi = pretty_midi_fix.PrettyMIDI(initial_tempo=tempo)
28
+ track = pretty_midi_fix.Instrument(program=40)
29
+ frameLenSecs = librosa.frames_to_time(1, sr=16000)
30
+ intervals = {}
31
+ onsets = (onProb > onset_thresh).astype(np.int8)
32
+ frames = onsets | (actProb > onset_thresh).astype(np.int8)
33
+ for i, frame in enumerate(np.vstack([frames, np.zeros(frames.shape[1])])):
34
+ for pitch, active in enumerate(frame):
35
+ if active:
36
+ if pitch not in intervals:
37
+ if onsets is None or onsets[i, pitch]:
38
+ intervals[pitch] = i
39
+ elif onsets is not None and onsets[i, pitch] and (i - intervals[pitch] > 2):
40
+ endpitch(pitch, i,volProb,intervals,min_note_len,frameLenSecs,include_pitch_bends,pitch_bend_steps,pitch_bend_depth,track)
41
+ intervals[pitch] = i
42
+ elif pitch in intervals:
43
+ if i + gap_tolerance_frames < frames.shape[0] and np.any(frames[i:i + gap_tolerance_frames, pitch]):
44
+ continue # Don't end the note yet
45
+ endpitch(pitch, i,volProb,intervals,min_note_len,frameLenSecs,include_pitch_bends,pitch_bend_steps,pitch_bend_depth,track)
46
+ midi.instruments.append(track)
47
+ return midi
48
+
49
+ class Magenta:
50
+ def __init__(self,model_path=hf_hub_download("shethjenil/Audio2Midi_Models","magenta.tflite")):
51
+ self.interp = tensorflow.lite.Interpreter(model_path=model_path)
52
+ self.interp.allocate_tensors()
53
+ self.inputLen = self.interp.get_input_details()[0]['shape'][0]
54
+ self.outputStep = self.interp.get_output_details()[0]['shape'][1] * 512
55
+
56
+ def run_inference(self,audio_path,progress_callback):
57
+ song = librosa.load(audio_path,sr=16000)[0]
58
+ actProb, onProb, offProb, volProb = np.empty((1, 88)), np.empty((1, 88)), np.empty((1, 88)), np.empty((1, 88))
59
+ paddedSong = np.append(song, np.zeros(-(song.size - self.inputLen) % self.outputStep, dtype=np.float32))
60
+ total_size = (paddedSong.size - self.inputLen) // self.outputStep + 1
61
+ tempo = librosa.feature.rhythm.tempo(y=song, sr=16000).mean()
62
+ for i in range(total_size):
63
+ self.interp.set_tensor(self.interp.get_input_details()[0]['index'], paddedSong[i * self.outputStep : i * self.outputStep + self.inputLen])
64
+ self.interp.invoke()
65
+ actProb = np.vstack((actProb, self.interp.get_tensor(self.interp.get_output_details()[0]['index'])[0]))
66
+ onProb = np.vstack(( onProb, self.interp.get_tensor(self.interp.get_output_details()[1]['index'])[0]))
67
+ offProb = np.vstack((offProb, self.interp.get_tensor(self.interp.get_output_details()[2]['index'])[0]))
68
+ volProb = np.vstack((volProb, self.interp.get_tensor(self.interp.get_output_details()[3]['index'])[0]))
69
+ if progress_callback:
70
+ progress_callback(i,total_size)
71
+ return actProb , onProb , offProb , volProb , tempo
72
+
73
+
74
+ def predict(self,audio,onset_thresh=0,min_note_len=3,gap_tolerance_frames = 3,pitch_bend_depth = 1500,pitch_bend_steps = 4,include_pitch_bends=True,progress_callback: Callable[[int, int], None] = None,output_file="output.mid"):
75
+ model_output_to_notes(self.run_inference(audio,progress_callback),onset_thresh = onset_thresh,min_note_len = min_note_len,include_pitch_bends = include_pitch_bends,pitch_bend_depth=pitch_bend_depth,pitch_bend_steps=pitch_bend_steps,gap_tolerance_frames=gap_tolerance_frames).write(output_file)
76
+ return output_file
File without changes
File without changes