PyPI - audio2midi - Versions diffs - 0.6.0__tar.gz → 0.8.0__tar.gz - Mend

audio2midi 0.6.0tar.gz → 0.8.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

{audio2midi-0.6.0 → audio2midi-0.8.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: audio2midi
-Version: 0.6.0
+Version: 0.8.0
 Summary: Audio To Midi
 Author-email: dummyjenil <dummyjenil@gmail.com>
 Provides-Extra: all
@@ -52,6 +52,12 @@ Provides-Extra: librosa-pitch-detector
 Requires-Dist: librosa; extra == 'librosa-pitch-detector'
 Requires-Dist: numpy; extra == 'librosa-pitch-detector'
 Requires-Dist: pretty-midi-fix; extra == 'librosa-pitch-detector'
+Provides-Extra: magenta-music-transcription
+Requires-Dist: huggingface-hub; extra == 'magenta-music-transcription'
+Requires-Dist: librosa; extra == 'magenta-music-transcription'
+Requires-Dist: numpy; extra == 'magenta-music-transcription'
+Requires-Dist: pretty-midi-fix; extra == 'magenta-music-transcription'
+Requires-Dist: tensorflow; extra == 'magenta-music-transcription'
 Provides-Extra: melodia-pitch-detector
 Requires-Dist: huggingface-hub; extra == 'melodia-pitch-detector'
 Requires-Dist: librosa; extra == 'melodia-pitch-detector'
@@ -91,7 +97,7 @@ Description-Content-Type: text/markdown
 ---
 ```bash
-pip install audio2midi[all] audio2midi[pop2piano] audio2midi[violin_pitch_detector] audio2midi[crepe_pitch_detector] audio2midi[crepe_pitch_detector_tf] audio2midi[melodia_pitch_detector] audio2midi[basic_pitch_pitch_detector] audio2midi[librosa_pitch_detector]
+pip install audio2midi[all] audio2midi[pop2piano] audio2midi[violin_pitch_detector] audio2midi[crepe_pitch_detector] audio2midi[crepe_pitch_detector_tf] audio2midi[melodia_pitch_detector] audio2midi[basic_pitch_pitch_detector] audio2midi[librosa_pitch_detector] audio2midi[magenta_music_transcription]
 ```
 ---
@@ -140,6 +146,7 @@ from audio2midi.basic_pitch_pitch_detector import BasicPitch
 from audio2midi.crepe_pitch_detector import Crepe
 from audio2midi.violin_pitch_detector import Violin_Pitch_Det
 from audio2midi.pop2piano import Pop2Piano
+from audio2midi.magenta_music_transcription import Magenta
 from torch import device as Device
 from torch.cuda import is_available as cuda_is_available
 device = Device("cuda" if cuda_is_available() else "cpu")
@@ -147,6 +154,7 @@ Crepe().predict(audio_path)
 Pop2Piano(device=device).predict(audio_path)
 Violin_Pitch_Det(device=device).predict(audio_path)
 BasicPitch(device=device).predict(audio_path)
+Magenta().predict(audio_path)
 ```
 ---
@@ -160,6 +168,7 @@ from audio2midi.melodia_pitch_detector import Melodia
 from audio2midi.pop2piano import Pop2Piano
 from audio2midi.violin_pitch_detector import Violin_Pitch_Det
 from audio2midi.mt3_music_transcription import MT3
+from audio2midi.magenta_music_transcription import Magenta
 from os import environ
 from huggingface_hub import hf_hub_download
 from shutil import unpack_archive
@@ -214,11 +223,12 @@ gr.TabbedInterface([
     gr.Interface(Guitar_Pitch_Det().predict,[gr.Audio(type="filepath",label="Input Audio"),gr.Number(4,label="mag_exp"),gr.Number(-61,label="Threshold"),gr.Number(6,label="Pre_post_max"),gr.Checkbox(False,label="backtrack"),gr.Checkbox(False,label="round_to_sixteenth"),gr.Number(1024,label="hop_length"),gr.Number(72,label="n_bins"),gr.Number(12,label="bins_per_octave")],gr.File(label="Midi File")),
     gr.Interface(Melodia().predict,[gr.Audio(type="filepath",label="Input Audio"),gr.Number(120,label="BPM",step=30),gr.Number(0.25,label="smoothness",step=0.05,info="Smooth the pitch sequence with a median filter of the provided duration (in seconds)."),gr.Number(0.1,label="minimum duration",step=0.1,info="Minimum allowed duration for note (in seconds). Shorter notes will be removed."),gr.Number(128,label="HOP")],gr.File(label="Midi File")),
     gr.Interface(BasicPitch(device=device).predict,[gr.Audio(type="filepath", label="Upload Audio"),gr.Number(0.5,label="onset_thresh",info="Minimum amplitude of an onset activation to be considered an onset."),gr.Number(0.3,label="frame_thresh",info="Minimum energy requirement for a frame to be considered present."),gr.Number(127.70,label="min_note_len",info="The minimum allowed note length in milliseconds."),gr.Number(120,label="midi_tempo"),gr.Checkbox(True,label="infer_onsets",info="add additional onsets when there are large differences in frame amplitudes."),gr.Checkbox(True,label="include_pitch_bends",info="include pitch bends."),gr.Checkbox(False,label="multiple_pitch_bends",info="allow overlapping notes in midi file to have pitch bends."),gr.Checkbox(True,label="melodia_trick",info="Use the melodia post-processing step.")],gr.File(label="Download Midi File")),
+    gr.Interface(Magenta().predict,[gr.Audio(type="filepath", label="Upload Audio"),gr.Number(0.5,label="onset_thresh",info="Minimum amplitude of an onset activation to be considered an onset."),gr.Number(3,label="min_note_len",info="The minimum allowed note length"),gr.Number(3,label="gap_tolerance_frames"),gr.Number(4,label="pitch_bend_steps"),gr.Number(1500,label="pitch_bend_depth"),gr.Checkbox(True,label="include_pitch_bends",info="include pitch bends.")],gr.File(label="Download Midi File")),
     gr.Interface(Violin_Pitch_Det(device=device,model_capacity=getenv("violin_model_capacity","full")).predict, [gr.Audio(label="Upload your Audio file",type="filepath"),gr.Number(32,label="Batch size"),gr.Radio(["spotify","tiktok"],value="spotify",label="Post Processing"),gr.Checkbox(True,label="include_pitch_bends")],gr.File(label="Download MIDI file")),
     gr.Interface(Crepe(getenv("crepe_model_capacity","full")).predict,[gr.Audio(type="filepath",label="Input Audio"),gr.Checkbox(False,label="viterbi",info="Apply viterbi smoothing to the estimated pitch curve"),gr.Checkbox(True,label="center"),gr.Number(10,label="step size",info="The step size in milliseconds for running pitch estimation."),gr.Number(0.8,label="minimum confidence"),gr.Number(32,label="batch size")],gr.File(label="Midi File")),
     gr.Interface(CrepeTF(getenv("crepe_model_capacity","full")).predict,[gr.Audio(type="filepath",label="Input Audio"),gr.Checkbox(False,label="viterbi",info="Apply viterbi smoothing to the estimated pitch curve"),gr.Checkbox(True,label="center"),gr.Number(10,label="step size",info="The step size in milliseconds for running pitch estimation."),gr.Number(0.8,label="minimum confidence"),gr.Number(32,label="batch size")],gr.File(label="Midi File")),
     gr.Interface(Pop2Piano(device).predict,[gr.Audio(label="Input Audio",type="filepath"),gr.Number(1, minimum=1, maximum=21, label="Composer"),gr.Number(2,label="Details in Piano"),gr.Number(1,label="Efficiency of Piano"),gr.Radio([1,2,4],label="steps per beat",value=2)],gr.File(label="MIDI File")),
     gr.Interface(MT3(str(Path("mt3_model").absolute())).predict,[gr.Audio(label="Input Audio",type="filepath"),gr.Number(0,label="seed")],gr.File(label="MIDI File")),
     midi_viz_ui
-],["Normal Pitch Detection","Guitar Based Pitch Detection","Melodia","Spotify Pitch Detection","Violin Based Pitch Detection","Crepe Pitch Detection","Crepe Pitch Detection TF","Pop2Piano","MT3","Midi Vizulizer"]).launch()
+],["Normal Pitch Detection","Guitar Based Pitch Detection","Melodia","Spotify Pitch Detection","Magenta Pitch Detection","Violin Based Pitch Detection","Crepe Pitch Detection","Crepe Pitch Detection TF","Pop2Piano","MT3","Midi Vizulizer"]).launch()
 ```

{audio2midi-0.6.0 → audio2midi-0.8.0}/README.md RENAMED Viewed

@@ -5,7 +5,7 @@
 ---
 ```bash
-pip install audio2midi[all] audio2midi[pop2piano] audio2midi[violin_pitch_detector] audio2midi[crepe_pitch_detector] audio2midi[crepe_pitch_detector_tf] audio2midi[melodia_pitch_detector] audio2midi[basic_pitch_pitch_detector] audio2midi[librosa_pitch_detector]
+pip install audio2midi[all] audio2midi[pop2piano] audio2midi[violin_pitch_detector] audio2midi[crepe_pitch_detector] audio2midi[crepe_pitch_detector_tf] audio2midi[melodia_pitch_detector] audio2midi[basic_pitch_pitch_detector] audio2midi[librosa_pitch_detector] audio2midi[magenta_music_transcription]
 ```
 ---
@@ -54,6 +54,7 @@ from audio2midi.basic_pitch_pitch_detector import BasicPitch
 from audio2midi.crepe_pitch_detector import Crepe
 from audio2midi.violin_pitch_detector import Violin_Pitch_Det
 from audio2midi.pop2piano import Pop2Piano
+from audio2midi.magenta_music_transcription import Magenta
 from torch import device as Device
 from torch.cuda import is_available as cuda_is_available
 device = Device("cuda" if cuda_is_available() else "cpu")
@@ -61,6 +62,7 @@ Crepe().predict(audio_path)
 Pop2Piano(device=device).predict(audio_path)
 Violin_Pitch_Det(device=device).predict(audio_path)
 BasicPitch(device=device).predict(audio_path)
+Magenta().predict(audio_path)
 ```
 ---
@@ -74,6 +76,7 @@ from audio2midi.melodia_pitch_detector import Melodia
 from audio2midi.pop2piano import Pop2Piano
 from audio2midi.violin_pitch_detector import Violin_Pitch_Det
 from audio2midi.mt3_music_transcription import MT3
+from audio2midi.magenta_music_transcription import Magenta
 from os import environ
 from huggingface_hub import hf_hub_download
 from shutil import unpack_archive
@@ -128,11 +131,12 @@ gr.TabbedInterface([
     gr.Interface(Guitar_Pitch_Det().predict,[gr.Audio(type="filepath",label="Input Audio"),gr.Number(4,label="mag_exp"),gr.Number(-61,label="Threshold"),gr.Number(6,label="Pre_post_max"),gr.Checkbox(False,label="backtrack"),gr.Checkbox(False,label="round_to_sixteenth"),gr.Number(1024,label="hop_length"),gr.Number(72,label="n_bins"),gr.Number(12,label="bins_per_octave")],gr.File(label="Midi File")),
     gr.Interface(Melodia().predict,[gr.Audio(type="filepath",label="Input Audio"),gr.Number(120,label="BPM",step=30),gr.Number(0.25,label="smoothness",step=0.05,info="Smooth the pitch sequence with a median filter of the provided duration (in seconds)."),gr.Number(0.1,label="minimum duration",step=0.1,info="Minimum allowed duration for note (in seconds). Shorter notes will be removed."),gr.Number(128,label="HOP")],gr.File(label="Midi File")),
     gr.Interface(BasicPitch(device=device).predict,[gr.Audio(type="filepath", label="Upload Audio"),gr.Number(0.5,label="onset_thresh",info="Minimum amplitude of an onset activation to be considered an onset."),gr.Number(0.3,label="frame_thresh",info="Minimum energy requirement for a frame to be considered present."),gr.Number(127.70,label="min_note_len",info="The minimum allowed note length in milliseconds."),gr.Number(120,label="midi_tempo"),gr.Checkbox(True,label="infer_onsets",info="add additional onsets when there are large differences in frame amplitudes."),gr.Checkbox(True,label="include_pitch_bends",info="include pitch bends."),gr.Checkbox(False,label="multiple_pitch_bends",info="allow overlapping notes in midi file to have pitch bends."),gr.Checkbox(True,label="melodia_trick",info="Use the melodia post-processing step.")],gr.File(label="Download Midi File")),
+    gr.Interface(Magenta().predict,[gr.Audio(type="filepath", label="Upload Audio"),gr.Number(0.5,label="onset_thresh",info="Minimum amplitude of an onset activation to be considered an onset."),gr.Number(3,label="min_note_len",info="The minimum allowed note length"),gr.Number(3,label="gap_tolerance_frames"),gr.Number(4,label="pitch_bend_steps"),gr.Number(1500,label="pitch_bend_depth"),gr.Checkbox(True,label="include_pitch_bends",info="include pitch bends.")],gr.File(label="Download Midi File")),
     gr.Interface(Violin_Pitch_Det(device=device,model_capacity=getenv("violin_model_capacity","full")).predict, [gr.Audio(label="Upload your Audio file",type="filepath"),gr.Number(32,label="Batch size"),gr.Radio(["spotify","tiktok"],value="spotify",label="Post Processing"),gr.Checkbox(True,label="include_pitch_bends")],gr.File(label="Download MIDI file")),
     gr.Interface(Crepe(getenv("crepe_model_capacity","full")).predict,[gr.Audio(type="filepath",label="Input Audio"),gr.Checkbox(False,label="viterbi",info="Apply viterbi smoothing to the estimated pitch curve"),gr.Checkbox(True,label="center"),gr.Number(10,label="step size",info="The step size in milliseconds for running pitch estimation."),gr.Number(0.8,label="minimum confidence"),gr.Number(32,label="batch size")],gr.File(label="Midi File")),
     gr.Interface(CrepeTF(getenv("crepe_model_capacity","full")).predict,[gr.Audio(type="filepath",label="Input Audio"),gr.Checkbox(False,label="viterbi",info="Apply viterbi smoothing to the estimated pitch curve"),gr.Checkbox(True,label="center"),gr.Number(10,label="step size",info="The step size in milliseconds for running pitch estimation."),gr.Number(0.8,label="minimum confidence"),gr.Number(32,label="batch size")],gr.File(label="Midi File")),
     gr.Interface(Pop2Piano(device).predict,[gr.Audio(label="Input Audio",type="filepath"),gr.Number(1, minimum=1, maximum=21, label="Composer"),gr.Number(2,label="Details in Piano"),gr.Number(1,label="Efficiency of Piano"),gr.Radio([1,2,4],label="steps per beat",value=2)],gr.File(label="MIDI File")),
     gr.Interface(MT3(str(Path("mt3_model").absolute())).predict,[gr.Audio(label="Input Audio",type="filepath"),gr.Number(0,label="seed")],gr.File(label="MIDI File")),
     midi_viz_ui
-],["Normal Pitch Detection","Guitar Based Pitch Detection","Melodia","Spotify Pitch Detection","Violin Based Pitch Detection","Crepe Pitch Detection","Crepe Pitch Detection TF","Pop2Piano","MT3","Midi Vizulizer"]).launch()
+],["Normal Pitch Detection","Guitar Based Pitch Detection","Melodia","Spotify Pitch Detection","Magenta Pitch Detection","Violin Based Pitch Detection","Crepe Pitch Detection","Crepe Pitch Detection TF","Pop2Piano","MT3","Midi Vizulizer"]).launch()
 ```

{audio2midi-0.6.0 → audio2midi-0.8.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "audio2midi"
-version = "0.6.0"
+version = "0.8.0"
 description = "Audio To Midi"
 readme = "README.md"
 authors = [
@@ -18,6 +18,7 @@ crepe_pitch_detector = ["librosa", "numpy","pretty_midi_fix","hmmlearn","tensorf
 violin_pitch_detector = ["librosa", "numpy","pretty_midi_fix","scipy","torchaudio","torch","mir_eval","huggingface_hub"]
 pop2piano = ["librosa", "numpy==1.26.4","pretty_midi_fix","transformers","essentia","torch","scipy","resampy","pretty_midi","huggingface_hub"]
 mt3_music_transcription = ["nest_asyncio", "mt3-audio2midi"]
+magenta_music_transcription = ["librosa","numpy","pretty_midi_fix","tensorflow","huggingface_hub"]
 all = ["librosa", "numpy==1.26.4","pretty_midi_fix","transformers","essentia","torch","scipy","torchaudio","torch","mir_eval","hmmlearn","tensorflow","keras","vamp","nnAudio","resampy","pretty_midi","huggingface_hub","nest_asyncio", "mt3-audio2midi"]
 [build-system]

audio2midi-0.8.0/src/audio2midi/magenta_music_transcription.py ADDED Viewed

@@ -0,0 +1,76 @@
+from typing import Callable
+import numpy as np
+import pretty_midi_fix
+import tensorflow
+import librosa.feature.rhythm
+from huggingface_hub import hf_hub_download
+import librosa
+def endpitch(pitch, endFrame,volProb,intervals,MIN_NOTE_FRAMES,frameLenSecs,PITCH_BEND_ENABLED,pitch_bend_steps,pitch_bend_depth,track):
+    startFrame = intervals[pitch]
+    if endFrame - startFrame < MIN_NOTE_FRAMES:
+        return
+    vol = volProb[startFrame, pitch]
+    if vol < 0 or vol > 1:
+        return
+    start_time = startFrame * frameLenSecs
+    track.notes.append(pretty_midi_fix.Note(velocity=int(max(0, min(1, vol)) * 80 + 10), pitch=pitch + 21, start=start_time, end=endFrame * frameLenSecs))
+    if PITCH_BEND_ENABLED:
+        for step in range(pitch_bend_steps):
+            track.pitch_bends.append(pretty_midi_fix.PitchBend(pitch=int(np.sin(np.pi * step / (pitch_bend_steps - 1)) * pitch_bend_depth), time=start_time + step * 0.01))
+        track.pitch_bends.append(pretty_midi_fix.PitchBend(pitch=0, time=start_time + 0.05))  # Reset
+    del intervals[pitch]
+def model_output_to_notes(model_output,onset_thresh,include_pitch_bends,min_note_len,gap_tolerance_frames,pitch_bend_depth,pitch_bend_steps):
+    actProb , onProb , offProb , volProb , tempo = model_output
+    midi = pretty_midi_fix.PrettyMIDI(initial_tempo=tempo)
+    track = pretty_midi_fix.Instrument(program=40)
+    frameLenSecs = librosa.frames_to_time(1, sr=16000)
+    intervals = {}
+    onsets = (onProb > onset_thresh).astype(np.int8)
+    frames = onsets | (actProb > onset_thresh).astype(np.int8)
+    for i, frame in enumerate(np.vstack([frames, np.zeros(frames.shape[1])])):
+        for pitch, active in enumerate(frame):
+            if active:
+                if pitch not in intervals:
+                    if onsets is None or onsets[i, pitch]:
+                        intervals[pitch] = i
+                elif onsets is not None and onsets[i, pitch] and (i - intervals[pitch] > 2):
+                    endpitch(pitch, i,volProb,intervals,min_note_len,frameLenSecs,include_pitch_bends,pitch_bend_steps,pitch_bend_depth,track)
+                    intervals[pitch] = i
+            elif pitch in intervals:
+                if i + gap_tolerance_frames < frames.shape[0] and np.any(frames[i:i + gap_tolerance_frames, pitch]):
+                    continue  # Don't end the note yet
+                endpitch(pitch, i,volProb,intervals,min_note_len,frameLenSecs,include_pitch_bends,pitch_bend_steps,pitch_bend_depth,track)
+    midi.instruments.append(track)
+    return midi
+class Magenta:
+    def __init__(self,model_path=hf_hub_download("shethjenil/Audio2Midi_Models","magenta.tflite")):
+        self.interp = tensorflow.lite.Interpreter(model_path=model_path)
+        self.interp.allocate_tensors()
+        self.inputLen = self.interp.get_input_details()[0]['shape'][0]
+        self.outputStep = self.interp.get_output_details()[0]['shape'][1] * 512
+    def run_inference(self,audio_path,progress_callback):
+        song = librosa.load(audio_path,sr=16000)[0]
+        actProb, onProb, offProb, volProb = np.empty((1, 88)), np.empty((1, 88)), np.empty((1, 88)), np.empty((1, 88))
+        paddedSong = np.append(song, np.zeros(-(song.size - self.inputLen) % self.outputStep, dtype=np.float32))
+        total_size = (paddedSong.size - self.inputLen) // self.outputStep + 1
+        tempo = librosa.feature.rhythm.tempo(y=song, sr=16000).mean()
+        for i in range(total_size):
+            self.interp.set_tensor(self.interp.get_input_details()[0]['index'], paddedSong[i * self.outputStep : i * self.outputStep + self.inputLen])
+            self.interp.invoke()
+            actProb = np.vstack((actProb, self.interp.get_tensor(self.interp.get_output_details()[0]['index'])[0]))
+            onProb  = np.vstack(( onProb, self.interp.get_tensor(self.interp.get_output_details()[1]['index'])[0]))
+            offProb = np.vstack((offProb, self.interp.get_tensor(self.interp.get_output_details()[2]['index'])[0]))
+            volProb = np.vstack((volProb, self.interp.get_tensor(self.interp.get_output_details()[3]['index'])[0]))
+            if progress_callback:
+                progress_callback(i,total_size)
+        return actProb , onProb , offProb , volProb , tempo
+    def predict(self,audio,onset_thresh=0,min_note_len=3,gap_tolerance_frames = 3,pitch_bend_depth = 1500,pitch_bend_steps = 4,include_pitch_bends=True,progress_callback: Callable[[int, int], None] = None,output_file="output.mid"):
+        model_output_to_notes(self.run_inference(audio,progress_callback),onset_thresh  = onset_thresh,min_note_len  = min_note_len,include_pitch_bends  = include_pitch_bends,pitch_bend_depth=pitch_bend_depth,pitch_bend_steps=pitch_bend_steps,gap_tolerance_frames=gap_tolerance_frames).write(output_file)
+        return output_file