audio2midi 0.6.0__tar.gz → 0.7.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: audio2midi
3
- Version: 0.6.0
3
+ Version: 0.7.0
4
4
  Summary: Audio To Midi
5
5
  Author-email: dummyjenil <dummyjenil@gmail.com>
6
6
  Provides-Extra: all
@@ -52,6 +52,12 @@ Provides-Extra: librosa-pitch-detector
52
52
  Requires-Dist: librosa; extra == 'librosa-pitch-detector'
53
53
  Requires-Dist: numpy; extra == 'librosa-pitch-detector'
54
54
  Requires-Dist: pretty-midi-fix; extra == 'librosa-pitch-detector'
55
+ Provides-Extra: magenta-music-transcription
56
+ Requires-Dist: huggingface-hub; extra == 'magenta-music-transcription'
57
+ Requires-Dist: librosa; extra == 'magenta-music-transcription'
58
+ Requires-Dist: numpy; extra == 'magenta-music-transcription'
59
+ Requires-Dist: pretty-midi-fix; extra == 'magenta-music-transcription'
60
+ Requires-Dist: tensorflow; extra == 'magenta-music-transcription'
55
61
  Provides-Extra: melodia-pitch-detector
56
62
  Requires-Dist: huggingface-hub; extra == 'melodia-pitch-detector'
57
63
  Requires-Dist: librosa; extra == 'melodia-pitch-detector'
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "audio2midi"
3
- version = "0.6.0"
3
+ version = "0.7.0"
4
4
  description = "Audio To Midi"
5
5
  readme = "README.md"
6
6
  authors = [
@@ -18,6 +18,7 @@ crepe_pitch_detector = ["librosa", "numpy","pretty_midi_fix","hmmlearn","tensorf
18
18
  violin_pitch_detector = ["librosa", "numpy","pretty_midi_fix","scipy","torchaudio","torch","mir_eval","huggingface_hub"]
19
19
  pop2piano = ["librosa", "numpy==1.26.4","pretty_midi_fix","transformers","essentia","torch","scipy","resampy","pretty_midi","huggingface_hub"]
20
20
  mt3_music_transcription = ["nest_asyncio", "mt3-audio2midi"]
21
+ magenta_music_transcription = ["librosa","numpy","pretty_midi_fix","tensorflow","huggingface_hub"]
21
22
  all = ["librosa", "numpy==1.26.4","pretty_midi_fix","transformers","essentia","torch","scipy","torchaudio","torch","mir_eval","hmmlearn","tensorflow","keras","vamp","nnAudio","resampy","pretty_midi","huggingface_hub","nest_asyncio", "mt3-audio2midi"]
22
23
 
23
24
  [build-system]
@@ -0,0 +1,76 @@
1
+ from typing import Callable
2
+ import numpy as np
3
+ import pretty_midi_fix
4
+ import tensorflow
5
+ import librosa.feature.rhythm
6
+ from huggingface_hub import hf_hub_download
7
+ import librosa
8
+
9
+
10
+ def endpitch(pitch, endFrame,volProb,intervals,MIN_NOTE_FRAMES,frameLenSecs,PITCH_BEND_ENABLED,pitch_bend_steps,pitch_bend_depth,track):
11
+ startFrame = intervals[pitch]
12
+ if endFrame - startFrame < MIN_NOTE_FRAMES:
13
+ return
14
+ vol = volProb[startFrame, pitch]
15
+ if vol < 0 or vol > 1:
16
+ return
17
+ start_time = startFrame * frameLenSecs
18
+ track.notes.append(pretty_midi_fix.Note(velocity=int(max(0, min(1, vol)) * 80 + 10), pitch=pitch + 21, start=start_time, end=endFrame * frameLenSecs))
19
+ if PITCH_BEND_ENABLED:
20
+ for step in range(pitch_bend_steps):
21
+ track.pitch_bends.append(pretty_midi_fix.PitchBend(pitch=int(np.sin(np.pi * step / (pitch_bend_steps - 1)) * pitch_bend_depth), time=start_time + step * 0.01))
22
+ track.pitch_bends.append(pretty_midi_fix.PitchBend(pitch=0, time=start_time + 0.05)) # Reset
23
+ del intervals[pitch]
24
+
25
+ def model_output_to_notes(model_output,onset_thresh,include_pitch_bends,min_note_len,gap_tolerance_frames,pitch_bend_depth,pitch_bend_steps):
26
+ actProb , onProb , offProb , volProb , tempo = model_output
27
+ midi = pretty_midi_fix.PrettyMIDI(initial_tempo=tempo)
28
+ track = pretty_midi_fix.Instrument(program=40)
29
+ frameLenSecs = librosa.frames_to_time(1, sr=16000)
30
+ intervals = {}
31
+ onsets = (onProb > onset_thresh).astype(np.int8)
32
+ frames = onsets | (actProb > onset_thresh).astype(np.int8)
33
+ for i, frame in enumerate(np.vstack([frames, np.zeros(frames.shape[1])])):
34
+ for pitch, active in enumerate(frame):
35
+ if active:
36
+ if pitch not in intervals:
37
+ if onsets is None or onsets[i, pitch]:
38
+ intervals[pitch] = i
39
+ elif onsets is not None and onsets[i, pitch] and (i - intervals[pitch] > 2):
40
+ endpitch(pitch, i,volProb,intervals,min_note_len,frameLenSecs,include_pitch_bends,pitch_bend_steps,pitch_bend_depth,track)
41
+ intervals[pitch] = i
42
+ elif pitch in intervals:
43
+ if i + gap_tolerance_frames < frames.shape[0] and np.any(frames[i:i + gap_tolerance_frames, pitch]):
44
+ continue # Don't end the note yet
45
+ endpitch(pitch, i,volProb,intervals,min_note_len,frameLenSecs,include_pitch_bends,pitch_bend_steps,pitch_bend_depth,track)
46
+ midi.instruments.append(track)
47
+ return midi
48
+
49
+ class Magenta:
50
+ def __init__(self,model_path=hf_hub_download("shethjenil/Audio2Midi_Models","magenta.tflite")):
51
+ self.interp = tensorflow.lite.Interpreter(model_path=model_path)
52
+ self.interp.allocate_tensors()
53
+ self.inputLen = self.interp.get_input_details()[0]['shape'][0]
54
+ self.outputStep = self.interp.get_output_details()[0]['shape'][1] * 512
55
+
56
+ def run_inference(self,audio_path,progress_callback):
57
+ song = librosa.load(audio_path,sr=16000)[0]
58
+ actProb, onProb, offProb, volProb = np.empty((1, 88)), np.empty((1, 88)), np.empty((1, 88)), np.empty((1, 88))
59
+ paddedSong = np.append(song, np.zeros(-(song.size - self.inputLen) % self.outputStep, dtype=np.float32))
60
+ total_size = (paddedSong.size - self.inputLen) // self.outputStep + 1
61
+ tempo = librosa.feature.rhythm.tempo(y=song, sr=16000).mean()
62
+ for i in range(total_size):
63
+ self.interp.set_tensor(self.interp.get_input_details()[0]['index'], paddedSong[i * self.outputStep : i * self.outputStep + self.inputLen])
64
+ self.interp.invoke()
65
+ actProb = np.vstack((actProb, self.interp.get_tensor(self.interp.get_output_details()[0]['index'])[0]))
66
+ onProb = np.vstack(( onProb, self.interp.get_tensor(self.interp.get_output_details()[1]['index'])[0]))
67
+ offProb = np.vstack((offProb, self.interp.get_tensor(self.interp.get_output_details()[2]['index'])[0]))
68
+ volProb = np.vstack((volProb, self.interp.get_tensor(self.interp.get_output_details()[3]['index'])[0]))
69
+ if progress_callback:
70
+ progress_callback(i,total_size)
71
+ return actProb , onProb , offProb , volProb , tempo
72
+
73
+
74
+ def predict(self,audio,onset_thresh=0,min_note_len=3,include_pitch_bends=True,gap_tolerance_frames = 3,pitch_bend_depth = 1500,pitch_bend_steps = 4,progress_callback: Callable[[int, int], None] = None,output_file="output.mid"):
75
+ model_output_to_notes(self.run_inference(audio,progress_callback),onset_thresh = onset_thresh,min_note_len = min_note_len,include_pitch_bends = include_pitch_bends,pitch_bend_depth=pitch_bend_depth,pitch_bend_steps=pitch_bend_steps,gap_tolerance_frames=gap_tolerance_frames).write(output_file)
76
+ return output_file
File without changes
File without changes
File without changes