audio2midi 0.6.0__tar.gz → 0.7.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {audio2midi-0.6.0 → audio2midi-0.7.0}/PKG-INFO +7 -1
- {audio2midi-0.6.0 → audio2midi-0.7.0}/pyproject.toml +2 -1
- audio2midi-0.7.0/src/audio2midi/magenta_music_transcription.py +76 -0
- {audio2midi-0.6.0 → audio2midi-0.7.0}/.gitignore +0 -0
- {audio2midi-0.6.0 → audio2midi-0.7.0}/.python-version +0 -0
- {audio2midi-0.6.0 → audio2midi-0.7.0}/README.md +0 -0
- {audio2midi-0.6.0 → audio2midi-0.7.0}/src/audio2midi/__init__.py +0 -0
- {audio2midi-0.6.0 → audio2midi-0.7.0}/src/audio2midi/basic_pitch_pitch_detector.py +0 -0
- {audio2midi-0.6.0 → audio2midi-0.7.0}/src/audio2midi/crepe_pitch_detector.py +0 -0
- {audio2midi-0.6.0 → audio2midi-0.7.0}/src/audio2midi/crepe_pitch_detector_tf.py +0 -0
- {audio2midi-0.6.0 → audio2midi-0.7.0}/src/audio2midi/librosa_pitch_detector.py +0 -0
- {audio2midi-0.6.0 → audio2midi-0.7.0}/src/audio2midi/melodia_pitch_detector.py +0 -0
- {audio2midi-0.6.0 → audio2midi-0.7.0}/src/audio2midi/mt3_music_transcription.py +0 -0
- {audio2midi-0.6.0 → audio2midi-0.7.0}/src/audio2midi/pop2piano.py +0 -0
- {audio2midi-0.6.0 → audio2midi-0.7.0}/src/audio2midi/py.typed +0 -0
- {audio2midi-0.6.0 → audio2midi-0.7.0}/src/audio2midi/violin_pitch_detector.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: audio2midi
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.7.0
|
4
4
|
Summary: Audio To Midi
|
5
5
|
Author-email: dummyjenil <dummyjenil@gmail.com>
|
6
6
|
Provides-Extra: all
|
@@ -52,6 +52,12 @@ Provides-Extra: librosa-pitch-detector
|
|
52
52
|
Requires-Dist: librosa; extra == 'librosa-pitch-detector'
|
53
53
|
Requires-Dist: numpy; extra == 'librosa-pitch-detector'
|
54
54
|
Requires-Dist: pretty-midi-fix; extra == 'librosa-pitch-detector'
|
55
|
+
Provides-Extra: magenta-music-transcription
|
56
|
+
Requires-Dist: huggingface-hub; extra == 'magenta-music-transcription'
|
57
|
+
Requires-Dist: librosa; extra == 'magenta-music-transcription'
|
58
|
+
Requires-Dist: numpy; extra == 'magenta-music-transcription'
|
59
|
+
Requires-Dist: pretty-midi-fix; extra == 'magenta-music-transcription'
|
60
|
+
Requires-Dist: tensorflow; extra == 'magenta-music-transcription'
|
55
61
|
Provides-Extra: melodia-pitch-detector
|
56
62
|
Requires-Dist: huggingface-hub; extra == 'melodia-pitch-detector'
|
57
63
|
Requires-Dist: librosa; extra == 'melodia-pitch-detector'
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[project]
|
2
2
|
name = "audio2midi"
|
3
|
-
version = "0.
|
3
|
+
version = "0.7.0"
|
4
4
|
description = "Audio To Midi"
|
5
5
|
readme = "README.md"
|
6
6
|
authors = [
|
@@ -18,6 +18,7 @@ crepe_pitch_detector = ["librosa", "numpy","pretty_midi_fix","hmmlearn","tensorf
|
|
18
18
|
violin_pitch_detector = ["librosa", "numpy","pretty_midi_fix","scipy","torchaudio","torch","mir_eval","huggingface_hub"]
|
19
19
|
pop2piano = ["librosa", "numpy==1.26.4","pretty_midi_fix","transformers","essentia","torch","scipy","resampy","pretty_midi","huggingface_hub"]
|
20
20
|
mt3_music_transcription = ["nest_asyncio", "mt3-audio2midi"]
|
21
|
+
magenta_music_transcription = ["librosa","numpy","pretty_midi_fix","tensorflow","huggingface_hub"]
|
21
22
|
all = ["librosa", "numpy==1.26.4","pretty_midi_fix","transformers","essentia","torch","scipy","torchaudio","torch","mir_eval","hmmlearn","tensorflow","keras","vamp","nnAudio","resampy","pretty_midi","huggingface_hub","nest_asyncio", "mt3-audio2midi"]
|
22
23
|
|
23
24
|
[build-system]
|
@@ -0,0 +1,76 @@
|
|
1
|
+
from typing import Callable
|
2
|
+
import numpy as np
|
3
|
+
import pretty_midi_fix
|
4
|
+
import tensorflow
|
5
|
+
import librosa.feature.rhythm
|
6
|
+
from huggingface_hub import hf_hub_download
|
7
|
+
import librosa
|
8
|
+
|
9
|
+
|
10
|
+
def endpitch(pitch, endFrame,volProb,intervals,MIN_NOTE_FRAMES,frameLenSecs,PITCH_BEND_ENABLED,pitch_bend_steps,pitch_bend_depth,track):
|
11
|
+
startFrame = intervals[pitch]
|
12
|
+
if endFrame - startFrame < MIN_NOTE_FRAMES:
|
13
|
+
return
|
14
|
+
vol = volProb[startFrame, pitch]
|
15
|
+
if vol < 0 or vol > 1:
|
16
|
+
return
|
17
|
+
start_time = startFrame * frameLenSecs
|
18
|
+
track.notes.append(pretty_midi_fix.Note(velocity=int(max(0, min(1, vol)) * 80 + 10), pitch=pitch + 21, start=start_time, end=endFrame * frameLenSecs))
|
19
|
+
if PITCH_BEND_ENABLED:
|
20
|
+
for step in range(pitch_bend_steps):
|
21
|
+
track.pitch_bends.append(pretty_midi_fix.PitchBend(pitch=int(np.sin(np.pi * step / (pitch_bend_steps - 1)) * pitch_bend_depth), time=start_time + step * 0.01))
|
22
|
+
track.pitch_bends.append(pretty_midi_fix.PitchBend(pitch=0, time=start_time + 0.05)) # Reset
|
23
|
+
del intervals[pitch]
|
24
|
+
|
25
|
+
def model_output_to_notes(model_output,onset_thresh,include_pitch_bends,min_note_len,gap_tolerance_frames,pitch_bend_depth,pitch_bend_steps):
|
26
|
+
actProb , onProb , offProb , volProb , tempo = model_output
|
27
|
+
midi = pretty_midi_fix.PrettyMIDI(initial_tempo=tempo)
|
28
|
+
track = pretty_midi_fix.Instrument(program=40)
|
29
|
+
frameLenSecs = librosa.frames_to_time(1, sr=16000)
|
30
|
+
intervals = {}
|
31
|
+
onsets = (onProb > onset_thresh).astype(np.int8)
|
32
|
+
frames = onsets | (actProb > onset_thresh).astype(np.int8)
|
33
|
+
for i, frame in enumerate(np.vstack([frames, np.zeros(frames.shape[1])])):
|
34
|
+
for pitch, active in enumerate(frame):
|
35
|
+
if active:
|
36
|
+
if pitch not in intervals:
|
37
|
+
if onsets is None or onsets[i, pitch]:
|
38
|
+
intervals[pitch] = i
|
39
|
+
elif onsets is not None and onsets[i, pitch] and (i - intervals[pitch] > 2):
|
40
|
+
endpitch(pitch, i,volProb,intervals,min_note_len,frameLenSecs,include_pitch_bends,pitch_bend_steps,pitch_bend_depth,track)
|
41
|
+
intervals[pitch] = i
|
42
|
+
elif pitch in intervals:
|
43
|
+
if i + gap_tolerance_frames < frames.shape[0] and np.any(frames[i:i + gap_tolerance_frames, pitch]):
|
44
|
+
continue # Don't end the note yet
|
45
|
+
endpitch(pitch, i,volProb,intervals,min_note_len,frameLenSecs,include_pitch_bends,pitch_bend_steps,pitch_bend_depth,track)
|
46
|
+
midi.instruments.append(track)
|
47
|
+
return midi
|
48
|
+
|
49
|
+
class Magenta:
|
50
|
+
def __init__(self,model_path=hf_hub_download("shethjenil/Audio2Midi_Models","magenta.tflite")):
|
51
|
+
self.interp = tensorflow.lite.Interpreter(model_path=model_path)
|
52
|
+
self.interp.allocate_tensors()
|
53
|
+
self.inputLen = self.interp.get_input_details()[0]['shape'][0]
|
54
|
+
self.outputStep = self.interp.get_output_details()[0]['shape'][1] * 512
|
55
|
+
|
56
|
+
def run_inference(self,audio_path,progress_callback):
|
57
|
+
song = librosa.load(audio_path,sr=16000)[0]
|
58
|
+
actProb, onProb, offProb, volProb = np.empty((1, 88)), np.empty((1, 88)), np.empty((1, 88)), np.empty((1, 88))
|
59
|
+
paddedSong = np.append(song, np.zeros(-(song.size - self.inputLen) % self.outputStep, dtype=np.float32))
|
60
|
+
total_size = (paddedSong.size - self.inputLen) // self.outputStep + 1
|
61
|
+
tempo = librosa.feature.rhythm.tempo(y=song, sr=16000).mean()
|
62
|
+
for i in range(total_size):
|
63
|
+
self.interp.set_tensor(self.interp.get_input_details()[0]['index'], paddedSong[i * self.outputStep : i * self.outputStep + self.inputLen])
|
64
|
+
self.interp.invoke()
|
65
|
+
actProb = np.vstack((actProb, self.interp.get_tensor(self.interp.get_output_details()[0]['index'])[0]))
|
66
|
+
onProb = np.vstack(( onProb, self.interp.get_tensor(self.interp.get_output_details()[1]['index'])[0]))
|
67
|
+
offProb = np.vstack((offProb, self.interp.get_tensor(self.interp.get_output_details()[2]['index'])[0]))
|
68
|
+
volProb = np.vstack((volProb, self.interp.get_tensor(self.interp.get_output_details()[3]['index'])[0]))
|
69
|
+
if progress_callback:
|
70
|
+
progress_callback(i,total_size)
|
71
|
+
return actProb , onProb , offProb , volProb , tempo
|
72
|
+
|
73
|
+
|
74
|
+
def predict(self,audio,onset_thresh=0,min_note_len=3,include_pitch_bends=True,gap_tolerance_frames = 3,pitch_bend_depth = 1500,pitch_bend_steps = 4,progress_callback: Callable[[int, int], None] = None,output_file="output.mid"):
|
75
|
+
model_output_to_notes(self.run_inference(audio,progress_callback),onset_thresh = onset_thresh,min_note_len = min_note_len,include_pitch_bends = include_pitch_bends,pitch_bend_depth=pitch_bend_depth,pitch_bend_steps=pitch_bend_steps,gap_tolerance_frames=gap_tolerance_frames).write(output_file)
|
76
|
+
return output_file
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|