audio2midi 0.3.0__py2.py3-none-any.whl → 0.5.0__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- audio2midi/crepe_pitch_detector.py +874 -47
- audio2midi/crepe_pitch_detector_tf.py +133 -0
- audio2midi/librosa_pitch_detector.py +3 -3
- audio2midi/mt3_music_transcription.py +1 -0
- audio2midi-0.5.0.dist-info/METADATA +224 -0
- audio2midi-0.5.0.dist-info/RECORD +13 -0
- audio2midi-0.3.0.dist-info/METADATA +0 -106
- audio2midi-0.3.0.dist-info/RECORD +0 -11
- {audio2midi-0.3.0.dist-info → audio2midi-0.5.0.dist-info}/WHEEL +0 -0
@@ -0,0 +1,133 @@
|
|
1
|
+
from keras.layers import Input, Reshape, Conv2D, BatchNormalization
|
2
|
+
from keras.layers import MaxPool2D, Dropout, Permute, Flatten, Dense
|
3
|
+
from keras.models import Model
|
4
|
+
from keras.callbacks import Callback
|
5
|
+
from hmmlearn.hmm import CategoricalHMM
|
6
|
+
from math import ceil as math_ceil
|
7
|
+
from typing import Callable
|
8
|
+
from numpy.lib.stride_tricks import as_strided
|
9
|
+
from librosa import load as librosa_load
|
10
|
+
from pretty_midi_fix import PrettyMIDI , PitchBend , Note ,Instrument
|
11
|
+
import numpy as np
|
12
|
+
from huggingface_hub import hf_hub_download
|
13
|
+
|
14
|
+
class PredictProgressCallback(Callback):
|
15
|
+
def __init__(self, total_batches,progress_callback: Callable[[int, int], None] = None):
|
16
|
+
super().__init__()
|
17
|
+
self.total_batches = total_batches
|
18
|
+
self.progress_callback = progress_callback
|
19
|
+
def on_predict_begin(self, logs=None):
|
20
|
+
if self.progress_callback:
|
21
|
+
self.progress_callback(0,self.total_batches)
|
22
|
+
def on_predict_batch_end(self, batch, logs=None):
|
23
|
+
if self.progress_callback:
|
24
|
+
self.progress_callback(batch,self.total_batches)
|
25
|
+
def on_predict_end(self, logs=None):
|
26
|
+
if self.progress_callback:
|
27
|
+
self.progress_callback(self.total_batches,self.total_batches)
|
28
|
+
|
29
|
+
|
30
|
+
class CrepeTF():
|
31
|
+
def __init__(self,model_type="full",model_path=None):
|
32
|
+
if not model_path:
|
33
|
+
model_path = hf_hub_download("shethjenil/Audio2Midi_Models",f"crepe_{model_type}.h5")
|
34
|
+
model_type_importance = {'tiny': 4, 'small': 8, 'medium': 16, 'large': 24, 'full': 32}[model_type]
|
35
|
+
filters = [n * model_type_importance for n in [32, 4, 4, 4, 8, 16]]
|
36
|
+
widths = [512, 64, 64, 64, 64, 64]
|
37
|
+
strides = [(4, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1)]
|
38
|
+
x = Input(shape=(1024,), name='input', dtype='float32')
|
39
|
+
y = Reshape(target_shape=(1024, 1, 1), name='input-reshape')(x)
|
40
|
+
layers = [1, 2, 3, 4, 5, 6]
|
41
|
+
for l, f, w, s in zip(layers, filters, widths, strides):
|
42
|
+
y = Conv2D(f, (w, 1), strides=s, padding='same', activation='relu', name="conv%d" % l)(y)
|
43
|
+
y = BatchNormalization(name="conv%d-BN" % l)(y)
|
44
|
+
y = MaxPool2D(pool_size=(2, 1), strides=None, padding='valid', name="conv%d-maxpool" % l)(y)
|
45
|
+
y = Dropout(0.25, name="conv%d-dropout" % l)(y)
|
46
|
+
y = Permute((2, 1, 3), name="transpose")(y)
|
47
|
+
y = Flatten(name="flatten")(y)
|
48
|
+
y = Dense(360, activation='sigmoid', name="classifier")(y)
|
49
|
+
self.model = Model(inputs=x, outputs=y)
|
50
|
+
self.model.load_weights(model_path)
|
51
|
+
self.model.compile('adam', 'binary_crossentropy')
|
52
|
+
self.cents_mapping=(np.linspace(0, 7180, 360) + 1997.3794084376191)
|
53
|
+
|
54
|
+
def to_local_average_cents(self, salience, center=None):
|
55
|
+
if salience.ndim == 1:
|
56
|
+
if center is None:
|
57
|
+
center = int(np.argmax(salience))
|
58
|
+
start = max(0, center - 4)
|
59
|
+
end = min(len(salience), center + 5)
|
60
|
+
salience = salience[start:end]
|
61
|
+
product_sum = np.sum(salience * self.cents_mapping[start:end])
|
62
|
+
weight_sum = np.sum(salience)
|
63
|
+
return product_sum / weight_sum
|
64
|
+
if salience.ndim == 2:
|
65
|
+
return np.array([self.to_local_average_cents(salience[i, :]) for i in range(salience.shape[0])])
|
66
|
+
raise Exception("label should be either 1d or 2d ndarray")
|
67
|
+
|
68
|
+
def to_viterbi_cents(self,salience):
|
69
|
+
starting = np.ones(360) / 360
|
70
|
+
xx, yy = np.meshgrid(range(360), range(360))
|
71
|
+
transition = np.maximum(12 - abs(xx - yy), 0)
|
72
|
+
transition = transition / np.sum(transition, axis=1)[:, None]
|
73
|
+
self_emission = 0.1
|
74
|
+
emission = (np.eye(360) * self_emission + np.ones(shape=(360, 360)) * ((1 - self_emission) / 360))
|
75
|
+
model = CategoricalHMM(360, starting, transition)
|
76
|
+
model.startprob_, model.transmat_, model.emissionprob_ = starting, transition, emission
|
77
|
+
observations = np.argmax(salience, axis=1)
|
78
|
+
path = model.predict(observations.reshape(-1, 1), [len(observations)])
|
79
|
+
return np.array([self.to_local_average_cents(salience[i, :], path[i]) for i in range(len(observations))])
|
80
|
+
|
81
|
+
def get_activation(self,audio:np.ndarray,center, step_size, progress_callback,batch_size):
|
82
|
+
if center:
|
83
|
+
audio = np.pad(audio, 512, mode='constant', constant_values=0)
|
84
|
+
hop_length = int(16000 * step_size / 1000)
|
85
|
+
n_frames = 1 + int((len(audio) - 1024) / hop_length)
|
86
|
+
frames = as_strided(audio, shape=(1024, n_frames),strides=(audio.itemsize, hop_length * audio.itemsize))
|
87
|
+
frames = frames.transpose().copy()
|
88
|
+
frames -= np.mean(frames, axis=1)[:, np.newaxis]
|
89
|
+
frames /= np.clip(np.std(frames, axis=1)[:, np.newaxis], 1e-8, None)
|
90
|
+
return self.model.predict(frames,batch_size,0,callbacks=[PredictProgressCallback(math_ceil(len(frames) / batch_size),progress_callback)])
|
91
|
+
|
92
|
+
def model_predict(self,audio:np.ndarray,viterbi, center, step_size,progress_callback,batch_size):
|
93
|
+
activation = self.get_activation(audio.astype(np.float32), center, step_size,progress_callback,batch_size)
|
94
|
+
confidence = activation.max(axis=1)
|
95
|
+
cents = self.to_viterbi_cents(activation) if viterbi else self.to_local_average_cents(activation)
|
96
|
+
frequency = 10 * 2 ** (cents / 1200)
|
97
|
+
frequency[np.isnan(frequency)] = 0
|
98
|
+
time = np.arange(confidence.shape[0]) * step_size / 1000.0
|
99
|
+
return time, frequency, confidence
|
100
|
+
|
101
|
+
def predict(self,audio_path,viterbi=False, center=True, step_size=10,min_confidence=0.8,batch_size=32,progress_callback: Callable[[int, int], None] = None,output_file= "output.mid"):
|
102
|
+
time, frequency, confidence = self.model_predict(librosa_load(audio_path, sr=16000, mono=True)[0],viterbi,center,step_size,progress_callback,batch_size)
|
103
|
+
mask = confidence > min_confidence
|
104
|
+
times = time[mask]
|
105
|
+
frequencies = frequency[mask]
|
106
|
+
midi_floats = 69 + 12 * np.log2(frequencies / 440.0)
|
107
|
+
midi_notes = np.round(midi_floats).astype(int)
|
108
|
+
pitch_offsets = midi_floats - midi_notes # in semitones
|
109
|
+
midi = PrettyMIDI()
|
110
|
+
instrument = Instrument(program=40) # e.g., Violin for pitch bend demo
|
111
|
+
if len(times) > 0:
|
112
|
+
current_note = midi_notes[0]
|
113
|
+
note_start = times[0]
|
114
|
+
for i in range(1, len(times)):
|
115
|
+
if midi_notes[i] != current_note or i == len(times) - 1:
|
116
|
+
note_end = times[i]
|
117
|
+
if 0 <= current_note <= 127:
|
118
|
+
note = Note(velocity=100,pitch=int(current_note),start=note_start,end=note_end)
|
119
|
+
instrument.notes.append(note)
|
120
|
+
seg_mask = (times >= note_start) & (times <= note_end)
|
121
|
+
seg_times = times[seg_mask]
|
122
|
+
seg_offsets = pitch_offsets[seg_mask]
|
123
|
+
for t, offset in zip(seg_times, seg_offsets):
|
124
|
+
# Assuming pitch bend range is +/- 2 semitones
|
125
|
+
bend_value = int(offset / 2.0 * 8192) # Scale to -8192 to +8191
|
126
|
+
bend_value = np.clip(bend_value, -8192, 8191)
|
127
|
+
pb = PitchBend(pitch=bend_value, time=t)
|
128
|
+
instrument.pitch_bends.append(pb)
|
129
|
+
current_note = midi_notes[i]
|
130
|
+
note_start = times[i]
|
131
|
+
midi.instruments.append(instrument)
|
132
|
+
midi.write(output_file)
|
133
|
+
return output_file
|
@@ -40,7 +40,7 @@ class Normal_Pitch_Det:
|
|
40
40
|
midi_sequence = self.clean_midi_sequence(self.smooth_pitch_sequence(pitches, magnitudes,threshold),min_note_length)
|
41
41
|
time_per_frame = audio_duration / len(midi_sequence)
|
42
42
|
pm = PrettyMIDI(initial_tempo=tempo_bpm)
|
43
|
-
instrument = Instrument(program=
|
43
|
+
instrument = Instrument(program=40)
|
44
44
|
last_note = None
|
45
45
|
start_time = 0
|
46
46
|
for i, note in enumerate(midi_sequence):
|
@@ -135,13 +135,13 @@ class Guitar_Pitch_Det:
|
|
135
135
|
# Process all segments
|
136
136
|
notes_data = [self.estimate_segment_note(cqt_db, boundaries, i, sr, tempo_bpm, threshold_db,round_to_sixteenth) for i in range(len(boundaries) - 1)]
|
137
137
|
pm = PrettyMIDI(initial_tempo=tempo_bpm)
|
138
|
-
|
138
|
+
instrument = Instrument(program=40)
|
139
139
|
note_time = 0.0
|
140
140
|
for (pitch, duration, velocity) in notes_data:
|
141
141
|
if pitch is not None:
|
142
142
|
# Convert duration in beats to duration in seconds for PrettyMIDI
|
143
143
|
duration_sec = duration * (60 / tempo_bpm)
|
144
|
-
|
144
|
+
instrument.notes.append(Note(velocity, pitch, note_time, note_time + duration_sec))
|
145
145
|
note_time += duration_sec # Increment note_time by duration in seconds
|
146
146
|
else:
|
147
147
|
# If it's a rest, just advance the time
|
@@ -0,0 +1 @@
|
|
1
|
+
from mt3_audio2midi import MT3
|
@@ -0,0 +1,224 @@
|
|
1
|
+
Metadata-Version: 2.4
|
2
|
+
Name: audio2midi
|
3
|
+
Version: 0.5.0
|
4
|
+
Summary: Audio To Midi
|
5
|
+
Author-email: dummyjenil <dummyjenil@gmail.com>
|
6
|
+
Provides-Extra: all
|
7
|
+
Requires-Dist: essentia; extra == 'all'
|
8
|
+
Requires-Dist: hmmlearn; extra == 'all'
|
9
|
+
Requires-Dist: huggingface-hub; extra == 'all'
|
10
|
+
Requires-Dist: keras; extra == 'all'
|
11
|
+
Requires-Dist: librosa; extra == 'all'
|
12
|
+
Requires-Dist: mir-eval; extra == 'all'
|
13
|
+
Requires-Dist: mt3-audio2midi; extra == 'all'
|
14
|
+
Requires-Dist: nest-asyncio; extra == 'all'
|
15
|
+
Requires-Dist: nnaudio; extra == 'all'
|
16
|
+
Requires-Dist: numpy==1.26.4; extra == 'all'
|
17
|
+
Requires-Dist: pretty-midi; extra == 'all'
|
18
|
+
Requires-Dist: pretty-midi-fix; extra == 'all'
|
19
|
+
Requires-Dist: resampy; extra == 'all'
|
20
|
+
Requires-Dist: scipy; extra == 'all'
|
21
|
+
Requires-Dist: tensorflow; extra == 'all'
|
22
|
+
Requires-Dist: torch; extra == 'all'
|
23
|
+
Requires-Dist: torchaudio; extra == 'all'
|
24
|
+
Requires-Dist: transformers; extra == 'all'
|
25
|
+
Requires-Dist: vamp; extra == 'all'
|
26
|
+
Provides-Extra: basic-pitch-pitch-detector
|
27
|
+
Requires-Dist: huggingface-hub; extra == 'basic-pitch-pitch-detector'
|
28
|
+
Requires-Dist: librosa; extra == 'basic-pitch-pitch-detector'
|
29
|
+
Requires-Dist: nnaudio; extra == 'basic-pitch-pitch-detector'
|
30
|
+
Requires-Dist: numpy; extra == 'basic-pitch-pitch-detector'
|
31
|
+
Requires-Dist: pretty-midi-fix; extra == 'basic-pitch-pitch-detector'
|
32
|
+
Requires-Dist: scipy; extra == 'basic-pitch-pitch-detector'
|
33
|
+
Requires-Dist: torch; extra == 'basic-pitch-pitch-detector'
|
34
|
+
Provides-Extra: crepe-pitch-detector
|
35
|
+
Requires-Dist: hmmlearn; extra == 'crepe-pitch-detector'
|
36
|
+
Requires-Dist: huggingface-hub; extra == 'crepe-pitch-detector'
|
37
|
+
Requires-Dist: librosa; extra == 'crepe-pitch-detector'
|
38
|
+
Requires-Dist: numpy; extra == 'crepe-pitch-detector'
|
39
|
+
Requires-Dist: pretty-midi-fix; extra == 'crepe-pitch-detector'
|
40
|
+
Requires-Dist: tensorflow; extra == 'crepe-pitch-detector'
|
41
|
+
Requires-Dist: torch; extra == 'crepe-pitch-detector'
|
42
|
+
Requires-Dist: tqdm; extra == 'crepe-pitch-detector'
|
43
|
+
Provides-Extra: crepe-pitch-detector-tf
|
44
|
+
Requires-Dist: hmmlearn; extra == 'crepe-pitch-detector-tf'
|
45
|
+
Requires-Dist: huggingface-hub; extra == 'crepe-pitch-detector-tf'
|
46
|
+
Requires-Dist: keras; extra == 'crepe-pitch-detector-tf'
|
47
|
+
Requires-Dist: librosa; extra == 'crepe-pitch-detector-tf'
|
48
|
+
Requires-Dist: numpy; extra == 'crepe-pitch-detector-tf'
|
49
|
+
Requires-Dist: pretty-midi-fix; extra == 'crepe-pitch-detector-tf'
|
50
|
+
Requires-Dist: tensorflow; extra == 'crepe-pitch-detector-tf'
|
51
|
+
Provides-Extra: librosa-pitch-detector
|
52
|
+
Requires-Dist: librosa; extra == 'librosa-pitch-detector'
|
53
|
+
Requires-Dist: numpy; extra == 'librosa-pitch-detector'
|
54
|
+
Requires-Dist: pretty-midi-fix; extra == 'librosa-pitch-detector'
|
55
|
+
Provides-Extra: melodia-pitch-detector
|
56
|
+
Requires-Dist: huggingface-hub; extra == 'melodia-pitch-detector'
|
57
|
+
Requires-Dist: librosa; extra == 'melodia-pitch-detector'
|
58
|
+
Requires-Dist: numpy; extra == 'melodia-pitch-detector'
|
59
|
+
Requires-Dist: pretty-midi-fix; extra == 'melodia-pitch-detector'
|
60
|
+
Requires-Dist: scipy; extra == 'melodia-pitch-detector'
|
61
|
+
Requires-Dist: vamp; extra == 'melodia-pitch-detector'
|
62
|
+
Provides-Extra: mt3-music-transcription
|
63
|
+
Requires-Dist: mt3-audio2midi; extra == 'mt3-music-transcription'
|
64
|
+
Requires-Dist: nest-asyncio; extra == 'mt3-music-transcription'
|
65
|
+
Provides-Extra: pop2piano
|
66
|
+
Requires-Dist: essentia; extra == 'pop2piano'
|
67
|
+
Requires-Dist: huggingface-hub; extra == 'pop2piano'
|
68
|
+
Requires-Dist: librosa; extra == 'pop2piano'
|
69
|
+
Requires-Dist: numpy==1.26.4; extra == 'pop2piano'
|
70
|
+
Requires-Dist: pretty-midi; extra == 'pop2piano'
|
71
|
+
Requires-Dist: pretty-midi-fix; extra == 'pop2piano'
|
72
|
+
Requires-Dist: resampy; extra == 'pop2piano'
|
73
|
+
Requires-Dist: scipy; extra == 'pop2piano'
|
74
|
+
Requires-Dist: torch; extra == 'pop2piano'
|
75
|
+
Requires-Dist: transformers; extra == 'pop2piano'
|
76
|
+
Provides-Extra: violin-pitch-detector
|
77
|
+
Requires-Dist: huggingface-hub; extra == 'violin-pitch-detector'
|
78
|
+
Requires-Dist: librosa; extra == 'violin-pitch-detector'
|
79
|
+
Requires-Dist: mir-eval; extra == 'violin-pitch-detector'
|
80
|
+
Requires-Dist: numpy; extra == 'violin-pitch-detector'
|
81
|
+
Requires-Dist: pretty-midi-fix; extra == 'violin-pitch-detector'
|
82
|
+
Requires-Dist: scipy; extra == 'violin-pitch-detector'
|
83
|
+
Requires-Dist: torch; extra == 'violin-pitch-detector'
|
84
|
+
Requires-Dist: torchaudio; extra == 'violin-pitch-detector'
|
85
|
+
Description-Content-Type: text/markdown
|
86
|
+
|
87
|
+
[Audio2Midi Demo](https://huggingface.co/spaces/shethjenil/Audio2Midi)
|
88
|
+
---
|
89
|
+
|
90
|
+
[Github](https://github.com/dummyjenil/audio2midi)
|
91
|
+
---
|
92
|
+
|
93
|
+
```bash
|
94
|
+
pip install audio2midi[all] audio2midi[pop2piano] audio2midi[violin_pitch_detector] audio2midi[crepe_pitch_detector] audio2midi[crepe_pitch_detector_tf] audio2midi[melodia_pitch_detector] audio2midi[basic_pitch_pitch_detector] audio2midi[librosa_pitch_detector]
|
95
|
+
```
|
96
|
+
---
|
97
|
+
|
98
|
+
violin_model_capacity crepe_model_capacity
|
99
|
+
|
100
|
+
* tiny
|
101
|
+
* small
|
102
|
+
* medium
|
103
|
+
* large
|
104
|
+
* full
|
105
|
+
---
|
106
|
+
|
107
|
+
``` python
|
108
|
+
from audio2midi.librosa_pitch_detector import Normal_Pitch_Det , Guitar_Pitch_Det
|
109
|
+
|
110
|
+
audio_path = "audio.mp3"
|
111
|
+
Normal_Pitch_Det().predict(audio_path)
|
112
|
+
Guitar_Pitch_Det().predict(audio_path)
|
113
|
+
```
|
114
|
+
|
115
|
+
---
|
116
|
+
|
117
|
+
``` python
|
118
|
+
from os import environ
|
119
|
+
from huggingface_hub import hf_hub_download
|
120
|
+
from shutil import unpack_archive
|
121
|
+
from pathlib import Path
|
122
|
+
from audio2midi.melodia_pitch_detector import Melodia
|
123
|
+
from platform import system as platform_system , architecture as platform_architecture
|
124
|
+
|
125
|
+
import nest_asyncio
|
126
|
+
from audio2midi.mt3_music_transcription import MT3
|
127
|
+
nest_asyncio.apply()
|
128
|
+
unpack_archive(hf_hub_download("shethjenil/Audio2Midi_Models","mt3.zip"),"mt3_model",format="zip")
|
129
|
+
MT3("mt3_model").predict(audio_path)
|
130
|
+
|
131
|
+
unpack_archive(hf_hub_download("shethjenil/Audio2Midi_Models",f"melodia_vamp_plugin_{'win' if (system := platform_system()) == 'Windows' else 'mac' if system == 'Darwin' else 'linux64' if (arch := platform_architecture()[0]) == '64bit' else 'linux32' if arch == '32bit' else None}.zip"),"vamp_melodia",format="zip")
|
132
|
+
environ['VAMP_PATH'] = str(Path("vamp_melodia").absolute())
|
133
|
+
Melodia().predict(audio_path)
|
134
|
+
```
|
135
|
+
|
136
|
+
---
|
137
|
+
|
138
|
+
```python
|
139
|
+
from audio2midi.basic_pitch_pitch_detector import BasicPitch
|
140
|
+
from audio2midi.crepe_pitch_detector import Crepe
|
141
|
+
from audio2midi.violin_pitch_detector import Violin_Pitch_Det
|
142
|
+
from audio2midi.pop2piano import Pop2Piano
|
143
|
+
from torch import device as Device
|
144
|
+
from torch.cuda import is_available as cuda_is_available
|
145
|
+
device = Device("cuda" if cuda_is_available() else "cpu")
|
146
|
+
Crepe().predict(audio_path)
|
147
|
+
Pop2Piano(device=device).predict(audio_path)
|
148
|
+
Violin_Pitch_Det(device=device).predict(audio_path)
|
149
|
+
BasicPitch(device=device).predict(audio_path)
|
150
|
+
```
|
151
|
+
|
152
|
+
---
|
153
|
+
|
154
|
+
```python
|
155
|
+
from audio2midi.basic_pitch_pitch_detector import BasicPitch
|
156
|
+
from audio2midi.crepe_pitch_detector_tf import CrepeTF
|
157
|
+
from audio2midi.crepe_pitch_detector import Crepe
|
158
|
+
from audio2midi.librosa_pitch_detector import Normal_Pitch_Det , Guitar_Pitch_Det
|
159
|
+
from audio2midi.melodia_pitch_detector import Melodia
|
160
|
+
from audio2midi.pop2piano import Pop2Piano
|
161
|
+
from audio2midi.violin_pitch_detector import Violin_Pitch_Det
|
162
|
+
from audio2midi.mt3_music_transcription import MT3
|
163
|
+
from os import environ
|
164
|
+
from huggingface_hub import hf_hub_download
|
165
|
+
from shutil import unpack_archive
|
166
|
+
from pathlib import Path
|
167
|
+
from platform import system as platform_system , architecture as platform_architecture
|
168
|
+
import nest_asyncio
|
169
|
+
nest_asyncio.apply()
|
170
|
+
|
171
|
+
unpack_archive(hf_hub_download("shethjenil/Audio2Midi_Models",f"melodia_vamp_plugin_{'win' if (system := platform_system()) == 'Windows' else 'mac' if system == 'Darwin' else 'linux64' if (arch := platform_architecture()[0]) == '64bit' else 'linux32' if arch == '32bit' else None}.zip"),"vamp_melodia",format="zip")
|
172
|
+
unpack_archive(hf_hub_download("shethjenil/Audio2Midi_Models","mt3.zip"),"mt3_model",format="zip")
|
173
|
+
|
174
|
+
environ['VAMP_PATH'] = str(Path("vamp_melodia").absolute())
|
175
|
+
|
176
|
+
from os import getenv
|
177
|
+
from torch import device as Device
|
178
|
+
from torch.cuda import is_available as cuda_is_available
|
179
|
+
device = Device("cuda" if cuda_is_available() else "cpu")
|
180
|
+
|
181
|
+
import gradio as gr
|
182
|
+
with gr.Blocks() as midi_viz_ui:
|
183
|
+
midi = gr.File(label="Upload MIDI")
|
184
|
+
sf = gr.File(label="Upload SoundFont")
|
185
|
+
output_html = gr.HTML(f'''
|
186
|
+
<div style="display: flex; justify-content: center; align-items: center;">
|
187
|
+
<iframe style="width: 100%; height: 500px;" src="https://shethjenil-midivizsf2.static.hf.space/index_single_file.html" id="midiviz"></iframe>
|
188
|
+
</div>''')
|
189
|
+
midi.upload(None, inputs=midi, js="""
|
190
|
+
async (file) => {
|
191
|
+
if (!file || !file.url || !file.orig_name) return;
|
192
|
+
const iframe = document.getElementById("midiviz");
|
193
|
+
iframe.contentWindow.postMessage({
|
194
|
+
type: "load-midi",
|
195
|
+
url: file.url,
|
196
|
+
name: file.orig_name
|
197
|
+
}, "*");
|
198
|
+
}
|
199
|
+
""")
|
200
|
+
sf.upload(None, inputs=sf, js="""
|
201
|
+
async (file) => {
|
202
|
+
if (!file || !file.url || !file.orig_name) return;
|
203
|
+
const iframe = document.getElementById("midiviz");
|
204
|
+
iframe.contentWindow.postMessage({
|
205
|
+
type: "load-sf",
|
206
|
+
url: file.url,
|
207
|
+
name: file.orig_name
|
208
|
+
}, "*");
|
209
|
+
}
|
210
|
+
""")
|
211
|
+
|
212
|
+
gr.TabbedInterface([
|
213
|
+
gr.Interface(Normal_Pitch_Det().predict,[gr.Audio(type="filepath",label="Input Audio"),gr.Number(120,label="BPM"),gr.Number(512,label="HOP Len"),gr.Number(2,label="minimum note length"),gr.Number(0.1,label="threshold")],gr.File(label="Midi File")),
|
214
|
+
gr.Interface(Guitar_Pitch_Det().predict,[gr.Audio(type="filepath",label="Input Audio"),gr.Number(4,label="mag_exp"),gr.Number(-61,label="Threshold"),gr.Number(6,label="Pre_post_max"),gr.Checkbox(False,label="backtrack"),gr.Checkbox(False,label="round_to_sixteenth"),gr.Number(1024,label="hop_length"),gr.Number(72,label="n_bins"),gr.Number(12,label="bins_per_octave")],gr.File(label="Midi File")),
|
215
|
+
gr.Interface(Melodia().predict,[gr.Audio(type="filepath",label="Input Audio"),gr.Number(120,label="BPM",step=30),gr.Number(0.25,label="smoothness",step=0.05,info="Smooth the pitch sequence with a median filter of the provided duration (in seconds)."),gr.Number(0.1,label="minimum duration",step=0.1,info="Minimum allowed duration for note (in seconds). Shorter notes will be removed."),gr.Number(128,label="HOP")],gr.File(label="Midi File")),
|
216
|
+
gr.Interface(BasicPitch(device=device).predict,[gr.Audio(type="filepath", label="Upload Audio"),gr.Number(0.5,label="onset_thresh",info="Minimum amplitude of an onset activation to be considered an onset."),gr.Number(0.3,label="frame_thresh",info="Minimum energy requirement for a frame to be considered present."),gr.Number(127.70,label="min_note_len",info="The minimum allowed note length in milliseconds."),gr.Number(120,label="midi_tempo"),gr.Checkbox(True,label="infer_onsets",info="add additional onsets when there are large differences in frame amplitudes."),gr.Checkbox(True,label="include_pitch_bends",info="include pitch bends."),gr.Checkbox(False,label="multiple_pitch_bends",info="allow overlapping notes in midi file to have pitch bends."),gr.Checkbox(True,label="melodia_trick",info="Use the melodia post-processing step.")],gr.File(label="Download Midi File")),
|
217
|
+
gr.Interface(Violin_Pitch_Det(device=device,model_capacity=getenv("violin_model_capacity","full")).predict, [gr.Audio(label="Upload your Audio file",type="filepath"),gr.Number(32,label="Batch size"),gr.Radio(["spotify","tiktok"],value="spotify",label="Post Processing"),gr.Checkbox(True,label="include_pitch_bends")],gr.File(label="Download MIDI file")),
|
218
|
+
gr.Interface(Crepe(getenv("crepe_model_capacity","full")).predict,[gr.Audio(type="filepath",label="Input Audio"),gr.Checkbox(False,label="viterbi",info="Apply viterbi smoothing to the estimated pitch curve"),gr.Checkbox(True,label="center"),gr.Number(10,label="step size",info="The step size in milliseconds for running pitch estimation."),gr.Number(0.8,label="minimum confidence"),gr.Number(32,label="batch size")],gr.File(label="Midi File")),
|
219
|
+
gr.Interface(CrepeTF(getenv("crepe_model_capacity","full")).predict,[gr.Audio(type="filepath",label="Input Audio"),gr.Checkbox(False,label="viterbi",info="Apply viterbi smoothing to the estimated pitch curve"),gr.Checkbox(True,label="center"),gr.Number(10,label="step size",info="The step size in milliseconds for running pitch estimation."),gr.Number(0.8,label="minimum confidence"),gr.Number(32,label="batch size")],gr.File(label="Midi File")),
|
220
|
+
gr.Interface(Pop2Piano(device).predict,[gr.Audio(label="Input Audio",type="filepath"),gr.Number(1, minimum=1, maximum=21, label="Composer"),gr.Number(2,label="Details in Piano"),gr.Number(1,label="Efficiency of Piano"),gr.Radio([1,2,4],label="steps per beat",value=2)],gr.File(label="MIDI File")),
|
221
|
+
gr.Interface(MT3(str(Path("mt3_model").absolute())).predict,[gr.Audio(label="Input Audio",type="filepath"),gr.Number(0,label="seed")],gr.File(label="MIDI File")),
|
222
|
+
midi_viz_ui
|
223
|
+
],["Normal Pitch Detection","Guitar Based Pitch Detection","Melodia","Spotify Pitch Detection","Violin Based Pitch Detection","Crepe Pitch Detection","Crepe Pitch Detection TF","Pop2Piano","MT3","Midi Vizulizer"]).launch()
|
224
|
+
```
|
@@ -0,0 +1,13 @@
|
|
1
|
+
audio2midi/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
+
audio2midi/basic_pitch_pitch_detector.py,sha256=6ZH4SKL9qAj6ODSr84qr_QmNvDYOuZRUrbVCOB9430E,30663
|
3
|
+
audio2midi/crepe_pitch_detector.py,sha256=63PLW5pQ_WSlIr_bOSAv7wjIujA06Iy-pTQab3InPR4,34675
|
4
|
+
audio2midi/crepe_pitch_detector_tf.py,sha256=obOK9zWeAFWKhVuZAXG-SM2Cn-vVBjQMU08hwwjDvKA,7607
|
5
|
+
audio2midi/librosa_pitch_detector.py,sha256=btwYkNqVgkf9rdkBtH2Q5DqdNVKtO9iZ88SvcbQK1jk,7777
|
6
|
+
audio2midi/melodia_pitch_detector.py,sha256=YFt9NKyZ_Dyt_3ltInUz6QZgovmmPK6gR9EPlL_aV5Y,2402
|
7
|
+
audio2midi/mt3_music_transcription.py,sha256=_8MgMoXd-LNzI4GeXwIJs6pq7B7lrEkzOQwQSPTo1wo,32
|
8
|
+
audio2midi/pop2piano.py,sha256=kBAF1kY-5Ctu92etNLo4Clr1hkW1B5OCvd-XT7SrI8g,125685
|
9
|
+
audio2midi/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
10
|
+
audio2midi/violin_pitch_detector.py,sha256=C0--R0NaYcIyAFELGgebbs3yjAnQeVdtpk4iZ2NbUIA,61650
|
11
|
+
audio2midi-0.5.0.dist-info/METADATA,sha256=V9B8lUncLjTxdWvM25lY2RCTTmqTwagsEOrjGP2o7z8,12857
|
12
|
+
audio2midi-0.5.0.dist-info/WHEEL,sha256=tkmg4JIqwd9H8mL30xA7crRmoStyCtGp0VWshokd1Jc,105
|
13
|
+
audio2midi-0.5.0.dist-info/RECORD,,
|
@@ -1,106 +0,0 @@
|
|
1
|
-
Metadata-Version: 2.4
|
2
|
-
Name: audio2midi
|
3
|
-
Version: 0.3.0
|
4
|
-
Summary: Audio To Midi
|
5
|
-
Author-email: dummyjenil <dummyjenil@gmail.com>
|
6
|
-
Provides-Extra: all
|
7
|
-
Requires-Dist: essentia; extra == 'all'
|
8
|
-
Requires-Dist: hmmlearn; extra == 'all'
|
9
|
-
Requires-Dist: huggingface-hub; extra == 'all'
|
10
|
-
Requires-Dist: keras; extra == 'all'
|
11
|
-
Requires-Dist: librosa; extra == 'all'
|
12
|
-
Requires-Dist: mir-eval; extra == 'all'
|
13
|
-
Requires-Dist: nnaudio; extra == 'all'
|
14
|
-
Requires-Dist: numpy==1.26.4; extra == 'all'
|
15
|
-
Requires-Dist: pretty-midi; extra == 'all'
|
16
|
-
Requires-Dist: pretty-midi-fix; extra == 'all'
|
17
|
-
Requires-Dist: resampy; extra == 'all'
|
18
|
-
Requires-Dist: scipy; extra == 'all'
|
19
|
-
Requires-Dist: tensorflow; extra == 'all'
|
20
|
-
Requires-Dist: torch; extra == 'all'
|
21
|
-
Requires-Dist: torchaudio; extra == 'all'
|
22
|
-
Requires-Dist: transformers; extra == 'all'
|
23
|
-
Requires-Dist: vamp; extra == 'all'
|
24
|
-
Provides-Extra: basic-pitch-pitch-detector
|
25
|
-
Requires-Dist: huggingface-hub; extra == 'basic-pitch-pitch-detector'
|
26
|
-
Requires-Dist: librosa; extra == 'basic-pitch-pitch-detector'
|
27
|
-
Requires-Dist: nnaudio; extra == 'basic-pitch-pitch-detector'
|
28
|
-
Requires-Dist: numpy; extra == 'basic-pitch-pitch-detector'
|
29
|
-
Requires-Dist: pretty-midi-fix; extra == 'basic-pitch-pitch-detector'
|
30
|
-
Requires-Dist: scipy; extra == 'basic-pitch-pitch-detector'
|
31
|
-
Requires-Dist: torch; extra == 'basic-pitch-pitch-detector'
|
32
|
-
Provides-Extra: crepe-pitch-detector
|
33
|
-
Requires-Dist: hmmlearn; extra == 'crepe-pitch-detector'
|
34
|
-
Requires-Dist: huggingface-hub; extra == 'crepe-pitch-detector'
|
35
|
-
Requires-Dist: keras; extra == 'crepe-pitch-detector'
|
36
|
-
Requires-Dist: librosa; extra == 'crepe-pitch-detector'
|
37
|
-
Requires-Dist: numpy; extra == 'crepe-pitch-detector'
|
38
|
-
Requires-Dist: pretty-midi-fix; extra == 'crepe-pitch-detector'
|
39
|
-
Requires-Dist: tensorflow; extra == 'crepe-pitch-detector'
|
40
|
-
Provides-Extra: librosa-pitch-detector
|
41
|
-
Requires-Dist: librosa; extra == 'librosa-pitch-detector'
|
42
|
-
Requires-Dist: numpy; extra == 'librosa-pitch-detector'
|
43
|
-
Requires-Dist: pretty-midi-fix; extra == 'librosa-pitch-detector'
|
44
|
-
Provides-Extra: melodia-pitch-detector
|
45
|
-
Requires-Dist: huggingface-hub; extra == 'melodia-pitch-detector'
|
46
|
-
Requires-Dist: librosa; extra == 'melodia-pitch-detector'
|
47
|
-
Requires-Dist: numpy; extra == 'melodia-pitch-detector'
|
48
|
-
Requires-Dist: pretty-midi-fix; extra == 'melodia-pitch-detector'
|
49
|
-
Requires-Dist: scipy; extra == 'melodia-pitch-detector'
|
50
|
-
Requires-Dist: vamp; extra == 'melodia-pitch-detector'
|
51
|
-
Provides-Extra: pop2piano
|
52
|
-
Requires-Dist: essentia; extra == 'pop2piano'
|
53
|
-
Requires-Dist: huggingface-hub; extra == 'pop2piano'
|
54
|
-
Requires-Dist: librosa; extra == 'pop2piano'
|
55
|
-
Requires-Dist: numpy==1.26.4; extra == 'pop2piano'
|
56
|
-
Requires-Dist: pretty-midi; extra == 'pop2piano'
|
57
|
-
Requires-Dist: pretty-midi-fix; extra == 'pop2piano'
|
58
|
-
Requires-Dist: resampy; extra == 'pop2piano'
|
59
|
-
Requires-Dist: scipy; extra == 'pop2piano'
|
60
|
-
Requires-Dist: torch; extra == 'pop2piano'
|
61
|
-
Requires-Dist: transformers; extra == 'pop2piano'
|
62
|
-
Provides-Extra: violin-pitch-detector
|
63
|
-
Requires-Dist: huggingface-hub; extra == 'violin-pitch-detector'
|
64
|
-
Requires-Dist: librosa; extra == 'violin-pitch-detector'
|
65
|
-
Requires-Dist: mir-eval; extra == 'violin-pitch-detector'
|
66
|
-
Requires-Dist: numpy; extra == 'violin-pitch-detector'
|
67
|
-
Requires-Dist: pretty-midi-fix; extra == 'violin-pitch-detector'
|
68
|
-
Requires-Dist: scipy; extra == 'violin-pitch-detector'
|
69
|
-
Requires-Dist: torch; extra == 'violin-pitch-detector'
|
70
|
-
Requires-Dist: torchaudio; extra == 'violin-pitch-detector'
|
71
|
-
Description-Content-Type: text/markdown
|
72
|
-
|
73
|
-
``` python
|
74
|
-
from audio2midi.librosa_pitch_detector import Normal_Pitch_Det , Guitar_Pitch_Det
|
75
|
-
|
76
|
-
audio_path = "audio.mp3"
|
77
|
-
Normal_Pitch_Det().predict(audio_path)
|
78
|
-
Guitar_Pitch_Det().predict(audio_path)
|
79
|
-
```
|
80
|
-
---
|
81
|
-
``` python
|
82
|
-
from os import environ
|
83
|
-
from huggingface_hub import hf_hub_download
|
84
|
-
from shutil import unpack_archive
|
85
|
-
from pathlib import Path
|
86
|
-
from audio2midi.melodia_pitch_detector import Melodia
|
87
|
-
from platform import system as platform_system , architecture as platform_architecture
|
88
|
-
|
89
|
-
unpack_archive(hf_hub_download("shethjenil/Audio2Midi_Models",f"melodia_vamp_plugin_{'win' if (system := platform_system()) == 'Windows' else 'mac' if system == 'Darwin' else 'linux64' if (arch := platform_architecture()[0]) == '64bit' else 'linux32' if arch == '32bit' else None}.zip"),"vamp_melodia",format="zip")
|
90
|
-
environ['VAMP_PATH'] = str(Path("vamp_melodia").absolute())
|
91
|
-
Melodia().predict(audio_path)
|
92
|
-
```
|
93
|
-
---
|
94
|
-
```python
|
95
|
-
from audio2midi.basic_pitch_pitch_detector import BasicPitch
|
96
|
-
from audio2midi.crepe_pitch_detector import Crepe
|
97
|
-
from audio2midi.violin_pitch_detector import Violin_Pitch_Det
|
98
|
-
from audio2midi.pop2piano import Pop2Piano
|
99
|
-
from torch import device as Device
|
100
|
-
from torch.cuda import is_available as cuda_is_available
|
101
|
-
device = Device("cuda" if cuda_is_available() else "cpu")
|
102
|
-
Crepe().predict(audio_path)
|
103
|
-
Pop2Piano(device=device).predict(audio_path)
|
104
|
-
Violin_Pitch_Det(device=device).predict(audio_path)
|
105
|
-
BasicPitch(device=device).predict(audio_path)
|
106
|
-
```
|
@@ -1,11 +0,0 @@
|
|
1
|
-
audio2midi/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
-
audio2midi/basic_pitch_pitch_detector.py,sha256=6ZH4SKL9qAj6ODSr84qr_QmNvDYOuZRUrbVCOB9430E,30663
|
3
|
-
audio2midi/crepe_pitch_detector.py,sha256=UgyN9oJOrRsWrbtsyD1r4OiX8StRg5o_HrMq0JDKj4o,7605
|
4
|
-
audio2midi/librosa_pitch_detector.py,sha256=CdPDt72zYRjr3u7dVBdq3wrGoi96i2PV-LLvqWvHmpI,7790
|
5
|
-
audio2midi/melodia_pitch_detector.py,sha256=YFt9NKyZ_Dyt_3ltInUz6QZgovmmPK6gR9EPlL_aV5Y,2402
|
6
|
-
audio2midi/pop2piano.py,sha256=kBAF1kY-5Ctu92etNLo4Clr1hkW1B5OCvd-XT7SrI8g,125685
|
7
|
-
audio2midi/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
8
|
-
audio2midi/violin_pitch_detector.py,sha256=C0--R0NaYcIyAFELGgebbs3yjAnQeVdtpk4iZ2NbUIA,61650
|
9
|
-
audio2midi-0.3.0.dist-info/METADATA,sha256=kgJtW1RMkRqLbO3Do2XNVNaTs0CvRjWii6R2trW6Afg,4887
|
10
|
-
audio2midi-0.3.0.dist-info/WHEEL,sha256=tkmg4JIqwd9H8mL30xA7crRmoStyCtGp0VWshokd1Jc,105
|
11
|
-
audio2midi-0.3.0.dist-info/RECORD,,
|
File without changes
|