rosabeats 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
rosabeats/rosabeats.py ADDED
@@ -0,0 +1,996 @@
1
+ #!/usr/bin/env python
2
+
3
+ import re
4
+ import sys
5
+ import os.path
6
+ import random
7
+ import time
8
+ import joblib
9
+
10
+ # Optional imports for vamp and ffms2
11
+ try:
12
+ import vamp
13
+ VAMP_AVAILABLE = True
14
+ except ImportError:
15
+ VAMP_AVAILABLE = False
16
+ vamp = None
17
+
18
+ try:
19
+ import ffms2
20
+ FFMS2_AVAILABLE = True
21
+ except ImportError:
22
+ FFMS2_AVAILABLE = False
23
+ ffms2 = None
24
+
25
+ import numpy as np
26
+ import scipy
27
+ import sklearn
28
+ import librosa
29
+ import soundfile as sf
30
+ import sounddevice as sd
31
+
32
+ class rosabeats:
33
+ """A class for analyzing and manipulating audio files, particularly focused on beat tracking and segmentation.
34
+
35
+ This class provides functionality for:
36
+ - Loading and processing audio files
37
+ - Beat tracking and tempo analysis
38
+ - Audio segmentation
39
+ - Playback and remixing capabilities
40
+ - Beat and bar manipulation
41
+
42
+ Attributes:
43
+ debug (bool): Class-level debug flag for controlling debug output
44
+ ffms_source: FFMS2 audio source object
45
+ data: Audio data array
46
+ sr: Sample rate
47
+ channels: Number of audio channels
48
+ dtype: Data type of audio samples
49
+ mono: Mono version of audio data
50
+ beat_timings: Array of beat timings
51
+ tempo: Estimated tempo in BPM
52
+ beat_slices: List of beat slice boundaries
53
+ total_beats: Total number of beats detected
54
+ bars: Bar information
55
+ total_segments: Total number of segments
56
+ segments: List of segment information
57
+ beatsperbar: Number of beats per bar
58
+ firstfullbar: Index of first full bar
59
+ pulse_device: PulseAudio device index
60
+ stream: Audio output stream
61
+ remix: Remix buffer
62
+ remix_index: Current position in remix buffer
63
+ remix_output_file: Output file for remix
64
+ beats_output_file: Output file for beat information
65
+ beats_output: File handle for beat output
66
+ output_play: Flag for enabling playback
67
+ output_save: Flag for enabling saving
68
+ output_beats: Flag for enabling beat output
69
+ sourcefile: Path to source audio file
70
+ saved_features_enabled: Flag for saved features functionality
71
+ """
72
+
73
+ debug = False
74
+
75
+ @classmethod
76
+ def d_print(cls, *args, **kwargs):
77
+ """Print debug messages if debug mode is enabled.
78
+
79
+ Args:
80
+ *args: Variable length argument list to print
81
+ **kwargs: Arbitrary keyword arguments passed to print()
82
+ """
83
+ if cls.debug:
84
+ print("-> ", "".join(map(str, args)), **kwargs, flush=True)
85
+
86
+ def __init__(self, infile=None, debug=False):
87
+ """Initialize the rosabeats object.
88
+
89
+ Args:
90
+ infile (str, optional): Path to input audio file
91
+ debug (bool, optional): Enable debug mode
92
+ """
93
+ rosabeats.debug = debug
94
+
95
+ self.ffms_source = None
96
+ self.data = None
97
+ self.sr = None
98
+ self.channels = None
99
+ self.dtype = None
100
+ self.mono = None
101
+ self.beat_timings = None
102
+ self.tempo = None
103
+ self.beat_slices = None
104
+ self.total_beats = None
105
+ self.bars = None
106
+ self.total_segments = None
107
+ self.segments = None
108
+ self.beatsperbar = None
109
+ self.firstfullbar = None
110
+ self.pulse_device = None
111
+ self.stream = None
112
+ self.remix = None
113
+ self.remix_index = None
114
+ self.remix_output_file = None
115
+ self.beats_output_file = None
116
+ self.beats_output = None
117
+ self.output_play = False
118
+ self.output_save = False
119
+ self.output_beats = False
120
+ self.sourcefile = None
121
+
122
+ # things get confusing when you are experimenting a lot and forgetting
123
+ # that it's using old features/settings that are pickled away out of sight
124
+ self.saved_features_enabled = False
125
+
126
+ if not infile is None:
127
+ self.setfile(infile)
128
+
129
+ def beat_starts_bar(self, beatnum):
130
+ """Check if a beat number starts a new bar.
131
+
132
+ Args:
133
+ beatnum (int): Beat number to check
134
+
135
+ Returns:
136
+ int or None: Bar number if beat starts a bar, None otherwise
137
+ """
138
+ if (beatnum - self.firstfullbar) % self.beatsperbar == 0:
139
+ return (beatnum - self.firstfullbar) / self.beatsperbar
140
+ else:
141
+ return None
142
+
143
+ def bar_containing_beat(self, beatnum):
144
+ """Get the bar number and beat position within bar for a given beat number.
145
+
146
+ Args:
147
+ beatnum (int): Beat number to analyze
148
+
149
+ Returns:
150
+ tuple: (bar_number, beat_position_in_bar)
151
+
152
+ Raises:
153
+ Exception: If beat number is out of range
154
+ """
155
+ if beatnum > self.total_beats - 1 or beatnum < 0:
156
+ raise Exception("%d is outside possible range" % beatnum)
157
+
158
+ bar = int((beatnum - self.firstfullbar) / self.beatsperbar)
159
+
160
+ if bar > self.total_bars - 1 or bar < 0:
161
+ raise Exception(
162
+ "got %d in bar %d but bar %d shouldn't exist" % (beatnum, bar)
163
+ )
164
+
165
+ rem = (beatnum - self.firstfullbar) % self.beatsperbar
166
+
167
+ # returns the bar and the beat # in the bar
168
+ return bar, rem
169
+
170
+ def set_remix_output_file(self, wavfile):
171
+ """Set the output file for the remix.
172
+
173
+ Args:
174
+ wavfile (str): Path to output WAV file
175
+ """
176
+ self.remix_output_file = wavfile
177
+
178
+ def disable_output_beats(self):
179
+ """Disable beat output functionality."""
180
+ self.output_beats = False
181
+
182
+ def disable_output_save(self):
183
+ """Disable save output functionality."""
184
+ self.output_save = False
185
+
186
+ def disable_output_play(self):
187
+ """Disable playback functionality."""
188
+ self.output_play = False
189
+
190
+ def enable_output_beats(self, beatsfile):
191
+ """Enable beat output functionality and set output file.
192
+
193
+ Args:
194
+ beatsfile (str): Path to output beats file
195
+ """
196
+ self.set_beats_output_file(beatsfile)
197
+ self.output_beats = True
198
+
199
+ def enable_output_save(self, wavfile):
200
+ """Enable save output functionality and set output file.
201
+
202
+ Args:
203
+ wavfile (str): Path to output WAV file
204
+ """
205
+ self.set_remix_output_file(wavfile)
206
+ self.output_save = True
207
+
208
+ def enable_output_play(self):
209
+ """Enable playback functionality."""
210
+ self.output_play = True
211
+
212
+ def reset_remix(self):
213
+ """Reset the remix buffer to initial state."""
214
+ if self.sr is None:
215
+ self.load()
216
+
217
+ if self.remix is not None:
218
+ del self.remix
219
+
220
+ # initializes an array that will hold 30 minutes of audio samples
221
+ length = 30 * 60 * self.sr
222
+ self.remix = np.zeros(shape=(self.channels, length), dtype=self.dtype)
223
+ self.remix_index = 0
224
+
225
+ def extend_remix(self):
226
+ """Extend the remix buffer by adding more space."""
227
+ if self.sr is None:
228
+ self.load()
229
+
230
+ rosabeats.d_print()
231
+ rosabeats.d_print("***********extending available space for remixed beats")
232
+ rosabeats.d_print("***********len(remix[0]) before: %s" % len(self.remix[0]))
233
+ # add another 30 minutes
234
+ length = 30 * 60 * self.sr
235
+ extended_array = np.concatenate(
236
+ (self.remix.T, np.zeros(shape=(length, self.channels), dtype=self.dtype)),
237
+ axis=0,
238
+ )
239
+ self.remix = extended_array.T
240
+ rosabeats.d_print("***********len(remix[0]) after: %s" % len(self.remix[0]))
241
+ rosabeats.d_print("******done extending available space for remixed beats")
242
+
243
+ def save_remix(self):
244
+ """Save the remix to the output file."""
245
+ yt, index = librosa.effects.trim(self.remix)
246
+ sf.write(self.remix_output_file, yt.T, self.sr, "PCM_16")
247
+
248
+ def setfile(self, infile):
249
+ """Set the input audio file and initialize related paths.
250
+
251
+ Args:
252
+ infile (str): Path to input audio file
253
+ """
254
+ self.sourcefile = os.path.abspath(infile)
255
+ dname = os.path.dirname(self.sourcefile)
256
+ bname = os.path.basename(self.sourcefile)
257
+ stem, _ = os.path.splitext(bname)
258
+ self.saved_features = os.path.join(dname, "." + stem + ".pkl")
259
+
260
+ def find_pulseaudio_device(self):
261
+ """Find and set the PulseAudio device for playback."""
262
+ dev_count = 0
263
+ for dev_name in [x["name"] for x in sd.query_devices()]:
264
+ if dev_name == "pulse":
265
+ self.pulse_device = dev_count
266
+ break
267
+ dev_count += 1
268
+
269
+ if not self.pulse_device is None:
270
+ sd.default.device = self.pulse_device
271
+
272
+ def setup_playback(self):
273
+ """Set up audio playback configuration."""
274
+ if self.sr is None:
275
+ self.load()
276
+
277
+ sd.default.channels = self.channels
278
+ sd.default.samplerate = self.sr
279
+ sd.default.dtype = self.dtype
280
+
281
+ self.find_pulseaudio_device()
282
+
283
+ self.stream = sd.OutputStream()
284
+ self.stream.start()
285
+
286
+ def init_outputs(self):
287
+ """Initialize all enabled output methods."""
288
+ if self.output_play:
289
+ self.setup_playback()
290
+ if self.output_save:
291
+ self.reset_remix()
292
+ if self.output_beats:
293
+ self.start_writing_beats_output()
294
+
295
+ def load_ffms(self):
296
+ """Load audio file using FFMS2 library."""
297
+ self.ffms_source = ffms2.AudioSource(self.sourcefile)
298
+ self.ffms_source.init_buffer(count=self.ffms_source.properties.NumSamples)
299
+ self.data = self.ffms_source.get_audio(start=0).T
300
+ self.sr = self.ffms_source.properties.SampleRate
301
+ self.channels = self.ffms_source.properties.Channels
302
+ self.dtype = type(self.data[0][0])
303
+
304
+ def load_soundfile(self):
305
+ """Load audio file using soundfile library."""
306
+ self.data, self.sr = sf.read(self.sourcefile, dtype="float32")
307
+ self.data = self.data.T
308
+ self.channels = self.data.ndim
309
+ self.dtype = "float32"
310
+
311
+ def load_librosa(self):
312
+ """Load audio file using librosa library."""
313
+ self.data, self.sr = librosa.load(self.sourcefile, sr=None, mono=False)
314
+ self.channels = self.data.ndim
315
+ self.dtype = type(self.data[0][0])
316
+
317
+ def load(self):
318
+ """Load audio file using appropriate library based on file extension.
319
+
320
+ Raises:
321
+ ImportError: If FFMS2 is required but not available
322
+ """
323
+ base, ext = os.path.splitext(self.sourcefile)
324
+ if ext == ".wav":
325
+ rosabeats.d_print("loading via librosa")
326
+ self.load_librosa()
327
+ elif ext == ".ogg":
328
+ rosabeats.d_print("loading via soundfile")
329
+ self.load_soundfile()
330
+ else:
331
+ if not FFMS2_AVAILABLE:
332
+ raise ImportError("ffms2 is required for loading non-wav/ogg files. Please install ffms2.")
333
+ rosabeats.d_print("loading via ffms")
334
+ self.load_ffms()
335
+
336
+ self.data, _ = librosa.effects.trim(self.data)
337
+
338
+ def mix_to_mono(self):
339
+ """Convert audio data to mono."""
340
+ if self.data is None:
341
+ self.load()
342
+
343
+ self.mono = librosa.to_mono(self.data)
344
+
345
+ def has_saved_features(self):
346
+ """Check if saved features file exists.
347
+
348
+ Returns:
349
+ bool: True if saved features file exists and is enabled
350
+ """
351
+ return self.saved_features_enabled and os.path.isfile(self.saved_features)
352
+
353
+ def remove_features_file(self):
354
+ """Remove the saved features file if it exists."""
355
+ if os.path.isfile(self.saved_features):
356
+ rosabeats.d_print("removing %s" % self.saved_features)
357
+ os.unlink(self.saved_features)
358
+ else:
359
+ rosabeats.d_print("no features file found")
360
+
361
+ def save_features(self):
362
+ """Save extracted features to file."""
363
+ rosabeats.d_print("saving features...")
364
+
365
+ features = dict()
366
+ features["tempo"] = self.tempo
367
+ features["beatsperbar"] = self.beatsperbar
368
+ features["firstfullbar"] = self.firstfullbar
369
+ features["total_beats"] = self.total_beats
370
+ features["total_bars"] = self.total_bars if self.total_bars else None
371
+ features["total_segments"] = self.total_segments
372
+ features["beat_timings"] = self.beat_timings
373
+ features["beat_samples"] = self.beat_samples
374
+ features["beat_slices"] = self.beat_slices
375
+ features["segments"] = self.segments
376
+ # write features
377
+ with open(self.saved_features, "wb") as f:
378
+ joblib.dump(features, f)
379
+
380
+ def load_saved_features(self):
381
+ """Load saved features from file."""
382
+ rosabeats.d_print("loading features...")
383
+
384
+ with open(self.saved_features, "rb") as f:
385
+ features = joblib.load(f)
386
+
387
+ self.tempo = features["tempo"]
388
+ self.beatsperbar = features["beatsperbar"]
389
+ self.firstfullbar = features["firstfullbar"]
390
+ self.total_beats = features["total_beats"]
391
+ self.total_bars = features["total_bars"]
392
+ self.total_segments = features["total_segments"]
393
+ self.beat_timings = features["beat_timings"]
394
+ self.beat_samples = features["beat_samples"]
395
+ self.beat_slices = features["beat_slices"]
396
+ self.segments = features["segments"]
397
+
398
+ def track_beats(self, beatsper=8, firstfull=0):
399
+ """Track beats in the audio file.
400
+
401
+ Args:
402
+ beatsper (int, optional): Number of beats per bar
403
+ firstfull (int, optional): Index of first full bar
404
+ """
405
+ if self.has_saved_features():
406
+ self.load_saved_features()
407
+ return
408
+
409
+ if self.mono is None:
410
+ self.mix_to_mono()
411
+
412
+ rosabeats.d_print("tracking beats...")
413
+ self.tempo, self.beat_timings = librosa.beat.beat_track(y=self.mono, sr=self.sr, units='time')
414
+ self.beat_samples = librosa.time_to_samples(self.beat_timings, sr=self.sr)
415
+ self.beat_slices = [
416
+ (start, end)
417
+ for (start, end) in zip(self.beat_samples, self.beat_samples[1:])
418
+ ]
419
+ self.total_beats = len(self.beat_timings)
420
+
421
+ self.beatsperbar = beatsper
422
+ self.firstfullbar = firstfull
423
+ self.total_bars = int((self.total_beats - self.firstfullbar) / self.beatsperbar)
424
+
425
+ self.save_features()
426
+
427
+ def segment(self, method="segmentino", redo=False, max_clusters=None):
428
+ """Segment the audio file using the specified method.
429
+
430
+ Args:
431
+ method (str, optional): Segmentation method to use ("laplacian", "segmentino", or "backtrack"; "segmentino" is default)
432
+ (currently, both laplacian and backtrack are broken)
433
+ redo (bool, optional): Force re-segmentation even if segments exist
434
+
435
+ Raises:
436
+ ValueError: If invalid method is specified
437
+ ImportError: If method="segmentino" but vamp is not available
438
+ ValueError: If max_clusters is not specified for laplacian segmentation
439
+ """
440
+ if method not in ["laplacian", "segmentino", "backtrack"]:
441
+ raise ValueError("method must be either 'laplacian', 'segmentino' or 'backtrack'")
442
+
443
+ if method == "segmentino" and not VAMP_AVAILABLE:
444
+ raise ImportError("vamp is required for segmentino segmentation. Please install vamp.")
445
+
446
+ if max_clusters is None and method == "laplacian":
447
+ raise ValueError("max_clusters must be specified for laplacian segmentation")
448
+
449
+ if max_clusters is not None and method != "laplacian":
450
+ raise ValueError("max_clusters should only be specified for laplacian segmentation")
451
+
452
+ if method == "backtrack":
453
+ self.segment_backtrack(redo)
454
+ elif method == "laplacian":
455
+ self.segment_laplacian(redo, max_clusters)
456
+ else:
457
+ self.segment_segmentino(redo)
458
+
459
+ def segment_backtrack(self, redo=False):
460
+ """Segment audio using librosa onset detection and backtracking method.
461
+
462
+ Args:
463
+ redo (bool, optional): Force re-segmentation even if segments exist
464
+ """
465
+ if self.beat_timings is None:
466
+ self.track_beats()
467
+
468
+ if not self.total_segments is None and redo is False:
469
+ rosabeats.d_print(
470
+ "warning: you already have segment data and did not specify a redo"
471
+ )
472
+ return
473
+
474
+ # Get onset times
475
+ onset_frames = librosa.onset.onset_detect(y=self.mono, sr=self.sr, backtrack=True)
476
+
477
+ # Initialize segments list
478
+ self.segments = []
479
+ count = 0
480
+
481
+ for frame_s, seg_len in zip(onset_frames, onset_frames[1:]):
482
+ segment_boundaries = (frame_s, frame_s + seg_len)
483
+ segment_time_boundaries = librosa.samples_to_time(segment_boundaries, sr=self.sr)
484
+ start, end = segment_time_boundaries
485
+ duration = end - start
486
+
487
+ segment = dict()
488
+ segment["label"] = "segment" + str(count)
489
+ segment["start"] = start
490
+ segment["duration"] = duration
491
+ segment["samples"] = segment_boundaries
492
+ segment["beats"] = []
493
+ segment["bars"] = []
494
+
495
+ self.segments.append(segment)
496
+
497
+ count += 1
498
+
499
+ self.total_segments = len(self.segments)
500
+ self.save_features()
501
+
502
+ def segment_laplacian(self, redo=False, max_clusters=48):
503
+ """Segment audio using Laplacian segmentation method.
504
+
505
+ Args:
506
+ redo (bool, optional): Force re-segmentation even if segments exist
507
+ max_clusters (int, optional): Maximum number of clusters to use
508
+ """
509
+ if self.beat_timings is None:
510
+ self.track_beats()
511
+
512
+ if not self.total_segments is None and redo is False:
513
+ rosabeats.d_print(
514
+ "warning: you already have segment data and did not specify a redo"
515
+ )
516
+ return
517
+
518
+ rosabeats.d_print("segmenting song...")
519
+ duration = librosa.get_duration(y=self.mono,sr=self.sr)
520
+
521
+ beat_frames = librosa.time_to_frames(self.beat_timings, sr=self.sr)
522
+
523
+ BINS_PER_OCTAVE = 12 * 3
524
+ N_OCTAVES = 7
525
+
526
+ cqt = librosa.cqt(y=self.mono, sr=self.sr, bins_per_octave=BINS_PER_OCTAVE, n_bins=N_OCTAVES * BINS_PER_OCTAVE)
527
+ C = librosa.amplitude_to_db( np.abs(cqt), ref=np.max)
528
+
529
+ Csync = librosa.util.sync(C, beat_frames, aggregate=np.median)
530
+
531
+
532
+ beat_times = librosa.frames_to_time(librosa.util.fix_frames(beat_frames,
533
+ x_min=0,
534
+ x_max=C.shape[1]),
535
+ sr=self.sr)
536
+
537
+ R = librosa.segment.recurrence_matrix(Csync, width=3, mode='affinity',
538
+ sym=True)
539
+
540
+ df = librosa.segment.timelag_filter(scipy.ndimage.median_filter)
541
+ Rf = df(R, size=(1, 7))
542
+
543
+ mfcc = librosa.feature.mfcc(y=self.mono, sr=self.sr)
544
+ Msync = librosa.util.sync(mfcc, beat_frames)
545
+
546
+ path_distance = np.sum(np.diff(Msync, axis=1)**2, axis=0)
547
+ sigma = np.median(path_distance)
548
+ path_sim = np.exp(-path_distance / sigma)
549
+
550
+ R_path = np.diag(path_sim, k=1) + np.diag(path_sim, k=-1)
551
+
552
+ deg_path = np.sum(R_path, axis=1)
553
+ deg_rec = np.sum(Rf, axis=1)
554
+
555
+ mu = deg_path.dot(deg_path + deg_rec) / np.sum((deg_path + deg_rec)**2)
556
+
557
+ A = mu * Rf + (1 - mu) * R_path
558
+ L = scipy.sparse.csgraph.laplacian(A, normed=True)
559
+ _, evecs = scipy.linalg.eigh(L)
560
+
561
+ evecs = scipy.ndimage.median_filter(evecs, size=(9, 1))
562
+
563
+ Cnorm = np.cumsum(evecs**2, axis=1)**0.5
564
+
565
+ ####
566
+ _clusters_list = []
567
+
568
+ best_cluster_size = 0
569
+ best_labels = None
570
+ best_cluster_score = 0
571
+
572
+ # we need at least 3 clusters for any song and shouldn't need to calculate more than
573
+ # 48 clusters for even a really complicated piece of music.
574
+
575
+ for n_clusters in range(max_clusters, 2, -1):
576
+ rosabeats.d_print("Testing a cluster value of %d..." % n_clusters)
577
+
578
+ # compute a matrix of the Eigen-vectors / their normalized values
579
+ X = evecs[:, :n_clusters] / Cnorm[:, n_clusters-1:n_clusters]
580
+
581
+ # create the candidate clusters and fit them
582
+ clusterer = sklearn.cluster.KMeans(n_clusters=n_clusters, max_iter=300,
583
+ random_state=0, n_init=20)
584
+
585
+ cluster_labels = clusterer.fit_predict(X)
586
+
587
+ silhouette_avg = sklearn.metrics.silhouette_score(X, cluster_labels)
588
+
589
+ labels = cluster_labels.tolist()
590
+ segment_count = 0.0
591
+ segment_length = 0
592
+ clusters = max(labels) + 1
593
+
594
+ previous_label = -1
595
+
596
+ segment_lengths = []
597
+
598
+ for label in labels:
599
+ if label != previous_label:
600
+ previous_label = label
601
+ segment_count += 1.0
602
+
603
+ if segment_length > 0:
604
+ segment_lengths.append(segment_length)
605
+
606
+ segment_length = 1
607
+ else:
608
+ segment_length +=1
609
+
610
+ ratio = float(segment_count) / float(clusters)
611
+ min_segment_len = min(segment_lengths)
612
+
613
+ orphan_scaler = .8 if min_segment_len == 1 else 1
614
+
615
+ cluster_score = n_clusters * silhouette_avg * ratio * orphan_scaler
616
+
617
+ if cluster_score >= best_cluster_score:
618
+ best_cluster_score = cluster_score
619
+ best_cluster_size = n_clusters
620
+ best_labels = cluster_labels
621
+
622
+ k = best_cluster_size
623
+
624
+ rosabeats.d_print("using best cluster size %d" % k)
625
+
626
+ X = evecs[:, :k] / Cnorm[:, k-1:k]
627
+ seg_ids = sklearn.cluster.KMeans(n_clusters=k, max_iter=1000,
628
+ random_state=0, n_init=1000).fit_predict(X)
629
+
630
+ bound_beats = 1 + np.flatnonzero(seg_ids[:-1] != seg_ids[1:])
631
+
632
+ bound_beats = librosa.util.fix_frames(bound_beats, x_min=0)
633
+ bound_segs = list(seg_ids[bound_beats])
634
+ bound_frames = beat_frames[bound_beats]
635
+
636
+ bound_frames = librosa.util.fix_frames(bound_frames,
637
+ x_min=None,
638
+ x_max=C.shape[1]-1)
639
+
640
+ bound_samples = librosa.frames_to_samples(bound_frames)
641
+
642
+ self.segments = []
643
+ prev = 0
644
+ for sample, label in zip(bound_samples,bound_segs):
645
+ segment_boundaries = (prev, sample-1)
646
+ prev = sample
647
+ segment_time_boundaries = librosa.samples_to_time(segment_boundaries,sr=self.sr)
648
+ start, end = segment_time_boundaries
649
+ duration = end - start
650
+ segment = {}
651
+ try:
652
+ segment['label'] = seg_labels[int(label)]
653
+ except:
654
+ segment['label'] = label
655
+
656
+ segment['start'] = start
657
+ segment['duration'] = duration
658
+ segment['samples'] = segment_boundaries
659
+ segment['beats'] = []
660
+ segment['bars'] = []
661
+
662
+ self.segments.append(segment)
663
+
664
+ self.total_segments = len(self.segments)
665
+ self.save_features()
666
+
667
+ ##TODO## segment_laplacian needs to add any unsegmented part of the song as a last segment
668
+ ##TODO## for example, using max clusters of 10 with example audio, we get 10 segments, but ending with beat 254 (there are 308)
669
+ ##TODO## even if max clusters is 48, it only gives us segments including up to beat 303
670
+
671
+ def segment_segmentino(self, redo=False):
672
+ """Segment audio using the Segmentino plugin.
673
+
674
+ Args:
675
+ redo (bool, optional): Force re-segmentation even if segments exist
676
+
677
+ Raises:
678
+ RuntimeError: If segmentino plugin fails to return valid data
679
+ """
680
+ if self.data is None:
681
+ self.load()
682
+
683
+ if not self.total_segments is None and redo is False:
684
+ rosabeats.d_print(
685
+ "warning: you already have segment data and did not specify a redo"
686
+ )
687
+ return
688
+
689
+ rosabeats.d_print("segmenting song...")
690
+ try:
691
+ segmented = vamp.collect(self.data, self.sr, "segmentino:segmentino")
692
+ except Exception as e:
693
+ rosabeats.d_print(f"Error loading segmentino plugin: {str(e)}")
694
+ raise RuntimeError(f"Failed to run segmentino segmentation: {str(e)}") from e
695
+
696
+ if not segmented or "list" not in segmented:
697
+ rosabeats.d_print("Segmentino plugin returned invalid data")
698
+ raise RuntimeError("Segmentino plugin failed to return valid segment data")
699
+
700
+ self.total_segments = len(segmented["list"])
701
+ self.segments = self.total_segments * [None]
702
+
703
+ for count, result in enumerate(segmented["list"]):
704
+ label = result["label"]
705
+ start = float(result["timestamp"])
706
+ duration = float(result["duration"])
707
+ end = start + duration
708
+
709
+ self.segments[count] = dict()
710
+ self.segments[count]["label"] = label
711
+ self.segments[count]["start"] = start
712
+ self.segments[count]["duration"] = duration
713
+ self.segments[count]["samples"] = librosa.time_to_samples(
714
+ (start, end), sr=self.sr
715
+ )
716
+ self.segments[count]["beats"] = []
717
+ self.segments[count]["bars"] = []
718
+
719
+ self.save_features()
720
+
721
+ def segmentize_beats(self):
722
+ """Associate beats and bars with segments.
723
+
724
+ Raises:
725
+ Exception: If segments or beat timings are not available
726
+ """
727
+ if self.segments is None or self.beat_timings is None:
728
+ raise Exception("must segment() and track beats before segmentizing beats")
729
+
730
+ rosabeats.d_print("segmentizing beats/bars...")
731
+
732
+ for idx, seg in enumerate(self.segments):
733
+ rosabeats.d_print("segmentizing beats for segment %d" % idx)
734
+
735
+ seg_first = seg["samples"][0]
736
+ seg_last = seg["samples"][1]
737
+
738
+ # for each beat in the song...
739
+ for beat_num in range(self.total_beats - 1):
740
+ # rosabeats.d_print("examining beat %d" % beat_num)
741
+
742
+ # obtain sample where beat starts
743
+ beat_first = self.beat_slices[beat_num][0]
744
+ # rosabeats.d_print("beat %d, %d <= %d <= %d ?" % (beat_num, seg_first, beat_first, seg_last))
745
+
746
+ # see if the beat starts inside the segment boundaries
747
+ if beat_first >= seg_first and beat_first <= seg_last:
748
+ # the beat starts firmly within the segment
749
+ # so save this beat to the list of beats associated with this segment
750
+ seg["beats"].append(beat_num)
751
+ # rosabeats.d_print("BEAT %d is in segment %d" % (beat_num, idx))
752
+
753
+ # now let's see if this beat starts a bar
754
+ bar_num = self.beat_starts_bar(beat_num)
755
+
756
+ # if it does start a bar...
757
+ if not bar_num is None:
758
+ # rosabeats.d_print("beat %d starts bar %d" % (beat_num, bar_num))
759
+
760
+ # determine the beat number of the last beat in the bar (i.e. 0 + (8-1) = 7,k so 0-7)
761
+ beat_num_final = int(beat_num + (self.beatsperbar - 1))
762
+ # print("bar %d starts with beat %d and ends with beat %d" % (bar_num, beat_num, beat_num_final))
763
+
764
+ # obtain sample where final beat in bar starts
765
+ try:
766
+ beat_final_first = self.beat_slices[beat_num_final][0]
767
+ # rosabeats.d_print("beat %d stats on sample %d" % (beat_num_final, beat_final_first))
768
+ # rosabeats.d_print("segment starts sample %d and ends sample %d" % (seg_first, seg_last))
769
+ except:
770
+ rosabeats.d_print(
771
+ "warning: beat %d does not exist" % beat_num_final
772
+ )
773
+ continue
774
+
775
+ # see if the final beat in bar starts inside the segment boundaries
776
+ if beat_final_first >= seg_first and beat_final_first <= seg_last:
777
+ # last beat starts in segment
778
+ # rosabeats.d_print(" BAR %d is in segment %d" % (bar_num, idx))
779
+ seg["bars"].append(int(bar_num))
780
+
781
+ # alternatively, bar_beat_First = self.beat_slices[beat_num_final][0]
782
+ # and then check that that is <= segment, meaning last beat of bar STARTS inside segment
783
+ import pprint #TODO# remove
784
+ pprint.pprint(self.segments) #TODO# remove
785
+
786
+
787
+ self.save_features()
788
+
789
+ def divide_bars(self):
790
+ """Deprecated method that no longer performs any action."""
791
+ rosabeats.d_print("warning: divide_bars() no longer does anything")
792
+
793
+ def set_beats_output_file(self, beatsfile):
794
+ """Set the output file for beat information.
795
+
796
+ Args:
797
+ beatsfile (str): Path to output beats file
798
+ """
799
+ self.beats_output_file = beatsfile
800
+
801
+ def set_default_beats_output_file(self):
802
+ """Set default beats output file based on source filename."""
803
+ basename = os.path.basename(self.sourcefile)
804
+ stub, ext = os.path.splitext(basename)
805
+ self.set_beats_output_file(stub + "_beats.br")
806
+
807
+ def start_writing_beats_output(self):
808
+ """Initialize beat output file and write header information."""
809
+ if self.beats_output_file == None:
810
+ self.set_default_beats_output_file()
811
+
812
+ self.beats_output = open(self.beats_output_file, "w")
813
+ self.beats_output.write("file %s\n" % self.sourcefile)
814
+ self.beats_output.write(
815
+ "beats_bar %d %d\n" % (self.beatsperbar, self.firstfullbar)
816
+ )
817
+
818
+ def shutdown(self):
819
+ """Clean up and close all output streams."""
820
+ if self.output_play:
821
+ self.stream.close()
822
+ if self.output_save:
823
+ self.save_remix()
824
+ if self.output_beats:
825
+ self.beats_output.close()
826
+
827
+ def write_out(self, text):
828
+ """Write text to beats output file.
829
+
830
+ Args:
831
+ text (str): Text to write
832
+ """
833
+ if self.beats_output == None:
834
+ self.start_writing_beats_output()
835
+
836
+ self.beats_output.write("%s\n" % text)
837
+
838
+ def play_beat(self, b, silent=False, divisor=1):
839
+ """Play a single beat.
840
+
841
+ Args:
842
+ b (int): Beat number to play
843
+ silent (bool, optional): Suppress console output
844
+ divisor (int, optional): Beat division factor
845
+
846
+ Raises:
847
+ Exception: If beat tracking has not been performed
848
+ """
849
+ if self.beat_slices is None:
850
+ raise Exception("must track beats before playing beats")
851
+
852
+ try:
853
+ first, last = self.beat_slices[b]
854
+ except:
855
+ if not silent:
856
+ print("*NOB* ", end="", flush=True)
857
+ print(flush=True)
858
+ print("error: beat %d does not exist" % b)
859
+ return
860
+
861
+ if divisor > 1:
862
+ beat_len = last - first
863
+ beat_len = int(beat_len / divisor)
864
+ last = first + beat_len
865
+
866
+ if not silent:
867
+ print("%d" % b, end="", flush=True)
868
+ if divisor > 1:
869
+ print("/%d" % divisor, flush=True)
870
+ print(" ", end="", flush=True)
871
+
872
+ if self.output_play:
873
+ self.stream.write(
874
+ np.ascontiguousarray(
875
+ np.array((self.data[0][first:last], self.data[1][first:last])).T
876
+ )
877
+ )
878
+
879
+ if self.output_save:
880
+ try:
881
+ # try copying the beat data into the existing remix buffer
882
+ self.remix[
883
+ 0,
884
+ self.remix_index : self.remix_index + len(self.data[0][first:last]),
885
+ ] += self.data[0][first:last]
886
+ self.remix[
887
+ 1,
888
+ self.remix_index : self.remix_index + len(self.data[1][first:last]),
889
+ ] += self.data[1][first:last]
890
+ except ValueError:
891
+ # if it fails, extend the buffer and try again
892
+ self.extend_remix()
893
+ self.remix[
894
+ 0,
895
+ self.remix_index : self.remix_index + len(self.data[0][first:last]),
896
+ ] += self.data[0][first:last]
897
+ self.remix[
898
+ 1,
899
+ self.remix_index : self.remix_index + len(self.data[1][first:last]),
900
+ ] += self.data[1][first:last]
901
+
902
+ self.remix_index += len(self.data[0][first:last])
903
+
904
+ if self.output_beats:
905
+ if divisor > 1:
906
+ self.write_out("beat_div %d %d 1" % (b, divisor))
907
+ else:
908
+ self.write_out("beats %d" % b)
909
+
910
+ def play_beats(self, beats):
911
+ """Play a sequence of beats.
912
+
913
+ Args:
914
+ beats (list): List of beat numbers to play
915
+ """
916
+ for beat in beats:
917
+ self.play_beat(beat)
918
+ print(flush=True)
919
+
920
+ def play_bars(self, bars, reverse=False):
921
+ """Play a sequence of bars.
922
+
923
+ Args:
924
+ bars (list): List of bar numbers to play
925
+ reverse (bool, optional): Play bars in reverse order
926
+ """
927
+ for bar in bars:
928
+ self.play_bar(bar, reverse=reverse)
929
+
930
+ def rest(self, beats):
931
+ """Add silence for specified number of beats.
932
+
933
+ Args:
934
+ beats (float): Number of beats to rest
935
+ """
936
+ sec_per_beat = float(1 / (self.tempo / 60))
937
+ sec_of_silence = sec_per_beat * beats
938
+ samples_of_silence = int(sec_of_silence * self.sr)
939
+ silence = np.zeros(shape=(samples_of_silence,), dtype=self.dtype)
940
+ rosabeats.d_print(
941
+ "resting %02g sec (%02g beats at %02g seconds per beat)"
942
+ % (sec_of_silence, beats, sec_per_beat)
943
+ )
944
+ if self.output_play:
945
+ self.stream.write(
946
+ np.zeros(shape=(samples_of_silence, self.channels), dtype=self.dtype)
947
+ )
948
+
949
+ if self.output_save:
950
+ for x in range(self.channels):
951
+ self.remix[
952
+ x, self.remix_index : self.remix_index + len(silence)
953
+ ] += silence
954
+ self.remix_index += len(silence)
955
+
956
+ if self.output_beats:
957
+ self.write_out("rest %g" % beats)
958
+
959
+ def play_bar(self, m, reverse=False, silent=False):
960
+ """Play a single bar.
961
+
962
+ Args:
963
+ m (int): Bar number to play
964
+ reverse (bool, optional): Play bar in reverse order
965
+ silent (bool, optional): Suppress console output
966
+
967
+ Raises:
968
+ Exception: If beat tracking has not been performed
969
+ """
970
+ if self.beatsperbar is None or self.beat_slices is None:
971
+ raise Exception("must track beats before you can play bar")
972
+
973
+ if self.output_beats:
974
+ self.write_out("# bar %d" % m)
975
+
976
+ if not silent:
977
+ print("[%d]" % m, end="", flush=True)
978
+
979
+ first_beat = int(m * self.beatsperbar) + self.firstfullbar
980
+ last_beat = int(first_beat + self.beatsperbar) - 1
981
+ if last_beat > self.total_beats - 1:
982
+ last_beat = int(self.total_beats) - 1
983
+
984
+ beats = [x for x in range(first_beat, last_beat + 1)]
985
+ if reverse:
986
+ if not silent:
987
+ print("[rev] ", end="", flush=True)
988
+ beats.reverse()
989
+
990
+ for beat in beats:
991
+ if beat == first_beat:
992
+ if not silent:
993
+ print("*", end="", flush=True)
994
+ self.play_beat(beat)
995
+ if not silent:
996
+ print(flush=True)