muze 0.1.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +18 -1
  3. data/README.md +5 -0
  4. data/Rakefile +3 -0
  5. data/ext/muze/muze_ext.c +129 -12
  6. data/lib/muze/beat/beat_track.rb +93 -11
  7. data/lib/muze/core/audio.rb +129 -0
  8. data/lib/muze/core/cache.rb +38 -0
  9. data/lib/muze/core/dct.rb +24 -21
  10. data/lib/muze/core/frames.rb +31 -0
  11. data/lib/muze/core/matrix.rb +23 -0
  12. data/lib/muze/core/resample.rb +111 -19
  13. data/lib/muze/core/stft.rb +312 -52
  14. data/lib/muze/core/windows.rb +113 -17
  15. data/lib/muze/display/specshow.rb +307 -41
  16. data/lib/muze/effects/harmonic_percussive.rb +83 -18
  17. data/lib/muze/effects/streaming.rb +101 -0
  18. data/lib/muze/effects/time_stretch.rb +353 -36
  19. data/lib/muze/feature/aggregation.rb +49 -0
  20. data/lib/muze/feature/chroma.rb +43 -15
  21. data/lib/muze/feature/context.rb +81 -0
  22. data/lib/muze/feature/mfcc.rb +78 -38
  23. data/lib/muze/feature/spectral.rb +258 -39
  24. data/lib/muze/filters/chroma_filter.rb +21 -2
  25. data/lib/muze/filters/mel.rb +47 -1
  26. data/lib/muze/io/audio_loader/ffmpeg_backend.rb +179 -15
  27. data/lib/muze/io/audio_loader/wavify_backend.rb +118 -11
  28. data/lib/muze/io/audio_loader.rb +178 -48
  29. data/lib/muze/io/audio_writer.rb +48 -0
  30. data/lib/muze/native.rb +91 -8
  31. data/lib/muze/onset/onset_detect.rb +114 -23
  32. data/lib/muze/version.rb +1 -1
  33. data/lib/muze.rb +237 -60
  34. metadata +11 -21
  35. data/benchmarks/baseline.json +0 -24
  36. data/benchmarks/native_vs_ruby.rb +0 -23
  37. data/benchmarks/quality_metrics.rb +0 -265
  38. data/benchmarks/quality_thresholds.md +0 -28
  39. data/benchmarks/support/fixture_library.rb +0 -107
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 852274e098d5ea8d512680e8dbcf9962b8e380ce778c51912d5efa77cf5d4427
4
- data.tar.gz: 4e49c534ea90ca6d01c1a3e37ca88e975db1a404a59b37288f8397642b71a7a5
3
+ metadata.gz: 01b14e94bc84cd5ca2437c37404f0344a084af15f185465687776997c135442a
4
+ data.tar.gz: '09d93556256984b96e283bcd3be6bd5c6d404130056f6822b9bf42757a43c394'
5
5
  SHA512:
6
- metadata.gz: 874a8d2467e25523dd91361a41b299b7c8e9dfec8bd22def1f382795026ce28b7aa086f19eb8ffc7616c25f90e60471c909c2a777c3f4bc8fced6ba1fbf8c65a
7
- data.tar.gz: c4d63ca9bd4a35ff74e34eec358a03e3c3c5dd15dd03589debbb00c97c02d1719849ca883149afd984f76065bed8377678af88e2297832eaddab43d8dad3b970
6
+ metadata.gz: 451d64a3392a7baa148808ccee159a03c2d20234b0bc39db5bdf92535f399467fed6eb8360498148a925c652f0d43afb6e05cc070aa7548a2e71373490e727cb
7
+ data.tar.gz: 2743e731b94df7d830f4deb497099d211828ad41d85436a5afcb4e25864bec32865040db402763fa10a413004c574a5c5b7a942cc7f54091bb5c0dd6ce8ced4b
data/CHANGELOG.md CHANGED
@@ -2,6 +2,23 @@
2
2
 
3
3
  All notable changes to this project are documented in this file.
4
4
 
5
+ ## [1.0.0] - 2026-05-25
6
+
7
+ ### Added
8
+
9
+ - Added broader audio I/O: WAV writing, source-rate preserving loads, `Pathname` and WAV `IO` inputs, stereo output, configurable downmixing, file metadata via `Muze.info`, and chunked reads via `Muze.load_stream`.
10
+ - Added streamed and long-file-friendly DSP APIs including `Muze.stft_stream`, `Muze.time_stretch_stream`, `Muze.pitch_shift_stream`, and `Muze.hpss_stream`.
11
+ - Added more STFT and conversion controls: explicit pad modes, trailing frame padding, periodic windows, custom windows, non-power-of-two even FFT lengths, frame/time/sample helpers, FFT frequencies, and configurable dB conversion.
12
+ - Added richer feature extraction: Slaney mel filters, mel frequencies, chroma tuning and octave weighting, MFCC liftering and input modes, delta edge modes, additional spectral descriptors, polynomial spectral features, tonnetz, beat-synchronous aggregation, and shared feature extraction via `Muze.feature_context` / `Muze.feature_stack`.
13
+ - Added rhythm controls for onset strength, onset detection, tempo estimation, normalized tempograms, fixed-BPM beat tracking, tempo frequency helpers, and beat metadata output.
14
+ - Added effects improvements: multi-channel processing, HPSS masks and margins, WSOLA/OLA time stretching, phase locking, pitch-shift controls, frame-energy trimming with interval units, and preemphasis/deemphasis.
15
+ - Added lightweight visualization upgrades: `specshow` axes, dimensions, color maps, value bounds, fragment output, image rendering, waveform envelope rendering, stereo waveform layouts, and onset envelope rendering.
16
+
17
+ ### Changed
18
+
19
+ - The optional native extension is now built during gem installation when supported, while retaining the pure Ruby fallback.
20
+ - Audio loading now applies FFmpeg seek/duration controls before decoding and uses safer FFmpeg process handling.
21
+
5
22
  ## [0.1.0] - 2026-03-07
6
23
 
7
- - Initial release.
24
+ - Initial release.
data/README.md CHANGED
@@ -1,5 +1,10 @@
1
1
  # Muze
2
2
 
3
+ [![Gem Version](https://img.shields.io/gem/v/muze.svg)](https://rubygems.org/gems/muze)
4
+ [![CI](https://github.com/ydah/muze/actions/workflows/ci.yml/badge.svg)](https://github.com/ydah/muze/actions/workflows/ci.yml)
5
+ [![Ruby](https://img.shields.io/badge/ruby-%3E%3D%203.1-red.svg)](https://www.ruby-lang.org/)
6
+ [![License](https://img.shields.io/badge/license-MIT-blue.svg)](./LICENSE.txt)
7
+
3
8
  Muze is a Ruby audio feature extraction library that provides a full pipeline from audio loading to spectral analysis, feature extraction, rhythm analysis, effects, and lightweight visualization.
4
9
 
5
10
  ![Image](https://github.com/user-attachments/assets/1e88395b-c715-4c9a-b458-d1cb9fceb848)
data/Rakefile CHANGED
@@ -7,6 +7,9 @@ require_relative "benchmarks/quality_metrics"
7
7
 
8
8
  RSpec::Core::RakeTask.new(:spec)
9
9
 
10
+ CLEAN.include("ext/muze/*.o", "ext/muze/mkmf.log")
11
+ CLOBBER.include("ext/muze/Makefile", "ext/muze/*.bundle", "ext/muze/*.dSYM")
12
+
10
13
  directory "ext/muze"
11
14
 
12
15
  desc "Compile optional C extension"
data/ext/muze/muze_ext.c CHANGED
@@ -1,16 +1,30 @@
1
1
  #include "ruby.h"
2
+ #include "ruby/thread.h"
3
+ #include <string.h>
2
4
 
3
5
  static VALUE mMuze;
4
6
  static VALUE mNative;
5
7
 
8
+ static VALUE muze_parameter_error(void) {
9
+ ID id = rb_intern("ParameterError");
10
+ if (rb_const_defined(mMuze, id)) {
11
+ return rb_const_get(mMuze, id);
12
+ }
13
+
14
+ return rb_eArgError;
15
+ }
16
+
6
17
  static VALUE native_frame_slices(VALUE self, VALUE rb_signal, VALUE rb_frame_length, VALUE rb_hop_length) {
7
- Check_Type(rb_signal, T_ARRAY);
18
+ if (!RB_TYPE_P(rb_signal, T_ARRAY)) {
19
+ rb_raise(muze_parameter_error(), "signal must be an Array");
20
+ }
21
+
8
22
  const long signal_length = RARRAY_LEN(rb_signal);
9
23
  const long frame_length = NUM2LONG(rb_frame_length);
10
24
  const long hop_length = NUM2LONG(rb_hop_length);
11
25
 
12
26
  if (frame_length <= 0 || hop_length <= 0) {
13
- rb_raise(rb_eArgError, "frame_length and hop_length must be positive");
27
+ rb_raise(muze_parameter_error(), "frame_length and hop_length must be positive");
14
28
  }
15
29
 
16
30
  if (signal_length <= frame_length) {
@@ -41,16 +55,66 @@ static VALUE native_frame_slices(VALUE self, VALUE rb_signal, VALUE rb_frame_len
41
55
  return frames;
42
56
  }
43
57
 
44
- static int cmp_double(const void *a, const void *b) {
45
- const double left = *(const double *)a;
46
- const double right = *(const double *)b;
47
- if (left < right) return -1;
48
- if (left > right) return 1;
49
- return 0;
58
+ static void swap_double(double *values, long left, long right) {
59
+ const double tmp = values[left];
60
+ values[left] = values[right];
61
+ values[right] = tmp;
62
+ }
63
+
64
+ static long partition_double(double *values, long left, long right, long pivot_index) {
65
+ const double pivot = values[pivot_index];
66
+ long store_index = left;
67
+
68
+ swap_double(values, pivot_index, right);
69
+
70
+ for (long i = left; i < right; i++) {
71
+ if (values[i] < pivot) {
72
+ swap_double(values, store_index, i);
73
+ store_index++;
74
+ }
75
+ }
76
+
77
+ swap_double(values, right, store_index);
78
+ return store_index;
79
+ }
80
+
81
+ static double quickselect_double(double *values, long count, long target) {
82
+ long left = 0;
83
+ long right = count - 1;
84
+
85
+ while (1) {
86
+ if (left == right) return values[left];
87
+
88
+ const long pivot_index = partition_double(values, left, right, left + ((right - left) / 2));
89
+
90
+ if (target == pivot_index) {
91
+ return values[target];
92
+ } else if (target < pivot_index) {
93
+ right = pivot_index - 1;
94
+ } else {
95
+ left = pivot_index + 1;
96
+ }
97
+ }
98
+ }
99
+
100
+ struct median_args {
101
+ double *values;
102
+ long count;
103
+ long target;
104
+ double result;
105
+ };
106
+
107
+ static void *median_without_gvl(void *ptr) {
108
+ struct median_args *args = (struct median_args *)ptr;
109
+ args->result = quickselect_double(args->values, args->count, args->target);
110
+ return NULL;
50
111
  }
51
112
 
52
113
  static VALUE native_median1d(VALUE self, VALUE rb_values) {
53
- Check_Type(rb_values, T_ARRAY);
114
+ if (!RB_TYPE_P(rb_values, T_ARRAY)) {
115
+ rb_raise(muze_parameter_error(), "values must be an Array");
116
+ }
117
+
54
118
  const long count = RARRAY_LEN(rb_values);
55
119
  if (count == 0) return DBL2NUM(0.0);
56
120
 
@@ -60,10 +124,62 @@ static VALUE native_median1d(VALUE self, VALUE rb_values) {
60
124
  values[i] = NUM2DBL(rb_ary_entry(rb_values, i));
61
125
  }
62
126
 
63
- qsort(values, count, sizeof(double), cmp_double);
64
- const double median = values[count / 2];
127
+ struct median_args args = { values, count, count / 2, 0.0 };
128
+ rb_thread_call_without_gvl(median_without_gvl, &args, NULL, NULL);
65
129
  xfree(values);
66
- return DBL2NUM(median);
130
+ return DBL2NUM(args.result);
131
+ }
132
+
133
+ static void insert_sorted_double(double *values, long *length, double value) {
134
+ long index = 0;
135
+ while (index < *length && values[index] <= value) index++;
136
+ memmove(values + index + 1, values + index, sizeof(double) * (*length - index));
137
+ values[index] = value;
138
+ (*length)++;
139
+ }
140
+
141
+ static void remove_sorted_double(double *values, long *length, double value) {
142
+ long index = 0;
143
+ while (index < *length && values[index] < value) index++;
144
+ while (index < *length && values[index] != value) index++;
145
+ if (index >= *length) return;
146
+
147
+ memmove(values + index, values + index + 1, sizeof(double) * (*length - index - 1));
148
+ (*length)--;
149
+ }
150
+
151
+ static VALUE native_median_filter1d(VALUE self, VALUE rb_values, VALUE rb_half) {
152
+ if (!RB_TYPE_P(rb_values, T_ARRAY)) {
153
+ rb_raise(muze_parameter_error(), "values must be an Array");
154
+ }
155
+
156
+ const long count = RARRAY_LEN(rb_values);
157
+ const long half = NUM2LONG(rb_half);
158
+ if (half < 0) {
159
+ rb_raise(muze_parameter_error(), "half must be non-negative");
160
+ }
161
+
162
+ VALUE output = rb_ary_new2(count);
163
+ if (count == 0) return output;
164
+
165
+ double *window = ALLOC_N(double, count);
166
+ long window_length = 0;
167
+
168
+ for (long index = 0; index < count; index++) {
169
+ if (index > half) {
170
+ remove_sorted_double(window, &window_length, NUM2DBL(rb_ary_entry(rb_values, index - half - 1)));
171
+ }
172
+
173
+ const long entering = index + half;
174
+ if (entering < count) {
175
+ insert_sorted_double(window, &window_length, NUM2DBL(rb_ary_entry(rb_values, entering)));
176
+ }
177
+
178
+ rb_ary_push(output, DBL2NUM(window_length == 0 ? 0.0 : window[window_length / 2]));
179
+ }
180
+
181
+ xfree(window);
182
+ return output;
67
183
  }
68
184
 
69
185
  void Init_muze_ext(void) {
@@ -72,4 +188,5 @@ void Init_muze_ext(void) {
72
188
 
73
189
  rb_define_singleton_method(mNative, "frame_slices", native_frame_slices, 3);
74
190
  rb_define_singleton_method(mNative, "median1d", native_median1d, 1);
191
+ rb_define_singleton_method(mNative, "median_filter1d", native_median_filter1d, 2);
75
192
  }
@@ -12,15 +12,36 @@ module Muze
12
12
  # @param start_bpm [Float]
13
13
  # @param tightness [Integer]
14
14
  # @return [Array(Float, Array<Integer>)] estimated tempo and beat frames
15
- def beat_track(y: nil, sr: 22_050, onset_envelope: nil, hop_length: 512, start_bpm: 120.0, tightness: 100)
15
+ def beat_track(y: nil, sr: 22_050, onset_envelope: nil, hop_length: 512, start_bpm: 120.0, tightness: 100, min_bpm: 30.0, max_bpm: 240.0, bpm: nil, fill_missing: true, return_metadata: false)
16
+ validate_beat_params!(
17
+ sr:,
18
+ hop_length:,
19
+ start_bpm:,
20
+ tightness:,
21
+ min_bpm:,
22
+ max_bpm:,
23
+ bpm:,
24
+ fill_missing:,
25
+ return_metadata:
26
+ )
27
+
16
28
  envelope = if onset_envelope
17
29
  onset_envelope.is_a?(Numo::NArray) ? onset_envelope.to_a : Array(onset_envelope)
18
30
  else
19
31
  Muze::Onset.onset_strength(y:, sr:, hop_length:).to_a
20
32
  end
33
+ validate_finite_array!(envelope, "onset_envelope")
34
+
35
+ if envelope.empty? || envelope.max.to_f <= 1.0e-12
36
+ result = { tempo: nil, beats: [], confidence: 0.0 }
37
+ return return_metadata ? result : [nil, []]
38
+ end
39
+
40
+ tempo = bpm || estimate_tempo(envelope, sr:, hop_length:, start_bpm:, min_bpm:, max_bpm:, tightness:)
41
+ beats = track_beats(envelope, tempo:, sr:, hop_length:, tightness:, fill_missing:)
42
+ confidence = beat_confidence(envelope, beats)
43
+ return { tempo:, beats:, confidence: } if return_metadata
21
44
 
22
- tempo = estimate_tempo(envelope, sr:, hop_length:, start_bpm:)
23
- beats = track_beats(envelope, tempo:, sr:, hop_length:, tightness:)
24
45
  [tempo, beats]
25
46
  end
26
47
 
@@ -30,25 +51,40 @@ module Muze
30
51
  # @param hop_length [Integer]
31
52
  # @param win_length [Integer]
32
53
  # @return [Numo::SFloat]
33
- def tempogram(y: nil, onset_envelope: nil, sr: 22_050, hop_length: 512, win_length: 384)
34
- Muze::Feature.tempogram(y:, onset_envelope:, sr:, hop_length:, win_length:)
54
+ def tempogram(y: nil, onset_envelope: nil, sr: 22_050, hop_length: 512, win_length: 384, normalize: false)
55
+ validate_positive_integer!(sr, "sr")
56
+ validate_positive_integer!(hop_length, "hop_length")
57
+ Muze::Feature.tempogram(y:, onset_envelope:, sr:, hop_length:, win_length:, normalize:)
58
+ end
59
+
60
+ def tempo_frequencies(sr: 22_050, hop_length: 512, win_length: 384)
61
+ validate_positive_integer!(sr, "sr")
62
+ validate_positive_integer!(hop_length, "hop_length")
63
+ raise Muze::ParameterError, "win_length must be positive" unless win_length.positive?
64
+
65
+ Numo::SFloat.cast(Array.new(win_length) do |lag|
66
+ lag.zero? ? 0.0 : 60.0 * sr / (hop_length * lag)
67
+ end)
35
68
  end
36
69
 
37
- def estimate_tempo(envelope, sr:, hop_length:, start_bpm:)
70
+ def estimate_tempo(envelope, sr:, hop_length:, start_bpm:, min_bpm:, max_bpm:, tightness:)
38
71
  return start_bpm if envelope.length < 4
39
72
 
40
- min_bpm = 30.0
41
- max_bpm = 240.0
73
+ raise Muze::ParameterError, "min_bpm must be positive" unless min_bpm.positive?
74
+ raise Muze::ParameterError, "max_bpm must be greater than min_bpm" unless max_bpm > min_bpm
75
+
42
76
  min_lag = [(sr * 60.0 / (hop_length * max_bpm)).round, 1].max
43
77
  max_lag = [(sr * 60.0 / (hop_length * min_bpm)).round, envelope.length - 1].min
44
78
  return start_bpm if min_lag >= max_lag
45
79
 
80
+ prior_lag = sr * 60.0 / (hop_length * start_bpm)
46
81
  best_lag = min_lag
47
82
  best_score = -Float::INFINITY
48
83
 
49
84
  (min_lag..max_lag).each do |lag|
50
85
  score = 0.0
51
86
  (lag...envelope.length).each { |index| score += envelope[index] * envelope[index - lag] }
87
+ score -= normalized_tightness(tightness) * ((lag - prior_lag).abs / prior_lag) * score.abs
52
88
  next unless score > best_score
53
89
 
54
90
  best_score = score
@@ -59,7 +95,7 @@ module Muze
59
95
  end
60
96
  private_class_method :estimate_tempo
61
97
 
62
- def track_beats(envelope, tempo:, sr:, hop_length:, tightness:)
98
+ def track_beats(envelope, tempo:, sr:, hop_length:, tightness:, fill_missing:)
63
99
  interval = [(60.0 * sr / (tempo * hop_length)).round, 1].max
64
100
  peaks = Muze::Onset.onset_detect(onset_envelope: envelope, backtrack: false)
65
101
  return [] if peaks.empty?
@@ -69,7 +105,8 @@ module Muze
69
105
 
70
106
  while target < envelope.length
71
107
  candidates = peaks.select { |peak| (peak - target).abs <= search_radius(interval, tightness) }
72
- beats << select_beat_candidate(candidates, target:, interval:, envelope:, tightness:)
108
+ candidate = select_beat_candidate(candidates, target:, interval:, envelope:, tightness:)
109
+ beats << (candidate || target) if fill_missing || candidate
73
110
  target += interval
74
111
  end
75
112
 
@@ -85,7 +122,7 @@ module Muze
85
122
  private_class_method :search_radius
86
123
 
87
124
  def select_beat_candidate(candidates, target:, interval:, envelope:, tightness:)
88
- return target unless candidates.any?
125
+ return nil unless candidates.any?
89
126
 
90
127
  penalty_weight = 1.0 + (4.0 * normalized_tightness(tightness))
91
128
  candidates.max_by do |candidate|
@@ -96,6 +133,16 @@ module Muze
96
133
  end
97
134
  private_class_method :select_beat_candidate
98
135
 
136
+ def beat_confidence(envelope, beats)
137
+ return 0.0 if beats.empty?
138
+
139
+ peak = envelope.max.to_f
140
+ return 0.0 if peak <= 0.0
141
+
142
+ beats.sum { |beat| envelope[beat].to_f } / (beats.length * peak)
143
+ end
144
+ private_class_method :beat_confidence
145
+
99
146
  def normalized_tightness(tightness)
100
147
  value = tightness.to_f
101
148
  return 0.0 if value <= 0.0
@@ -103,5 +150,40 @@ module Muze
103
150
  [value / 100.0, 4.0].min / 4.0
104
151
  end
105
152
  private_class_method :normalized_tightness
153
+
154
+ def validate_beat_params!(sr:, hop_length:, start_bpm:, tightness:, min_bpm:, max_bpm:, bpm:, fill_missing:, return_metadata:)
155
+ validate_positive_integer!(sr, "sr")
156
+ validate_positive_integer!(hop_length, "hop_length")
157
+ validate_positive_number!(start_bpm, "start_bpm")
158
+ raise Muze::ParameterError, "tightness must be finite" unless tightness.respond_to?(:finite?) && tightness.finite?
159
+ validate_positive_number!(min_bpm, "min_bpm")
160
+ validate_positive_number!(max_bpm, "max_bpm")
161
+ raise Muze::ParameterError, "max_bpm must be greater than min_bpm" unless max_bpm > min_bpm
162
+ validate_positive_number!(bpm, "bpm") if bpm
163
+ raise Muze::ParameterError, "fill_missing must be true or false" unless [true, false].include?(fill_missing)
164
+ raise Muze::ParameterError, "return_metadata must be true or false" unless [true, false].include?(return_metadata)
165
+ end
166
+ private_class_method :validate_beat_params!
167
+
168
+ def validate_positive_integer!(value, label)
169
+ return if value.is_a?(Integer) && value.positive?
170
+
171
+ raise Muze::ParameterError, "#{label} must be a positive integer"
172
+ end
173
+ private_class_method :validate_positive_integer!
174
+
175
+ def validate_positive_number!(value, label)
176
+ return if value.respond_to?(:finite?) && value.finite? && value.positive?
177
+
178
+ raise Muze::ParameterError, "#{label} must be positive"
179
+ end
180
+ private_class_method :validate_positive_number!
181
+
182
+ def validate_finite_array!(values, label)
183
+ return if values.all? { |value| value.respond_to?(:finite?) && value.finite? }
184
+
185
+ raise Muze::ParameterError, "#{label} must contain only finite numeric values"
186
+ end
187
+ private_class_method :validate_finite_array!
106
188
  end
107
189
  end
@@ -0,0 +1,129 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Muze
4
+ module Core
5
+ # Small audio-array helpers shared by public convenience APIs.
6
+ module Audio
7
+ module_function
8
+
9
+ def validate_audio!(y, allow_empty: false)
10
+ raise Muze::ParameterError, "audio must not be nil" if y.nil?
11
+
12
+ signal = Numo::SFloat.cast(y)
13
+ raise Muze::ParameterError, "audio must be one- or two-dimensional" unless [1, 2].include?(signal.ndim)
14
+ raise Muze::ParameterError, "audio must not be empty" if !allow_empty && signal.empty?
15
+ raise Muze::ParameterError, "audio must contain only finite numeric values" unless finite_values?(signal)
16
+ raise Muze::ParameterError, "audio channel count must be positive" if signal.ndim == 2 && signal.shape[1].zero?
17
+
18
+ signal
19
+ rescue NoMethodError, TypeError, ArgumentError => e
20
+ raise Muze::ParameterError, "audio must be Array<Float> or Numo::NArray: #{e.message}"
21
+ end
22
+
23
+ def valid_audio?(y, allow_empty: false)
24
+ validate_audio!(y, allow_empty:)
25
+ true
26
+ rescue Muze::ParameterError
27
+ false
28
+ end
29
+
30
+ def normalize(y, peak: 1.0, axis: nil)
31
+ raise Muze::ParameterError, "peak must be positive" unless peak.respond_to?(:positive?) && peak.positive?
32
+ raise Muze::ParameterError, "axis must be nil or :channels" unless axis.nil? || axis == :channels
33
+
34
+ signal = validate_audio!(y, allow_empty: true)
35
+ return signal if signal.empty?
36
+
37
+ return normalize_channels(signal, peak:) if axis == :channels && signal.ndim == 2
38
+
39
+ current_peak = signal.abs.max.to_f
40
+ return signal if current_peak <= 0.0
41
+
42
+ (signal * (peak.to_f / current_peak)).cast_to(Numo::SFloat)
43
+ end
44
+
45
+ def remix(y, intervals, units: :samples, sr: nil, hop_length: 512)
46
+ signal = validate_audio!(y, allow_empty: true)
47
+ raw_intervals = intervals.respond_to?(:to_a) ? intervals.to_a : Array(intervals)
48
+ raw_intervals = [raw_intervals] if raw_intervals.length == 2 && !raw_intervals.first.respond_to?(:to_a)
49
+
50
+ sample_ranges = raw_intervals.map do |interval|
51
+ raise Muze::ParameterError, "intervals must contain [start, end] pairs" unless interval.respond_to?(:to_a) && interval.to_a.length == 2
52
+
53
+ start_sample = convert_position(interval.to_a[0], units:, sr:, hop_length:)
54
+ end_sample = convert_position(interval.to_a[1], units:, sr:, hop_length:)
55
+ raise Muze::ParameterError, "interval end must be >= start" if end_sample < start_sample
56
+
57
+ [start_sample, end_sample]
58
+ end
59
+
60
+ chunks = sample_ranges.map { |start_sample, end_sample| slice_samples(signal, start_sample, end_sample) }
61
+ concatenate_chunks(chunks, channel_count: signal.ndim == 2 ? signal.shape[1] : nil)
62
+ end
63
+
64
+ def finite_values?(signal)
65
+ signal.to_a.flatten.all? { |value| value.respond_to?(:finite?) && value.finite? }
66
+ end
67
+ private_class_method :finite_values?
68
+
69
+ def normalize_channels(signal, peak:)
70
+ frames, channels = signal.shape
71
+ output = Numo::SFloat.zeros(frames, channels)
72
+ channels.times do |channel|
73
+ values = signal[true, channel]
74
+ current_peak = values.abs.max.to_f
75
+ output[true, channel] = current_peak <= 0.0 ? values : values * (peak.to_f / current_peak)
76
+ end
77
+ output
78
+ end
79
+ private_class_method :normalize_channels
80
+
81
+ def convert_position(position, units:, sr:, hop_length:)
82
+ case units
83
+ when :samples
84
+ position.to_i
85
+ when :frames
86
+ raise Muze::ParameterError, "hop_length must be positive" unless hop_length.positive?
87
+
88
+ (position.to_i * hop_length).to_i
89
+ when :time
90
+ raise Muze::ParameterError, "sr must be positive for time units" unless sr&.positive?
91
+
92
+ (position.to_f * sr).round
93
+ else
94
+ raise Muze::ParameterError, "units must be :samples, :frames, or :time"
95
+ end
96
+ end
97
+ private_class_method :convert_position
98
+
99
+ def slice_samples(signal, start_sample, end_sample)
100
+ start_index = [[start_sample, 0].max, signal.shape[0]].min
101
+ end_index = [[end_sample, 0].max, signal.shape[0]].min
102
+ return signal.ndim == 2 ? Numo::SFloat.zeros(0, signal.shape[1]) : Numo::SFloat[] if end_index <= start_index
103
+
104
+ signal.ndim == 2 ? signal[start_index...end_index, true] : signal[start_index...end_index]
105
+ end
106
+ private_class_method :slice_samples
107
+
108
+ def concatenate_chunks(chunks, channel_count:)
109
+ return channel_count ? Numo::SFloat.zeros(0, channel_count) : Numo::SFloat[] if chunks.empty?
110
+
111
+ total = chunks.sum { |chunk| channel_count ? chunk.shape[0] : chunk.size }
112
+ if channel_count
113
+ output = Numo::SFloat.zeros(total, channel_count)
114
+ offset = 0
115
+ chunks.each do |chunk|
116
+ next if chunk.shape[0].zero?
117
+
118
+ output[offset...(offset + chunk.shape[0]), true] = chunk
119
+ offset += chunk.shape[0]
120
+ end
121
+ return output
122
+ end
123
+
124
+ Numo::SFloat.cast(chunks.flat_map(&:to_a))
125
+ end
126
+ private_class_method :concatenate_chunks
127
+ end
128
+ end
129
+ end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Muze
4
+ module Core
5
+ # Tiny bounded LRU cache for generated DSP lookup tables.
6
+ class BoundedCache
7
+ def initialize(max_size:)
8
+ raise Muze::ParameterError, "max_size must be positive" unless max_size.is_a?(Integer) && max_size.positive?
9
+
10
+ @max_size = max_size
11
+ @entries = {}
12
+ @mutex = Mutex.new
13
+ end
14
+
15
+ def fetch(key)
16
+ @mutex.synchronize do
17
+ if @entries.key?(key)
18
+ value = @entries.delete(key)
19
+ @entries[key] = value
20
+ return value
21
+ end
22
+
23
+ value = yield
24
+ @entries.shift while @entries.size >= @max_size
25
+ @entries[key] = value
26
+ end
27
+ end
28
+
29
+ def clear
30
+ @mutex.synchronize { @entries.clear }
31
+ end
32
+
33
+ def size
34
+ @mutex.synchronize { @entries.size }
35
+ end
36
+ end
37
+ end
38
+ end
data/lib/muze/core/dct.rb CHANGED
@@ -4,6 +4,7 @@ module Muze
4
4
  module Core
5
5
  # DCT utilities.
6
6
  module DCT
7
+ BASIS_CACHE = Muze::Core::BoundedCache.new(max_size: 64)
7
8
  module_function
8
9
 
9
10
  # @param x [Numo::NArray]
@@ -22,34 +23,36 @@ module Muze
22
23
 
23
24
  rows, cols = matrix.shape
24
25
  target_length = n || rows
25
- result = Numo::SFloat.zeros(target_length, cols)
26
-
27
- cols.times do |col|
28
- signal = matrix[true, col].to_a
29
- transformed = dct_vector(signal, target_length, norm:)
30
- target_length.times { |idx| result[idx, col] = transformed[idx] }
31
- end
26
+ working = adjust_rows(matrix, target_length)
27
+ result = basis_matrix(rows: target_length, cols: target_length, norm:).dot(working).cast_to(Numo::SFloat)
32
28
 
33
29
  axis == 1 ? result.transpose : result
34
30
  end
35
31
 
36
- def dct_vector(signal, n, norm:)
37
- padded = if signal.length >= n
38
- signal[0, n]
39
- else
40
- signal + Array.new(n - signal.length, 0.0)
41
- end
42
-
43
- Array.new(n) do |k|
44
- sum = 0.0
45
- n.times do |idx|
46
- sum += padded[idx] * Math.cos(Math::PI * (idx + 0.5) * k / n)
47
- end
32
+ def adjust_rows(matrix, target_length)
33
+ rows, cols = matrix.shape
34
+ return matrix[0...target_length, true] if rows >= target_length
48
35
 
49
- normalize_dct(sum, k, n, norm)
36
+ output = Numo::SFloat.zeros(target_length, cols)
37
+ output[0...rows, true] = matrix
38
+ output
39
+ end
40
+ private_class_method :adjust_rows
41
+
42
+ def basis_matrix(rows:, cols:, norm:)
43
+ key = [rows, cols, norm]
44
+ BASIS_CACHE.fetch(key) do
45
+ matrix = Numo::SFloat.zeros(rows, cols)
46
+ rows.times do |row|
47
+ cols.times do |col|
48
+ value = Math.cos(Math::PI * (col + 0.5) * row / cols)
49
+ matrix[row, col] = normalize_dct(value, row, cols, norm)
50
+ end
51
+ end
52
+ matrix
50
53
  end
51
54
  end
52
- private_class_method :dct_vector
55
+ private_class_method :basis_matrix
53
56
 
54
57
  def normalize_dct(value, index, length, norm)
55
58
  return value * 2.0 unless norm == :ortho
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Muze
4
+ module Core
5
+ # Shared fixed-size frame slicing for analysis and effects code.
6
+ module Frames
7
+ module_function
8
+
9
+ def slice(signal, frame_length:, hop_length:, pad_end: false)
10
+ raise Muze::ParameterError, "frame_length and hop_length must be positive" unless frame_length.positive? && hop_length.positive?
11
+
12
+ values = signal.is_a?(Numo::NArray) ? signal.to_a : Array(signal)
13
+ return [pad_frame(values, frame_length)] if values.length <= frame_length
14
+ return Muze::Native.frame_slices(values, frame_length, hop_length) unless pad_end
15
+
16
+ frame_count = ((values.length - frame_length).to_f / hop_length).ceil + 1
17
+ Array.new(frame_count) do |index|
18
+ start = index * hop_length
19
+ pad_frame(values[start, frame_length] || [], frame_length)
20
+ end
21
+ end
22
+
23
+ def pad_frame(frame, frame_length)
24
+ return frame if frame.length >= frame_length
25
+
26
+ frame + Array.new(frame_length - frame.length, 0.0)
27
+ end
28
+ private_class_method :pad_frame
29
+ end
30
+ end
31
+ end