wavify 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. checksums.yaml +7 -0
  2. data/.serena/.gitignore +1 -0
  3. data/.serena/memories/project_overview.md +5 -0
  4. data/.serena/memories/style_and_completion.md +5 -0
  5. data/.serena/memories/suggested_commands.md +11 -0
  6. data/.serena/project.yml +126 -0
  7. data/.simplecov +18 -0
  8. data/.yardopts +4 -0
  9. data/CHANGELOG.md +11 -0
  10. data/LICENSE +21 -0
  11. data/README.md +196 -0
  12. data/Rakefile +190 -0
  13. data/benchmarks/README.md +46 -0
  14. data/benchmarks/benchmark_helper.rb +112 -0
  15. data/benchmarks/dsp_effects_benchmark.rb +46 -0
  16. data/benchmarks/flac_benchmark.rb +74 -0
  17. data/benchmarks/streaming_memory_benchmark.rb +94 -0
  18. data/benchmarks/wav_io_benchmark.rb +110 -0
  19. data/examples/audio_processing.rb +73 -0
  20. data/examples/cinematic_transition.rb +118 -0
  21. data/examples/drum_machine.rb +74 -0
  22. data/examples/format_convert.rb +81 -0
  23. data/examples/hybrid_arrangement.rb +165 -0
  24. data/examples/streaming_master_chain.rb +129 -0
  25. data/examples/synth_pad.rb +42 -0
  26. data/lib/wavify/audio.rb +483 -0
  27. data/lib/wavify/codecs/aiff.rb +338 -0
  28. data/lib/wavify/codecs/base.rb +108 -0
  29. data/lib/wavify/codecs/flac.rb +1322 -0
  30. data/lib/wavify/codecs/ogg_vorbis.rb +1447 -0
  31. data/lib/wavify/codecs/raw.rb +193 -0
  32. data/lib/wavify/codecs/registry.rb +87 -0
  33. data/lib/wavify/codecs/wav.rb +459 -0
  34. data/lib/wavify/core/duration.rb +99 -0
  35. data/lib/wavify/core/format.rb +133 -0
  36. data/lib/wavify/core/sample_buffer.rb +216 -0
  37. data/lib/wavify/core/stream.rb +129 -0
  38. data/lib/wavify/dsl.rb +537 -0
  39. data/lib/wavify/dsp/effects/chorus.rb +98 -0
  40. data/lib/wavify/dsp/effects/compressor.rb +85 -0
  41. data/lib/wavify/dsp/effects/delay.rb +69 -0
  42. data/lib/wavify/dsp/effects/distortion.rb +64 -0
  43. data/lib/wavify/dsp/effects/effect_base.rb +68 -0
  44. data/lib/wavify/dsp/effects/reverb.rb +112 -0
  45. data/lib/wavify/dsp/effects.rb +21 -0
  46. data/lib/wavify/dsp/envelope.rb +97 -0
  47. data/lib/wavify/dsp/filter.rb +271 -0
  48. data/lib/wavify/dsp/oscillator.rb +123 -0
  49. data/lib/wavify/errors.rb +34 -0
  50. data/lib/wavify/sequencer/engine.rb +278 -0
  51. data/lib/wavify/sequencer/note_sequence.rb +132 -0
  52. data/lib/wavify/sequencer/pattern.rb +102 -0
  53. data/lib/wavify/sequencer/track.rb +298 -0
  54. data/lib/wavify/sequencer.rb +12 -0
  55. data/lib/wavify/version.rb +6 -0
  56. data/lib/wavify.rb +28 -0
  57. data/tools/fixture_writer.rb +85 -0
  58. metadata +129 -0
@@ -0,0 +1,1447 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "stringio"
4
+ require "vorbis"
5
+
6
+ module Wavify
7
+ module Codecs
8
+ # OGG Vorbis codec backed by libogg (ogg-ruby) and libvorbis (vorbis-ruby).
9
+ #
10
+ # Container demuxing uses {Ogg::SyncState} and {Ogg::StreamState}. Audio
11
+ # decode uses libvorbis synthesis functions via {Vorbis::Native}. Audio
12
+ # encode uses {Vorbis::Encoder}.
13
+ #
14
+ # Sequential chained Vorbis logical streams are concatenated (and
15
+ # resampled to the first logical stream sample rate when rates differ).
16
+ # Interleaved multi-stream OGG logical streams are mixed with clipping.
17
+ class OggVorbis < Base
18
+ # Recognized filename extensions.
19
+ EXTENSIONS = %w[.ogg .oga].freeze
20
+
21
+ VORBIS_SIGNATURE = "vorbis" # :nodoc:
22
+ IDENTIFICATION_HEADER_TYPE = 0x01 # :nodoc:
23
+ COMMENT_HEADER_TYPE = 0x03 # :nodoc:
24
+ SETUP_HEADER_TYPE = 0x05 # :nodoc:
25
+ GRANULE_POSITION_UNKNOWN = 0xFFFF_FFFF_FFFF_FFFF # :nodoc:
26
+ VORBIS_ENCODE_DEFAULT_QUALITY = 0.4 # :nodoc:
27
+
28
+ class << self
29
+ # @param io_or_path [String, IO]
30
+ # @return [Boolean]
31
+ def can_read?(io_or_path)
32
+ return true if io_or_path.is_a?(String) && EXTENSIONS.include?(File.extname(io_or_path).downcase)
33
+ return false unless io_or_path.respond_to?(:read)
34
+
35
+ magic = io_or_path.read(4)
36
+ io_or_path.rewind if io_or_path.respond_to?(:rewind)
37
+ magic == "OggS"
38
+ end
39
+
40
+ # Reads OGG Vorbis audio.
41
+ #
42
+ # @note Supports full Vorbis decode via libvorbis. Sequential chained
43
+ # OGG logical streams are concatenated and normalized to the first
44
+ # logical stream format (including resampling). Interleaved
45
+ # multi-stream OGG logical streams are mixed.
46
+ # `decode_mode:` is accepted for API compatibility but has no
47
+ # effect; libvorbis always performs full decode.
48
+ def read(io_or_path, format: nil, decode_mode: :strict)
49
+ raise InvalidParameterError, "decode_mode must be :strict or :placeholder, got #{decode_mode.inspect}" unless %i[strict
50
+ placeholder].include?(decode_mode)
51
+
52
+ if (chained_decoded = decode_chained_vorbis_read_if_needed(io_or_path, decode_mode: decode_mode, target_format: format))
53
+ return chained_decoded
54
+ end
55
+
56
+ decode_context = build_vorbis_decode_context(io_or_path)
57
+ decoded = run_vorbis_decode_pipeline(decode_context)
58
+ return decoded unless format
59
+
60
+ decoded.convert(format)
61
+ end
62
+
63
+ # Writes OGG Vorbis audio.
64
+ def write(io_or_path, sample_buffer, format:)
65
+ raise InvalidParameterError, "sample_buffer must be Core::SampleBuffer" unless sample_buffer.is_a?(Core::SampleBuffer)
66
+
67
+ stream_write(io_or_path, format: format) do |writer|
68
+ writer.call(sample_buffer)
69
+ end
70
+ end
71
+
72
+ # Streams OGG Vorbis audio decoding.
73
+ #
74
+ # @note Supports full Vorbis decode via libvorbis. Sequential chained
75
+ # OGG logical streams are concatenated and normalized to the first
76
+ # logical stream format during streaming (including resampling).
77
+ # Interleaved multi-stream OGG logical streams are mixed.
78
+ # `decode_mode:` is accepted for API compatibility but has no effect.
79
+ def stream_read(io_or_path, chunk_size: 4096, decode_mode: :strict, &block)
80
+ return enum_for(__method__, io_or_path, chunk_size: chunk_size, decode_mode: decode_mode) unless block_given?
81
+ raise InvalidParameterError, "chunk_size must be a positive Integer" unless chunk_size.is_a?(Integer) && chunk_size.positive?
82
+ raise InvalidParameterError, "decode_mode must be :strict or :placeholder, got #{decode_mode.inspect}" unless %i[strict
83
+ placeholder].include?(decode_mode)
84
+
85
+ return nil if stream_chained_vorbis_if_needed(io_or_path, chunk_size: chunk_size, decode_mode: decode_mode, &block)
86
+
87
+ decode_context = build_vorbis_decode_context(io_or_path)
88
+ run_vorbis_decode_pipeline(decode_context, streaming: true, chunk_size: chunk_size, &block)
89
+ end
90
+
91
+ # Streams OGG Vorbis audio encoding via {Vorbis::Encoder}.
92
+ #
93
+ # @note Encodes using libvorbis at the default VBR quality level.
94
+ # Accepts any channel count and sample rate supported by libvorbis.
95
+ def stream_write(io_or_path, format:)
96
+ return enum_for(__method__, io_or_path, format: format) unless block_given?
97
+ raise InvalidParameterError, "format must be Core::Format" unless format.is_a?(Core::Format)
98
+ raise InvalidParameterError, "Vorbis encode requires positive channel count" unless format.channels.to_i.positive?
99
+ raise InvalidParameterError, "Vorbis encode requires positive sample_rate" unless format.sample_rate.to_i.positive?
100
+
101
+ target_format = Core::Format.new(
102
+ channels: format.channels,
103
+ sample_rate: format.sample_rate,
104
+ bit_depth: 32,
105
+ sample_format: :float
106
+ )
107
+
108
+ io, close_io = open_output(io_or_path)
109
+ io.rewind if io.respond_to?(:rewind)
110
+ io.truncate(0) if io.respond_to?(:truncate)
111
+
112
+ encoder = Vorbis::Encoder.new(
113
+ channels: target_format.channels,
114
+ rate: target_format.sample_rate,
115
+ quality: VORBIS_ENCODE_DEFAULT_QUALITY
116
+ )
117
+
118
+ encoder.write_headers { |page_bytes| io.write(page_bytes) }
119
+
120
+ writer = lambda do |chunk|
121
+ raise InvalidParameterError, "stream chunk must be Core::SampleBuffer" unless chunk.is_a?(Core::SampleBuffer)
122
+
123
+ buffer = chunk.format == target_format ? chunk : chunk.convert(target_format)
124
+ next nil if buffer.sample_frame_count.zero?
125
+
126
+ channels_data = Array.new(target_format.channels) { [] }
127
+ buffer.samples.each_slice(target_format.channels) do |frame|
128
+ frame.each_with_index { |sample, ch| channels_data[ch] << sample.to_f }
129
+ end
130
+ encoder.encode(channels_data) { |page_bytes| io.write(page_bytes) }
131
+ end
132
+
133
+ yield writer
134
+
135
+ encoder.finish { |page_bytes| io.write(page_bytes) }
136
+ encoder.close
137
+ io.flush if io.respond_to?(:flush)
138
+ io.rewind if io.respond_to?(:rewind)
139
+ io_or_path
140
+ rescue StandardError
141
+ begin
142
+ encoder&.close
143
+ rescue StandardError
144
+ nil
145
+ end
146
+ raise
147
+ ensure
148
+ io.close if close_io && io
149
+ end
150
+
151
+ # Parses OGG/Vorbis headers and returns metadata without audio decode.
152
+ #
153
+ # @param io_or_path [String, IO]
154
+ # @return [Hash]
155
+ def metadata(io_or_path)
156
+ io, close_io = open_input(io_or_path)
157
+ ensure_seekable!(io)
158
+ chained_streams, physical_ogg_info = read_ogg_logical_stream_chains(io)
159
+
160
+ if chained_streams.length > 1
161
+ chain_metadatas = chained_streams.map do |stream|
162
+ parse_single_logical_stream_metadata(StringIO.new(stream.fetch(:bytes)))
163
+ end
164
+ if physical_ogg_info[:interleaved_multistream]
165
+ return merge_interleaved_vorbis_metadata(chain_metadatas, chained_streams, physical_ogg_info)
166
+ end
167
+
168
+ return merge_chained_vorbis_metadata(chain_metadatas, chained_streams)
169
+ end
170
+
171
+ parse_single_logical_stream_metadata(StringIO.new(chained_streams.fetch(0).fetch(:bytes)))
172
+ ensure
173
+ io.close if close_io && io
174
+ end
175
+
176
+ private
177
+
178
+ # ---------------------------------------------------------------------------
179
+ # OGG container reading (using ogg-ruby)
180
+ # ---------------------------------------------------------------------------
181
+
182
+ def read_ogg_logical_stream_chains_from_input(io_or_path, with_info: false)
183
+ io, close_io = open_input(io_or_path)
184
+ ensure_seekable!(io)
185
+
186
+ chains, physical_info = read_ogg_logical_stream_chains(io)
187
+ with_info ? [chains, physical_info] : chains
188
+ ensure
189
+ io.close if close_io && io
190
+ end
191
+
192
+ def read_ogg_logical_stream_chains(io)
193
+ io.rewind
194
+ sync = Ogg::SyncState.new
195
+ streams_by_serial = {}
196
+ total_page_count = 0
197
+ total_bos_page_count = 0
198
+ total_eos_page_count = 0
199
+ total_continued_page_count = 0
200
+ physical_page_index = 0
201
+
202
+ loop do
203
+ data = io.read(4096)
204
+ sync.write(data) if data
205
+
206
+ while (page = sync.pageout)
207
+ sn = page.serialno
208
+ bos = page.bos?
209
+ eos = page.eos?
210
+ continued = page.continued?
211
+
212
+ raise InvalidFormatError, "first OGG page must have BOS flag" if physical_page_index.zero? && !bos
213
+
214
+ stream = streams_by_serial[sn]
215
+ if stream.nil?
216
+ raise InvalidFormatError, "first page of OGG logical stream must have BOS flag" unless bos
217
+
218
+ stream = {
219
+ serial_number: sn,
220
+ bytes: +"",
221
+ page_count: 0,
222
+ bos_page_count: 0,
223
+ eos_page_count: 0,
224
+ continued_page_count: 0,
225
+ eos_seen: false,
226
+ first_physical_page_index: physical_page_index,
227
+ last_physical_page_index: physical_page_index,
228
+ physical_page_indices: []
229
+ }
230
+ streams_by_serial[sn] = stream
231
+ elsif stream[:eos_seen]
232
+ raise InvalidFormatError, "unexpected OGG page after EOS in logical stream"
233
+ end
234
+
235
+ raise InvalidFormatError, "unexpected BOS page in OGG logical stream" if stream[:page_count].positive? && bos
236
+
237
+ stream[:bytes] << page.to_s
238
+ stream[:page_count] += 1
239
+ stream[:bos_page_count] += 1 if bos
240
+ stream[:eos_page_count] += 1 if eos
241
+ stream[:continued_page_count] += 1 if continued
242
+ stream[:eos_seen] = true if eos
243
+ stream[:last_physical_page_index] = physical_page_index
244
+ stream[:physical_page_indices] << physical_page_index
245
+
246
+ total_page_count += 1
247
+ total_bos_page_count += 1 if bos
248
+ total_eos_page_count += 1 if eos
249
+ total_continued_page_count += 1 if continued
250
+ physical_page_index += 1
251
+ end
252
+
253
+ break if data.nil?
254
+ end
255
+
256
+ streams = streams_by_serial.values.sort_by { |s| s[:first_physical_page_index] }
257
+ raise InvalidFormatError, "empty OGG bitstream" if streams.empty?
258
+
259
+ overlapping_pairs = []
260
+ per_stream_overlap = {}
261
+ streams.each_with_index do |left, left_index|
262
+ streams.each_with_index do |right, right_index|
263
+ next if right_index <= left_index
264
+
265
+ overlaps = left[:first_physical_page_index] <= right[:last_physical_page_index] &&
266
+ right[:first_physical_page_index] <= left[:last_physical_page_index]
267
+ next unless overlaps
268
+
269
+ left_serial = left[:serial_number]
270
+ right_serial = right[:serial_number]
271
+ overlapping_pairs << [left_serial, right_serial]
272
+ per_stream_overlap[left_serial] = true
273
+ per_stream_overlap[right_serial] = true
274
+ end
275
+ end
276
+
277
+ result_streams = streams.map do |stream|
278
+ stream.merge(
279
+ bytes: stream[:bytes].dup.freeze,
280
+ interleaved_pages: per_stream_overlap.fetch(stream[:serial_number], false)
281
+ ).freeze
282
+ end
283
+
284
+ [
285
+ result_streams,
286
+ {
287
+ page_count: total_page_count,
288
+ bos_page_count: total_bos_page_count,
289
+ eos_page_count: total_eos_page_count,
290
+ continued_page_count: total_continued_page_count,
291
+ logical_stream_count: streams.length,
292
+ interleaved_multistream: !overlapping_pairs.empty?,
293
+ overlapping_logical_stream_serial_pairs: overlapping_pairs
294
+ }
295
+ ]
296
+ rescue Ogg::CorruptDataError, Ogg::SyncCorruptDataError => e
297
+ raise InvalidFormatError, "OGG data corrupt or invalid checksum: #{e.message}"
298
+ rescue Ogg::StreamCorruptDataError => e
299
+ raise InvalidFormatError, "OGG stream sequence error: #{e.message}"
300
+ ensure
301
+ sync&.clear
302
+ end
303
+
304
+ # Reads OGG packets from a single logical stream IO using ogg-ruby.
305
+ #
306
+ # Returns [packets, ogg_info] where packets is an Array of Hashes with
307
+ # :data, :kind, :granule_position keys, and ogg_info is a Hash with
308
+ # page-level statistics.
309
+ def read_ogg_packets(io)
310
+ io.rewind
311
+ sync = Ogg::SyncState.new
312
+ stream_state = nil
313
+ serial_number = nil
314
+ page_count = 0
315
+ bos_page_count = 0
316
+ eos_page_count = 0
317
+ continued_page_count = 0
318
+ max_granule_position = nil
319
+ packets = []
320
+
321
+ loop do
322
+ data = io.read(4096)
323
+ sync.write(data) if data
324
+
325
+ while (page = sync.pageout)
326
+ sn = page.serialno
327
+ if serial_number && sn != serial_number
328
+ raise UnsupportedFormatError,
329
+ "multi-stream OGG containers must be split before packet reading"
330
+ end
331
+ raise InvalidFormatError, "first OGG page must have BOS flag" if page_count.zero? && !page.bos?
332
+
333
+ serial_number ||= sn
334
+ stream_state ||= Ogg::StreamState.new(sn)
335
+ stream_state.pagein(page)
336
+
337
+ bos_page_count += 1 if page.bos?
338
+ eos_page_count += 1 if page.eos?
339
+ continued_page_count += 1 if page.continued?
340
+ page_count += 1
341
+
342
+ while (packet = stream_state.packetout)
343
+ granulepos = packet.granulepos
344
+ is_unknown = (granulepos == -1)
345
+ resolved_granule = is_unknown ? nil : granulepos
346
+ packets << {
347
+ data: packet.data,
348
+ bos: packet.bos?,
349
+ eos: packet.eos?,
350
+ packetno: packet.packetno,
351
+ kind: classify_vorbis_packet(packet.data),
352
+ granule_position: resolved_granule
353
+ }
354
+ max_granule_position = [max_granule_position || 0, granulepos].max unless is_unknown
355
+ end
356
+ end
357
+
358
+ break if data.nil?
359
+ end
360
+
361
+ [
362
+ packets,
363
+ {
364
+ serial_number: serial_number,
365
+ page_count: page_count,
366
+ max_granule_position: max_granule_position,
367
+ bos_page_count: bos_page_count,
368
+ eos_page_count: eos_page_count,
369
+ continued_page_count: continued_page_count
370
+ }
371
+ ]
372
+ rescue Ogg::CorruptDataError, Ogg::SyncCorruptDataError => e
373
+ raise InvalidFormatError, "OGG data corrupt or invalid checksum: #{e.message}"
374
+ rescue Ogg::StreamCorruptDataError => e
375
+ raise InvalidFormatError, "OGG stream sequence error: #{e.message}"
376
+ ensure
377
+ stream_state&.clear
378
+ sync&.clear
379
+ end
380
+
381
+ # ---------------------------------------------------------------------------
382
+ # Vorbis decode (using Vorbis::Native synthesis functions)
383
+ # ---------------------------------------------------------------------------
384
+
385
+ def build_vorbis_decode_context(io_or_path)
386
+ io, close_io = open_input(io_or_path)
387
+ ensure_seekable!(io)
388
+
389
+ packet_entries, ogg_info = read_ogg_packets(io)
390
+ raise InvalidFormatError, "missing Vorbis identification header" if packet_entries[0].nil?
391
+ raise InvalidFormatError, "missing Vorbis comment header" if packet_entries[1].nil?
392
+ raise InvalidFormatError, "missing Vorbis setup header" if packet_entries[2].nil?
393
+
394
+ info_ptr = FFI::MemoryPointer.new(Vorbis::Native::VorbisInfo.size)
395
+ comment_ptr = FFI::MemoryPointer.new(Vorbis::Native::VorbisComment.size)
396
+ Vorbis::Native.vorbis_info_init(info_ptr)
397
+ Vorbis::Native.vorbis_comment_init(comment_ptr)
398
+
399
+ packet_entries.first(3).each_with_index do |entry, idx|
400
+ pkt = Ogg::Packet.new(
401
+ data: entry.fetch(:data),
402
+ bos: entry.fetch(:bos, idx.zero?),
403
+ eos: entry.fetch(:eos, false),
404
+ packetno: entry.fetch(:packetno, idx)
405
+ )
406
+ result = Vorbis::Native.vorbis_synthesis_headerin(info_ptr, comment_ptr, pkt.native)
407
+ raise InvalidFormatError, "Vorbis header parse failed (code #{result})" unless result.zero?
408
+ end
409
+
410
+ vinfo = Vorbis::Native::VorbisInfo.new(info_ptr)
411
+ channels = vinfo[:channels]
412
+ sample_rate = vinfo[:rate]
413
+
414
+ audio_packets = packet_entries.drop(3).select { |e| e.fetch(:kind) == :audio }
415
+ raise InvalidFormatError, "OGG Vorbis stream does not contain audio packets" if audio_packets.empty?
416
+
417
+ format = Core::Format.new(channels: channels, sample_rate: sample_rate, bit_depth: 32, sample_format: :float)
418
+
419
+ {
420
+ format: format,
421
+ channels: channels,
422
+ sample_rate: sample_rate,
423
+ audio_packets: audio_packets,
424
+ sample_frame_count: ogg_info[:max_granule_position],
425
+ info_ptr: info_ptr,
426
+ comment_ptr: comment_ptr
427
+ }
428
+ ensure
429
+ io.close if close_io && io
430
+ end
431
+
432
+ def run_vorbis_decode_pipeline(decode_context, streaming: false, chunk_size: nil, &block)
433
+ info_ptr = decode_context.fetch(:info_ptr)
434
+ comment_ptr = decode_context.fetch(:comment_ptr)
435
+ audio_packets = decode_context.fetch(:audio_packets)
436
+ channels = decode_context.fetch(:channels)
437
+ max_granule = decode_context.fetch(:sample_frame_count)
438
+ format = decode_context.fetch(:format)
439
+
440
+ dsp_ptr = FFI::MemoryPointer.new(Vorbis::Native::VorbisDspState.size)
441
+ block_ptr = FFI::MemoryPointer.new(Vorbis::Native::VorbisBlock.size)
442
+ pcm_pp = FFI::MemoryPointer.new(:pointer)
443
+ dsp_initialized = false
444
+ block_initialized = false
445
+
446
+ result = Vorbis::Native.vorbis_synthesis_init(dsp_ptr, info_ptr)
447
+ raise InvalidFormatError, "Vorbis synthesis init failed (#{result})" unless result.zero?
448
+
449
+ dsp_initialized = true
450
+
451
+ result = Vorbis::Native.vorbis_block_init(dsp_ptr, block_ptr)
452
+ raise InvalidFormatError, "Vorbis block init failed (#{result})" unless result.zero?
453
+
454
+ block_initialized = true
455
+
456
+ all_samples = []
457
+ ptr_size = FFI::Pointer.size
458
+
459
+ audio_packets.each do |entry|
460
+ pkt = Ogg::Packet.new(
461
+ data: entry.fetch(:data),
462
+ bos: entry.fetch(:bos, false),
463
+ eos: entry.fetch(:eos, false),
464
+ granulepos: entry[:granule_position].nil? ? -1 : entry[:granule_position],
465
+ packetno: entry.fetch(:packetno, 0)
466
+ )
467
+ next unless Vorbis::Native.vorbis_synthesis(block_ptr, pkt.native).zero?
468
+
469
+ Vorbis::Native.vorbis_synthesis_blockin(dsp_ptr, block_ptr)
470
+
471
+ while (n = Vorbis::Native.vorbis_synthesis_pcmout(dsp_ptr, pcm_pp)).positive?
472
+ ch_array_ptr = pcm_pp.read_pointer
473
+ n.times do |i|
474
+ channels.times do |ch|
475
+ ch_ptr = ch_array_ptr.get_pointer(ch * ptr_size)
476
+ all_samples << ch_ptr.get_float(i * 4)
477
+ end
478
+ end
479
+ Vorbis::Native.vorbis_synthesis_read(dsp_ptr, n)
480
+ end
481
+ end
482
+
483
+ if max_granule&.positive?
484
+ target_sample_count = max_granule * channels
485
+ all_samples = all_samples.first(target_sample_count) if all_samples.length > target_sample_count
486
+ end
487
+
488
+ result_buffer = Core::SampleBuffer.new(all_samples, format)
489
+
490
+ if streaming && block
491
+ each_sample_buffer_frame_slice(result_buffer, chunk_size, &block)
492
+ nil
493
+ elsif block
494
+ yield result_buffer
495
+ nil
496
+ else
497
+ result_buffer
498
+ end
499
+ ensure
500
+ Vorbis::Native.vorbis_block_clear(block_ptr) if block_initialized
501
+ Vorbis::Native.vorbis_dsp_clear(dsp_ptr) if dsp_initialized
502
+ Vorbis::Native.vorbis_comment_clear(comment_ptr) if comment_ptr
503
+ Vorbis::Native.vorbis_info_clear(info_ptr) if info_ptr
504
+ end
505
+
506
+ # ---------------------------------------------------------------------------
507
+ # Metadata (using vorbis-ruby header parsing + ogg-ruby packet reading)
508
+ # ---------------------------------------------------------------------------
509
+
510
+ def parse_single_logical_stream_metadata(io)
511
+ packet_entries, ogg_info = read_ogg_packets(io)
512
+ raise InvalidFormatError, "missing Vorbis identification header" if packet_entries[0].nil?
513
+ raise InvalidFormatError, "missing Vorbis comment header" if packet_entries[1].nil?
514
+ raise InvalidFormatError, "missing Vorbis setup header" if packet_entries[2].nil?
515
+
516
+ info_ptr = FFI::MemoryPointer.new(Vorbis::Native::VorbisInfo.size)
517
+ comment_ptr = FFI::MemoryPointer.new(Vorbis::Native::VorbisComment.size)
518
+ Vorbis::Native.vorbis_info_init(info_ptr)
519
+ Vorbis::Native.vorbis_comment_init(comment_ptr)
520
+
521
+ setup_parsed = false
522
+ saved_channels = nil
523
+ saved_rate = nil
524
+ saved_bitrate_nominal = nil
525
+ saved_bitrate_lower = nil
526
+ saved_bitrate_upper = nil
527
+ packet_entries.first(3).each_with_index do |entry, index|
528
+ pkt = Ogg::Packet.new(
529
+ data: entry.fetch(:data),
530
+ bos: entry.fetch(:bos, index.zero?),
531
+ eos: entry.fetch(:eos, false),
532
+ packetno: entry.fetch(:packetno, index)
533
+ )
534
+ result = Vorbis::Native.vorbis_synthesis_headerin(info_ptr, comment_ptr, pkt.native)
535
+ if result.zero? && index.zero?
536
+ # Save info from identification header before setup header possibly clears VorbisInfo
537
+ temp = Vorbis::Native::VorbisInfo.new(info_ptr)
538
+ saved_channels = temp[:channels]
539
+ saved_rate = temp[:rate]
540
+ saved_bitrate_nominal = temp[:bitrate_nominal]
541
+ saved_bitrate_lower = temp[:bitrate_lower]
542
+ saved_bitrate_upper = temp[:bitrate_upper]
543
+ end
544
+ break unless result.zero?
545
+
546
+ setup_parsed = (index == 2)
547
+ end
548
+
549
+ vinfo = Vorbis::Native::VorbisInfo.new(info_ptr)
550
+ channels = saved_channels || vinfo[:channels]
551
+ sample_rate = saved_rate || vinfo[:rate]
552
+ nominal_bitrate_raw = saved_bitrate_nominal || vinfo[:bitrate_nominal]
553
+ minimum_bitrate_raw = saved_bitrate_lower || vinfo[:bitrate_lower]
554
+ maximum_bitrate_raw = saved_bitrate_upper || vinfo[:bitrate_upper]
555
+
556
+ blocksize_small = nil
557
+ blocksize_large = nil
558
+ if setup_parsed
559
+ bs = Vorbis::Native.vorbis_info_blocksize(info_ptr, 0)
560
+ bl = Vorbis::Native.vorbis_info_blocksize(info_ptr, 1)
561
+ blocksize_small = bs.positive? ? bs : nil
562
+ blocksize_large = bl.positive? ? bl : nil
563
+ end
564
+
565
+ vc = Vorbis::Native::VorbisComment.new(comment_ptr)
566
+ vendor = vc[:vendor].null? ? nil : vc[:vendor].read_string
567
+ comments_hash = {}
568
+ n_comments = vc[:comments]
569
+ if n_comments.positive? && !vc[:user_comments].null?
570
+ user_comments_ptr = vc[:user_comments]
571
+ comment_lengths_ptr = vc[:comment_lengths]
572
+ n_comments.times do |i|
573
+ str_ptr = user_comments_ptr.get_pointer(i * FFI::Pointer.size)
574
+ next if str_ptr.null?
575
+
576
+ len = comment_lengths_ptr.get_int32(i * 4)
577
+ next unless len.positive?
578
+
579
+ str = str_ptr.read_bytes(len)
580
+ key, value = str.split("=", 2)
581
+ comments_hash[key.downcase] = value if key && value
582
+ end
583
+ end
584
+
585
+ audio_packets = packet_entries.drop(3).select { |e| e.fetch(:kind) == :audio }
586
+ non_audio_packets = packet_entries.drop(3).reject { |e| e.fetch(:kind) == :audio }
587
+ known_granule_positions = audio_packets.filter_map { |e| e[:granule_position] }
588
+
589
+ format = Core::Format.new(channels: channels, sample_rate: sample_rate, bit_depth: 32, sample_format: :float)
590
+ sample_frame_count = ogg_info[:max_granule_position]
591
+ duration = sample_frame_count ? Core::Duration.from_samples(sample_frame_count, format.sample_rate) : nil
592
+
593
+ {
594
+ format: format,
595
+ sample_frame_count: sample_frame_count,
596
+ duration: duration,
597
+ vendor: vendor,
598
+ comments: comments_hash,
599
+ nominal_bitrate: nominal_bitrate_raw.positive? ? nominal_bitrate_raw : nil,
600
+ minimum_bitrate: minimum_bitrate_raw.positive? ? minimum_bitrate_raw : nil,
601
+ maximum_bitrate: maximum_bitrate_raw.positive? ? maximum_bitrate_raw : nil,
602
+ blocksize_small: blocksize_small,
603
+ blocksize_large: blocksize_large,
604
+ ogg_serial_number: ogg_info[:serial_number],
605
+ ogg_page_count: ogg_info[:page_count],
606
+ ogg_packet_count: packet_entries.length,
607
+ ogg_bos_page_count: ogg_info[:bos_page_count],
608
+ ogg_eos_page_count: ogg_info[:eos_page_count],
609
+ ogg_continued_page_count: ogg_info[:continued_page_count],
610
+ vorbis_audio_packet_count: audio_packets.length,
611
+ vorbis_non_audio_packet_count: non_audio_packets.length,
612
+ vorbis_audio_packets_with_granule_count: known_granule_positions.length,
613
+ first_audio_packet_granule_position: audio_packets.find { |e| !e[:granule_position].nil? }&.fetch(:granule_position),
614
+ last_audio_packet_granule_position: audio_packets.reverse_each.find do |e|
615
+ !e[:granule_position].nil?
616
+ end&.fetch(:granule_position),
617
+ vorbis_setup_parsed: setup_parsed,
618
+ vorbis_codebook_count: nil,
619
+ vorbis_codebook_dimensions: nil,
620
+ vorbis_codebook_entries: nil,
621
+ vorbis_codebook_lookup_types: nil,
622
+ vorbis_codebook_used_entry_counts: nil,
623
+ vorbis_codebook_sparse_count: nil,
624
+ vorbis_codebook_huffman_complete_count: nil,
625
+ vorbis_codebook_huffman_incomplete_count: nil,
626
+ vorbis_codebook_huffman_max_codeword_length: nil,
627
+ vorbis_floor_count: nil,
628
+ vorbis_residue_count: nil,
629
+ vorbis_floor_types: nil,
630
+ vorbis_residue_types: nil,
631
+ vorbis_mapping_count: nil,
632
+ vorbis_mode_count: nil,
633
+ vorbis_mode_bits: nil,
634
+ vorbis_mode_blockflags: nil,
635
+ vorbis_mode_mappings: nil,
636
+ vorbis_mapping_submap_counts: nil,
637
+ vorbis_mapping_coupling_step_counts: nil,
638
+ vorbis_mapping_coupling_pairs: nil,
639
+ vorbis_mapping_channel_muxes: nil,
640
+ vorbis_mapping_submap_floors: nil,
641
+ vorbis_mapping_submap_residues: nil,
642
+ vorbis_mode_blocksizes: nil,
643
+ vorbis_audio_packet_header_parsed_count: 0,
644
+ vorbis_audio_packet_mode_histogram: {},
645
+ vorbis_audio_packet_blocksize_histogram: {},
646
+ vorbis_window_transition_histogram: {},
647
+ vorbis_decode_plan_built: false,
648
+ vorbis_decode_plan_packet_count: nil,
649
+ vorbis_decode_plan_nominal_overlap_frame_total: nil,
650
+ vorbis_decode_plan_known_granule_delta_count: nil,
651
+ vorbis_decode_plan_nominal_minus_final_granule: nil,
652
+ vorbis_output_assembly_preflight_ok: nil,
653
+ vorbis_output_assembly_preflight_error: nil,
654
+ vorbis_output_assembly_emitted_frame_count: nil,
655
+ vorbis_output_assembly_trim_frames: nil,
656
+ vorbis_output_assembly_window_curve_preflight_count: nil,
657
+ vorbis_long_window_packet_count: nil,
658
+ vorbis_short_window_packet_count: nil,
659
+ setup_header_size: packet_entries[2]&.fetch(:data)&.bytesize
660
+ }
661
+ ensure
662
+ Vorbis::Native.vorbis_comment_clear(comment_ptr) if comment_ptr
663
+ Vorbis::Native.vorbis_info_clear(info_ptr) if info_ptr
664
+ end
665
+
666
+ # ---------------------------------------------------------------------------
667
+ # Chained / interleaved stream merging (metadata)
668
+ # ---------------------------------------------------------------------------
669
+
670
+ def merge_chained_vorbis_metadata(chain_metadatas, chained_streams)
671
+ metadatas = Array(chain_metadatas)
672
+ streams = Array(chained_streams)
673
+ raise InvalidFormatError, "OGG Vorbis chained metadata requires at least one logical stream" if metadatas.empty?
674
+ raise InvalidFormatError, "OGG Vorbis chained metadata stream count mismatch" unless metadatas.length == streams.length
675
+
676
+ first = metadatas.first.dup
677
+ first_format = first.fetch(:format)
678
+ logical_stream_formats = metadatas.map { |metadata| metadata.fetch(:format) }
679
+ mixed_format_chain = logical_stream_formats.any? { |format| format != first_format }
680
+ resampled_output_frame_counts = metadatas.map do |metadata|
681
+ resampled_vorbis_sample_frame_count(
682
+ metadata[:sample_frame_count].to_i,
683
+ source_sample_rate: metadata.fetch(:format).sample_rate,
684
+ target_sample_rate: first_format.sample_rate
685
+ )
686
+ end
687
+ sample_frame_count = resampled_output_frame_counts.sum
688
+ duration = Core::Duration.from_samples(sample_frame_count, first_format.sample_rate)
689
+
690
+ sum_keys = %i[
691
+ ogg_page_count
692
+ ogg_packet_count
693
+ ogg_bos_page_count
694
+ ogg_eos_page_count
695
+ ogg_continued_page_count
696
+ vorbis_audio_packet_count
697
+ vorbis_non_audio_packet_count
698
+ vorbis_audio_packets_with_granule_count
699
+ vorbis_audio_packet_header_parsed_count
700
+ ]
701
+
702
+ sum_keys.each do |key|
703
+ values = metadatas.map { |metadata| metadata[key] }
704
+ next if values.any?(&:nil?)
705
+
706
+ first[key] = values.sum
707
+ end
708
+
709
+ first[:format] = first_format
710
+ first[:sample_frame_count] = sample_frame_count
711
+ first[:duration] = duration
712
+ first[:ogg_serial_number] = streams.first.fetch(:serial_number)
713
+ first[:ogg_serial_numbers] = streams.map { |stream| stream.fetch(:serial_number) }
714
+ first[:ogg_logical_stream_count] = streams.length
715
+ first[:ogg_logical_stream_formats] = logical_stream_formats
716
+ first[:ogg_logical_stream_sample_frame_counts] = metadatas.map { |metadata| metadata[:sample_frame_count] }
717
+ first[:ogg_logical_stream_output_frame_counts] = resampled_output_frame_counts
718
+ first[:ogg_logical_stream_durations] = metadatas.map { |metadata| metadata[:duration] }
719
+ first[:vorbis_chained] = true
720
+ first[:vorbis_chained_mixed_format] = mixed_format_chain
721
+ first[:vorbis_chained_resampled_sample_rate] = logical_stream_formats.any? do |format|
722
+ format.sample_rate != first_format.sample_rate
723
+ end
724
+
725
+ first
726
+ end
727
+
728
+ def merge_interleaved_vorbis_metadata(chain_metadatas, chained_streams, physical_ogg_info)
729
+ metadatas = Array(chain_metadatas)
730
+ streams = Array(chained_streams)
731
+ raise InvalidFormatError, "OGG Vorbis interleaved metadata requires at least one logical stream" if metadatas.empty?
732
+ raise InvalidFormatError, "OGG Vorbis interleaved metadata stream count mismatch" unless metadatas.length == streams.length
733
+
734
+ first = metadatas.first.dup
735
+ first_format = first.fetch(:format)
736
+ logical_stream_formats = metadatas.map { |metadata| metadata.fetch(:format) }
737
+ resampled_output_frame_counts = metadatas.map do |metadata|
738
+ resampled_vorbis_sample_frame_count(
739
+ metadata[:sample_frame_count].to_i,
740
+ source_sample_rate: metadata.fetch(:format).sample_rate,
741
+ target_sample_rate: first_format.sample_rate
742
+ )
743
+ end
744
+ sample_frame_count = resampled_output_frame_counts.max || 0
745
+ duration = Core::Duration.from_samples(sample_frame_count, first_format.sample_rate)
746
+
747
+ sum_keys = %i[
748
+ ogg_page_count
749
+ ogg_packet_count
750
+ ogg_bos_page_count
751
+ ogg_eos_page_count
752
+ ogg_continued_page_count
753
+ vorbis_audio_packet_count
754
+ vorbis_non_audio_packet_count
755
+ vorbis_audio_packets_with_granule_count
756
+ vorbis_audio_packet_header_parsed_count
757
+ ]
758
+
759
+ sum_keys.each do |key|
760
+ values = metadatas.map { |metadata| metadata[key] }
761
+ next if values.any?(&:nil?)
762
+
763
+ first[key] = values.sum
764
+ end
765
+
766
+ first[:format] = first_format
767
+ first[:sample_frame_count] = sample_frame_count
768
+ first[:duration] = duration
769
+ first[:ogg_serial_number] = streams.first.fetch(:serial_number)
770
+ first[:ogg_serial_numbers] = streams.map { |stream| stream.fetch(:serial_number) }
771
+ first[:ogg_logical_stream_count] = streams.length
772
+ first[:ogg_logical_stream_formats] = logical_stream_formats
773
+ first[:ogg_logical_stream_sample_frame_counts] = metadatas.map { |metadata| metadata[:sample_frame_count] }
774
+ first[:ogg_logical_stream_output_frame_counts] = resampled_output_frame_counts
775
+ first[:ogg_logical_stream_durations] = metadatas.map { |metadata| metadata[:duration] }
776
+ first[:vorbis_chained] = false
777
+ first[:vorbis_interleaved_multistream] = true
778
+ first[:vorbis_interleaved_multistream_mixed] = true
779
+ first[:vorbis_interleaved_multistream_resampled_sample_rate] =
780
+ logical_stream_formats.any? { |format| format.sample_rate != first_format.sample_rate }
781
+ first[:vorbis_chained_mixed_format] = logical_stream_formats.any? { |format| format != first_format }
782
+ first[:ogg_interleaved_multistream] = physical_ogg_info[:interleaved_multistream]
783
+ first[:ogg_overlapping_logical_stream_serial_pairs] = physical_ogg_info[:overlapping_logical_stream_serial_pairs]
784
+
785
+ first
786
+ end
787
+
788
+ # ---------------------------------------------------------------------------
789
+ # Chained / interleaved stream decoding (high-level helpers)
790
+ # ---------------------------------------------------------------------------
791
+
792
+ def decode_chained_vorbis_read_if_needed(io_or_path, decode_mode:, target_format: nil)
793
+ chained_streams, physical_ogg_info = read_ogg_logical_stream_chains_from_input(io_or_path, with_info: true)
794
+ return nil unless chained_streams.length > 1
795
+
796
+ if physical_ogg_info[:interleaved_multistream]
797
+ decoded_buffers = chained_streams.map do |stream|
798
+ read(StringIO.new(stream.fetch(:bytes)), decode_mode: decode_mode)
799
+ end
800
+
801
+ return mix_vorbis_sample_buffers(decoded_buffers, target_format: target_format)
802
+ end
803
+
804
+ decoded_buffers = chained_streams.map do |stream|
805
+ read(StringIO.new(stream.fetch(:bytes)), format: target_format, decode_mode: decode_mode)
806
+ end
807
+
808
+ concatenate_vorbis_sample_buffers(decoded_buffers, target_format: target_format)
809
+ end
810
+
811
+ def stream_chained_vorbis_if_needed(io_or_path, chunk_size:, decode_mode:, &block)
812
+ chained_streams, physical_ogg_info = read_ogg_logical_stream_chains_from_input(io_or_path, with_info: true)
813
+ return false unless chained_streams.length > 1
814
+
815
+ if physical_ogg_info[:interleaved_multistream]
816
+ stream_metadatas = chained_streams.map do |stream|
817
+ parse_single_logical_stream_metadata(StringIO.new(stream.fetch(:bytes)))
818
+ end
819
+ target_format = stream_metadatas.first.fetch(:format)
820
+ return stream_interleaved_vorbis_logical_streams_mixed!(
821
+ chained_streams,
822
+ chunk_size: chunk_size,
823
+ decode_mode: decode_mode,
824
+ target_format: target_format,
825
+ stream_metadatas: stream_metadatas, &block
826
+ )
827
+ end
828
+
829
+ stream_metadatas = chained_streams.map do |stream|
830
+ parse_single_logical_stream_metadata(StringIO.new(stream.fetch(:bytes)))
831
+ end
832
+ target_format = stream_metadatas.first.fetch(:format)
833
+ same_sample_rate = stream_metadatas.all? { |metadata| metadata.fetch(:format).sample_rate == target_format.sample_rate }
834
+
835
+ unless same_sample_rate
836
+ chained_streams.each do |stream|
837
+ normalized = normalize_vorbis_logical_stream_buffer_for_target(
838
+ read(StringIO.new(stream.fetch(:bytes)), decode_mode: decode_mode),
839
+ target_format
840
+ )
841
+ each_sample_buffer_frame_slice(normalized, chunk_size, &block)
842
+ end
843
+ return true
844
+ end
845
+
846
+ chained_format = nil
847
+ chained_streams.each do |stream|
848
+ stream_read(StringIO.new(stream.fetch(:bytes)), chunk_size: chunk_size, decode_mode: decode_mode) do |chunk|
849
+ chained_format ||= chunk.format
850
+ yield(chunk.format == chained_format ? chunk : chunk.convert(chained_format))
851
+ end
852
+ end
853
+
854
+ true
855
+ end
856
+
857
+ def concatenate_vorbis_sample_buffers(buffers, target_format: nil)
858
+ buffers = Array(buffers)
859
+ raise InvalidFormatError, "OGG Vorbis chained decode did not produce any logical streams" if buffers.empty?
860
+
861
+ first = buffers.first
862
+ raise InvalidFormatError, "OGG Vorbis chained decode expected SampleBuffer outputs" unless first.is_a?(Core::SampleBuffer)
863
+ raise InvalidParameterError, "target_format must be Core::Format" if !target_format.nil? && !target_format.is_a?(Core::Format)
864
+
865
+ resolved_target_format = target_format || first.format
866
+ combined = first.format == resolved_target_format ? first : first.convert(resolved_target_format)
867
+
868
+ buffers.drop(1).reduce(combined) do |combined_buffer, buffer|
869
+ raise InvalidFormatError, "OGG Vorbis chained decode expected SampleBuffer outputs" unless buffer.is_a?(Core::SampleBuffer)
870
+
871
+ converted = normalize_vorbis_logical_stream_buffer_for_target(buffer, resolved_target_format)
872
+ combined_buffer.concat(converted)
873
+ end
874
+ end
875
+
876
+ def mix_vorbis_sample_buffers(buffers, target_format: nil)
877
+ buffers = Array(buffers)
878
+ raise InvalidFormatError, "OGG Vorbis multi-stream decode did not produce any logical streams" if buffers.empty?
879
+
880
+ first = buffers.first
881
+ raise InvalidFormatError, "OGG Vorbis multi-stream decode expected SampleBuffer outputs" unless first.is_a?(Core::SampleBuffer)
882
+ raise InvalidParameterError, "target_format must be Core::Format" if !target_format.nil? && !target_format.is_a?(Core::Format)
883
+
884
+ resolved_target_format = target_format || first.format
885
+
886
+ work_format = resolved_target_format.with(sample_format: :float, bit_depth: 32)
887
+ converted = buffers.map do |buffer|
888
+ normalize_vorbis_logical_stream_buffer_for_target(buffer, work_format)
889
+ end
890
+ max_frames = converted.map(&:sample_frame_count).max || 0
891
+ mixed_samples = Array.new(max_frames * work_format.channels, 0.0)
892
+
893
+ converted.each do |buffer|
894
+ buffer.samples.each_with_index do |sample, index|
895
+ mixed_samples[index] += sample.to_f
896
+ end
897
+ end
898
+ mixed_samples.map! { |sample| [[sample, -1.0].max, 1.0].min }
899
+
900
+ mixed = Core::SampleBuffer.new(mixed_samples, work_format)
901
+ return mixed if mixed.format == resolved_target_format
902
+
903
+ mixed.convert(resolved_target_format)
904
+ end
905
+
906
+ def normalize_vorbis_logical_stream_buffer_for_target(buffer, target_format)
907
+ raise InvalidParameterError, "buffer must be Core::SampleBuffer" unless buffer.is_a?(Core::SampleBuffer)
908
+ raise InvalidParameterError, "target_format must be Core::Format" unless target_format.is_a?(Core::Format)
909
+
910
+ normalized = if buffer.format.sample_rate == target_format.sample_rate
911
+ buffer
912
+ else
913
+ resample_vorbis_sample_buffer(buffer, target_sample_rate: target_format.sample_rate)
914
+ end
915
+ normalized.format == target_format ? normalized : normalized.convert(target_format)
916
+ end
917
+
918
+ def resampled_vorbis_sample_frame_count(frame_count, source_sample_rate:, target_sample_rate:)
919
+ frame_count = Integer(frame_count)
920
+ source_sample_rate = Integer(source_sample_rate)
921
+ target_sample_rate = Integer(target_sample_rate)
922
+ raise InvalidParameterError, "frame_count must be non-negative" if frame_count.negative?
923
+ raise InvalidParameterError, "source_sample_rate must be positive" unless source_sample_rate.positive?
924
+ raise InvalidParameterError, "target_sample_rate must be positive" unless target_sample_rate.positive?
925
+
926
+ return frame_count if source_sample_rate == target_sample_rate
927
+ return 0 if frame_count.zero?
928
+
929
+ ((frame_count * target_sample_rate.to_f) / source_sample_rate).round
930
+ end
931
+
932
+ def resample_vorbis_sample_buffer(buffer, target_sample_rate:)
933
+ raise InvalidParameterError, "buffer must be Core::SampleBuffer" unless buffer.is_a?(Core::SampleBuffer)
934
+
935
+ source_format = buffer.format
936
+ source_sample_rate = source_format.sample_rate
937
+ target_sample_rate = Integer(target_sample_rate)
938
+ return buffer if source_sample_rate == target_sample_rate
939
+
940
+ work_format = source_format.with(sample_format: :float, bit_depth: 32)
941
+ work_buffer = (buffer.format == work_format ? buffer : buffer.convert(work_format))
942
+ channels = work_format.channels
943
+ source_frames = work_buffer.sample_frame_count
944
+ target_frames = resampled_vorbis_sample_frame_count(
945
+ source_frames,
946
+ source_sample_rate: source_sample_rate,
947
+ target_sample_rate: target_sample_rate
948
+ )
949
+ return Core::SampleBuffer.new([], work_format.with(sample_rate: target_sample_rate)) if target_frames.zero?
950
+
951
+ channel_samples = Array.new(channels) { [] }
952
+ work_buffer.samples.each_slice(channels) do |frame|
953
+ channels.times { |channel_index| channel_samples[channel_index] << frame.fetch(channel_index).to_f }
954
+ end
955
+
956
+ resampled_channels = channel_samples.map do |samples|
957
+ if samples.empty?
958
+ Array.new(target_frames, 0.0)
959
+ elsif samples.length == 1
960
+ Array.new(target_frames, samples.first.to_f)
961
+ else
962
+ Array.new(target_frames) do |target_index|
963
+ source_position = (target_index * source_sample_rate.to_f) / target_sample_rate
964
+ left_index = source_position.floor
965
+ left_index = 0 if left_index.negative?
966
+ if left_index >= (samples.length - 1)
967
+ samples.last.to_f
968
+ else
969
+ right_index = left_index + 1
970
+ frac = source_position - left_index
971
+ left = samples.fetch(left_index).to_f
972
+ right = samples.fetch(right_index).to_f
973
+ left + ((right - left) * frac)
974
+ end
975
+ end
976
+ end
977
+ end
978
+
979
+ interleaved = []
980
+ target_frames.times do |frame_index|
981
+ channels.times do |channel_index|
982
+ interleaved << resampled_channels.fetch(channel_index).fetch(frame_index)
983
+ end
984
+ end
985
+
986
+ Core::SampleBuffer.new(interleaved, work_format.with(sample_rate: target_sample_rate))
987
+ end
988
+
989
+ def stream_interleaved_vorbis_logical_streams_mixed!(
990
+ chained_streams,
991
+ chunk_size:,
992
+ decode_mode:,
993
+ target_format: nil,
994
+ stream_metadatas: nil, &block
995
+ )
996
+ unless block_given?
997
+ return enum_for(
998
+ __method__,
999
+ chained_streams,
1000
+ chunk_size: chunk_size,
1001
+ decode_mode: decode_mode,
1002
+ target_format: target_format,
1003
+ stream_metadatas: stream_metadatas
1004
+ )
1005
+ end
1006
+
1007
+ streams = Array(chained_streams)
1008
+ raise InvalidFormatError, "OGG Vorbis interleaved stream decode requires logical streams" if streams.empty?
1009
+
1010
+ metadatas = stream_metadatas ? Array(stream_metadatas) : nil
1011
+ if metadatas && metadatas.length != streams.length
1012
+ raise InvalidFormatError, "OGG Vorbis interleaved stream metadata count mismatch"
1013
+ end
1014
+
1015
+ if metadatas
1016
+ resolved_target_format = target_format || metadatas.first.fetch(:format)
1017
+ same_sample_rate = metadatas.all? do |metadata|
1018
+ metadata.fetch(:format).sample_rate == resolved_target_format.sample_rate
1019
+ end
1020
+ unless same_sample_rate
1021
+ return stream_interleaved_vorbis_logical_streams_mixed_resampled!(
1022
+ streams,
1023
+ stream_metadatas: metadatas,
1024
+ target_format: resolved_target_format,
1025
+ chunk_size: chunk_size,
1026
+ decode_mode: decode_mode, &block
1027
+ )
1028
+ end
1029
+ end
1030
+
1031
+ enumerators = streams.map do |stream|
1032
+ stream_read(
1033
+ StringIO.new(stream.fetch(:bytes)),
1034
+ chunk_size: chunk_size,
1035
+ decode_mode: decode_mode
1036
+ )
1037
+ end
1038
+ loop do
1039
+ chunks = enumerators.map do |enumerator|
1040
+ enumerator.next
1041
+ rescue StopIteration
1042
+ nil
1043
+ end
1044
+ active_chunks = chunks.compact
1045
+ break if active_chunks.empty?
1046
+
1047
+ yield mix_vorbis_sample_buffers(active_chunks)
1048
+ end
1049
+
1050
+ true
1051
+ end
1052
+
1053
+ def stream_interleaved_vorbis_logical_streams_mixed_resampled!(
1054
+ chained_streams,
1055
+ stream_metadatas:,
1056
+ target_format:,
1057
+ chunk_size:,
1058
+ decode_mode:
1059
+ )
1060
+ unless block_given?
1061
+ return enum_for(
1062
+ __method__,
1063
+ chained_streams,
1064
+ stream_metadatas: stream_metadatas,
1065
+ target_format: target_format,
1066
+ chunk_size: chunk_size,
1067
+ decode_mode: decode_mode
1068
+ )
1069
+ end
1070
+
1071
+ streams = Array(chained_streams)
1072
+ metadatas = Array(stream_metadatas)
1073
+ raise InvalidFormatError, "OGG Vorbis interleaved stream metadata count mismatch" unless streams.length == metadatas.length
1074
+ raise InvalidFormatError, "OGG Vorbis interleaved stream decode requires logical streams" if streams.empty?
1075
+
1076
+ target_work_format = target_format.with(sample_format: :float, bit_depth: 32)
1077
+ stream_states = streams.zip(metadatas).map do |stream, _metadata|
1078
+ {
1079
+ enumerator: stream_read(
1080
+ StringIO.new(stream.fetch(:bytes)),
1081
+ chunk_size: chunk_size,
1082
+ decode_mode: decode_mode
1083
+ ),
1084
+ source_eof: false,
1085
+ pending_samples: [],
1086
+ target_work_format: target_work_format,
1087
+ resampler_initialized: false,
1088
+ resampler: nil
1089
+ }
1090
+ end
1091
+
1092
+ loop do
1093
+ made_progress = false
1094
+ stream_states.each do |stream_state|
1095
+ progress = ensure_vorbis_interleaved_stream_pending_frames!(
1096
+ stream_state,
1097
+ min_frames: chunk_size
1098
+ )
1099
+ made_progress ||= progress
1100
+ end
1101
+
1102
+ pending_frame_counts = stream_states.map do |stream_state|
1103
+ stream_state.fetch(:pending_samples).length / target_work_format.channels
1104
+ end
1105
+ if pending_frame_counts.any? { |count| count >= chunk_size }
1106
+ emit_frames = chunk_size
1107
+ elsif stream_states.all? { |stream_state| vorbis_interleaved_stream_state_source_drained?(stream_state) }
1108
+ emit_frames = pending_frame_counts.max || 0
1109
+ break if emit_frames.zero?
1110
+ else
1111
+ raise InvalidFormatError, "interleaved Vorbis streaming resampler made no progress" unless made_progress
1112
+
1113
+ next
1114
+ end
1115
+
1116
+ mixed_inputs = stream_states.map do |stream_state|
1117
+ take_vorbis_interleaved_stream_pending_chunk!(stream_state, frame_count: emit_frames)
1118
+ end.compact
1119
+ yield mix_vorbis_sample_buffers(mixed_inputs, target_format: target_work_format)
1120
+ end
1121
+
1122
+ true
1123
+ end
1124
+
1125
+ def ensure_vorbis_interleaved_stream_pending_frames!(stream_state, min_frames:)
1126
+ progress = false
1127
+ target_work_format = stream_state.fetch(:target_work_format)
1128
+ pending_samples = stream_state.fetch(:pending_samples)
1129
+ pending_frame_count = pending_samples.length / target_work_format.channels
1130
+
1131
+ while pending_frame_count < min_frames
1132
+ if (resampler = stream_state[:resampler])
1133
+ drained = drain_vorbis_streaming_linear_resampler_chunk!(
1134
+ resampler,
1135
+ max_frames: (min_frames - pending_frame_count)
1136
+ )
1137
+ if drained
1138
+ normalized = drained.format == target_work_format ? drained : drained.convert(target_work_format)
1139
+ pending_samples.concat(normalized.samples)
1140
+ pending_frame_count = pending_samples.length / target_work_format.channels
1141
+ progress = true
1142
+ next
1143
+ end
1144
+ end
1145
+
1146
+ break if stream_state[:source_eof]
1147
+
1148
+ begin
1149
+ chunk = stream_state.fetch(:enumerator).next
1150
+ append_vorbis_interleaved_stream_pending_output_chunk!(stream_state, chunk)
1151
+ pending_frame_count = pending_samples.length / target_work_format.channels
1152
+ progress = true
1153
+ rescue StopIteration
1154
+ stream_state[:source_eof] = true
1155
+ finish_vorbis_streaming_linear_resampler!(resampler) if resampler
1156
+ progress = true
1157
+ end
1158
+ end
1159
+
1160
+ progress
1161
+ end
1162
+
1163
+ def append_vorbis_interleaved_stream_pending_output_chunk!(stream_state, chunk)
1164
+ raise InvalidParameterError, "chunk must be Core::SampleBuffer" unless chunk.is_a?(Core::SampleBuffer)
1165
+
1166
+ target_work_format = stream_state.fetch(:target_work_format)
1167
+ pending_samples = stream_state.fetch(:pending_samples)
1168
+
1169
+ unless stream_state[:resampler_initialized]
1170
+ stream_state[:resampler] = build_vorbis_streaming_linear_resampler_state(
1171
+ source_format: chunk.format,
1172
+ target_sample_rate: target_work_format.sample_rate
1173
+ )
1174
+ stream_state[:resampler_initialized] = true
1175
+ end
1176
+
1177
+ if (resampler = stream_state[:resampler])
1178
+ feed_vorbis_streaming_linear_resampler_chunk!(resampler, chunk)
1179
+ while (drained = drain_vorbis_streaming_linear_resampler_chunk!(resampler, max_frames: nil))
1180
+ normalized = drained.format == target_work_format ? drained : drained.convert(target_work_format)
1181
+ pending_samples.concat(normalized.samples)
1182
+ end
1183
+ return nil
1184
+ end
1185
+
1186
+ normalized = if chunk.format == target_work_format
1187
+ chunk
1188
+ else
1189
+ normalize_vorbis_logical_stream_buffer_for_target(chunk,
1190
+ target_work_format)
1191
+ end
1192
+ pending_samples.concat(normalized.samples)
1193
+ nil
1194
+ end
1195
+
1196
+ def take_vorbis_interleaved_stream_pending_chunk!(stream_state, frame_count:)
1197
+ frame_count = Integer(frame_count)
1198
+ raise InvalidParameterError, "frame_count must be non-negative" if frame_count.negative?
1199
+
1200
+ pending_samples = stream_state.fetch(:pending_samples)
1201
+ target_work_format = stream_state.fetch(:target_work_format)
1202
+ channels = target_work_format.channels
1203
+ available_frames = pending_samples.length / channels
1204
+ take_frames = [frame_count, available_frames].min
1205
+ return nil if take_frames.zero?
1206
+
1207
+ samples = pending_samples.slice!(0, take_frames * channels)
1208
+ Core::SampleBuffer.new(samples, target_work_format)
1209
+ end
1210
+
1211
+ def vorbis_interleaved_stream_state_source_drained?(stream_state)
1212
+ return false unless stream_state[:source_eof]
1213
+
1214
+ resampler = stream_state[:resampler]
1215
+ resampler.nil? || vorbis_streaming_linear_resampler_finished?(resampler)
1216
+ end
1217
+
1218
+ def build_vorbis_streaming_linear_resampler_state(source_format:, target_sample_rate:)
1219
+ raise InvalidParameterError, "source_format must be Core::Format" unless source_format.is_a?(Core::Format)
1220
+
1221
+ target_sample_rate = Integer(target_sample_rate)
1222
+ return nil if source_format.sample_rate == target_sample_rate
1223
+
1224
+ source_work_format = source_format.with(sample_format: :float, bit_depth: 32)
1225
+ {
1226
+ source_work_format: source_work_format,
1227
+ target_work_format: source_work_format.with(sample_rate: target_sample_rate),
1228
+ source_sample_rate: source_work_format.sample_rate,
1229
+ target_sample_rate: target_sample_rate,
1230
+ channels: source_work_format.channels,
1231
+ source_buffer_samples: [],
1232
+ source_buffer_start_frame: 0,
1233
+ total_source_frames: 0,
1234
+ next_target_frame_index: 0,
1235
+ source_eof: false,
1236
+ final_target_frame_count: nil
1237
+ }
1238
+ end
1239
+
1240
+ def feed_vorbis_streaming_linear_resampler_chunk!(state, chunk)
1241
+ raise InvalidParameterError, "state must be a resampler state Hash" unless state.is_a?(Hash)
1242
+ raise InvalidParameterError, "chunk must be Core::SampleBuffer" unless chunk.is_a?(Core::SampleBuffer)
1243
+
1244
+ source_work_format = state.fetch(:source_work_format)
1245
+ if chunk.format.sample_rate != source_work_format.sample_rate
1246
+ raise InvalidFormatError,
1247
+ "streaming resampler source sample rate mismatch " \
1248
+ "(expected #{source_work_format.sample_rate}, got #{chunk.format.sample_rate})"
1249
+ end
1250
+
1251
+ normalized = chunk.format == source_work_format ? chunk : chunk.convert(source_work_format)
1252
+ state.fetch(:source_buffer_samples).concat(normalized.samples.map(&:to_f))
1253
+ state[:total_source_frames] += normalized.sample_frame_count
1254
+ nil
1255
+ end
1256
+
1257
+ def finish_vorbis_streaming_linear_resampler!(state)
1258
+ raise InvalidParameterError, "state must be a resampler state Hash" unless state.is_a?(Hash)
1259
+ return nil if state[:source_eof]
1260
+
1261
+ state[:source_eof] = true
1262
+ state[:final_target_frame_count] = resampled_vorbis_sample_frame_count(
1263
+ state.fetch(:total_source_frames),
1264
+ source_sample_rate: state.fetch(:source_sample_rate),
1265
+ target_sample_rate: state.fetch(:target_sample_rate)
1266
+ )
1267
+ nil
1268
+ end
1269
+
1270
+ def vorbis_streaming_linear_resampler_finished?(state)
1271
+ return false unless state.is_a?(Hash)
1272
+ return false unless state[:source_eof]
1273
+ return false if state[:final_target_frame_count].nil?
1274
+
1275
+ state.fetch(:next_target_frame_index) >= state.fetch(:final_target_frame_count)
1276
+ end
1277
+
1278
+ def drain_vorbis_streaming_linear_resampler_chunk!(state, max_frames:)
1279
+ raise InvalidParameterError, "state must be a resampler state Hash" unless state.is_a?(Hash)
1280
+
1281
+ channels = state.fetch(:channels)
1282
+ source_sample_rate = state.fetch(:source_sample_rate)
1283
+ target_sample_rate = state.fetch(:target_sample_rate)
1284
+ total_source_frames = state.fetch(:total_source_frames)
1285
+ if total_source_frames.zero?
1286
+ return nil unless state[:source_eof]
1287
+ return nil if state.fetch(:final_target_frame_count).to_i.zero?
1288
+ end
1289
+
1290
+ if max_frames.nil?
1291
+ limit = Float::INFINITY
1292
+ else
1293
+ max_frames = Integer(max_frames)
1294
+ raise InvalidParameterError, "max_frames must be non-negative" if max_frames.negative?
1295
+ return nil if max_frames.zero?
1296
+
1297
+ limit = max_frames
1298
+ end
1299
+
1300
+ final_target_frame_count = state[:final_target_frame_count]
1301
+ output_samples = []
1302
+ produced_frames = 0
1303
+
1304
+ while produced_frames < limit
1305
+ next_target_frame_index = state.fetch(:next_target_frame_index)
1306
+ break if !final_target_frame_count.nil? && next_target_frame_index >= final_target_frame_count
1307
+ break if total_source_frames.zero?
1308
+
1309
+ source_position = (next_target_frame_index * source_sample_rate.to_f) / target_sample_rate
1310
+ left_index = source_position.floor
1311
+ left_index = 0 if left_index.negative?
1312
+ break if !state[:source_eof] && (left_index + 1) >= total_source_frames
1313
+
1314
+ if left_index >= (total_source_frames - 1)
1315
+ left_index = total_source_frames - 1
1316
+ right_index = left_index
1317
+ frac = 0.0
1318
+ else
1319
+ right_index = left_index + 1
1320
+ frac = source_position - left_index
1321
+ end
1322
+
1323
+ channels.times do |channel_index|
1324
+ left = vorbis_streaming_linear_resampler_source_sample(state, left_index, channel_index)
1325
+ if right_index == left_index
1326
+ output_samples << left
1327
+ else
1328
+ right = vorbis_streaming_linear_resampler_source_sample(state, right_index, channel_index)
1329
+ output_samples << (left + ((right - left) * frac))
1330
+ end
1331
+ end
1332
+
1333
+ state[:next_target_frame_index] = next_target_frame_index + 1
1334
+ produced_frames += 1
1335
+ end
1336
+
1337
+ compact_vorbis_streaming_linear_resampler_source_buffer!(state)
1338
+ return nil if output_samples.empty?
1339
+
1340
+ Core::SampleBuffer.new(output_samples, state.fetch(:target_work_format))
1341
+ end
1342
+
1343
+ def vorbis_streaming_linear_resampler_source_sample(state, absolute_frame_index, channel_index)
1344
+ channels = state.fetch(:channels)
1345
+ start_frame = state.fetch(:source_buffer_start_frame)
1346
+ local_frame_index = absolute_frame_index - start_frame
1347
+ raise InvalidFormatError, "streaming resampler source buffer underflow" if local_frame_index.negative?
1348
+
1349
+ sample_index = (local_frame_index * channels) + channel_index
1350
+ sample = state.fetch(:source_buffer_samples)[sample_index]
1351
+ raise InvalidFormatError, "streaming resampler source buffer overflow" if sample.nil?
1352
+
1353
+ sample.to_f
1354
+ end
1355
+
1356
+ def compact_vorbis_streaming_linear_resampler_source_buffer!(state)
1357
+ total_source_frames = state.fetch(:total_source_frames)
1358
+ return nil if total_source_frames.zero?
1359
+
1360
+ next_needed_frame = if vorbis_streaming_linear_resampler_finished?(state)
1361
+ total_source_frames
1362
+ else
1363
+ source_sample_rate = state.fetch(:source_sample_rate)
1364
+ target_sample_rate = state.fetch(:target_sample_rate)
1365
+ source_position = (state.fetch(:next_target_frame_index) * source_sample_rate.to_f) / target_sample_rate
1366
+ [source_position.floor, 0].max
1367
+ end
1368
+ next_needed_frame = if state[:source_eof]
1369
+ [next_needed_frame, total_source_frames].min
1370
+ else
1371
+ [next_needed_frame, (total_source_frames - 1)].min
1372
+ end
1373
+
1374
+ keep_from_frame = [next_needed_frame, total_source_frames].min
1375
+ drop_frames = keep_from_frame - state.fetch(:source_buffer_start_frame)
1376
+ return nil unless drop_frames.positive?
1377
+
1378
+ channels = state.fetch(:channels)
1379
+ state.fetch(:source_buffer_samples).slice!(0, drop_frames * channels)
1380
+ state[:source_buffer_start_frame] += drop_frames
1381
+ nil
1382
+ end
1383
+
1384
+ def each_sample_buffer_frame_slice(buffer, chunk_size)
1385
+ return enum_for(__method__, buffer, chunk_size) unless block_given?
1386
+
1387
+ raise InvalidParameterError, "buffer must be Core::SampleBuffer" unless buffer.is_a?(Core::SampleBuffer)
1388
+ raise InvalidParameterError, "chunk_size must be a positive Integer" unless chunk_size.is_a?(Integer) && chunk_size.positive?
1389
+
1390
+ total_frames = buffer.sample_frame_count
1391
+ frame_offset = 0
1392
+ while frame_offset < total_frames
1393
+ frame_length = [chunk_size, total_frames - frame_offset].min
1394
+ yield buffer.slice(frame_offset, frame_length)
1395
+ frame_offset += frame_length
1396
+ end
1397
+
1398
+ nil
1399
+ end
1400
+
1401
+ # ---------------------------------------------------------------------------
1402
+ # Packet classification
1403
+ # ---------------------------------------------------------------------------
1404
+
1405
+ def classify_vorbis_packet(packet)
1406
+ return :unknown if packet.nil? || packet.empty?
1407
+
1408
+ first_byte = packet.getbyte(0)
1409
+ return :audio if first_byte.nobits?(0x01)
1410
+
1411
+ return :identification_header if packet.bytesize >= 7 && first_byte == IDENTIFICATION_HEADER_TYPE && packet[1,
1412
+ 6] == VORBIS_SIGNATURE
1413
+ return :comment_header if packet.bytesize >= 7 && first_byte == COMMENT_HEADER_TYPE && packet[1, 6] == VORBIS_SIGNATURE
1414
+ return :setup_header if packet.bytesize >= 7 && first_byte == SETUP_HEADER_TYPE && packet[1, 6] == VORBIS_SIGNATURE
1415
+
1416
+ :unknown
1417
+ end
1418
+
1419
+ # ---------------------------------------------------------------------------
1420
+ # IO helpers
1421
+ # ---------------------------------------------------------------------------
1422
+
1423
+ def open_input(io_or_path)
1424
+ return [io_or_path, false] if io_or_path.respond_to?(:read)
1425
+ raise InvalidParameterError, "input path must be String or IO: #{io_or_path.inspect}" unless io_or_path.is_a?(String)
1426
+
1427
+ [File.open(io_or_path, "rb"), true]
1428
+ rescue Errno::ENOENT
1429
+ raise InvalidFormatError, "input file not found: #{io_or_path}"
1430
+ end
1431
+
1432
+ def open_output(io_or_path)
1433
+ return [io_or_path, false] if io_or_path.respond_to?(:write)
1434
+ raise InvalidParameterError, "output path must be String or IO: #{io_or_path.inspect}" unless io_or_path.is_a?(String)
1435
+
1436
+ [File.open(io_or_path, "wb"), true]
1437
+ end
1438
+
1439
+ def ensure_seekable!(io)
1440
+ return if io.respond_to?(:seek) && io.respond_to?(:rewind)
1441
+
1442
+ raise StreamError, "OGG Vorbis codec requires seekable IO"
1443
+ end
1444
+ end
1445
+ end
1446
+ end
1447
+ end