wavify 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.serena/.gitignore +1 -0
- data/.serena/memories/project_overview.md +5 -0
- data/.serena/memories/style_and_completion.md +5 -0
- data/.serena/memories/suggested_commands.md +11 -0
- data/.serena/project.yml +126 -0
- data/.simplecov +18 -0
- data/.yardopts +4 -0
- data/CHANGELOG.md +11 -0
- data/LICENSE +21 -0
- data/README.md +196 -0
- data/Rakefile +190 -0
- data/benchmarks/README.md +46 -0
- data/benchmarks/benchmark_helper.rb +112 -0
- data/benchmarks/dsp_effects_benchmark.rb +46 -0
- data/benchmarks/flac_benchmark.rb +74 -0
- data/benchmarks/streaming_memory_benchmark.rb +94 -0
- data/benchmarks/wav_io_benchmark.rb +110 -0
- data/examples/audio_processing.rb +73 -0
- data/examples/cinematic_transition.rb +118 -0
- data/examples/drum_machine.rb +74 -0
- data/examples/format_convert.rb +81 -0
- data/examples/hybrid_arrangement.rb +165 -0
- data/examples/streaming_master_chain.rb +129 -0
- data/examples/synth_pad.rb +42 -0
- data/lib/wavify/audio.rb +483 -0
- data/lib/wavify/codecs/aiff.rb +338 -0
- data/lib/wavify/codecs/base.rb +108 -0
- data/lib/wavify/codecs/flac.rb +1322 -0
- data/lib/wavify/codecs/ogg_vorbis.rb +1447 -0
- data/lib/wavify/codecs/raw.rb +193 -0
- data/lib/wavify/codecs/registry.rb +87 -0
- data/lib/wavify/codecs/wav.rb +459 -0
- data/lib/wavify/core/duration.rb +99 -0
- data/lib/wavify/core/format.rb +133 -0
- data/lib/wavify/core/sample_buffer.rb +216 -0
- data/lib/wavify/core/stream.rb +129 -0
- data/lib/wavify/dsl.rb +537 -0
- data/lib/wavify/dsp/effects/chorus.rb +98 -0
- data/lib/wavify/dsp/effects/compressor.rb +85 -0
- data/lib/wavify/dsp/effects/delay.rb +69 -0
- data/lib/wavify/dsp/effects/distortion.rb +64 -0
- data/lib/wavify/dsp/effects/effect_base.rb +68 -0
- data/lib/wavify/dsp/effects/reverb.rb +112 -0
- data/lib/wavify/dsp/effects.rb +21 -0
- data/lib/wavify/dsp/envelope.rb +97 -0
- data/lib/wavify/dsp/filter.rb +271 -0
- data/lib/wavify/dsp/oscillator.rb +123 -0
- data/lib/wavify/errors.rb +34 -0
- data/lib/wavify/sequencer/engine.rb +278 -0
- data/lib/wavify/sequencer/note_sequence.rb +132 -0
- data/lib/wavify/sequencer/pattern.rb +102 -0
- data/lib/wavify/sequencer/track.rb +298 -0
- data/lib/wavify/sequencer.rb +12 -0
- data/lib/wavify/version.rb +6 -0
- data/lib/wavify.rb +28 -0
- data/tools/fixture_writer.rb +85 -0
- metadata +129 -0
|
@@ -0,0 +1,1447 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "stringio"
|
|
4
|
+
require "vorbis"
|
|
5
|
+
|
|
6
|
+
module Wavify
|
|
7
|
+
module Codecs
|
|
8
|
+
# OGG Vorbis codec backed by libogg (ogg-ruby) and libvorbis (vorbis-ruby).
|
|
9
|
+
#
|
|
10
|
+
# Container demuxing uses {Ogg::SyncState} and {Ogg::StreamState}. Audio
|
|
11
|
+
# decode uses libvorbis synthesis functions via {Vorbis::Native}. Audio
|
|
12
|
+
# encode uses {Vorbis::Encoder}.
|
|
13
|
+
#
|
|
14
|
+
# Sequential chained Vorbis logical streams are concatenated (and
|
|
15
|
+
# resampled to the first logical stream sample rate when rates differ).
|
|
16
|
+
# Interleaved multi-stream OGG logical streams are mixed with clipping.
|
|
17
|
+
class OggVorbis < Base
|
|
18
|
+
# Recognized filename extensions.
|
|
19
|
+
EXTENSIONS = %w[.ogg .oga].freeze
|
|
20
|
+
|
|
21
|
+
VORBIS_SIGNATURE = "vorbis" # :nodoc:
|
|
22
|
+
IDENTIFICATION_HEADER_TYPE = 0x01 # :nodoc:
|
|
23
|
+
COMMENT_HEADER_TYPE = 0x03 # :nodoc:
|
|
24
|
+
SETUP_HEADER_TYPE = 0x05 # :nodoc:
|
|
25
|
+
GRANULE_POSITION_UNKNOWN = 0xFFFF_FFFF_FFFF_FFFF # :nodoc:
|
|
26
|
+
VORBIS_ENCODE_DEFAULT_QUALITY = 0.4 # :nodoc:
|
|
27
|
+
|
|
28
|
+
class << self
|
|
29
|
+
# @param io_or_path [String, IO]
|
|
30
|
+
# @return [Boolean]
|
|
31
|
+
def can_read?(io_or_path)
|
|
32
|
+
return true if io_or_path.is_a?(String) && EXTENSIONS.include?(File.extname(io_or_path).downcase)
|
|
33
|
+
return false unless io_or_path.respond_to?(:read)
|
|
34
|
+
|
|
35
|
+
magic = io_or_path.read(4)
|
|
36
|
+
io_or_path.rewind if io_or_path.respond_to?(:rewind)
|
|
37
|
+
magic == "OggS"
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Reads OGG Vorbis audio.
|
|
41
|
+
#
|
|
42
|
+
# @note Supports full Vorbis decode via libvorbis. Sequential chained
|
|
43
|
+
# OGG logical streams are concatenated and normalized to the first
|
|
44
|
+
# logical stream format (including resampling). Interleaved
|
|
45
|
+
# multi-stream OGG logical streams are mixed.
|
|
46
|
+
# `decode_mode:` is accepted for API compatibility but has no
|
|
47
|
+
# effect; libvorbis always performs full decode.
|
|
48
|
+
def read(io_or_path, format: nil, decode_mode: :strict)
|
|
49
|
+
raise InvalidParameterError, "decode_mode must be :strict or :placeholder, got #{decode_mode.inspect}" unless %i[strict
|
|
50
|
+
placeholder].include?(decode_mode)
|
|
51
|
+
|
|
52
|
+
if (chained_decoded = decode_chained_vorbis_read_if_needed(io_or_path, decode_mode: decode_mode, target_format: format))
|
|
53
|
+
return chained_decoded
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
decode_context = build_vorbis_decode_context(io_or_path)
|
|
57
|
+
decoded = run_vorbis_decode_pipeline(decode_context)
|
|
58
|
+
return decoded unless format
|
|
59
|
+
|
|
60
|
+
decoded.convert(format)
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# Writes OGG Vorbis audio.
|
|
64
|
+
def write(io_or_path, sample_buffer, format:)
|
|
65
|
+
raise InvalidParameterError, "sample_buffer must be Core::SampleBuffer" unless sample_buffer.is_a?(Core::SampleBuffer)
|
|
66
|
+
|
|
67
|
+
stream_write(io_or_path, format: format) do |writer|
|
|
68
|
+
writer.call(sample_buffer)
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# Streams OGG Vorbis audio decoding.
|
|
73
|
+
#
|
|
74
|
+
# @note Supports full Vorbis decode via libvorbis. Sequential chained
|
|
75
|
+
# OGG logical streams are concatenated and normalized to the first
|
|
76
|
+
# logical stream format during streaming (including resampling).
|
|
77
|
+
# Interleaved multi-stream OGG logical streams are mixed.
|
|
78
|
+
# `decode_mode:` is accepted for API compatibility but has no effect.
|
|
79
|
+
def stream_read(io_or_path, chunk_size: 4096, decode_mode: :strict, &block)
|
|
80
|
+
return enum_for(__method__, io_or_path, chunk_size: chunk_size, decode_mode: decode_mode) unless block_given?
|
|
81
|
+
raise InvalidParameterError, "chunk_size must be a positive Integer" unless chunk_size.is_a?(Integer) && chunk_size.positive?
|
|
82
|
+
raise InvalidParameterError, "decode_mode must be :strict or :placeholder, got #{decode_mode.inspect}" unless %i[strict
|
|
83
|
+
placeholder].include?(decode_mode)
|
|
84
|
+
|
|
85
|
+
return nil if stream_chained_vorbis_if_needed(io_or_path, chunk_size: chunk_size, decode_mode: decode_mode, &block)
|
|
86
|
+
|
|
87
|
+
decode_context = build_vorbis_decode_context(io_or_path)
|
|
88
|
+
run_vorbis_decode_pipeline(decode_context, streaming: true, chunk_size: chunk_size, &block)
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# Streams OGG Vorbis audio encoding via {Vorbis::Encoder}.
|
|
92
|
+
#
|
|
93
|
+
# @note Encodes using libvorbis at the default VBR quality level.
|
|
94
|
+
# Accepts any channel count and sample rate supported by libvorbis.
|
|
95
|
+
def stream_write(io_or_path, format:)
|
|
96
|
+
return enum_for(__method__, io_or_path, format: format) unless block_given?
|
|
97
|
+
raise InvalidParameterError, "format must be Core::Format" unless format.is_a?(Core::Format)
|
|
98
|
+
raise InvalidParameterError, "Vorbis encode requires positive channel count" unless format.channels.to_i.positive?
|
|
99
|
+
raise InvalidParameterError, "Vorbis encode requires positive sample_rate" unless format.sample_rate.to_i.positive?
|
|
100
|
+
|
|
101
|
+
target_format = Core::Format.new(
|
|
102
|
+
channels: format.channels,
|
|
103
|
+
sample_rate: format.sample_rate,
|
|
104
|
+
bit_depth: 32,
|
|
105
|
+
sample_format: :float
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
io, close_io = open_output(io_or_path)
|
|
109
|
+
io.rewind if io.respond_to?(:rewind)
|
|
110
|
+
io.truncate(0) if io.respond_to?(:truncate)
|
|
111
|
+
|
|
112
|
+
encoder = Vorbis::Encoder.new(
|
|
113
|
+
channels: target_format.channels,
|
|
114
|
+
rate: target_format.sample_rate,
|
|
115
|
+
quality: VORBIS_ENCODE_DEFAULT_QUALITY
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
encoder.write_headers { |page_bytes| io.write(page_bytes) }
|
|
119
|
+
|
|
120
|
+
writer = lambda do |chunk|
|
|
121
|
+
raise InvalidParameterError, "stream chunk must be Core::SampleBuffer" unless chunk.is_a?(Core::SampleBuffer)
|
|
122
|
+
|
|
123
|
+
buffer = chunk.format == target_format ? chunk : chunk.convert(target_format)
|
|
124
|
+
next nil if buffer.sample_frame_count.zero?
|
|
125
|
+
|
|
126
|
+
channels_data = Array.new(target_format.channels) { [] }
|
|
127
|
+
buffer.samples.each_slice(target_format.channels) do |frame|
|
|
128
|
+
frame.each_with_index { |sample, ch| channels_data[ch] << sample.to_f }
|
|
129
|
+
end
|
|
130
|
+
encoder.encode(channels_data) { |page_bytes| io.write(page_bytes) }
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
yield writer
|
|
134
|
+
|
|
135
|
+
encoder.finish { |page_bytes| io.write(page_bytes) }
|
|
136
|
+
encoder.close
|
|
137
|
+
io.flush if io.respond_to?(:flush)
|
|
138
|
+
io.rewind if io.respond_to?(:rewind)
|
|
139
|
+
io_or_path
|
|
140
|
+
rescue StandardError
|
|
141
|
+
begin
|
|
142
|
+
encoder&.close
|
|
143
|
+
rescue StandardError
|
|
144
|
+
nil
|
|
145
|
+
end
|
|
146
|
+
raise
|
|
147
|
+
ensure
|
|
148
|
+
io.close if close_io && io
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
# Parses OGG/Vorbis headers and returns metadata without audio decode.
|
|
152
|
+
#
|
|
153
|
+
# @param io_or_path [String, IO]
|
|
154
|
+
# @return [Hash]
|
|
155
|
+
def metadata(io_or_path)
|
|
156
|
+
io, close_io = open_input(io_or_path)
|
|
157
|
+
ensure_seekable!(io)
|
|
158
|
+
chained_streams, physical_ogg_info = read_ogg_logical_stream_chains(io)
|
|
159
|
+
|
|
160
|
+
if chained_streams.length > 1
|
|
161
|
+
chain_metadatas = chained_streams.map do |stream|
|
|
162
|
+
parse_single_logical_stream_metadata(StringIO.new(stream.fetch(:bytes)))
|
|
163
|
+
end
|
|
164
|
+
if physical_ogg_info[:interleaved_multistream]
|
|
165
|
+
return merge_interleaved_vorbis_metadata(chain_metadatas, chained_streams, physical_ogg_info)
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
return merge_chained_vorbis_metadata(chain_metadatas, chained_streams)
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
parse_single_logical_stream_metadata(StringIO.new(chained_streams.fetch(0).fetch(:bytes)))
|
|
172
|
+
ensure
|
|
173
|
+
io.close if close_io && io
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
private
|
|
177
|
+
|
|
178
|
+
# ---------------------------------------------------------------------------
|
|
179
|
+
# OGG container reading (using ogg-ruby)
|
|
180
|
+
# ---------------------------------------------------------------------------
|
|
181
|
+
|
|
182
|
+
def read_ogg_logical_stream_chains_from_input(io_or_path, with_info: false)
|
|
183
|
+
io, close_io = open_input(io_or_path)
|
|
184
|
+
ensure_seekable!(io)
|
|
185
|
+
|
|
186
|
+
chains, physical_info = read_ogg_logical_stream_chains(io)
|
|
187
|
+
with_info ? [chains, physical_info] : chains
|
|
188
|
+
ensure
|
|
189
|
+
io.close if close_io && io
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
def read_ogg_logical_stream_chains(io)
|
|
193
|
+
io.rewind
|
|
194
|
+
sync = Ogg::SyncState.new
|
|
195
|
+
streams_by_serial = {}
|
|
196
|
+
total_page_count = 0
|
|
197
|
+
total_bos_page_count = 0
|
|
198
|
+
total_eos_page_count = 0
|
|
199
|
+
total_continued_page_count = 0
|
|
200
|
+
physical_page_index = 0
|
|
201
|
+
|
|
202
|
+
loop do
|
|
203
|
+
data = io.read(4096)
|
|
204
|
+
sync.write(data) if data
|
|
205
|
+
|
|
206
|
+
while (page = sync.pageout)
|
|
207
|
+
sn = page.serialno
|
|
208
|
+
bos = page.bos?
|
|
209
|
+
eos = page.eos?
|
|
210
|
+
continued = page.continued?
|
|
211
|
+
|
|
212
|
+
raise InvalidFormatError, "first OGG page must have BOS flag" if physical_page_index.zero? && !bos
|
|
213
|
+
|
|
214
|
+
stream = streams_by_serial[sn]
|
|
215
|
+
if stream.nil?
|
|
216
|
+
raise InvalidFormatError, "first page of OGG logical stream must have BOS flag" unless bos
|
|
217
|
+
|
|
218
|
+
stream = {
|
|
219
|
+
serial_number: sn,
|
|
220
|
+
bytes: +"",
|
|
221
|
+
page_count: 0,
|
|
222
|
+
bos_page_count: 0,
|
|
223
|
+
eos_page_count: 0,
|
|
224
|
+
continued_page_count: 0,
|
|
225
|
+
eos_seen: false,
|
|
226
|
+
first_physical_page_index: physical_page_index,
|
|
227
|
+
last_physical_page_index: physical_page_index,
|
|
228
|
+
physical_page_indices: []
|
|
229
|
+
}
|
|
230
|
+
streams_by_serial[sn] = stream
|
|
231
|
+
elsif stream[:eos_seen]
|
|
232
|
+
raise InvalidFormatError, "unexpected OGG page after EOS in logical stream"
|
|
233
|
+
end
|
|
234
|
+
|
|
235
|
+
raise InvalidFormatError, "unexpected BOS page in OGG logical stream" if stream[:page_count].positive? && bos
|
|
236
|
+
|
|
237
|
+
stream[:bytes] << page.to_s
|
|
238
|
+
stream[:page_count] += 1
|
|
239
|
+
stream[:bos_page_count] += 1 if bos
|
|
240
|
+
stream[:eos_page_count] += 1 if eos
|
|
241
|
+
stream[:continued_page_count] += 1 if continued
|
|
242
|
+
stream[:eos_seen] = true if eos
|
|
243
|
+
stream[:last_physical_page_index] = physical_page_index
|
|
244
|
+
stream[:physical_page_indices] << physical_page_index
|
|
245
|
+
|
|
246
|
+
total_page_count += 1
|
|
247
|
+
total_bos_page_count += 1 if bos
|
|
248
|
+
total_eos_page_count += 1 if eos
|
|
249
|
+
total_continued_page_count += 1 if continued
|
|
250
|
+
physical_page_index += 1
|
|
251
|
+
end
|
|
252
|
+
|
|
253
|
+
break if data.nil?
|
|
254
|
+
end
|
|
255
|
+
|
|
256
|
+
streams = streams_by_serial.values.sort_by { |s| s[:first_physical_page_index] }
|
|
257
|
+
raise InvalidFormatError, "empty OGG bitstream" if streams.empty?
|
|
258
|
+
|
|
259
|
+
overlapping_pairs = []
|
|
260
|
+
per_stream_overlap = {}
|
|
261
|
+
streams.each_with_index do |left, left_index|
|
|
262
|
+
streams.each_with_index do |right, right_index|
|
|
263
|
+
next if right_index <= left_index
|
|
264
|
+
|
|
265
|
+
overlaps = left[:first_physical_page_index] <= right[:last_physical_page_index] &&
|
|
266
|
+
right[:first_physical_page_index] <= left[:last_physical_page_index]
|
|
267
|
+
next unless overlaps
|
|
268
|
+
|
|
269
|
+
left_serial = left[:serial_number]
|
|
270
|
+
right_serial = right[:serial_number]
|
|
271
|
+
overlapping_pairs << [left_serial, right_serial]
|
|
272
|
+
per_stream_overlap[left_serial] = true
|
|
273
|
+
per_stream_overlap[right_serial] = true
|
|
274
|
+
end
|
|
275
|
+
end
|
|
276
|
+
|
|
277
|
+
result_streams = streams.map do |stream|
|
|
278
|
+
stream.merge(
|
|
279
|
+
bytes: stream[:bytes].dup.freeze,
|
|
280
|
+
interleaved_pages: per_stream_overlap.fetch(stream[:serial_number], false)
|
|
281
|
+
).freeze
|
|
282
|
+
end
|
|
283
|
+
|
|
284
|
+
[
|
|
285
|
+
result_streams,
|
|
286
|
+
{
|
|
287
|
+
page_count: total_page_count,
|
|
288
|
+
bos_page_count: total_bos_page_count,
|
|
289
|
+
eos_page_count: total_eos_page_count,
|
|
290
|
+
continued_page_count: total_continued_page_count,
|
|
291
|
+
logical_stream_count: streams.length,
|
|
292
|
+
interleaved_multistream: !overlapping_pairs.empty?,
|
|
293
|
+
overlapping_logical_stream_serial_pairs: overlapping_pairs
|
|
294
|
+
}
|
|
295
|
+
]
|
|
296
|
+
rescue Ogg::CorruptDataError, Ogg::SyncCorruptDataError => e
|
|
297
|
+
raise InvalidFormatError, "OGG data corrupt or invalid checksum: #{e.message}"
|
|
298
|
+
rescue Ogg::StreamCorruptDataError => e
|
|
299
|
+
raise InvalidFormatError, "OGG stream sequence error: #{e.message}"
|
|
300
|
+
ensure
|
|
301
|
+
sync&.clear
|
|
302
|
+
end
|
|
303
|
+
|
|
304
|
+
# Reads OGG packets from a single logical stream IO using ogg-ruby.
|
|
305
|
+
#
|
|
306
|
+
# Returns [packets, ogg_info] where packets is an Array of Hashes with
|
|
307
|
+
# :data, :kind, :granule_position keys, and ogg_info is a Hash with
|
|
308
|
+
# page-level statistics.
|
|
309
|
+
def read_ogg_packets(io)
|
|
310
|
+
io.rewind
|
|
311
|
+
sync = Ogg::SyncState.new
|
|
312
|
+
stream_state = nil
|
|
313
|
+
serial_number = nil
|
|
314
|
+
page_count = 0
|
|
315
|
+
bos_page_count = 0
|
|
316
|
+
eos_page_count = 0
|
|
317
|
+
continued_page_count = 0
|
|
318
|
+
max_granule_position = nil
|
|
319
|
+
packets = []
|
|
320
|
+
|
|
321
|
+
loop do
|
|
322
|
+
data = io.read(4096)
|
|
323
|
+
sync.write(data) if data
|
|
324
|
+
|
|
325
|
+
while (page = sync.pageout)
|
|
326
|
+
sn = page.serialno
|
|
327
|
+
if serial_number && sn != serial_number
|
|
328
|
+
raise UnsupportedFormatError,
|
|
329
|
+
"multi-stream OGG containers must be split before packet reading"
|
|
330
|
+
end
|
|
331
|
+
raise InvalidFormatError, "first OGG page must have BOS flag" if page_count.zero? && !page.bos?
|
|
332
|
+
|
|
333
|
+
serial_number ||= sn
|
|
334
|
+
stream_state ||= Ogg::StreamState.new(sn)
|
|
335
|
+
stream_state.pagein(page)
|
|
336
|
+
|
|
337
|
+
bos_page_count += 1 if page.bos?
|
|
338
|
+
eos_page_count += 1 if page.eos?
|
|
339
|
+
continued_page_count += 1 if page.continued?
|
|
340
|
+
page_count += 1
|
|
341
|
+
|
|
342
|
+
while (packet = stream_state.packetout)
|
|
343
|
+
granulepos = packet.granulepos
|
|
344
|
+
is_unknown = (granulepos == -1)
|
|
345
|
+
resolved_granule = is_unknown ? nil : granulepos
|
|
346
|
+
packets << {
|
|
347
|
+
data: packet.data,
|
|
348
|
+
bos: packet.bos?,
|
|
349
|
+
eos: packet.eos?,
|
|
350
|
+
packetno: packet.packetno,
|
|
351
|
+
kind: classify_vorbis_packet(packet.data),
|
|
352
|
+
granule_position: resolved_granule
|
|
353
|
+
}
|
|
354
|
+
max_granule_position = [max_granule_position || 0, granulepos].max unless is_unknown
|
|
355
|
+
end
|
|
356
|
+
end
|
|
357
|
+
|
|
358
|
+
break if data.nil?
|
|
359
|
+
end
|
|
360
|
+
|
|
361
|
+
[
|
|
362
|
+
packets,
|
|
363
|
+
{
|
|
364
|
+
serial_number: serial_number,
|
|
365
|
+
page_count: page_count,
|
|
366
|
+
max_granule_position: max_granule_position,
|
|
367
|
+
bos_page_count: bos_page_count,
|
|
368
|
+
eos_page_count: eos_page_count,
|
|
369
|
+
continued_page_count: continued_page_count
|
|
370
|
+
}
|
|
371
|
+
]
|
|
372
|
+
rescue Ogg::CorruptDataError, Ogg::SyncCorruptDataError => e
|
|
373
|
+
raise InvalidFormatError, "OGG data corrupt or invalid checksum: #{e.message}"
|
|
374
|
+
rescue Ogg::StreamCorruptDataError => e
|
|
375
|
+
raise InvalidFormatError, "OGG stream sequence error: #{e.message}"
|
|
376
|
+
ensure
|
|
377
|
+
stream_state&.clear
|
|
378
|
+
sync&.clear
|
|
379
|
+
end
|
|
380
|
+
|
|
381
|
+
# ---------------------------------------------------------------------------
|
|
382
|
+
# Vorbis decode (using Vorbis::Native synthesis functions)
|
|
383
|
+
# ---------------------------------------------------------------------------
|
|
384
|
+
|
|
385
|
+
def build_vorbis_decode_context(io_or_path)
|
|
386
|
+
io, close_io = open_input(io_or_path)
|
|
387
|
+
ensure_seekable!(io)
|
|
388
|
+
|
|
389
|
+
packet_entries, ogg_info = read_ogg_packets(io)
|
|
390
|
+
raise InvalidFormatError, "missing Vorbis identification header" if packet_entries[0].nil?
|
|
391
|
+
raise InvalidFormatError, "missing Vorbis comment header" if packet_entries[1].nil?
|
|
392
|
+
raise InvalidFormatError, "missing Vorbis setup header" if packet_entries[2].nil?
|
|
393
|
+
|
|
394
|
+
info_ptr = FFI::MemoryPointer.new(Vorbis::Native::VorbisInfo.size)
|
|
395
|
+
comment_ptr = FFI::MemoryPointer.new(Vorbis::Native::VorbisComment.size)
|
|
396
|
+
Vorbis::Native.vorbis_info_init(info_ptr)
|
|
397
|
+
Vorbis::Native.vorbis_comment_init(comment_ptr)
|
|
398
|
+
|
|
399
|
+
packet_entries.first(3).each_with_index do |entry, idx|
|
|
400
|
+
pkt = Ogg::Packet.new(
|
|
401
|
+
data: entry.fetch(:data),
|
|
402
|
+
bos: entry.fetch(:bos, idx.zero?),
|
|
403
|
+
eos: entry.fetch(:eos, false),
|
|
404
|
+
packetno: entry.fetch(:packetno, idx)
|
|
405
|
+
)
|
|
406
|
+
result = Vorbis::Native.vorbis_synthesis_headerin(info_ptr, comment_ptr, pkt.native)
|
|
407
|
+
raise InvalidFormatError, "Vorbis header parse failed (code #{result})" unless result.zero?
|
|
408
|
+
end
|
|
409
|
+
|
|
410
|
+
vinfo = Vorbis::Native::VorbisInfo.new(info_ptr)
|
|
411
|
+
channels = vinfo[:channels]
|
|
412
|
+
sample_rate = vinfo[:rate]
|
|
413
|
+
|
|
414
|
+
audio_packets = packet_entries.drop(3).select { |e| e.fetch(:kind) == :audio }
|
|
415
|
+
raise InvalidFormatError, "OGG Vorbis stream does not contain audio packets" if audio_packets.empty?
|
|
416
|
+
|
|
417
|
+
format = Core::Format.new(channels: channels, sample_rate: sample_rate, bit_depth: 32, sample_format: :float)
|
|
418
|
+
|
|
419
|
+
{
|
|
420
|
+
format: format,
|
|
421
|
+
channels: channels,
|
|
422
|
+
sample_rate: sample_rate,
|
|
423
|
+
audio_packets: audio_packets,
|
|
424
|
+
sample_frame_count: ogg_info[:max_granule_position],
|
|
425
|
+
info_ptr: info_ptr,
|
|
426
|
+
comment_ptr: comment_ptr
|
|
427
|
+
}
|
|
428
|
+
ensure
|
|
429
|
+
io.close if close_io && io
|
|
430
|
+
end
|
|
431
|
+
|
|
432
|
+
def run_vorbis_decode_pipeline(decode_context, streaming: false, chunk_size: nil, &block)
|
|
433
|
+
info_ptr = decode_context.fetch(:info_ptr)
|
|
434
|
+
comment_ptr = decode_context.fetch(:comment_ptr)
|
|
435
|
+
audio_packets = decode_context.fetch(:audio_packets)
|
|
436
|
+
channels = decode_context.fetch(:channels)
|
|
437
|
+
max_granule = decode_context.fetch(:sample_frame_count)
|
|
438
|
+
format = decode_context.fetch(:format)
|
|
439
|
+
|
|
440
|
+
dsp_ptr = FFI::MemoryPointer.new(Vorbis::Native::VorbisDspState.size)
|
|
441
|
+
block_ptr = FFI::MemoryPointer.new(Vorbis::Native::VorbisBlock.size)
|
|
442
|
+
pcm_pp = FFI::MemoryPointer.new(:pointer)
|
|
443
|
+
dsp_initialized = false
|
|
444
|
+
block_initialized = false
|
|
445
|
+
|
|
446
|
+
result = Vorbis::Native.vorbis_synthesis_init(dsp_ptr, info_ptr)
|
|
447
|
+
raise InvalidFormatError, "Vorbis synthesis init failed (#{result})" unless result.zero?
|
|
448
|
+
|
|
449
|
+
dsp_initialized = true
|
|
450
|
+
|
|
451
|
+
result = Vorbis::Native.vorbis_block_init(dsp_ptr, block_ptr)
|
|
452
|
+
raise InvalidFormatError, "Vorbis block init failed (#{result})" unless result.zero?
|
|
453
|
+
|
|
454
|
+
block_initialized = true
|
|
455
|
+
|
|
456
|
+
all_samples = []
|
|
457
|
+
ptr_size = FFI::Pointer.size
|
|
458
|
+
|
|
459
|
+
audio_packets.each do |entry|
|
|
460
|
+
pkt = Ogg::Packet.new(
|
|
461
|
+
data: entry.fetch(:data),
|
|
462
|
+
bos: entry.fetch(:bos, false),
|
|
463
|
+
eos: entry.fetch(:eos, false),
|
|
464
|
+
granulepos: entry[:granule_position].nil? ? -1 : entry[:granule_position],
|
|
465
|
+
packetno: entry.fetch(:packetno, 0)
|
|
466
|
+
)
|
|
467
|
+
next unless Vorbis::Native.vorbis_synthesis(block_ptr, pkt.native).zero?
|
|
468
|
+
|
|
469
|
+
Vorbis::Native.vorbis_synthesis_blockin(dsp_ptr, block_ptr)
|
|
470
|
+
|
|
471
|
+
while (n = Vorbis::Native.vorbis_synthesis_pcmout(dsp_ptr, pcm_pp)).positive?
|
|
472
|
+
ch_array_ptr = pcm_pp.read_pointer
|
|
473
|
+
n.times do |i|
|
|
474
|
+
channels.times do |ch|
|
|
475
|
+
ch_ptr = ch_array_ptr.get_pointer(ch * ptr_size)
|
|
476
|
+
all_samples << ch_ptr.get_float(i * 4)
|
|
477
|
+
end
|
|
478
|
+
end
|
|
479
|
+
Vorbis::Native.vorbis_synthesis_read(dsp_ptr, n)
|
|
480
|
+
end
|
|
481
|
+
end
|
|
482
|
+
|
|
483
|
+
if max_granule&.positive?
|
|
484
|
+
target_sample_count = max_granule * channels
|
|
485
|
+
all_samples = all_samples.first(target_sample_count) if all_samples.length > target_sample_count
|
|
486
|
+
end
|
|
487
|
+
|
|
488
|
+
result_buffer = Core::SampleBuffer.new(all_samples, format)
|
|
489
|
+
|
|
490
|
+
if streaming && block
|
|
491
|
+
each_sample_buffer_frame_slice(result_buffer, chunk_size, &block)
|
|
492
|
+
nil
|
|
493
|
+
elsif block
|
|
494
|
+
yield result_buffer
|
|
495
|
+
nil
|
|
496
|
+
else
|
|
497
|
+
result_buffer
|
|
498
|
+
end
|
|
499
|
+
ensure
|
|
500
|
+
Vorbis::Native.vorbis_block_clear(block_ptr) if block_initialized
|
|
501
|
+
Vorbis::Native.vorbis_dsp_clear(dsp_ptr) if dsp_initialized
|
|
502
|
+
Vorbis::Native.vorbis_comment_clear(comment_ptr) if comment_ptr
|
|
503
|
+
Vorbis::Native.vorbis_info_clear(info_ptr) if info_ptr
|
|
504
|
+
end
|
|
505
|
+
|
|
506
|
+
# ---------------------------------------------------------------------------
|
|
507
|
+
# Metadata (using vorbis-ruby header parsing + ogg-ruby packet reading)
|
|
508
|
+
# ---------------------------------------------------------------------------
|
|
509
|
+
|
|
510
|
+
def parse_single_logical_stream_metadata(io)
|
|
511
|
+
packet_entries, ogg_info = read_ogg_packets(io)
|
|
512
|
+
raise InvalidFormatError, "missing Vorbis identification header" if packet_entries[0].nil?
|
|
513
|
+
raise InvalidFormatError, "missing Vorbis comment header" if packet_entries[1].nil?
|
|
514
|
+
raise InvalidFormatError, "missing Vorbis setup header" if packet_entries[2].nil?
|
|
515
|
+
|
|
516
|
+
info_ptr = FFI::MemoryPointer.new(Vorbis::Native::VorbisInfo.size)
|
|
517
|
+
comment_ptr = FFI::MemoryPointer.new(Vorbis::Native::VorbisComment.size)
|
|
518
|
+
Vorbis::Native.vorbis_info_init(info_ptr)
|
|
519
|
+
Vorbis::Native.vorbis_comment_init(comment_ptr)
|
|
520
|
+
|
|
521
|
+
setup_parsed = false
|
|
522
|
+
saved_channels = nil
|
|
523
|
+
saved_rate = nil
|
|
524
|
+
saved_bitrate_nominal = nil
|
|
525
|
+
saved_bitrate_lower = nil
|
|
526
|
+
saved_bitrate_upper = nil
|
|
527
|
+
packet_entries.first(3).each_with_index do |entry, index|
|
|
528
|
+
pkt = Ogg::Packet.new(
|
|
529
|
+
data: entry.fetch(:data),
|
|
530
|
+
bos: entry.fetch(:bos, index.zero?),
|
|
531
|
+
eos: entry.fetch(:eos, false),
|
|
532
|
+
packetno: entry.fetch(:packetno, index)
|
|
533
|
+
)
|
|
534
|
+
result = Vorbis::Native.vorbis_synthesis_headerin(info_ptr, comment_ptr, pkt.native)
|
|
535
|
+
if result.zero? && index.zero?
|
|
536
|
+
# Save info from identification header before setup header possibly clears VorbisInfo
|
|
537
|
+
temp = Vorbis::Native::VorbisInfo.new(info_ptr)
|
|
538
|
+
saved_channels = temp[:channels]
|
|
539
|
+
saved_rate = temp[:rate]
|
|
540
|
+
saved_bitrate_nominal = temp[:bitrate_nominal]
|
|
541
|
+
saved_bitrate_lower = temp[:bitrate_lower]
|
|
542
|
+
saved_bitrate_upper = temp[:bitrate_upper]
|
|
543
|
+
end
|
|
544
|
+
break unless result.zero?
|
|
545
|
+
|
|
546
|
+
setup_parsed = (index == 2)
|
|
547
|
+
end
|
|
548
|
+
|
|
549
|
+
vinfo = Vorbis::Native::VorbisInfo.new(info_ptr)
|
|
550
|
+
channels = saved_channels || vinfo[:channels]
|
|
551
|
+
sample_rate = saved_rate || vinfo[:rate]
|
|
552
|
+
nominal_bitrate_raw = saved_bitrate_nominal || vinfo[:bitrate_nominal]
|
|
553
|
+
minimum_bitrate_raw = saved_bitrate_lower || vinfo[:bitrate_lower]
|
|
554
|
+
maximum_bitrate_raw = saved_bitrate_upper || vinfo[:bitrate_upper]
|
|
555
|
+
|
|
556
|
+
blocksize_small = nil
|
|
557
|
+
blocksize_large = nil
|
|
558
|
+
if setup_parsed
|
|
559
|
+
bs = Vorbis::Native.vorbis_info_blocksize(info_ptr, 0)
|
|
560
|
+
bl = Vorbis::Native.vorbis_info_blocksize(info_ptr, 1)
|
|
561
|
+
blocksize_small = bs.positive? ? bs : nil
|
|
562
|
+
blocksize_large = bl.positive? ? bl : nil
|
|
563
|
+
end
|
|
564
|
+
|
|
565
|
+
vc = Vorbis::Native::VorbisComment.new(comment_ptr)
|
|
566
|
+
vendor = vc[:vendor].null? ? nil : vc[:vendor].read_string
|
|
567
|
+
comments_hash = {}
|
|
568
|
+
n_comments = vc[:comments]
|
|
569
|
+
if n_comments.positive? && !vc[:user_comments].null?
|
|
570
|
+
user_comments_ptr = vc[:user_comments]
|
|
571
|
+
comment_lengths_ptr = vc[:comment_lengths]
|
|
572
|
+
n_comments.times do |i|
|
|
573
|
+
str_ptr = user_comments_ptr.get_pointer(i * FFI::Pointer.size)
|
|
574
|
+
next if str_ptr.null?
|
|
575
|
+
|
|
576
|
+
len = comment_lengths_ptr.get_int32(i * 4)
|
|
577
|
+
next unless len.positive?
|
|
578
|
+
|
|
579
|
+
str = str_ptr.read_bytes(len)
|
|
580
|
+
key, value = str.split("=", 2)
|
|
581
|
+
comments_hash[key.downcase] = value if key && value
|
|
582
|
+
end
|
|
583
|
+
end
|
|
584
|
+
|
|
585
|
+
audio_packets = packet_entries.drop(3).select { |e| e.fetch(:kind) == :audio }
|
|
586
|
+
non_audio_packets = packet_entries.drop(3).reject { |e| e.fetch(:kind) == :audio }
|
|
587
|
+
known_granule_positions = audio_packets.filter_map { |e| e[:granule_position] }
|
|
588
|
+
|
|
589
|
+
format = Core::Format.new(channels: channels, sample_rate: sample_rate, bit_depth: 32, sample_format: :float)
|
|
590
|
+
sample_frame_count = ogg_info[:max_granule_position]
|
|
591
|
+
duration = sample_frame_count ? Core::Duration.from_samples(sample_frame_count, format.sample_rate) : nil
|
|
592
|
+
|
|
593
|
+
{
|
|
594
|
+
format: format,
|
|
595
|
+
sample_frame_count: sample_frame_count,
|
|
596
|
+
duration: duration,
|
|
597
|
+
vendor: vendor,
|
|
598
|
+
comments: comments_hash,
|
|
599
|
+
nominal_bitrate: nominal_bitrate_raw.positive? ? nominal_bitrate_raw : nil,
|
|
600
|
+
minimum_bitrate: minimum_bitrate_raw.positive? ? minimum_bitrate_raw : nil,
|
|
601
|
+
maximum_bitrate: maximum_bitrate_raw.positive? ? maximum_bitrate_raw : nil,
|
|
602
|
+
blocksize_small: blocksize_small,
|
|
603
|
+
blocksize_large: blocksize_large,
|
|
604
|
+
ogg_serial_number: ogg_info[:serial_number],
|
|
605
|
+
ogg_page_count: ogg_info[:page_count],
|
|
606
|
+
ogg_packet_count: packet_entries.length,
|
|
607
|
+
ogg_bos_page_count: ogg_info[:bos_page_count],
|
|
608
|
+
ogg_eos_page_count: ogg_info[:eos_page_count],
|
|
609
|
+
ogg_continued_page_count: ogg_info[:continued_page_count],
|
|
610
|
+
vorbis_audio_packet_count: audio_packets.length,
|
|
611
|
+
vorbis_non_audio_packet_count: non_audio_packets.length,
|
|
612
|
+
vorbis_audio_packets_with_granule_count: known_granule_positions.length,
|
|
613
|
+
first_audio_packet_granule_position: audio_packets.find { |e| !e[:granule_position].nil? }&.fetch(:granule_position),
|
|
614
|
+
last_audio_packet_granule_position: audio_packets.reverse_each.find do |e|
|
|
615
|
+
!e[:granule_position].nil?
|
|
616
|
+
end&.fetch(:granule_position),
|
|
617
|
+
vorbis_setup_parsed: setup_parsed,
|
|
618
|
+
vorbis_codebook_count: nil,
|
|
619
|
+
vorbis_codebook_dimensions: nil,
|
|
620
|
+
vorbis_codebook_entries: nil,
|
|
621
|
+
vorbis_codebook_lookup_types: nil,
|
|
622
|
+
vorbis_codebook_used_entry_counts: nil,
|
|
623
|
+
vorbis_codebook_sparse_count: nil,
|
|
624
|
+
vorbis_codebook_huffman_complete_count: nil,
|
|
625
|
+
vorbis_codebook_huffman_incomplete_count: nil,
|
|
626
|
+
vorbis_codebook_huffman_max_codeword_length: nil,
|
|
627
|
+
vorbis_floor_count: nil,
|
|
628
|
+
vorbis_residue_count: nil,
|
|
629
|
+
vorbis_floor_types: nil,
|
|
630
|
+
vorbis_residue_types: nil,
|
|
631
|
+
vorbis_mapping_count: nil,
|
|
632
|
+
vorbis_mode_count: nil,
|
|
633
|
+
vorbis_mode_bits: nil,
|
|
634
|
+
vorbis_mode_blockflags: nil,
|
|
635
|
+
vorbis_mode_mappings: nil,
|
|
636
|
+
vorbis_mapping_submap_counts: nil,
|
|
637
|
+
vorbis_mapping_coupling_step_counts: nil,
|
|
638
|
+
vorbis_mapping_coupling_pairs: nil,
|
|
639
|
+
vorbis_mapping_channel_muxes: nil,
|
|
640
|
+
vorbis_mapping_submap_floors: nil,
|
|
641
|
+
vorbis_mapping_submap_residues: nil,
|
|
642
|
+
vorbis_mode_blocksizes: nil,
|
|
643
|
+
vorbis_audio_packet_header_parsed_count: 0,
|
|
644
|
+
vorbis_audio_packet_mode_histogram: {},
|
|
645
|
+
vorbis_audio_packet_blocksize_histogram: {},
|
|
646
|
+
vorbis_window_transition_histogram: {},
|
|
647
|
+
vorbis_decode_plan_built: false,
|
|
648
|
+
vorbis_decode_plan_packet_count: nil,
|
|
649
|
+
vorbis_decode_plan_nominal_overlap_frame_total: nil,
|
|
650
|
+
vorbis_decode_plan_known_granule_delta_count: nil,
|
|
651
|
+
vorbis_decode_plan_nominal_minus_final_granule: nil,
|
|
652
|
+
vorbis_output_assembly_preflight_ok: nil,
|
|
653
|
+
vorbis_output_assembly_preflight_error: nil,
|
|
654
|
+
vorbis_output_assembly_emitted_frame_count: nil,
|
|
655
|
+
vorbis_output_assembly_trim_frames: nil,
|
|
656
|
+
vorbis_output_assembly_window_curve_preflight_count: nil,
|
|
657
|
+
vorbis_long_window_packet_count: nil,
|
|
658
|
+
vorbis_short_window_packet_count: nil,
|
|
659
|
+
setup_header_size: packet_entries[2]&.fetch(:data)&.bytesize
|
|
660
|
+
}
|
|
661
|
+
ensure
|
|
662
|
+
Vorbis::Native.vorbis_comment_clear(comment_ptr) if comment_ptr
|
|
663
|
+
Vorbis::Native.vorbis_info_clear(info_ptr) if info_ptr
|
|
664
|
+
end
|
|
665
|
+
|
|
666
|
+
# ---------------------------------------------------------------------------
|
|
667
|
+
# Chained / interleaved stream merging (metadata)
|
|
668
|
+
# ---------------------------------------------------------------------------
|
|
669
|
+
|
|
670
|
+
def merge_chained_vorbis_metadata(chain_metadatas, chained_streams)
|
|
671
|
+
metadatas = Array(chain_metadatas)
|
|
672
|
+
streams = Array(chained_streams)
|
|
673
|
+
raise InvalidFormatError, "OGG Vorbis chained metadata requires at least one logical stream" if metadatas.empty?
|
|
674
|
+
raise InvalidFormatError, "OGG Vorbis chained metadata stream count mismatch" unless metadatas.length == streams.length
|
|
675
|
+
|
|
676
|
+
first = metadatas.first.dup
|
|
677
|
+
first_format = first.fetch(:format)
|
|
678
|
+
logical_stream_formats = metadatas.map { |metadata| metadata.fetch(:format) }
|
|
679
|
+
mixed_format_chain = logical_stream_formats.any? { |format| format != first_format }
|
|
680
|
+
resampled_output_frame_counts = metadatas.map do |metadata|
|
|
681
|
+
resampled_vorbis_sample_frame_count(
|
|
682
|
+
metadata[:sample_frame_count].to_i,
|
|
683
|
+
source_sample_rate: metadata.fetch(:format).sample_rate,
|
|
684
|
+
target_sample_rate: first_format.sample_rate
|
|
685
|
+
)
|
|
686
|
+
end
|
|
687
|
+
sample_frame_count = resampled_output_frame_counts.sum
|
|
688
|
+
duration = Core::Duration.from_samples(sample_frame_count, first_format.sample_rate)
|
|
689
|
+
|
|
690
|
+
sum_keys = %i[
|
|
691
|
+
ogg_page_count
|
|
692
|
+
ogg_packet_count
|
|
693
|
+
ogg_bos_page_count
|
|
694
|
+
ogg_eos_page_count
|
|
695
|
+
ogg_continued_page_count
|
|
696
|
+
vorbis_audio_packet_count
|
|
697
|
+
vorbis_non_audio_packet_count
|
|
698
|
+
vorbis_audio_packets_with_granule_count
|
|
699
|
+
vorbis_audio_packet_header_parsed_count
|
|
700
|
+
]
|
|
701
|
+
|
|
702
|
+
sum_keys.each do |key|
|
|
703
|
+
values = metadatas.map { |metadata| metadata[key] }
|
|
704
|
+
next if values.any?(&:nil?)
|
|
705
|
+
|
|
706
|
+
first[key] = values.sum
|
|
707
|
+
end
|
|
708
|
+
|
|
709
|
+
first[:format] = first_format
|
|
710
|
+
first[:sample_frame_count] = sample_frame_count
|
|
711
|
+
first[:duration] = duration
|
|
712
|
+
first[:ogg_serial_number] = streams.first.fetch(:serial_number)
|
|
713
|
+
first[:ogg_serial_numbers] = streams.map { |stream| stream.fetch(:serial_number) }
|
|
714
|
+
first[:ogg_logical_stream_count] = streams.length
|
|
715
|
+
first[:ogg_logical_stream_formats] = logical_stream_formats
|
|
716
|
+
first[:ogg_logical_stream_sample_frame_counts] = metadatas.map { |metadata| metadata[:sample_frame_count] }
|
|
717
|
+
first[:ogg_logical_stream_output_frame_counts] = resampled_output_frame_counts
|
|
718
|
+
first[:ogg_logical_stream_durations] = metadatas.map { |metadata| metadata[:duration] }
|
|
719
|
+
first[:vorbis_chained] = true
|
|
720
|
+
first[:vorbis_chained_mixed_format] = mixed_format_chain
|
|
721
|
+
first[:vorbis_chained_resampled_sample_rate] = logical_stream_formats.any? do |format|
|
|
722
|
+
format.sample_rate != first_format.sample_rate
|
|
723
|
+
end
|
|
724
|
+
|
|
725
|
+
first
|
|
726
|
+
end
|
|
727
|
+
|
|
728
|
+
def merge_interleaved_vorbis_metadata(chain_metadatas, chained_streams, physical_ogg_info)
|
|
729
|
+
metadatas = Array(chain_metadatas)
|
|
730
|
+
streams = Array(chained_streams)
|
|
731
|
+
raise InvalidFormatError, "OGG Vorbis interleaved metadata requires at least one logical stream" if metadatas.empty?
|
|
732
|
+
raise InvalidFormatError, "OGG Vorbis interleaved metadata stream count mismatch" unless metadatas.length == streams.length
|
|
733
|
+
|
|
734
|
+
first = metadatas.first.dup
|
|
735
|
+
first_format = first.fetch(:format)
|
|
736
|
+
logical_stream_formats = metadatas.map { |metadata| metadata.fetch(:format) }
|
|
737
|
+
resampled_output_frame_counts = metadatas.map do |metadata|
|
|
738
|
+
resampled_vorbis_sample_frame_count(
|
|
739
|
+
metadata[:sample_frame_count].to_i,
|
|
740
|
+
source_sample_rate: metadata.fetch(:format).sample_rate,
|
|
741
|
+
target_sample_rate: first_format.sample_rate
|
|
742
|
+
)
|
|
743
|
+
end
|
|
744
|
+
sample_frame_count = resampled_output_frame_counts.max || 0
|
|
745
|
+
duration = Core::Duration.from_samples(sample_frame_count, first_format.sample_rate)
|
|
746
|
+
|
|
747
|
+
sum_keys = %i[
|
|
748
|
+
ogg_page_count
|
|
749
|
+
ogg_packet_count
|
|
750
|
+
ogg_bos_page_count
|
|
751
|
+
ogg_eos_page_count
|
|
752
|
+
ogg_continued_page_count
|
|
753
|
+
vorbis_audio_packet_count
|
|
754
|
+
vorbis_non_audio_packet_count
|
|
755
|
+
vorbis_audio_packets_with_granule_count
|
|
756
|
+
vorbis_audio_packet_header_parsed_count
|
|
757
|
+
]
|
|
758
|
+
|
|
759
|
+
sum_keys.each do |key|
|
|
760
|
+
values = metadatas.map { |metadata| metadata[key] }
|
|
761
|
+
next if values.any?(&:nil?)
|
|
762
|
+
|
|
763
|
+
first[key] = values.sum
|
|
764
|
+
end
|
|
765
|
+
|
|
766
|
+
first[:format] = first_format
|
|
767
|
+
first[:sample_frame_count] = sample_frame_count
|
|
768
|
+
first[:duration] = duration
|
|
769
|
+
first[:ogg_serial_number] = streams.first.fetch(:serial_number)
|
|
770
|
+
first[:ogg_serial_numbers] = streams.map { |stream| stream.fetch(:serial_number) }
|
|
771
|
+
first[:ogg_logical_stream_count] = streams.length
|
|
772
|
+
first[:ogg_logical_stream_formats] = logical_stream_formats
|
|
773
|
+
first[:ogg_logical_stream_sample_frame_counts] = metadatas.map { |metadata| metadata[:sample_frame_count] }
|
|
774
|
+
first[:ogg_logical_stream_output_frame_counts] = resampled_output_frame_counts
|
|
775
|
+
first[:ogg_logical_stream_durations] = metadatas.map { |metadata| metadata[:duration] }
|
|
776
|
+
first[:vorbis_chained] = false
|
|
777
|
+
first[:vorbis_interleaved_multistream] = true
|
|
778
|
+
first[:vorbis_interleaved_multistream_mixed] = true
|
|
779
|
+
first[:vorbis_interleaved_multistream_resampled_sample_rate] =
|
|
780
|
+
logical_stream_formats.any? { |format| format.sample_rate != first_format.sample_rate }
|
|
781
|
+
first[:vorbis_chained_mixed_format] = logical_stream_formats.any? { |format| format != first_format }
|
|
782
|
+
first[:ogg_interleaved_multistream] = physical_ogg_info[:interleaved_multistream]
|
|
783
|
+
first[:ogg_overlapping_logical_stream_serial_pairs] = physical_ogg_info[:overlapping_logical_stream_serial_pairs]
|
|
784
|
+
|
|
785
|
+
first
|
|
786
|
+
end
|
|
787
|
+
|
|
788
|
+
# ---------------------------------------------------------------------------
|
|
789
|
+
# Chained / interleaved stream decoding (high-level helpers)
|
|
790
|
+
# ---------------------------------------------------------------------------
|
|
791
|
+
|
|
792
|
+
def decode_chained_vorbis_read_if_needed(io_or_path, decode_mode:, target_format: nil)
|
|
793
|
+
chained_streams, physical_ogg_info = read_ogg_logical_stream_chains_from_input(io_or_path, with_info: true)
|
|
794
|
+
return nil unless chained_streams.length > 1
|
|
795
|
+
|
|
796
|
+
if physical_ogg_info[:interleaved_multistream]
|
|
797
|
+
decoded_buffers = chained_streams.map do |stream|
|
|
798
|
+
read(StringIO.new(stream.fetch(:bytes)), decode_mode: decode_mode)
|
|
799
|
+
end
|
|
800
|
+
|
|
801
|
+
return mix_vorbis_sample_buffers(decoded_buffers, target_format: target_format)
|
|
802
|
+
end
|
|
803
|
+
|
|
804
|
+
decoded_buffers = chained_streams.map do |stream|
|
|
805
|
+
read(StringIO.new(stream.fetch(:bytes)), format: target_format, decode_mode: decode_mode)
|
|
806
|
+
end
|
|
807
|
+
|
|
808
|
+
concatenate_vorbis_sample_buffers(decoded_buffers, target_format: target_format)
|
|
809
|
+
end
|
|
810
|
+
|
|
811
|
+
def stream_chained_vorbis_if_needed(io_or_path, chunk_size:, decode_mode:, &block)
|
|
812
|
+
chained_streams, physical_ogg_info = read_ogg_logical_stream_chains_from_input(io_or_path, with_info: true)
|
|
813
|
+
return false unless chained_streams.length > 1
|
|
814
|
+
|
|
815
|
+
if physical_ogg_info[:interleaved_multistream]
|
|
816
|
+
stream_metadatas = chained_streams.map do |stream|
|
|
817
|
+
parse_single_logical_stream_metadata(StringIO.new(stream.fetch(:bytes)))
|
|
818
|
+
end
|
|
819
|
+
target_format = stream_metadatas.first.fetch(:format)
|
|
820
|
+
return stream_interleaved_vorbis_logical_streams_mixed!(
|
|
821
|
+
chained_streams,
|
|
822
|
+
chunk_size: chunk_size,
|
|
823
|
+
decode_mode: decode_mode,
|
|
824
|
+
target_format: target_format,
|
|
825
|
+
stream_metadatas: stream_metadatas, &block
|
|
826
|
+
)
|
|
827
|
+
end
|
|
828
|
+
|
|
829
|
+
stream_metadatas = chained_streams.map do |stream|
|
|
830
|
+
parse_single_logical_stream_metadata(StringIO.new(stream.fetch(:bytes)))
|
|
831
|
+
end
|
|
832
|
+
target_format = stream_metadatas.first.fetch(:format)
|
|
833
|
+
same_sample_rate = stream_metadatas.all? { |metadata| metadata.fetch(:format).sample_rate == target_format.sample_rate }
|
|
834
|
+
|
|
835
|
+
unless same_sample_rate
|
|
836
|
+
chained_streams.each do |stream|
|
|
837
|
+
normalized = normalize_vorbis_logical_stream_buffer_for_target(
|
|
838
|
+
read(StringIO.new(stream.fetch(:bytes)), decode_mode: decode_mode),
|
|
839
|
+
target_format
|
|
840
|
+
)
|
|
841
|
+
each_sample_buffer_frame_slice(normalized, chunk_size, &block)
|
|
842
|
+
end
|
|
843
|
+
return true
|
|
844
|
+
end
|
|
845
|
+
|
|
846
|
+
chained_format = nil
|
|
847
|
+
chained_streams.each do |stream|
|
|
848
|
+
stream_read(StringIO.new(stream.fetch(:bytes)), chunk_size: chunk_size, decode_mode: decode_mode) do |chunk|
|
|
849
|
+
chained_format ||= chunk.format
|
|
850
|
+
yield(chunk.format == chained_format ? chunk : chunk.convert(chained_format))
|
|
851
|
+
end
|
|
852
|
+
end
|
|
853
|
+
|
|
854
|
+
true
|
|
855
|
+
end
|
|
856
|
+
|
|
857
|
+
def concatenate_vorbis_sample_buffers(buffers, target_format: nil)
|
|
858
|
+
buffers = Array(buffers)
|
|
859
|
+
raise InvalidFormatError, "OGG Vorbis chained decode did not produce any logical streams" if buffers.empty?
|
|
860
|
+
|
|
861
|
+
first = buffers.first
|
|
862
|
+
raise InvalidFormatError, "OGG Vorbis chained decode expected SampleBuffer outputs" unless first.is_a?(Core::SampleBuffer)
|
|
863
|
+
raise InvalidParameterError, "target_format must be Core::Format" if !target_format.nil? && !target_format.is_a?(Core::Format)
|
|
864
|
+
|
|
865
|
+
resolved_target_format = target_format || first.format
|
|
866
|
+
combined = first.format == resolved_target_format ? first : first.convert(resolved_target_format)
|
|
867
|
+
|
|
868
|
+
buffers.drop(1).reduce(combined) do |combined_buffer, buffer|
|
|
869
|
+
raise InvalidFormatError, "OGG Vorbis chained decode expected SampleBuffer outputs" unless buffer.is_a?(Core::SampleBuffer)
|
|
870
|
+
|
|
871
|
+
converted = normalize_vorbis_logical_stream_buffer_for_target(buffer, resolved_target_format)
|
|
872
|
+
combined_buffer.concat(converted)
|
|
873
|
+
end
|
|
874
|
+
end
|
|
875
|
+
|
|
876
|
+
def mix_vorbis_sample_buffers(buffers, target_format: nil)
|
|
877
|
+
buffers = Array(buffers)
|
|
878
|
+
raise InvalidFormatError, "OGG Vorbis multi-stream decode did not produce any logical streams" if buffers.empty?
|
|
879
|
+
|
|
880
|
+
first = buffers.first
|
|
881
|
+
raise InvalidFormatError, "OGG Vorbis multi-stream decode expected SampleBuffer outputs" unless first.is_a?(Core::SampleBuffer)
|
|
882
|
+
raise InvalidParameterError, "target_format must be Core::Format" if !target_format.nil? && !target_format.is_a?(Core::Format)
|
|
883
|
+
|
|
884
|
+
resolved_target_format = target_format || first.format
|
|
885
|
+
|
|
886
|
+
work_format = resolved_target_format.with(sample_format: :float, bit_depth: 32)
|
|
887
|
+
converted = buffers.map do |buffer|
|
|
888
|
+
normalize_vorbis_logical_stream_buffer_for_target(buffer, work_format)
|
|
889
|
+
end
|
|
890
|
+
max_frames = converted.map(&:sample_frame_count).max || 0
|
|
891
|
+
mixed_samples = Array.new(max_frames * work_format.channels, 0.0)
|
|
892
|
+
|
|
893
|
+
converted.each do |buffer|
|
|
894
|
+
buffer.samples.each_with_index do |sample, index|
|
|
895
|
+
mixed_samples[index] += sample.to_f
|
|
896
|
+
end
|
|
897
|
+
end
|
|
898
|
+
mixed_samples.map! { |sample| [[sample, -1.0].max, 1.0].min }
|
|
899
|
+
|
|
900
|
+
mixed = Core::SampleBuffer.new(mixed_samples, work_format)
|
|
901
|
+
return mixed if mixed.format == resolved_target_format
|
|
902
|
+
|
|
903
|
+
mixed.convert(resolved_target_format)
|
|
904
|
+
end
|
|
905
|
+
|
|
906
|
+
def normalize_vorbis_logical_stream_buffer_for_target(buffer, target_format)
|
|
907
|
+
raise InvalidParameterError, "buffer must be Core::SampleBuffer" unless buffer.is_a?(Core::SampleBuffer)
|
|
908
|
+
raise InvalidParameterError, "target_format must be Core::Format" unless target_format.is_a?(Core::Format)
|
|
909
|
+
|
|
910
|
+
normalized = if buffer.format.sample_rate == target_format.sample_rate
|
|
911
|
+
buffer
|
|
912
|
+
else
|
|
913
|
+
resample_vorbis_sample_buffer(buffer, target_sample_rate: target_format.sample_rate)
|
|
914
|
+
end
|
|
915
|
+
normalized.format == target_format ? normalized : normalized.convert(target_format)
|
|
916
|
+
end
|
|
917
|
+
|
|
918
|
+
def resampled_vorbis_sample_frame_count(frame_count, source_sample_rate:, target_sample_rate:)
|
|
919
|
+
frame_count = Integer(frame_count)
|
|
920
|
+
source_sample_rate = Integer(source_sample_rate)
|
|
921
|
+
target_sample_rate = Integer(target_sample_rate)
|
|
922
|
+
raise InvalidParameterError, "frame_count must be non-negative" if frame_count.negative?
|
|
923
|
+
raise InvalidParameterError, "source_sample_rate must be positive" unless source_sample_rate.positive?
|
|
924
|
+
raise InvalidParameterError, "target_sample_rate must be positive" unless target_sample_rate.positive?
|
|
925
|
+
|
|
926
|
+
return frame_count if source_sample_rate == target_sample_rate
|
|
927
|
+
return 0 if frame_count.zero?
|
|
928
|
+
|
|
929
|
+
((frame_count * target_sample_rate.to_f) / source_sample_rate).round
|
|
930
|
+
end
|
|
931
|
+
|
|
932
|
+
def resample_vorbis_sample_buffer(buffer, target_sample_rate:)
|
|
933
|
+
raise InvalidParameterError, "buffer must be Core::SampleBuffer" unless buffer.is_a?(Core::SampleBuffer)
|
|
934
|
+
|
|
935
|
+
source_format = buffer.format
|
|
936
|
+
source_sample_rate = source_format.sample_rate
|
|
937
|
+
target_sample_rate = Integer(target_sample_rate)
|
|
938
|
+
return buffer if source_sample_rate == target_sample_rate
|
|
939
|
+
|
|
940
|
+
work_format = source_format.with(sample_format: :float, bit_depth: 32)
|
|
941
|
+
work_buffer = (buffer.format == work_format ? buffer : buffer.convert(work_format))
|
|
942
|
+
channels = work_format.channels
|
|
943
|
+
source_frames = work_buffer.sample_frame_count
|
|
944
|
+
target_frames = resampled_vorbis_sample_frame_count(
|
|
945
|
+
source_frames,
|
|
946
|
+
source_sample_rate: source_sample_rate,
|
|
947
|
+
target_sample_rate: target_sample_rate
|
|
948
|
+
)
|
|
949
|
+
return Core::SampleBuffer.new([], work_format.with(sample_rate: target_sample_rate)) if target_frames.zero?
|
|
950
|
+
|
|
951
|
+
channel_samples = Array.new(channels) { [] }
|
|
952
|
+
work_buffer.samples.each_slice(channels) do |frame|
|
|
953
|
+
channels.times { |channel_index| channel_samples[channel_index] << frame.fetch(channel_index).to_f }
|
|
954
|
+
end
|
|
955
|
+
|
|
956
|
+
resampled_channels = channel_samples.map do |samples|
|
|
957
|
+
if samples.empty?
|
|
958
|
+
Array.new(target_frames, 0.0)
|
|
959
|
+
elsif samples.length == 1
|
|
960
|
+
Array.new(target_frames, samples.first.to_f)
|
|
961
|
+
else
|
|
962
|
+
Array.new(target_frames) do |target_index|
|
|
963
|
+
source_position = (target_index * source_sample_rate.to_f) / target_sample_rate
|
|
964
|
+
left_index = source_position.floor
|
|
965
|
+
left_index = 0 if left_index.negative?
|
|
966
|
+
if left_index >= (samples.length - 1)
|
|
967
|
+
samples.last.to_f
|
|
968
|
+
else
|
|
969
|
+
right_index = left_index + 1
|
|
970
|
+
frac = source_position - left_index
|
|
971
|
+
left = samples.fetch(left_index).to_f
|
|
972
|
+
right = samples.fetch(right_index).to_f
|
|
973
|
+
left + ((right - left) * frac)
|
|
974
|
+
end
|
|
975
|
+
end
|
|
976
|
+
end
|
|
977
|
+
end
|
|
978
|
+
|
|
979
|
+
interleaved = []
|
|
980
|
+
target_frames.times do |frame_index|
|
|
981
|
+
channels.times do |channel_index|
|
|
982
|
+
interleaved << resampled_channels.fetch(channel_index).fetch(frame_index)
|
|
983
|
+
end
|
|
984
|
+
end
|
|
985
|
+
|
|
986
|
+
Core::SampleBuffer.new(interleaved, work_format.with(sample_rate: target_sample_rate))
|
|
987
|
+
end
|
|
988
|
+
|
|
989
|
+
def stream_interleaved_vorbis_logical_streams_mixed!(
|
|
990
|
+
chained_streams,
|
|
991
|
+
chunk_size:,
|
|
992
|
+
decode_mode:,
|
|
993
|
+
target_format: nil,
|
|
994
|
+
stream_metadatas: nil, &block
|
|
995
|
+
)
|
|
996
|
+
unless block_given?
|
|
997
|
+
return enum_for(
|
|
998
|
+
__method__,
|
|
999
|
+
chained_streams,
|
|
1000
|
+
chunk_size: chunk_size,
|
|
1001
|
+
decode_mode: decode_mode,
|
|
1002
|
+
target_format: target_format,
|
|
1003
|
+
stream_metadatas: stream_metadatas
|
|
1004
|
+
)
|
|
1005
|
+
end
|
|
1006
|
+
|
|
1007
|
+
streams = Array(chained_streams)
|
|
1008
|
+
raise InvalidFormatError, "OGG Vorbis interleaved stream decode requires logical streams" if streams.empty?
|
|
1009
|
+
|
|
1010
|
+
metadatas = stream_metadatas ? Array(stream_metadatas) : nil
|
|
1011
|
+
if metadatas && metadatas.length != streams.length
|
|
1012
|
+
raise InvalidFormatError, "OGG Vorbis interleaved stream metadata count mismatch"
|
|
1013
|
+
end
|
|
1014
|
+
|
|
1015
|
+
if metadatas
|
|
1016
|
+
resolved_target_format = target_format || metadatas.first.fetch(:format)
|
|
1017
|
+
same_sample_rate = metadatas.all? do |metadata|
|
|
1018
|
+
metadata.fetch(:format).sample_rate == resolved_target_format.sample_rate
|
|
1019
|
+
end
|
|
1020
|
+
unless same_sample_rate
|
|
1021
|
+
return stream_interleaved_vorbis_logical_streams_mixed_resampled!(
|
|
1022
|
+
streams,
|
|
1023
|
+
stream_metadatas: metadatas,
|
|
1024
|
+
target_format: resolved_target_format,
|
|
1025
|
+
chunk_size: chunk_size,
|
|
1026
|
+
decode_mode: decode_mode, &block
|
|
1027
|
+
)
|
|
1028
|
+
end
|
|
1029
|
+
end
|
|
1030
|
+
|
|
1031
|
+
enumerators = streams.map do |stream|
|
|
1032
|
+
stream_read(
|
|
1033
|
+
StringIO.new(stream.fetch(:bytes)),
|
|
1034
|
+
chunk_size: chunk_size,
|
|
1035
|
+
decode_mode: decode_mode
|
|
1036
|
+
)
|
|
1037
|
+
end
|
|
1038
|
+
loop do
|
|
1039
|
+
chunks = enumerators.map do |enumerator|
|
|
1040
|
+
enumerator.next
|
|
1041
|
+
rescue StopIteration
|
|
1042
|
+
nil
|
|
1043
|
+
end
|
|
1044
|
+
active_chunks = chunks.compact
|
|
1045
|
+
break if active_chunks.empty?
|
|
1046
|
+
|
|
1047
|
+
yield mix_vorbis_sample_buffers(active_chunks)
|
|
1048
|
+
end
|
|
1049
|
+
|
|
1050
|
+
true
|
|
1051
|
+
end
|
|
1052
|
+
|
|
1053
|
+
def stream_interleaved_vorbis_logical_streams_mixed_resampled!(
|
|
1054
|
+
chained_streams,
|
|
1055
|
+
stream_metadatas:,
|
|
1056
|
+
target_format:,
|
|
1057
|
+
chunk_size:,
|
|
1058
|
+
decode_mode:
|
|
1059
|
+
)
|
|
1060
|
+
unless block_given?
|
|
1061
|
+
return enum_for(
|
|
1062
|
+
__method__,
|
|
1063
|
+
chained_streams,
|
|
1064
|
+
stream_metadatas: stream_metadatas,
|
|
1065
|
+
target_format: target_format,
|
|
1066
|
+
chunk_size: chunk_size,
|
|
1067
|
+
decode_mode: decode_mode
|
|
1068
|
+
)
|
|
1069
|
+
end
|
|
1070
|
+
|
|
1071
|
+
streams = Array(chained_streams)
|
|
1072
|
+
metadatas = Array(stream_metadatas)
|
|
1073
|
+
raise InvalidFormatError, "OGG Vorbis interleaved stream metadata count mismatch" unless streams.length == metadatas.length
|
|
1074
|
+
raise InvalidFormatError, "OGG Vorbis interleaved stream decode requires logical streams" if streams.empty?
|
|
1075
|
+
|
|
1076
|
+
target_work_format = target_format.with(sample_format: :float, bit_depth: 32)
|
|
1077
|
+
stream_states = streams.zip(metadatas).map do |stream, _metadata|
|
|
1078
|
+
{
|
|
1079
|
+
enumerator: stream_read(
|
|
1080
|
+
StringIO.new(stream.fetch(:bytes)),
|
|
1081
|
+
chunk_size: chunk_size,
|
|
1082
|
+
decode_mode: decode_mode
|
|
1083
|
+
),
|
|
1084
|
+
source_eof: false,
|
|
1085
|
+
pending_samples: [],
|
|
1086
|
+
target_work_format: target_work_format,
|
|
1087
|
+
resampler_initialized: false,
|
|
1088
|
+
resampler: nil
|
|
1089
|
+
}
|
|
1090
|
+
end
|
|
1091
|
+
|
|
1092
|
+
loop do
|
|
1093
|
+
made_progress = false
|
|
1094
|
+
stream_states.each do |stream_state|
|
|
1095
|
+
progress = ensure_vorbis_interleaved_stream_pending_frames!(
|
|
1096
|
+
stream_state,
|
|
1097
|
+
min_frames: chunk_size
|
|
1098
|
+
)
|
|
1099
|
+
made_progress ||= progress
|
|
1100
|
+
end
|
|
1101
|
+
|
|
1102
|
+
pending_frame_counts = stream_states.map do |stream_state|
|
|
1103
|
+
stream_state.fetch(:pending_samples).length / target_work_format.channels
|
|
1104
|
+
end
|
|
1105
|
+
if pending_frame_counts.any? { |count| count >= chunk_size }
|
|
1106
|
+
emit_frames = chunk_size
|
|
1107
|
+
elsif stream_states.all? { |stream_state| vorbis_interleaved_stream_state_source_drained?(stream_state) }
|
|
1108
|
+
emit_frames = pending_frame_counts.max || 0
|
|
1109
|
+
break if emit_frames.zero?
|
|
1110
|
+
else
|
|
1111
|
+
raise InvalidFormatError, "interleaved Vorbis streaming resampler made no progress" unless made_progress
|
|
1112
|
+
|
|
1113
|
+
next
|
|
1114
|
+
end
|
|
1115
|
+
|
|
1116
|
+
mixed_inputs = stream_states.map do |stream_state|
|
|
1117
|
+
take_vorbis_interleaved_stream_pending_chunk!(stream_state, frame_count: emit_frames)
|
|
1118
|
+
end.compact
|
|
1119
|
+
yield mix_vorbis_sample_buffers(mixed_inputs, target_format: target_work_format)
|
|
1120
|
+
end
|
|
1121
|
+
|
|
1122
|
+
true
|
|
1123
|
+
end
|
|
1124
|
+
|
|
1125
|
+
def ensure_vorbis_interleaved_stream_pending_frames!(stream_state, min_frames:)
|
|
1126
|
+
progress = false
|
|
1127
|
+
target_work_format = stream_state.fetch(:target_work_format)
|
|
1128
|
+
pending_samples = stream_state.fetch(:pending_samples)
|
|
1129
|
+
pending_frame_count = pending_samples.length / target_work_format.channels
|
|
1130
|
+
|
|
1131
|
+
while pending_frame_count < min_frames
|
|
1132
|
+
if (resampler = stream_state[:resampler])
|
|
1133
|
+
drained = drain_vorbis_streaming_linear_resampler_chunk!(
|
|
1134
|
+
resampler,
|
|
1135
|
+
max_frames: (min_frames - pending_frame_count)
|
|
1136
|
+
)
|
|
1137
|
+
if drained
|
|
1138
|
+
normalized = drained.format == target_work_format ? drained : drained.convert(target_work_format)
|
|
1139
|
+
pending_samples.concat(normalized.samples)
|
|
1140
|
+
pending_frame_count = pending_samples.length / target_work_format.channels
|
|
1141
|
+
progress = true
|
|
1142
|
+
next
|
|
1143
|
+
end
|
|
1144
|
+
end
|
|
1145
|
+
|
|
1146
|
+
break if stream_state[:source_eof]
|
|
1147
|
+
|
|
1148
|
+
begin
|
|
1149
|
+
chunk = stream_state.fetch(:enumerator).next
|
|
1150
|
+
append_vorbis_interleaved_stream_pending_output_chunk!(stream_state, chunk)
|
|
1151
|
+
pending_frame_count = pending_samples.length / target_work_format.channels
|
|
1152
|
+
progress = true
|
|
1153
|
+
rescue StopIteration
|
|
1154
|
+
stream_state[:source_eof] = true
|
|
1155
|
+
finish_vorbis_streaming_linear_resampler!(resampler) if resampler
|
|
1156
|
+
progress = true
|
|
1157
|
+
end
|
|
1158
|
+
end
|
|
1159
|
+
|
|
1160
|
+
progress
|
|
1161
|
+
end
|
|
1162
|
+
|
|
1163
|
+
def append_vorbis_interleaved_stream_pending_output_chunk!(stream_state, chunk)
|
|
1164
|
+
raise InvalidParameterError, "chunk must be Core::SampleBuffer" unless chunk.is_a?(Core::SampleBuffer)
|
|
1165
|
+
|
|
1166
|
+
target_work_format = stream_state.fetch(:target_work_format)
|
|
1167
|
+
pending_samples = stream_state.fetch(:pending_samples)
|
|
1168
|
+
|
|
1169
|
+
unless stream_state[:resampler_initialized]
|
|
1170
|
+
stream_state[:resampler] = build_vorbis_streaming_linear_resampler_state(
|
|
1171
|
+
source_format: chunk.format,
|
|
1172
|
+
target_sample_rate: target_work_format.sample_rate
|
|
1173
|
+
)
|
|
1174
|
+
stream_state[:resampler_initialized] = true
|
|
1175
|
+
end
|
|
1176
|
+
|
|
1177
|
+
if (resampler = stream_state[:resampler])
|
|
1178
|
+
feed_vorbis_streaming_linear_resampler_chunk!(resampler, chunk)
|
|
1179
|
+
while (drained = drain_vorbis_streaming_linear_resampler_chunk!(resampler, max_frames: nil))
|
|
1180
|
+
normalized = drained.format == target_work_format ? drained : drained.convert(target_work_format)
|
|
1181
|
+
pending_samples.concat(normalized.samples)
|
|
1182
|
+
end
|
|
1183
|
+
return nil
|
|
1184
|
+
end
|
|
1185
|
+
|
|
1186
|
+
normalized = if chunk.format == target_work_format
|
|
1187
|
+
chunk
|
|
1188
|
+
else
|
|
1189
|
+
normalize_vorbis_logical_stream_buffer_for_target(chunk,
|
|
1190
|
+
target_work_format)
|
|
1191
|
+
end
|
|
1192
|
+
pending_samples.concat(normalized.samples)
|
|
1193
|
+
nil
|
|
1194
|
+
end
|
|
1195
|
+
|
|
1196
|
+
def take_vorbis_interleaved_stream_pending_chunk!(stream_state, frame_count:)
|
|
1197
|
+
frame_count = Integer(frame_count)
|
|
1198
|
+
raise InvalidParameterError, "frame_count must be non-negative" if frame_count.negative?
|
|
1199
|
+
|
|
1200
|
+
pending_samples = stream_state.fetch(:pending_samples)
|
|
1201
|
+
target_work_format = stream_state.fetch(:target_work_format)
|
|
1202
|
+
channels = target_work_format.channels
|
|
1203
|
+
available_frames = pending_samples.length / channels
|
|
1204
|
+
take_frames = [frame_count, available_frames].min
|
|
1205
|
+
return nil if take_frames.zero?
|
|
1206
|
+
|
|
1207
|
+
samples = pending_samples.slice!(0, take_frames * channels)
|
|
1208
|
+
Core::SampleBuffer.new(samples, target_work_format)
|
|
1209
|
+
end
|
|
1210
|
+
|
|
1211
|
+
def vorbis_interleaved_stream_state_source_drained?(stream_state)
|
|
1212
|
+
return false unless stream_state[:source_eof]
|
|
1213
|
+
|
|
1214
|
+
resampler = stream_state[:resampler]
|
|
1215
|
+
resampler.nil? || vorbis_streaming_linear_resampler_finished?(resampler)
|
|
1216
|
+
end
|
|
1217
|
+
|
|
1218
|
+
def build_vorbis_streaming_linear_resampler_state(source_format:, target_sample_rate:)
|
|
1219
|
+
raise InvalidParameterError, "source_format must be Core::Format" unless source_format.is_a?(Core::Format)
|
|
1220
|
+
|
|
1221
|
+
target_sample_rate = Integer(target_sample_rate)
|
|
1222
|
+
return nil if source_format.sample_rate == target_sample_rate
|
|
1223
|
+
|
|
1224
|
+
source_work_format = source_format.with(sample_format: :float, bit_depth: 32)
|
|
1225
|
+
{
|
|
1226
|
+
source_work_format: source_work_format,
|
|
1227
|
+
target_work_format: source_work_format.with(sample_rate: target_sample_rate),
|
|
1228
|
+
source_sample_rate: source_work_format.sample_rate,
|
|
1229
|
+
target_sample_rate: target_sample_rate,
|
|
1230
|
+
channels: source_work_format.channels,
|
|
1231
|
+
source_buffer_samples: [],
|
|
1232
|
+
source_buffer_start_frame: 0,
|
|
1233
|
+
total_source_frames: 0,
|
|
1234
|
+
next_target_frame_index: 0,
|
|
1235
|
+
source_eof: false,
|
|
1236
|
+
final_target_frame_count: nil
|
|
1237
|
+
}
|
|
1238
|
+
end
|
|
1239
|
+
|
|
1240
|
+
def feed_vorbis_streaming_linear_resampler_chunk!(state, chunk)
|
|
1241
|
+
raise InvalidParameterError, "state must be a resampler state Hash" unless state.is_a?(Hash)
|
|
1242
|
+
raise InvalidParameterError, "chunk must be Core::SampleBuffer" unless chunk.is_a?(Core::SampleBuffer)
|
|
1243
|
+
|
|
1244
|
+
source_work_format = state.fetch(:source_work_format)
|
|
1245
|
+
if chunk.format.sample_rate != source_work_format.sample_rate
|
|
1246
|
+
raise InvalidFormatError,
|
|
1247
|
+
"streaming resampler source sample rate mismatch " \
|
|
1248
|
+
"(expected #{source_work_format.sample_rate}, got #{chunk.format.sample_rate})"
|
|
1249
|
+
end
|
|
1250
|
+
|
|
1251
|
+
normalized = chunk.format == source_work_format ? chunk : chunk.convert(source_work_format)
|
|
1252
|
+
state.fetch(:source_buffer_samples).concat(normalized.samples.map(&:to_f))
|
|
1253
|
+
state[:total_source_frames] += normalized.sample_frame_count
|
|
1254
|
+
nil
|
|
1255
|
+
end
|
|
1256
|
+
|
|
1257
|
+
def finish_vorbis_streaming_linear_resampler!(state)
|
|
1258
|
+
raise InvalidParameterError, "state must be a resampler state Hash" unless state.is_a?(Hash)
|
|
1259
|
+
return nil if state[:source_eof]
|
|
1260
|
+
|
|
1261
|
+
state[:source_eof] = true
|
|
1262
|
+
state[:final_target_frame_count] = resampled_vorbis_sample_frame_count(
|
|
1263
|
+
state.fetch(:total_source_frames),
|
|
1264
|
+
source_sample_rate: state.fetch(:source_sample_rate),
|
|
1265
|
+
target_sample_rate: state.fetch(:target_sample_rate)
|
|
1266
|
+
)
|
|
1267
|
+
nil
|
|
1268
|
+
end
|
|
1269
|
+
|
|
1270
|
+
def vorbis_streaming_linear_resampler_finished?(state)
|
|
1271
|
+
return false unless state.is_a?(Hash)
|
|
1272
|
+
return false unless state[:source_eof]
|
|
1273
|
+
return false if state[:final_target_frame_count].nil?
|
|
1274
|
+
|
|
1275
|
+
state.fetch(:next_target_frame_index) >= state.fetch(:final_target_frame_count)
|
|
1276
|
+
end
|
|
1277
|
+
|
|
1278
|
+
def drain_vorbis_streaming_linear_resampler_chunk!(state, max_frames:)
|
|
1279
|
+
raise InvalidParameterError, "state must be a resampler state Hash" unless state.is_a?(Hash)
|
|
1280
|
+
|
|
1281
|
+
channels = state.fetch(:channels)
|
|
1282
|
+
source_sample_rate = state.fetch(:source_sample_rate)
|
|
1283
|
+
target_sample_rate = state.fetch(:target_sample_rate)
|
|
1284
|
+
total_source_frames = state.fetch(:total_source_frames)
|
|
1285
|
+
if total_source_frames.zero?
|
|
1286
|
+
return nil unless state[:source_eof]
|
|
1287
|
+
return nil if state.fetch(:final_target_frame_count).to_i.zero?
|
|
1288
|
+
end
|
|
1289
|
+
|
|
1290
|
+
if max_frames.nil?
|
|
1291
|
+
limit = Float::INFINITY
|
|
1292
|
+
else
|
|
1293
|
+
max_frames = Integer(max_frames)
|
|
1294
|
+
raise InvalidParameterError, "max_frames must be non-negative" if max_frames.negative?
|
|
1295
|
+
return nil if max_frames.zero?
|
|
1296
|
+
|
|
1297
|
+
limit = max_frames
|
|
1298
|
+
end
|
|
1299
|
+
|
|
1300
|
+
final_target_frame_count = state[:final_target_frame_count]
|
|
1301
|
+
output_samples = []
|
|
1302
|
+
produced_frames = 0
|
|
1303
|
+
|
|
1304
|
+
while produced_frames < limit
|
|
1305
|
+
next_target_frame_index = state.fetch(:next_target_frame_index)
|
|
1306
|
+
break if !final_target_frame_count.nil? && next_target_frame_index >= final_target_frame_count
|
|
1307
|
+
break if total_source_frames.zero?
|
|
1308
|
+
|
|
1309
|
+
source_position = (next_target_frame_index * source_sample_rate.to_f) / target_sample_rate
|
|
1310
|
+
left_index = source_position.floor
|
|
1311
|
+
left_index = 0 if left_index.negative?
|
|
1312
|
+
break if !state[:source_eof] && (left_index + 1) >= total_source_frames
|
|
1313
|
+
|
|
1314
|
+
if left_index >= (total_source_frames - 1)
|
|
1315
|
+
left_index = total_source_frames - 1
|
|
1316
|
+
right_index = left_index
|
|
1317
|
+
frac = 0.0
|
|
1318
|
+
else
|
|
1319
|
+
right_index = left_index + 1
|
|
1320
|
+
frac = source_position - left_index
|
|
1321
|
+
end
|
|
1322
|
+
|
|
1323
|
+
channels.times do |channel_index|
|
|
1324
|
+
left = vorbis_streaming_linear_resampler_source_sample(state, left_index, channel_index)
|
|
1325
|
+
if right_index == left_index
|
|
1326
|
+
output_samples << left
|
|
1327
|
+
else
|
|
1328
|
+
right = vorbis_streaming_linear_resampler_source_sample(state, right_index, channel_index)
|
|
1329
|
+
output_samples << (left + ((right - left) * frac))
|
|
1330
|
+
end
|
|
1331
|
+
end
|
|
1332
|
+
|
|
1333
|
+
state[:next_target_frame_index] = next_target_frame_index + 1
|
|
1334
|
+
produced_frames += 1
|
|
1335
|
+
end
|
|
1336
|
+
|
|
1337
|
+
compact_vorbis_streaming_linear_resampler_source_buffer!(state)
|
|
1338
|
+
return nil if output_samples.empty?
|
|
1339
|
+
|
|
1340
|
+
Core::SampleBuffer.new(output_samples, state.fetch(:target_work_format))
|
|
1341
|
+
end
|
|
1342
|
+
|
|
1343
|
+
def vorbis_streaming_linear_resampler_source_sample(state, absolute_frame_index, channel_index)
|
|
1344
|
+
channels = state.fetch(:channels)
|
|
1345
|
+
start_frame = state.fetch(:source_buffer_start_frame)
|
|
1346
|
+
local_frame_index = absolute_frame_index - start_frame
|
|
1347
|
+
raise InvalidFormatError, "streaming resampler source buffer underflow" if local_frame_index.negative?
|
|
1348
|
+
|
|
1349
|
+
sample_index = (local_frame_index * channels) + channel_index
|
|
1350
|
+
sample = state.fetch(:source_buffer_samples)[sample_index]
|
|
1351
|
+
raise InvalidFormatError, "streaming resampler source buffer overflow" if sample.nil?
|
|
1352
|
+
|
|
1353
|
+
sample.to_f
|
|
1354
|
+
end
|
|
1355
|
+
|
|
1356
|
+
def compact_vorbis_streaming_linear_resampler_source_buffer!(state)
|
|
1357
|
+
total_source_frames = state.fetch(:total_source_frames)
|
|
1358
|
+
return nil if total_source_frames.zero?
|
|
1359
|
+
|
|
1360
|
+
next_needed_frame = if vorbis_streaming_linear_resampler_finished?(state)
|
|
1361
|
+
total_source_frames
|
|
1362
|
+
else
|
|
1363
|
+
source_sample_rate = state.fetch(:source_sample_rate)
|
|
1364
|
+
target_sample_rate = state.fetch(:target_sample_rate)
|
|
1365
|
+
source_position = (state.fetch(:next_target_frame_index) * source_sample_rate.to_f) / target_sample_rate
|
|
1366
|
+
[source_position.floor, 0].max
|
|
1367
|
+
end
|
|
1368
|
+
next_needed_frame = if state[:source_eof]
|
|
1369
|
+
[next_needed_frame, total_source_frames].min
|
|
1370
|
+
else
|
|
1371
|
+
[next_needed_frame, (total_source_frames - 1)].min
|
|
1372
|
+
end
|
|
1373
|
+
|
|
1374
|
+
keep_from_frame = [next_needed_frame, total_source_frames].min
|
|
1375
|
+
drop_frames = keep_from_frame - state.fetch(:source_buffer_start_frame)
|
|
1376
|
+
return nil unless drop_frames.positive?
|
|
1377
|
+
|
|
1378
|
+
channels = state.fetch(:channels)
|
|
1379
|
+
state.fetch(:source_buffer_samples).slice!(0, drop_frames * channels)
|
|
1380
|
+
state[:source_buffer_start_frame] += drop_frames
|
|
1381
|
+
nil
|
|
1382
|
+
end
|
|
1383
|
+
|
|
1384
|
+
def each_sample_buffer_frame_slice(buffer, chunk_size)
|
|
1385
|
+
return enum_for(__method__, buffer, chunk_size) unless block_given?
|
|
1386
|
+
|
|
1387
|
+
raise InvalidParameterError, "buffer must be Core::SampleBuffer" unless buffer.is_a?(Core::SampleBuffer)
|
|
1388
|
+
raise InvalidParameterError, "chunk_size must be a positive Integer" unless chunk_size.is_a?(Integer) && chunk_size.positive?
|
|
1389
|
+
|
|
1390
|
+
total_frames = buffer.sample_frame_count
|
|
1391
|
+
frame_offset = 0
|
|
1392
|
+
while frame_offset < total_frames
|
|
1393
|
+
frame_length = [chunk_size, total_frames - frame_offset].min
|
|
1394
|
+
yield buffer.slice(frame_offset, frame_length)
|
|
1395
|
+
frame_offset += frame_length
|
|
1396
|
+
end
|
|
1397
|
+
|
|
1398
|
+
nil
|
|
1399
|
+
end
|
|
1400
|
+
|
|
1401
|
+
# ---------------------------------------------------------------------------
|
|
1402
|
+
# Packet classification
|
|
1403
|
+
# ---------------------------------------------------------------------------
|
|
1404
|
+
|
|
1405
|
+
def classify_vorbis_packet(packet)
|
|
1406
|
+
return :unknown if packet.nil? || packet.empty?
|
|
1407
|
+
|
|
1408
|
+
first_byte = packet.getbyte(0)
|
|
1409
|
+
return :audio if first_byte.nobits?(0x01)
|
|
1410
|
+
|
|
1411
|
+
return :identification_header if packet.bytesize >= 7 && first_byte == IDENTIFICATION_HEADER_TYPE && packet[1,
|
|
1412
|
+
6] == VORBIS_SIGNATURE
|
|
1413
|
+
return :comment_header if packet.bytesize >= 7 && first_byte == COMMENT_HEADER_TYPE && packet[1, 6] == VORBIS_SIGNATURE
|
|
1414
|
+
return :setup_header if packet.bytesize >= 7 && first_byte == SETUP_HEADER_TYPE && packet[1, 6] == VORBIS_SIGNATURE
|
|
1415
|
+
|
|
1416
|
+
:unknown
|
|
1417
|
+
end
|
|
1418
|
+
|
|
1419
|
+
# ---------------------------------------------------------------------------
|
|
1420
|
+
# IO helpers
|
|
1421
|
+
# ---------------------------------------------------------------------------
|
|
1422
|
+
|
|
1423
|
+
def open_input(io_or_path)
|
|
1424
|
+
return [io_or_path, false] if io_or_path.respond_to?(:read)
|
|
1425
|
+
raise InvalidParameterError, "input path must be String or IO: #{io_or_path.inspect}" unless io_or_path.is_a?(String)
|
|
1426
|
+
|
|
1427
|
+
[File.open(io_or_path, "rb"), true]
|
|
1428
|
+
rescue Errno::ENOENT
|
|
1429
|
+
raise InvalidFormatError, "input file not found: #{io_or_path}"
|
|
1430
|
+
end
|
|
1431
|
+
|
|
1432
|
+
def open_output(io_or_path)
|
|
1433
|
+
return [io_or_path, false] if io_or_path.respond_to?(:write)
|
|
1434
|
+
raise InvalidParameterError, "output path must be String or IO: #{io_or_path.inspect}" unless io_or_path.is_a?(String)
|
|
1435
|
+
|
|
1436
|
+
[File.open(io_or_path, "wb"), true]
|
|
1437
|
+
end
|
|
1438
|
+
|
|
1439
|
+
def ensure_seekable!(io)
|
|
1440
|
+
return if io.respond_to?(:seek) && io.respond_to?(:rewind)
|
|
1441
|
+
|
|
1442
|
+
raise StreamError, "OGG Vorbis codec requires seekable IO"
|
|
1443
|
+
end
|
|
1444
|
+
end
|
|
1445
|
+
end
|
|
1446
|
+
end
|
|
1447
|
+
end
|