format_parser 2.8.0 → 2.10.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -1
- data/lib/format_parser/version.rb +1 -1
- data/lib/parsers/wav_parser.rb +19 -37
- data/spec/parsers/wav_parser_spec.rb +12 -6
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 922909c34fc304a86a75f6396bd9627cdacf009aba5191da863064811ca2906f
|
4
|
+
data.tar.gz: 864b73aa70b7ec650dab326f677b17f7ed1905d05560deb669a564bb9fdc3d37
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5f2527d385f3270f9b11976d478030108787a165e4440825c4ffa8c6a56054e67e5a7fb1a88d18d25dcb945ab6c2befc97156e7a2a5d5ad98f2e11e7c558ce2a
|
7
|
+
data.tar.gz: 788254d0f2e40625a6f2fd17c3c7f8959456ad1018fceb62305c5a877a3a8fd52ad5587915b0d3711a8ab68c6dc0ca6d283d9556f68a3234bbd3ecb7a50ece95
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,11 @@
|
|
1
|
+
## 2.10.0
|
2
|
+
* Improve WAV parser by focusing on performance rather than on attempting a best-effort when extracting metadata from files that do not strictly follow the format spec.
|
3
|
+
|
4
|
+
## 2.9.0
|
5
|
+
* Improve WAV parser by performing a best-effort when extracting metadata from files that do not strictly follow the format spec.
|
6
|
+
|
1
7
|
## 2.8.0
|
2
|
-
* Add support for Ruby 3.2 and 3.3
|
8
|
+
* Add support for Ruby 3.2 and 3.3.
|
3
9
|
|
4
10
|
## 2.7.2
|
5
11
|
* Improved stability for mp4 parser when dealing with corrupted FTYP boxes.
|
data/lib/parsers/wav_parser.rb
CHANGED
@@ -20,75 +20,57 @@ class FormatParser::WAVParser
|
|
20
20
|
# with the exception that the Format chunk must precede the Data chunk.
|
21
21
|
# The specification does not require the Format chunk to be the first chunk
|
22
22
|
# after the RIFF header.
|
23
|
-
#
|
24
|
-
# For WAVE files containing PCM audio format we parse the 'fmt ' and
|
25
|
-
# 'data' chunks while for non PCM audio formats the 'fmt ' and 'fact'
|
26
|
-
# chunks. In the latter case the order fo appearence of the chunks is
|
27
|
-
# arbitrary.
|
23
|
+
# https://www.mmsp.ece.mcgill.ca/Documents/AudioFormats/WAVE/WAVE.html
|
28
24
|
fmt_processed = false
|
29
|
-
|
25
|
+
data_processed = false
|
30
26
|
fmt_data = {}
|
31
|
-
|
27
|
+
data_size = 0
|
32
28
|
loop do
|
33
29
|
chunk_type, chunk_size = safe_read(io, 8).unpack('a4l')
|
34
30
|
case chunk_type
|
35
31
|
when 'fmt ' # watch out: the chunk ID of the format chunk ends with a space
|
36
32
|
fmt_data = unpack_fmt_chunk(io, chunk_size)
|
37
|
-
return process_non_pcm(fmt_data, total_sample_frames) if fmt_data[:audio_format] != 1 and fact_processed
|
38
33
|
fmt_processed = true
|
39
34
|
when 'data'
|
40
|
-
|
41
|
-
|
42
|
-
safe_skip(io, chunk_size)
|
43
|
-
when 'fact'
|
44
|
-
total_sample_frames = safe_read(io, 4).unpack('l').first
|
45
|
-
safe_skip(io, chunk_size - 4)
|
46
|
-
return process_non_pcm(fmt_data, total_sample_frames) if fmt_processed and fmt_data[:audio_format] != 1
|
47
|
-
fact_processed = true
|
35
|
+
data_size = chunk_size
|
36
|
+
data_processed = true
|
48
37
|
else
|
49
38
|
# Skip this chunk until a known chunk is encountered
|
50
39
|
safe_skip(io, chunk_size)
|
51
40
|
end
|
41
|
+
break if fmt_processed && data_processed
|
52
42
|
end
|
43
|
+
|
44
|
+
file_info(fmt_data, data_size)
|
53
45
|
end
|
54
46
|
|
55
47
|
def unpack_fmt_chunk(io, chunk_size)
|
56
48
|
# The size of the fmt chunk is at least 16 bytes. If the format tag's value is not
|
57
49
|
# 1 compression might be in use for storing the data
|
58
50
|
# and the fmt chunk might contain extra fields appended to it.
|
59
|
-
# The
|
51
|
+
# The first 6 fields of the fmt tag are always:
|
52
|
+
# * unsigned short audio format
|
60
53
|
# * unsigned short channels
|
61
54
|
# * unsigned long samples per sec
|
62
55
|
# * unsigned long average bytes per sec
|
63
56
|
# * unsigned short block align
|
64
57
|
# * unsigned short bits per sample
|
65
58
|
|
66
|
-
|
59
|
+
_, channels, sample_rate, byte_rate, _, bits_per_sample = safe_read(io, 16).unpack('S_2I2S_2')
|
67
60
|
safe_skip(io, chunk_size - 16) # skip the extra fields
|
68
61
|
|
69
62
|
{
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
block_align: fmt_info[4],
|
75
|
-
bits_per_sample: fmt_info[5],
|
63
|
+
channels: channels,
|
64
|
+
sample_rate: sample_rate,
|
65
|
+
byte_rate: byte_rate,
|
66
|
+
bits_per_sample: bits_per_sample,
|
76
67
|
}
|
77
68
|
end
|
78
69
|
|
79
|
-
def
|
80
|
-
|
81
|
-
sample_frames = data_size / (fmt_data[:channels] * fmt_data[:bits_per_sample] / 8)
|
82
|
-
|
83
|
-
end
|
84
|
-
|
85
|
-
def process_non_pcm(fmt_data, total_sample_frames)
|
86
|
-
file_info(fmt_data, total_sample_frames)
|
87
|
-
end
|
88
|
-
|
89
|
-
def file_info(fmt_data, sample_frames)
|
90
|
-
return unless fmt_data[:sample_rate] > 0
|
91
|
-
duration_in_seconds = sample_frames / fmt_data[:sample_rate].to_f
|
70
|
+
def file_info(fmt_data, data_size)
|
71
|
+
# NOTE: Each sample includes information for each channel
|
72
|
+
sample_frames = data_size / (fmt_data[:channels] * fmt_data[:bits_per_sample] / 8) if fmt_data[:channels] > 0 && fmt_data[:bits_per_sample] > 0
|
73
|
+
duration_in_seconds = sample_frames / fmt_data[:sample_rate].to_f if sample_frames && fmt_data[:byte_rate] > 0
|
92
74
|
FormatParser::Audio.new(
|
93
75
|
format: :wav,
|
94
76
|
num_audio_channels: fmt_data[:channels],
|
@@ -20,8 +20,9 @@ describe FormatParser::WAVParser do
|
|
20
20
|
expect(parse_result.format).to eq(:wav)
|
21
21
|
expect(parse_result.num_audio_channels).to eq(1)
|
22
22
|
expect(parse_result.audio_sample_rate_hz).to eq(8000)
|
23
|
-
|
24
|
-
expect(parse_result.
|
23
|
+
# Fixture does not define bits_per_sample in the fmt chunk
|
24
|
+
expect(parse_result.media_duration_frames).to be_nil
|
25
|
+
expect(parse_result.media_duration_seconds).to be_nil
|
25
26
|
end
|
26
27
|
|
27
28
|
it 'returns correct info about pcm files with more channels' do
|
@@ -46,9 +47,14 @@ describe FormatParser::WAVParser do
|
|
46
47
|
expect(parse_result.media_duration_seconds).to be_within(0.01).of(13.81)
|
47
48
|
end
|
48
49
|
|
49
|
-
it
|
50
|
-
|
51
|
-
|
52
|
-
|
50
|
+
it 'returns correct info about non pcm files with no fact chunk' do
|
51
|
+
parse_result = subject.call(File.open(__dir__ + '/../fixtures/WAV/d_6_Channel_ID.wav', 'rb'))
|
52
|
+
|
53
|
+
expect(parse_result.nature).to eq(:audio)
|
54
|
+
expect(parse_result.format).to eq(:wav)
|
55
|
+
expect(parse_result.num_audio_channels).to eq(6)
|
56
|
+
expect(parse_result.audio_sample_rate_hz).to eq(44100)
|
57
|
+
expect(parse_result.media_duration_frames).to eq(257411)
|
58
|
+
expect(parse_result.media_duration_seconds).to be_within(0.01).of(5.83)
|
53
59
|
end
|
54
60
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: format_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.10.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Noah Berman
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: exe
|
11
11
|
cert_chain: []
|
12
|
-
date: 2024-07-
|
12
|
+
date: 2024-07-12 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: exifr
|