format_parser 2.7.2 → 2.9.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/main.yml +4 -0
- data/CHANGELOG.md +15 -0
- data/lib/format_parser/version.rb +1 -1
- data/lib/parsers/wav_parser.rb +21 -36
- data/spec/parsers/wav_parser_spec.rb +9 -4
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 04b14799c012037cf53b949b0920e0f8e2d5a42e30182363fee0d283d41eade4
|
4
|
+
data.tar.gz: 78ff4a354e05fb0e67a7b8dd8368768fceb313afc4dd736d5fa41592dfbc7b35
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e5792c23191b2d4b4d63a5d9dfbfe5a3b28862f9e8ba977c49e39f3ebaf6bd222a31c296f224743a00fc877f8dbbfb6f97cdd19a0b6d712db6e3a00d29dd30d8
|
7
|
+
data.tar.gz: 6fd2ac968375090928b407afa3234c623c933150a36a00da965e75bb97d01ee2dcb502124e71fa100f0f2321b16a0f268c8a8fa94691cd195fbeb65a1fbab177
|
data/.github/workflows/main.yml
CHANGED
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,18 @@
|
|
1
|
+
## 2.9.0
|
2
|
+
* Improve WAV parser by performing a best-effort when extracting metadata from files that do not strictly follow the format spec.
|
3
|
+
|
4
|
+
## 2.8.0
|
5
|
+
* Add support for Ruby 3.2 and 3.3.
|
6
|
+
|
7
|
+
## 2.7.2
|
8
|
+
* Improved stability for mp4 parser when dealing with corrupted FTYP boxes.
|
9
|
+
|
10
|
+
## 2.7.1
|
11
|
+
* Fixed bug with WAV file wrongly parsed as MP3.
|
12
|
+
|
13
|
+
## 2.7.0
|
14
|
+
* JSON format support.
|
15
|
+
|
1
16
|
## 2.6.0
|
2
17
|
* Prevent the default loading of thumbnails on TIFF-based formats to improve I/O.
|
3
18
|
|
data/lib/parsers/wav_parser.rb
CHANGED
@@ -20,75 +20,60 @@ class FormatParser::WAVParser
|
|
20
20
|
# with the exception that the Format chunk must precede the Data chunk.
|
21
21
|
# The specification does not require the Format chunk to be the first chunk
|
22
22
|
# after the RIFF header.
|
23
|
-
#
|
24
|
-
# For WAVE files containing PCM audio format we parse the 'fmt ' and
|
25
|
-
# 'data' chunks while for non PCM audio formats the 'fmt ' and 'fact'
|
26
|
-
# chunks. In the latter case the order fo appearence of the chunks is
|
27
|
-
# arbitrary.
|
28
|
-
fmt_processed = false
|
29
|
-
fact_processed = false
|
23
|
+
# https://www.mmsp.ece.mcgill.ca/Documents/AudioFormats/WAVE/WAVE.html
|
30
24
|
fmt_data = {}
|
31
|
-
|
25
|
+
data_size = 0
|
26
|
+
total_sample_frames = nil
|
32
27
|
loop do
|
33
28
|
chunk_type, chunk_size = safe_read(io, 8).unpack('a4l')
|
34
29
|
case chunk_type
|
35
30
|
when 'fmt ' # watch out: the chunk ID of the format chunk ends with a space
|
36
31
|
fmt_data = unpack_fmt_chunk(io, chunk_size)
|
37
|
-
return process_non_pcm(fmt_data, total_sample_frames) if fmt_data[:audio_format] != 1 and fact_processed
|
38
|
-
fmt_processed = true
|
39
32
|
when 'data'
|
40
|
-
|
41
|
-
return process_pcm(fmt_data, chunk_size) if fmt_data[:audio_format] == 1
|
42
|
-
safe_skip(io, chunk_size)
|
33
|
+
data_size = chunk_size
|
43
34
|
when 'fact'
|
44
35
|
total_sample_frames = safe_read(io, 4).unpack('l').first
|
45
36
|
safe_skip(io, chunk_size - 4)
|
46
|
-
return process_non_pcm(fmt_data, total_sample_frames) if fmt_processed and fmt_data[:audio_format] != 1
|
47
|
-
fact_processed = true
|
48
37
|
else
|
49
38
|
# Skip this chunk until a known chunk is encountered
|
50
39
|
safe_skip(io, chunk_size)
|
51
40
|
end
|
41
|
+
rescue FormatParser::IOUtils::InvalidRead
|
42
|
+
# We've reached EOF, so it's time to make the most out of the metadata we
|
43
|
+
# managed to parse
|
44
|
+
break
|
52
45
|
end
|
46
|
+
|
47
|
+
file_info(fmt_data, data_size, total_sample_frames)
|
53
48
|
end
|
54
49
|
|
55
50
|
def unpack_fmt_chunk(io, chunk_size)
|
56
51
|
# The size of the fmt chunk is at least 16 bytes. If the format tag's value is not
|
57
52
|
# 1 compression might be in use for storing the data
|
58
53
|
# and the fmt chunk might contain extra fields appended to it.
|
59
|
-
# The
|
54
|
+
# The first 6 fields of the fmt tag are always:
|
55
|
+
# * unsigned short audio format
|
60
56
|
# * unsigned short channels
|
61
57
|
# * unsigned long samples per sec
|
62
58
|
# * unsigned long average bytes per sec
|
63
59
|
# * unsigned short block align
|
64
60
|
# * unsigned short bits per sample
|
65
61
|
|
66
|
-
|
62
|
+
_, channels, sample_rate, byte_rate, _, bits_per_sample = safe_read(io, 16).unpack('S_2I2S_2')
|
67
63
|
safe_skip(io, chunk_size - 16) # skip the extra fields
|
68
64
|
|
69
65
|
{
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
block_align: fmt_info[4],
|
75
|
-
bits_per_sample: fmt_info[5],
|
66
|
+
channels: channels,
|
67
|
+
sample_rate: sample_rate,
|
68
|
+
byte_rate: byte_rate,
|
69
|
+
bits_per_sample: bits_per_sample,
|
76
70
|
}
|
77
71
|
end
|
78
72
|
|
79
|
-
def
|
80
|
-
|
81
|
-
sample_frames
|
82
|
-
|
83
|
-
end
|
84
|
-
|
85
|
-
def process_non_pcm(fmt_data, total_sample_frames)
|
86
|
-
file_info(fmt_data, total_sample_frames)
|
87
|
-
end
|
88
|
-
|
89
|
-
def file_info(fmt_data, sample_frames)
|
90
|
-
return unless fmt_data[:sample_rate] > 0
|
91
|
-
duration_in_seconds = sample_frames / fmt_data[:sample_rate].to_f
|
73
|
+
def file_info(fmt_data, data_size, sample_frames)
|
74
|
+
# NOTE: Each sample includes information for each channel
|
75
|
+
sample_frames ||= data_size / (fmt_data[:channels] * fmt_data[:bits_per_sample] / 8) if fmt_data[:channels] > 0 && fmt_data[:bits_per_sample] > 0
|
76
|
+
duration_in_seconds = sample_frames / fmt_data[:sample_rate].to_f if fmt_data[:sample_rate] > 0
|
92
77
|
FormatParser::Audio.new(
|
93
78
|
format: :wav,
|
94
79
|
num_audio_channels: fmt_data[:channels],
|
@@ -46,9 +46,14 @@ describe FormatParser::WAVParser do
|
|
46
46
|
expect(parse_result.media_duration_seconds).to be_within(0.01).of(13.81)
|
47
47
|
end
|
48
48
|
|
49
|
-
it
|
50
|
-
|
51
|
-
|
52
|
-
|
49
|
+
it 'returns correct info about non pcm files with no fact chunk' do
|
50
|
+
parse_result = subject.call(File.open(__dir__ + '/../fixtures/WAV/d_6_Channel_ID.wav', 'rb'))
|
51
|
+
|
52
|
+
expect(parse_result.nature).to eq(:audio)
|
53
|
+
expect(parse_result.format).to eq(:wav)
|
54
|
+
expect(parse_result.num_audio_channels).to eq(6)
|
55
|
+
expect(parse_result.audio_sample_rate_hz).to eq(44100)
|
56
|
+
expect(parse_result.media_duration_frames).to eq(257411)
|
57
|
+
expect(parse_result.media_duration_seconds).to be_within(0.01).of(5.83)
|
53
58
|
end
|
54
59
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: format_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.9.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Noah Berman
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: exe
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2024-07-09 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: exifr
|
@@ -334,7 +334,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
334
334
|
- !ruby/object:Gem::Version
|
335
335
|
version: '0'
|
336
336
|
requirements: []
|
337
|
-
rubygems_version: 3.3.
|
337
|
+
rubygems_version: 3.3.26
|
338
338
|
signing_key:
|
339
339
|
specification_version: 4
|
340
340
|
summary: A library for efficient parsing of file metadata
|