format_parser 0.24.0 → 0.25.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +15 -0
- data/format_parser.gemspec +2 -2
- data/lib/format_parser/version.rb +1 -1
- data/lib/parsers/moov_parser.rb +26 -7
- data/lib/parsers/moov_parser/decoder.rb +31 -0
- data/lib/parsers/mp3_parser.rb +19 -13
- data/lib/parsers/mp3_parser/id3_extraction.rb +4 -2
- data/spec/parsers/moov_parser_spec.rb +20 -0
- data/spec/parsers/mp3_parser_spec.rb +37 -1
- metadata +7 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 290b2369d2fe089202a76ab9d7b659f8de58fa44b9a40718130105ae7026036a
|
4
|
+
data.tar.gz: 53e983a8639cc42377ab50d7364b5099cf1f3308f8108f92b6194040546ea2e8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5f58620ab165b1c47a8a18fe82081d48cb0b285b4f9638146c31d1bb2f839247b36750a7f85be5a9ea14b8db3fc2ca175b86ab4f8b29f87bd1ea10caed57746c
|
7
|
+
data.tar.gz: b9e87723a7cc1d5ecf04a23c28cbfc433c120337275aa51d76396a9a1371bb1b683b22c18ab4e84d8870a76048e26f7c5d7f90b4b5f9b868bf5a0dcb6771640c
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,18 @@
|
|
1
|
+
## 0.25.1
|
2
|
+
* MOV: Fix error "negative length"
|
3
|
+
* MOV: Fix reading dimensions in multi-track files
|
4
|
+
* MP3: Fix parse of the Xing header to not raise errors
|
5
|
+
|
6
|
+
## 0.25.0
|
7
|
+
* MP3: add suport to id3 v2.4.x
|
8
|
+
* JPEG: Update gem exifr to 1.3.8 to fix a bug
|
9
|
+
|
10
|
+
## 0.24.2
|
11
|
+
* Update gem id3tag to 0.14.0 to fix MP3 issues
|
12
|
+
|
13
|
+
## 0.24.1
|
14
|
+
* Fix MP3 frames reading to jump correctly to the next bytes
|
15
|
+
|
1
16
|
## 0.24.0
|
2
17
|
* The TIFF parser will now return :arw as format for Sony ARW files insted of :tif so that the caller can decide whether it
|
3
18
|
wants to deal with RAW processing or not
|
data/format_parser.gemspec
CHANGED
@@ -31,8 +31,8 @@ Gem::Specification.new do |spec|
|
|
31
31
|
spec.require_paths = ['lib']
|
32
32
|
|
33
33
|
spec.add_dependency 'ks', '~> 0.0'
|
34
|
-
spec.add_dependency 'exifr', '~> 1', '>= 1.3.
|
35
|
-
spec.add_dependency 'id3tag', '~> 0.
|
34
|
+
spec.add_dependency 'exifr', '~> 1', '>= 1.3.8'
|
35
|
+
spec.add_dependency 'id3tag', '~> 0.14'
|
36
36
|
spec.add_dependency 'faraday', '~> 0.13'
|
37
37
|
spec.add_dependency 'measurometer', '~> 1'
|
38
38
|
|
data/lib/parsers/moov_parser.rb
CHANGED
@@ -38,14 +38,8 @@ class FormatParser::MOOVParser
|
|
38
38
|
ftyp_atom = decoder.find_first_atom_by_path(atom_tree, 'ftyp')
|
39
39
|
file_type = ftyp_atom.field_value(:major_brand)
|
40
40
|
|
41
|
-
width = nil
|
42
|
-
height = nil
|
43
|
-
|
44
41
|
# Try to find the width and height in the tkhd
|
45
|
-
|
46
|
-
width = tkhd.field_value(:track_width).first
|
47
|
-
height = tkhd.field_value(:track_height).first
|
48
|
-
end
|
42
|
+
width, height = parse_dimensions(decoder, atom_tree)
|
49
43
|
|
50
44
|
# Try to find the "topmost" duration (respecting edits)
|
51
45
|
if mdhd = decoder.find_first_atom_by_path(atom_tree, 'moov', 'mvhd')
|
@@ -78,6 +72,31 @@ class FormatParser::MOOVParser
|
|
78
72
|
FTYP_MAP.fetch(file_type.downcase, :mov)
|
79
73
|
end
|
80
74
|
|
75
|
+
# The dimensions are located in tkhd atom, but in some files it is necessary
|
76
|
+
# to get it below the video track, because it can have other tracks such as
|
77
|
+
# audio which does not have the dimensions.
|
78
|
+
# More details in https://developer.apple.com/library/archive/documentation/QuickTime/QTFF/QTFFChap2/qtff2.html#//apple_ref/doc/uid/TP40000939-CH204-DontLinkElementID_147
|
79
|
+
#
|
80
|
+
# Returns [width, height] if the dimension is found
|
81
|
+
# Returns [nil, nil] if the dimension is not found
|
82
|
+
def parse_dimensions(decoder, atom_tree)
|
83
|
+
video_trak_atom = decoder.find_video_trak_atom(atom_tree)
|
84
|
+
|
85
|
+
tkhd = begin
|
86
|
+
if video_trak_atom
|
87
|
+
decoder.find_first_atom_by_path([video_trak_atom], 'trak', 'tkhd')
|
88
|
+
else
|
89
|
+
decoder.find_first_atom_by_path(atom_tree, 'moov', 'trak', 'tkhd')
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
if tkhd
|
94
|
+
[tkhd.field_value(:track_width).first, tkhd.field_value(:track_height).first]
|
95
|
+
else
|
96
|
+
[nil, nil]
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
81
100
|
# An MPEG4/MOV/M4A will start with the "ftyp" atom. The atom must have a length
|
82
101
|
# of at least 8 (to accomodate the atom size and the atom type itself) plus the major
|
83
102
|
# and minor version fields. If we cannot find it we can be certain this is not our file.
|
@@ -1,6 +1,7 @@
|
|
1
1
|
# Handles decoding of MOV/MPEG4 atoms/boxes in a stream. Will recursively
|
2
2
|
# read atoms and parse their data fields if applicable. Also contains
|
3
3
|
# a few utility functions for finding atoms in a list etc.
|
4
|
+
# To know more about Atoms: https://developer.apple.com/library/archive/documentation/QuickTime/QTFF/QTFFChap2/qtff2.html
|
4
5
|
class FormatParser::MOOVParser::Decoder
|
5
6
|
include FormatParser::IOUtils
|
6
7
|
|
@@ -47,6 +48,34 @@ class FormatParser::MOOVParser::Decoder
|
|
47
48
|
find_first_atom_by_path(requisite.children || [], *atom_types)
|
48
49
|
end
|
49
50
|
|
51
|
+
def find_atoms_by_path(atoms, atom_types)
|
52
|
+
type_to_find = atom_types.shift
|
53
|
+
requisites = atoms.select { |e| e.atom_type == type_to_find }
|
54
|
+
|
55
|
+
# Return if we found our match
|
56
|
+
return requisites if atom_types.empty?
|
57
|
+
|
58
|
+
# Return nil if we didn't find the match at this nesting level
|
59
|
+
return unless requisites
|
60
|
+
|
61
|
+
# ...otherwise drill further down
|
62
|
+
find_atoms_by_path(requisites.flat_map(&:children).compact || [], atom_types)
|
63
|
+
end
|
64
|
+
|
65
|
+
# A file can have multiple tracks. To identify the type it is necessary to check
|
66
|
+
# the fields `omponent_subtype` in hdlr atom under the trak atom
|
67
|
+
# More details in https://developer.apple.com/library/archive/documentation/QuickTime/QTFF/QTFFChap2/qtff2.html#//apple_ref/doc/uid/TP40000939-CH204-DontLinkElementID_147
|
68
|
+
def find_video_trak_atom(atoms)
|
69
|
+
trak_atoms = find_atoms_by_path(atoms, ['moov', 'trak'])
|
70
|
+
|
71
|
+
return if trak_atoms.empty?
|
72
|
+
|
73
|
+
trak_atoms.find do |trak_atom|
|
74
|
+
hdlr_atom = find_first_atom_by_path([trak_atom], 'trak', 'mdia', 'hdlr')
|
75
|
+
hdlr_atom.atom_fields[:component_type] == 'mhlr' && hdlr_atom.atom_fields[:component_subtype] == 'vide'
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
50
79
|
def parse_ftyp_atom(io, atom_size)
|
51
80
|
# Subtract 8 for the atom_size+atom_type,
|
52
81
|
# and 8 once more for the major_brand and minor_version. The remaining
|
@@ -194,6 +223,8 @@ class FormatParser::MOOVParser::Decoder
|
|
194
223
|
end
|
195
224
|
|
196
225
|
def parse_meta_atom(io, atom_size)
|
226
|
+
return if atom_size == 0 # this atom can be empty
|
227
|
+
|
197
228
|
parse_hdlr_atom(io, atom_size)
|
198
229
|
end
|
199
230
|
|
data/lib/parsers/mp3_parser.rb
CHANGED
@@ -20,13 +20,14 @@ class FormatParser::MP3Parser
|
|
20
20
|
|
21
21
|
# We limit the number of MPEG frames we scan
|
22
22
|
# to obtain our duration estimation
|
23
|
-
MAX_FRAMES_TO_SCAN =
|
23
|
+
MAX_FRAMES_TO_SCAN = 500
|
24
24
|
|
25
25
|
# Default frame size for mp3
|
26
26
|
SAMPLES_PER_FRAME = 1152
|
27
27
|
|
28
28
|
# For some edge cases
|
29
29
|
ZIP_LOCAL_ENTRY_SIGNATURE = "PK\x03\x04\x14\x00".b
|
30
|
+
PNG_HEADER_BYTES = [137, 80, 78, 71, 13, 10, 26, 10].pack('C*')
|
30
31
|
|
31
32
|
# Wraps the Tag object returned by ID3Tag in such
|
32
33
|
# a way that a usable JSON representation gets
|
@@ -60,8 +61,12 @@ class FormatParser::MP3Parser
|
|
60
61
|
# To avoid having that happen, we check for the PKZIP signature -
|
61
62
|
# local entry header signature - at the very start of the file.
|
62
63
|
# If the file is too small safe_read will fail too and the parser
|
63
|
-
# will terminate here.
|
64
|
-
|
64
|
+
# will terminate here. Same with PNGs. In the future
|
65
|
+
# we should implement "confidence" for MP3 as of all our formats
|
66
|
+
# it is by far the most lax.
|
67
|
+
header = safe_read(io, 8)
|
68
|
+
return if header.start_with?(ZIP_LOCAL_ENTRY_SIGNATURE)
|
69
|
+
return if header.start_with?(PNG_HEADER_BYTES)
|
65
70
|
|
66
71
|
# Read all the ID3 tags (or at least attempt to)
|
67
72
|
io.seek(0)
|
@@ -131,27 +136,28 @@ class FormatParser::MP3Parser
|
|
131
136
|
# if you have a minute. https://pypi.python.org/pypi/tinytag
|
132
137
|
def parse_mpeg_frames(io)
|
133
138
|
mpeg_frames = []
|
139
|
+
bytes_to_read = 4
|
134
140
|
|
135
141
|
MAX_FRAMES_TO_SCAN.times do |frame_i|
|
136
142
|
# Read through until we can latch onto the 11 sync bits. Read in 4-byte
|
137
143
|
# increments to save on read() calls
|
138
|
-
data = io.read(
|
144
|
+
data = io.read(bytes_to_read)
|
139
145
|
|
140
146
|
# If we are at EOF - stop iterating
|
141
|
-
break unless data && data.bytesize ==
|
147
|
+
break unless data && data.bytesize == bytes_to_read
|
142
148
|
|
143
149
|
# Look for the sync pattern. It can be either the last byte being 0xFF,
|
144
150
|
# or any of the 2 bytes in sequence being 0xFF and > 0xF0.
|
145
151
|
four_bytes = data.unpack('C4')
|
146
152
|
seek_jmp = sync_bytes_offset_in_4_byte_seq(four_bytes)
|
147
153
|
if seek_jmp > 0
|
148
|
-
io.seek(io.pos + seek_jmp)
|
154
|
+
io.seek(io.pos - bytes_to_read + seek_jmp)
|
149
155
|
next
|
150
156
|
end
|
151
157
|
|
152
158
|
# Once we are past that stage we have latched onto a sync frame header
|
153
159
|
sync, conf, bitrate_freq, rest = four_bytes
|
154
|
-
frame_detail = parse_mpeg_frame_header(io.pos -
|
160
|
+
frame_detail = parse_mpeg_frame_header(io.pos - bytes_to_read, sync, conf, bitrate_freq, rest)
|
155
161
|
mpeg_frames << frame_detail
|
156
162
|
|
157
163
|
# There might be a xing header in the first frame that contains
|
@@ -166,7 +172,7 @@ class FormatParser::MP3Parser
|
|
166
172
|
end
|
167
173
|
end
|
168
174
|
if frame_detail.frame_length > 1 # jump over current frame body
|
169
|
-
io.seek(io.pos + frame_detail.frame_length -
|
175
|
+
io.seek(io.pos + frame_detail.frame_length - bytes_to_read)
|
170
176
|
end
|
171
177
|
end
|
172
178
|
[nil, mpeg_frames]
|
@@ -243,16 +249,16 @@ class FormatParser::MP3Parser
|
|
243
249
|
io.seek(xing_offset + 4) # Include the length of "Xing" itself
|
244
250
|
|
245
251
|
# https://www.codeproject.com/Articles/8295/MPEG-Audio-Frame-Header#XINGHeader
|
246
|
-
header_flags, _ = io.read(4).unpack('
|
252
|
+
header_flags, _ = io.read(4).unpack('i>')
|
247
253
|
frames = byte_count = toc = vbr_scale = nil
|
248
254
|
|
249
|
-
frames = io.read(4).unpack('N1').first if header_flags & 1 # FRAMES FLAG
|
255
|
+
frames = io.read(4).unpack('N1').first if header_flags & 1 != 0 # FRAMES FLAG
|
250
256
|
|
251
|
-
byte_count = io.read(4).unpack('N1').first if header_flags & 2 # BYTES FLAG
|
257
|
+
byte_count = io.read(4).unpack('N1').first if header_flags & 2 != 0 # BYTES FLAG
|
252
258
|
|
253
|
-
toc = io.read(100).unpack('C100') if header_flags & 4 # TOC FLAG
|
259
|
+
toc = io.read(100).unpack('C100') if header_flags & 4 != 0 # TOC FLAG
|
254
260
|
|
255
|
-
vbr_scale = io.read(4).unpack('N1').first if header_flags & 8 # VBR SCALE FLAG
|
261
|
+
vbr_scale = io.read(4).unpack('N1').first if header_flags & 8 != 0 # VBR SCALE FLAG
|
256
262
|
|
257
263
|
VBRHeader.new(frames: frames, byte_count: byte_count, toc_entries: toc, vbr_scale: vbr_scale)
|
258
264
|
end
|
@@ -1,6 +1,8 @@
|
|
1
1
|
module FormatParser::MP3Parser::ID3Extraction
|
2
2
|
ID3V1_TAG_SIZE_BYTES = 128
|
3
|
-
|
3
|
+
# it supports 2.4.x, 2.3.x, 2.2.x which are supported by the gem id3tag
|
4
|
+
# see https://id3.org/Developer%20Information for more details of each version
|
5
|
+
ID3V2_MINOR_TAG_VERSIONS = [2, 3, 4]
|
4
6
|
MAX_SIZE_FOR_ID3V2 = 1 * 1024 * 1024
|
5
7
|
|
6
8
|
extend FormatParser::IOUtils
|
@@ -22,7 +24,7 @@ module FormatParser::MP3Parser::ID3Extraction
|
|
22
24
|
io.seek(0) # Only support header ID3v2
|
23
25
|
header = parse_id3_v2_header(io)
|
24
26
|
return unless header[:tag] == 'ID3' && header[:size] > 0
|
25
|
-
return unless
|
27
|
+
return unless ID3V2_MINOR_TAG_VERSIONS.include?(header[:version].unpack('C').first)
|
26
28
|
|
27
29
|
id3_tag_size = io.pos + header[:size]
|
28
30
|
|
@@ -108,4 +108,24 @@ describe FormatParser::MOOVParser do
|
|
108
108
|
it 'provides filename hints' do
|
109
109
|
expect(subject).to be_likely_match('file.m4v')
|
110
110
|
end
|
111
|
+
|
112
|
+
it 'reads correctly the video dimensions' do
|
113
|
+
mov_path = fixtures_dir + '/MOOV/MOV/Test_Dimensions.mov'
|
114
|
+
|
115
|
+
result = subject.call(File.open(mov_path, 'rb'))
|
116
|
+
|
117
|
+
expect(result).not_to be_nil
|
118
|
+
expect(result.nature).to eq(:video)
|
119
|
+
expect(result.format).to eq(:mov)
|
120
|
+
expect(result.width_px).to eq(640)
|
121
|
+
expect(result.height_px).to eq(360)
|
122
|
+
end
|
123
|
+
|
124
|
+
it 'does not raise error when a meta atom has size 0' do
|
125
|
+
mov_path = fixtures_dir + '/MOOV/MOV/Test_Meta_Atom_With_Size_Zero.mov'
|
126
|
+
|
127
|
+
result = subject.call(File.open(mov_path, 'rb'))
|
128
|
+
expect(result).not_to be_nil
|
129
|
+
expect(result.format).to eq(:mov)
|
130
|
+
end
|
111
131
|
end
|
@@ -15,6 +15,26 @@ describe FormatParser::MP3Parser do
|
|
15
15
|
expect(parsed.media_duration_seconds).to be_within(0.1).of(0.836)
|
16
16
|
end
|
17
17
|
|
18
|
+
it 'reads the Xing header without raising errors' do
|
19
|
+
fpath = fixtures_dir + '/MP3/test_xing_header.mp3'
|
20
|
+
parsed = subject.call(File.open(fpath, 'rb'))
|
21
|
+
|
22
|
+
expect(parsed).not_to be_nil
|
23
|
+
|
24
|
+
expect(parsed.nature).to eq(:audio)
|
25
|
+
expect(parsed.format).to eq(:mp3)
|
26
|
+
expect(parsed.num_audio_channels).to eq(2)
|
27
|
+
expect(parsed.audio_sample_rate_hz).to eq(48000)
|
28
|
+
expect(parsed.intrinsics).not_to be_nil
|
29
|
+
expect(parsed.media_duration_seconds).to be_within(0.1).of(0.0342)
|
30
|
+
end
|
31
|
+
|
32
|
+
it 'does not misdetect a PNG' do
|
33
|
+
fpath = fixtures_dir + '/PNG/anim.png'
|
34
|
+
parsed = subject.call(File.open(fpath, 'rb'))
|
35
|
+
expect(parsed).to be_nil
|
36
|
+
end
|
37
|
+
|
18
38
|
describe 'title/artist/album attributes' do
|
19
39
|
let(:parsed) { subject.call(File.open(fpath, 'rb')) }
|
20
40
|
|
@@ -67,7 +87,7 @@ describe FormatParser::MP3Parser do
|
|
67
87
|
|
68
88
|
large_syncsfe_size = [ID3Tag::SynchsafeInteger.encode(more_bytes_than_permitted)].pack('N')
|
69
89
|
prepped = StringIO.new(
|
70
|
-
'ID3' + "\
|
90
|
+
'ID3' + "\x03\x00".b + "\x00".b + large_syncsfe_size + gunk
|
71
91
|
)
|
72
92
|
|
73
93
|
expect(ID3Tag).not_to receive(:read)
|
@@ -90,6 +110,14 @@ describe FormatParser::MP3Parser do
|
|
90
110
|
expect(parsed.title).to eq('test')
|
91
111
|
end
|
92
112
|
|
113
|
+
it 'reads the mpeg frames correctly' do
|
114
|
+
fpath = fixtures_dir + '/MP3/test_read_frames.mp3'
|
115
|
+
|
116
|
+
parsed = subject.call(File.open(fpath, 'rb'))
|
117
|
+
|
118
|
+
expect(parsed.audio_sample_rate_hz). to eq(48000)
|
119
|
+
end
|
120
|
+
|
93
121
|
it 'parses the Cassy MP3' do
|
94
122
|
fpath = fixtures_dir + '/MP3/Cassy.mp3'
|
95
123
|
parsed = subject.call(File.open(fpath, 'rb'))
|
@@ -130,6 +158,14 @@ describe FormatParser::MP3Parser do
|
|
130
158
|
}.to raise_error(FormatParser::IOUtils::InvalidRead)
|
131
159
|
end
|
132
160
|
|
161
|
+
it 'supports id3 v2.4.x' do
|
162
|
+
fpath = fixtures_dir + '/MP3/id3v24.mp3'
|
163
|
+
|
164
|
+
parsed = subject.call(File.open(fpath, 'rb'))
|
165
|
+
|
166
|
+
expect(parsed.artist). to eq('wetransfer')
|
167
|
+
end
|
168
|
+
|
133
169
|
describe '#as_json' do
|
134
170
|
it 'converts all hash keys to string when stringify_keys: true' do
|
135
171
|
fpath = fixtures_dir + '/MP3/Cassy.mp3'
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: format_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.25.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Noah Berman
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: exe
|
11
11
|
cert_chain: []
|
12
|
-
date: 2020-
|
12
|
+
date: 2020-10-05 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: ks
|
@@ -34,7 +34,7 @@ dependencies:
|
|
34
34
|
version: '1'
|
35
35
|
- - ">="
|
36
36
|
- !ruby/object:Gem::Version
|
37
|
-
version: 1.3.
|
37
|
+
version: 1.3.8
|
38
38
|
type: :runtime
|
39
39
|
prerelease: false
|
40
40
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -44,21 +44,21 @@ dependencies:
|
|
44
44
|
version: '1'
|
45
45
|
- - ">="
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: 1.3.
|
47
|
+
version: 1.3.8
|
48
48
|
- !ruby/object:Gem::Dependency
|
49
49
|
name: id3tag
|
50
50
|
requirement: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: '0.
|
54
|
+
version: '0.14'
|
55
55
|
type: :runtime
|
56
56
|
prerelease: false
|
57
57
|
version_requirements: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
59
|
- - "~>"
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version: '0.
|
61
|
+
version: '0.14'
|
62
62
|
- !ruby/object:Gem::Dependency
|
63
63
|
name: faraday
|
64
64
|
requirement: !ruby/object:Gem::Requirement
|
@@ -292,7 +292,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
292
292
|
- !ruby/object:Gem::Version
|
293
293
|
version: '0'
|
294
294
|
requirements: []
|
295
|
-
rubygems_version: 3.
|
295
|
+
rubygems_version: 3.1.2
|
296
296
|
signing_key:
|
297
297
|
specification_version: 4
|
298
298
|
summary: A library for efficient parsing of file metadata
|