format_parser 2.1.0 → 2.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -178,7 +178,7 @@ class FormatParser::JPEGParser
178
178
 
179
179
  def skip_frame
180
180
  length = read_short - 2
181
- safe_skip(@buf, length)
181
+ skip_bytes(length)
182
182
  end
183
183
 
184
184
  FormatParser.register_parser self, natures: :image, formats: :jpg, priority: 0
@@ -63,7 +63,7 @@ class FormatParser::MOOVParser::Decoder
63
63
  end
64
64
 
65
65
  # A file can have multiple tracks. To identify the type it is necessary to check
66
- # the fields `omponent_subtype` in hdlr atom under the trak atom
66
+ # the fields `component_subtype` in hdlr atom under the trak atom
67
67
  # More details in https://developer.apple.com/library/archive/documentation/QuickTime/QTFF/QTFFChap2/qtff2.html#//apple_ref/doc/uid/TP40000939-CH204-DontLinkElementID_147
68
68
  def find_video_trak_atom(atoms)
69
69
  trak_atoms = find_atoms_by_path(atoms, ['moov', 'trak'])
@@ -6,9 +6,9 @@ class FormatParser::MOOVParser
6
6
  # we can reasonably call "file type" (something
7
7
  # usable as a filename extension)
8
8
  FTYP_MAP = {
9
- 'qt ' => :mov,
10
- 'mp4 ' => :mp4,
11
9
  'm4a ' => :m4a,
10
+ 'mp4 ' => :mp4,
11
+ 'qt ' => :mov,
12
12
  }
13
13
 
14
14
  # https://tools.ietf.org/html/rfc4337#section-2
@@ -18,7 +18,7 @@ class FormatParser::MOOVParser
18
18
  MP4_MIXED_MIME_TYPE = 'video/mp4'
19
19
 
20
20
  def likely_match?(filename)
21
- filename =~ /\.(mov|m4a|ma4|mp4|aac|m4v)$/i
21
+ filename =~ /\.(m4a|m4v|ma4|mov|mp4)$/i
22
22
  end
23
23
 
24
24
  def call(io)
@@ -42,7 +42,8 @@ class FormatParser::MOOVParser
42
42
  end
43
43
 
44
44
  ftyp_atom = decoder.find_first_atom_by_path(atom_tree, 'ftyp')
45
- file_type = ftyp_atom.field_value(:major_brand)
45
+ file_type = ftyp_atom&.field_value(:major_brand)
46
+ format = format_from_moov_type(file_type)
46
47
 
47
48
  # Try to find the width and height in the tkhd
48
49
  width, height = parse_dimensions(decoder, atom_tree)
@@ -55,17 +56,16 @@ class FormatParser::MOOVParser
55
56
  end
56
57
 
57
58
  # M4A only contains audio, while MP4 and friends can contain video.
58
- fmt = format_from_moov_type(file_type)
59
- if fmt == :m4a
59
+ if format == :m4a
60
60
  FormatParser::Audio.new(
61
- format: format_from_moov_type(file_type),
61
+ format: format,
62
62
  media_duration_seconds: media_duration_s,
63
63
  content_type: MP4_AU_MIME_TYPE,
64
64
  intrinsics: atom_tree,
65
65
  )
66
66
  else
67
67
  FormatParser::Video.new(
68
- format: format_from_moov_type(file_type),
68
+ format: format,
69
69
  width_px: width,
70
70
  height_px: height,
71
71
  frame_rate: parse_time_to_sample_atom(decoder, atom_tree)&.truncate(2),
@@ -161,5 +161,5 @@ class FormatParser::MOOVParser
161
161
  end
162
162
  end
163
163
 
164
- FormatParser.register_parser new, natures: :video, formats: FTYP_MAP.values, priority: 3
164
+ FormatParser.register_parser new, natures: [:audio, :video], formats: FTYP_MAP.values, priority: 3
165
165
  end
@@ -54,7 +54,7 @@ class FormatParser::WebpParser
54
54
  # Encoded as a single VP8 key frame - a 10-byte uncompressed chunk followed by 2+ partitions of compressed data.
55
55
  # The first 6 bytes of this chunk contains information that is mostly relevant when using VP8 as a video
56
56
  # compression format, and can be ignored.
57
- safe_skip(@buf, 6)
57
+ skip_bytes(6)
58
58
 
59
59
  # The subsequent 4 bytes contain the image width and height, respectively, as 16-bit unsigned little endian
60
60
  # integers.
@@ -64,7 +64,7 @@ class FormatParser::WebpParser
64
64
 
65
65
  def read_lossless_data
66
66
  # There is a single byte signature, 0x2F, that we can disregard.
67
- safe_skip(@buf, 1)
67
+ skip_bytes(1)
68
68
 
69
69
  # The subsequent 4 bytes contain the image width and height, respectively, as 14-bit unsigned little endian
70
70
  # integers (minus one). The 4 remaining bits consist of a 1-bit flag indicating whether alpha is used, and a 3-bit
@@ -100,7 +100,7 @@ class FormatParser::WebpParser
100
100
 
101
101
  # The flags are followed by three reserved bytes of zeros, and then by the width and height, respectively - each
102
102
  # occupying three bytes and each one less than the actual canvas measurements.
103
- safe_skip(@buf, 3)
103
+ skip_bytes(3)
104
104
  dimensions = safe_read(@buf, 6).unpack('VS')
105
105
  width = (dimensions[0] & 0xffffff) + 1
106
106
  height = (dimensions[0] >> 24 | dimensions[1] << 8 & 0xffffff) + 1
@@ -134,7 +134,7 @@ class FormatParser::WebpParser
134
134
  end
135
135
 
136
136
  # Padding byte of 0 added if chunk size is odd.
137
- safe_skip(@buf, 1) if chunk_size.odd?
137
+ skip_bytes(1) if chunk_size.odd?
138
138
 
139
139
  case fourcc
140
140
  when 'EXIF'
@@ -155,9 +155,9 @@ class FormatParser::WebpParser
155
155
  intrinsics[:xmp] ||= safe_read(@buf, chunk_size)
156
156
  when 'ANMF'
157
157
  num_frames += 1 if image.has_multiple_frames
158
- safe_skip(@buf, chunk_size)
158
+ skip_bytes(chunk_size)
159
159
  else
160
- safe_skip(@buf, chunk_size)
160
+ skip_bytes(chunk_size)
161
161
  end
162
162
  end
163
163
 
@@ -177,14 +177,16 @@ describe FormatParser do
177
177
  it 'sorts the parsers by priority and name' do
178
178
  parsers = FormatParser.parsers_for(
179
179
  [:audio, :image],
180
- [:cr2, :dpx, :fdx, :flac, :gif, :jpg, :mov, :mp4, :m4a, :mp3, :mpg, :mpeg, :ogg, :png, :tif, :wav]
180
+ [:cr2, :cr3, :dpx, :fdx, :flac, :gif, :jpg, :mov, :mp4, :m4a, :mp3, :mpg, :mpeg, :ogg, :png, :tif, :wav]
181
181
  )
182
182
 
183
183
  expect(parsers.map { |parser| parser.class.name }).to eq([
184
184
  'FormatParser::GIFParser',
185
185
  'Class',
186
186
  'FormatParser::PNGParser',
187
+ 'FormatParser::MOOVParser',
187
188
  'FormatParser::CR2Parser',
189
+ 'FormatParser::CR3Parser',
188
190
  'FormatParser::DPXParser',
189
191
  'FormatParser::FLACParser',
190
192
  'FormatParser::MP3Parser',
@@ -0,0 +1,58 @@
1
+ require 'spec_helper'
2
+
3
+ describe FormatParser::CR3Parser do
4
+ it 'should match valid CR3 file extensions' do
5
+ valid_extensions = %w[cr3 cR3 Cr3 CR3]
6
+ valid_extensions.each { |extension| expect(subject.likely_match?("foo.#{extension}")).to be_truthy }
7
+ end
8
+
9
+ it 'should not match invalid CR3 file extensions' do
10
+ invalid_filenames = ['', 'foo', 'cr3', 'foocr3', 'foo.cr2', 'foo.cr3.bar']
11
+ invalid_filenames.each { |filename| expect(subject.likely_match?(filename)).to be_falsey }
12
+ end
13
+
14
+ it 'should not parse a file that does not match the CR3 definition' do
15
+ # MOV files are closely related to CR3 files (both extend the ISO Base File Format), so this is a decent edge case
16
+ # to ensure only true CR3 files are being parsed.
17
+ result = subject.call(File.open(fixtures_dir + '/MOOV/MOV/Test_Dimensions.mov'))
18
+ expect(result).to be_nil
19
+ end
20
+
21
+ it 'should return nil if no CMT1 atom is present' do
22
+ # This is a MOV file with the ftyp header modified to masquerade as a CR3 file. It is therefore missing the
23
+ # CR3-specific CMT1 atom containing the image metadata.
24
+ result = subject.call(File.open(fixtures_dir + '/CR3/invalid'))
25
+ expect(result).to be_nil
26
+ end
27
+
28
+ Dir.glob(fixtures_dir + '/CR3/*.cr3').sort.each do |file_path|
29
+ it "successfully parses #{File.basename(file_path)}" do
30
+ result = subject.call(File.open(file_path, 'rb'))
31
+
32
+ expect(result).not_to be_nil
33
+ expect(result.nature).to eq(:image)
34
+ expect(result.width_px).to be > 0
35
+ expect(result.height_px).to be > 0
36
+ expect(result.content_type).to eq('image/x-canon-cr3')
37
+ expect(result.intrinsics).not_to be_nil
38
+ end
39
+ end
40
+
41
+ it 'parses the necessary metadata from a CR3 file' do
42
+ file_path = fixtures_dir + '/CR3/Canon EOS R10 (RAW).CR3'
43
+
44
+ result = subject.call(File.open(file_path, 'rb'))
45
+ expect(result.nature).to eq(:image)
46
+ expect(result.width_px).to eq(6000)
47
+ expect(result.height_px).to eq(4000)
48
+ expect(result.orientation).to eq(:top_left)
49
+ expect(result.display_width_px).to eq(6000)
50
+ expect(result.display_height_px).to eq(4000)
51
+ expect(result.content_type).to eq('image/x-canon-cr3')
52
+ expect(result.intrinsics).not_to be_nil
53
+ expect(result.intrinsics[:atom_tree]).not_to be_nil
54
+ expect(result.intrinsics[:exif]).not_to be_nil
55
+ expect(result.intrinsics[:exif][:image_length]).to eq(result.height_px)
56
+ expect(result.intrinsics[:exif][:image_width]).to eq(result.width_px)
57
+ end
58
+ end
@@ -0,0 +1,242 @@
1
+ require 'spec_helper'
2
+
3
+ describe FormatParser::ISOBaseMediaFileFormat::Decoder do
4
+ context 'when build_atom_tree is called' do
5
+ context 'with no io' do
6
+ it 'raises an error' do
7
+ expect { subject.build_atom_tree(0x0) }.to raise_error(/IO missing - supply a valid IO object/)
8
+ end
9
+ end
10
+
11
+ context('with a max_read smaller than the length of the IO') do
12
+ let(:io) do
13
+ # moov
14
+ # moov
15
+ input = [0x8].pack('N') + 'moov' + [0x8].pack('N') + 'moov'
16
+ StringIO.new(input)
17
+ end
18
+
19
+ it 'stops building the tree' do
20
+ expect(subject.build_atom_tree(0x0, io).length).to eq(0)
21
+ expect(io.pos).to eq(0)
22
+
23
+ expect(subject.build_atom_tree(0x8, io).length).to eq(1)
24
+ expect(io.pos).to eq(0x8)
25
+ io.seek(0)
26
+
27
+ expect(subject.build_atom_tree(0x10, io).length).to eq(2)
28
+ expect(io.pos).to eq(0x10)
29
+ end
30
+ end
31
+
32
+ context 'when parsing an unknown atom' do
33
+ let(:io) do
34
+ # foo
35
+ # |-> moov
36
+ input = [0x14].pack('N') + 'foo ' + [0x1].pack('N') + [0x8].pack('N') + 'moov'
37
+ StringIO.new(input)
38
+ end
39
+
40
+ it 'parses only the type, position and size, and skips any fields and children' do
41
+ result = subject.build_atom_tree(0xFF, io)
42
+ expect(result.length).to eq(1)
43
+ expect(io.pos).to eq(0x14)
44
+
45
+ foo_atom = result[0]
46
+ expect(foo_atom.type).to eq('foo ')
47
+ expect(foo_atom.position).to eq(0)
48
+ expect(foo_atom.size).to eq(0x14)
49
+ expect(foo_atom.fields).to eq({})
50
+ expect(foo_atom.children).to eq([])
51
+ end
52
+ end
53
+
54
+ context 'when parsing a container atom' do
55
+ let(:io) do
56
+ # moov
57
+ # |-> foo
58
+ # |-> bar
59
+ input = [0x18].pack('N') + 'moov' + [0x8].pack('N') + 'foo ' + [0x8].pack('N') + 'bar '
60
+ StringIO.new(input)
61
+ end
62
+
63
+ it 'parses type, position, size and children' do
64
+ result = subject.build_atom_tree(0xFF, io)
65
+ expect(result.length).to eq(1)
66
+ expect(io.pos).to eq(0x18)
67
+
68
+ moov_atom = result[0]
69
+ expect(moov_atom.type).to eq('moov')
70
+ expect(moov_atom.position).to eq(0)
71
+ expect(moov_atom.size).to eq(0x18)
72
+ expect(moov_atom.fields).to eq({})
73
+ expect(moov_atom.children.length).to eq(2)
74
+ end
75
+ end
76
+
77
+ context 'when parsing an empty atom' do
78
+ let(:io) do
79
+ # nmhd
80
+ # |-> foo
81
+ input = [0x18].pack('N') + 'nmhd' + [0x1].pack('c') + 'fla' + [0x2].pack('N') + [0x8].pack('N') + 'foo '
82
+ StringIO.new(input)
83
+ end
84
+
85
+ it 'parses type, position, size, version and flags, and skips any other fields or children' do
86
+ result = subject.build_atom_tree(0xFF, io)
87
+ expect(result.length).to eq(1)
88
+ expect(io.pos).to eq(0x18)
89
+
90
+ nmhd_atom = result[0]
91
+ expect(nmhd_atom.type).to eq('nmhd')
92
+ expect(nmhd_atom.position).to eq(0)
93
+ expect(nmhd_atom.size).to eq(0x18)
94
+ expect(nmhd_atom.fields).to include({
95
+ version: 1,
96
+ flags: 'fla'
97
+ })
98
+ expect(nmhd_atom.children).to eq([])
99
+ end
100
+ end
101
+
102
+ context 'when parsing a uuid atom' do
103
+ let(:usertype) { '90f7c66ec2db476b977461e796f0dd4b' }
104
+ let(:io) do
105
+ input = [0x20].pack('N') + 'uuid' + [usertype].pack('H*') + [0x8].pack('N') + 'foo '
106
+ StringIO.new(input)
107
+ end
108
+
109
+ it 'parses type, position, size and usertype, and skips any other fields or children' do
110
+ # uuid
111
+ # |-> foo
112
+ result = subject.build_atom_tree(0xFF, io)
113
+ expect(result.length).to eq(1)
114
+ expect(io.pos).to eq(0x20)
115
+
116
+ nmhd_atom = result[0]
117
+ expect(nmhd_atom.type).to eq('uuid')
118
+ expect(nmhd_atom.position).to eq(0)
119
+ expect(nmhd_atom.size).to eq(0x20)
120
+ expect(nmhd_atom.fields).to include({
121
+ usertype: usertype,
122
+ })
123
+ expect(nmhd_atom.children).to eq([])
124
+ end
125
+ end
126
+ end
127
+ end
128
+
129
+ describe FormatParser::ISOBaseMediaFileFormat::Decoder::Atom do
130
+ context 'when initialized' do
131
+ context 'without fields and/or children' do
132
+ subject { described_class.new('foo', 0, 0) }
133
+
134
+ it 'sets them as an empty array/hash' do
135
+ expect(subject.type).to eq('foo')
136
+ expect(subject.position).to eq(0)
137
+ expect(subject.size).to eq(0)
138
+ expect(subject.fields).to eq({})
139
+ expect(subject.children).to eq([])
140
+ end
141
+ end
142
+
143
+ context 'with fields and/or children' do
144
+ let(:fields) { { foo: 1, bar: 'bar' } }
145
+ let(:children) { [described_class.new('bar', 0, 0)] }
146
+
147
+ subject { described_class.new('foo', 0, 0, fields, children) }
148
+
149
+ it 'sets them correctly' do
150
+ expect(subject.type).to eq('foo')
151
+ expect(subject.position).to eq(0)
152
+ expect(subject.size).to eq(0)
153
+ expect(subject.fields).to eq(fields)
154
+ expect(subject.children).to eq(children)
155
+ end
156
+ end
157
+ end
158
+
159
+ context 'when find_first_descendent is called' do
160
+ context 'with no children' do
161
+ subject { described_class.new('root', 0, 0) }
162
+ it 'returns nil' do
163
+ expect(subject.find_first_descendent(%w[root foo])).to be_nil
164
+ end
165
+ end
166
+
167
+ context 'with no descendents of the given type(s)' do
168
+ subject do
169
+ described_class.new('root', 0, 0, nil, [
170
+ described_class.new('foo', 0, 0),
171
+ described_class.new('bar', 0, 0, nil, [
172
+ described_class.new('baz', 0, 0)
173
+ ])
174
+ ])
175
+ end
176
+
177
+ it 'returns nil' do
178
+ expect(subject.find_first_descendent(%w[root qux])).to be_nil
179
+ end
180
+ end
181
+
182
+ context 'with multiple descendents of the given type(s)' do
183
+ let(:descendent) { described_class.new('bar', 0, 0) }
184
+
185
+ subject do
186
+ described_class.new('root', 0, 0, nil, [
187
+ described_class.new('foo', 0, 0, nil, [
188
+ descendent
189
+ ]),
190
+ described_class.new('bar', 0, 0),
191
+ ])
192
+ end
193
+
194
+ it 'returns the first relevant descendent in order of appearance' do
195
+ expect(subject.find_first_descendent(%w[bar])).to be(descendent)
196
+ end
197
+ end
198
+ end
199
+
200
+ context 'when select_descendents is called' do
201
+ context 'with no children' do
202
+ subject { described_class.new('root', 0, 0) }
203
+ it 'returns an empty array' do
204
+ expect(subject.select_descendents(%w[root foo])).to eq([])
205
+ end
206
+ end
207
+
208
+ context 'with no descendents of the given type(s)' do
209
+ subject do
210
+ described_class.new('root', 0, 0, nil, [
211
+ described_class.new('foo', 0, 0),
212
+ described_class.new('bar', 0, 0, nil, [
213
+ described_class.new('baz', 0, 0)
214
+ ])
215
+ ])
216
+ end
217
+
218
+ it 'returns an empty array' do
219
+ expect(subject.select_descendents(%w[root qux])).to eq([])
220
+ end
221
+ end
222
+
223
+ context 'with multiple descendents of the given type(s)' do
224
+ let(:descendent_1) { described_class.new('bar', 0, 0) }
225
+ let(:descendent_3) { described_class.new('bar', 20, 20) }
226
+ let(:descendent_2) { described_class.new('baz', 10, 10, nil, [descendent_3]) }
227
+
228
+ subject do
229
+ described_class.new('root', 0, 0, nil, [
230
+ described_class.new('foo', 0, 0, nil, [
231
+ descendent_1
232
+ ]),
233
+ descendent_2,
234
+ ])
235
+ end
236
+
237
+ it 'returns all relevant descendents' do
238
+ expect(subject.select_descendents(%w[bar baz])).to match_array([descendent_1, descendent_2, descendent_3])
239
+ end
240
+ end
241
+ end
242
+ end
metadata CHANGED
@@ -1,15 +1,15 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: format_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.1.0
4
+ version: 2.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Noah Berman
8
8
  - Julik Tarkhanov
9
- autorequire:
9
+ autorequire:
10
10
  bindir: exe
11
11
  cert_chain: []
12
- date: 2022-11-07 00:00:00.000000000 Z
12
+ date: 2022-11-28 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: exifr
@@ -195,6 +195,8 @@ files:
195
195
  - lib/parsers/arw_parser.rb
196
196
  - lib/parsers/bmp_parser.rb
197
197
  - lib/parsers/cr2_parser.rb
198
+ - lib/parsers/cr3_parser.rb
199
+ - lib/parsers/cr3_parser/decoder.rb
198
200
  - lib/parsers/dpx_parser.rb
199
201
  - lib/parsers/dpx_parser/dpx_structs.rb
200
202
  - lib/parsers/exif_parser.rb
@@ -202,6 +204,7 @@ files:
202
204
  - lib/parsers/flac_parser.rb
203
205
  - lib/parsers/gif_parser.rb
204
206
  - lib/parsers/heif_parser.rb
207
+ - lib/parsers/iso_base_media_file_format/decoder.rb
205
208
  - lib/parsers/jpeg_parser.rb
206
209
  - lib/parsers/m3u_parser.rb
207
210
  - lib/parsers/moov_parser.rb
@@ -243,12 +246,14 @@ files:
243
246
  - spec/parsers/arw_parser_spec.rb
244
247
  - spec/parsers/bmp_parser_spec.rb
245
248
  - spec/parsers/cr2_parser_spec.rb
249
+ - spec/parsers/cr3_parser_spec.rb
246
250
  - spec/parsers/dpx_parser_spec.rb
247
251
  - spec/parsers/exif_parser_spec.rb
248
252
  - spec/parsers/fdx_parser_spec.rb
249
253
  - spec/parsers/flac_parser_spec.rb
250
254
  - spec/parsers/gif_parser_spec.rb
251
255
  - spec/parsers/heif_parser_spec.rb
256
+ - spec/parsers/iso_base_media_file_format/decoder_spec.rb
252
257
  - spec/parsers/jpeg_parser_spec.rb
253
258
  - spec/parsers/m3u_parser_spec.rb
254
259
  - spec/parsers/moov_parser_spec.rb
@@ -273,7 +278,7 @@ licenses:
273
278
  - MIT (Hippocratic)
274
279
  metadata:
275
280
  allowed_push_host: https://rubygems.org
276
- post_install_message:
281
+ post_install_message:
277
282
  rdoc_options: []
278
283
  require_paths:
279
284
  - lib
@@ -288,8 +293,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
288
293
  - !ruby/object:Gem::Version
289
294
  version: '0'
290
295
  requirements: []
291
- rubygems_version: 3.3.17
292
- signing_key:
296
+ rubygems_version: 3.3.7
297
+ signing_key:
293
298
  specification_version: 4
294
299
  summary: A library for efficient parsing of file metadata
295
300
  test_files: []