format_parser 2.1.0 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -178,7 +178,7 @@ class FormatParser::JPEGParser
178
178
 
179
179
  def skip_frame
180
180
  length = read_short - 2
181
- safe_skip(@buf, length)
181
+ skip_bytes(length)
182
182
  end
183
183
 
184
184
  FormatParser.register_parser self, natures: :image, formats: :jpg, priority: 0
@@ -63,7 +63,7 @@ class FormatParser::MOOVParser::Decoder
63
63
  end
64
64
 
65
65
  # A file can have multiple tracks. To identify the type it is necessary to check
66
- # the fields `omponent_subtype` in hdlr atom under the trak atom
66
+ # the fields `component_subtype` in hdlr atom under the trak atom
67
67
  # More details in https://developer.apple.com/library/archive/documentation/QuickTime/QTFF/QTFFChap2/qtff2.html#//apple_ref/doc/uid/TP40000939-CH204-DontLinkElementID_147
68
68
  def find_video_trak_atom(atoms)
69
69
  trak_atoms = find_atoms_by_path(atoms, ['moov', 'trak'])
@@ -6,9 +6,9 @@ class FormatParser::MOOVParser
6
6
  # we can reasonably call "file type" (something
7
7
  # usable as a filename extension)
8
8
  FTYP_MAP = {
9
- 'qt ' => :mov,
10
- 'mp4 ' => :mp4,
11
9
  'm4a ' => :m4a,
10
+ 'mp4 ' => :mp4,
11
+ 'qt ' => :mov,
12
12
  }
13
13
 
14
14
  # https://tools.ietf.org/html/rfc4337#section-2
@@ -18,7 +18,7 @@ class FormatParser::MOOVParser
18
18
  MP4_MIXED_MIME_TYPE = 'video/mp4'
19
19
 
20
20
  def likely_match?(filename)
21
- filename =~ /\.(mov|m4a|ma4|mp4|aac|m4v)$/i
21
+ filename =~ /\.(m4a|m4v|ma4|mov|mp4)$/i
22
22
  end
23
23
 
24
24
  def call(io)
@@ -42,7 +42,8 @@ class FormatParser::MOOVParser
42
42
  end
43
43
 
44
44
  ftyp_atom = decoder.find_first_atom_by_path(atom_tree, 'ftyp')
45
- file_type = ftyp_atom.field_value(:major_brand)
45
+ file_type = ftyp_atom&.field_value(:major_brand)
46
+ format = format_from_moov_type(file_type)
46
47
 
47
48
  # Try to find the width and height in the tkhd
48
49
  width, height = parse_dimensions(decoder, atom_tree)
@@ -55,17 +56,16 @@ class FormatParser::MOOVParser
55
56
  end
56
57
 
57
58
  # M4A only contains audio, while MP4 and friends can contain video.
58
- fmt = format_from_moov_type(file_type)
59
- if fmt == :m4a
59
+ if format == :m4a
60
60
  FormatParser::Audio.new(
61
- format: format_from_moov_type(file_type),
61
+ format: format,
62
62
  media_duration_seconds: media_duration_s,
63
63
  content_type: MP4_AU_MIME_TYPE,
64
64
  intrinsics: atom_tree,
65
65
  )
66
66
  else
67
67
  FormatParser::Video.new(
68
- format: format_from_moov_type(file_type),
68
+ format: format,
69
69
  width_px: width,
70
70
  height_px: height,
71
71
  frame_rate: parse_time_to_sample_atom(decoder, atom_tree)&.truncate(2),
@@ -161,5 +161,5 @@ class FormatParser::MOOVParser
161
161
  end
162
162
  end
163
163
 
164
- FormatParser.register_parser new, natures: :video, formats: FTYP_MAP.values, priority: 3
164
+ FormatParser.register_parser new, natures: [:audio, :video], formats: FTYP_MAP.values, priority: 3
165
165
  end
@@ -54,7 +54,7 @@ class FormatParser::WebpParser
54
54
  # Encoded as a single VP8 key frame - a 10-byte uncompressed chunk followed by 2+ partitions of compressed data.
55
55
  # The first 6 bytes of this chunk contains information that is mostly relevant when using VP8 as a video
56
56
  # compression format, and can be ignored.
57
- safe_skip(@buf, 6)
57
+ skip_bytes(6)
58
58
 
59
59
  # The subsequent 4 bytes contain the image width and height, respectively, as 16-bit unsigned little endian
60
60
  # integers.
@@ -64,7 +64,7 @@ class FormatParser::WebpParser
64
64
 
65
65
  def read_lossless_data
66
66
  # There is a single byte signature, 0x2F, that we can disregard.
67
- safe_skip(@buf, 1)
67
+ skip_bytes(1)
68
68
 
69
69
  # The subsequent 4 bytes contain the image width and height, respectively, as 14-bit unsigned little endian
70
70
  # integers (minus one). The 4 remaining bits consist of a 1-bit flag indicating whether alpha is used, and a 3-bit
@@ -100,7 +100,7 @@ class FormatParser::WebpParser
100
100
 
101
101
  # The flags are followed by three reserved bytes of zeros, and then by the width and height, respectively - each
102
102
  # occupying three bytes and each one less than the actual canvas measurements.
103
- safe_skip(@buf, 3)
103
+ skip_bytes(3)
104
104
  dimensions = safe_read(@buf, 6).unpack('VS')
105
105
  width = (dimensions[0] & 0xffffff) + 1
106
106
  height = (dimensions[0] >> 24 | dimensions[1] << 8 & 0xffffff) + 1
@@ -134,7 +134,7 @@ class FormatParser::WebpParser
134
134
  end
135
135
 
136
136
  # Padding byte of 0 added if chunk size is odd.
137
- safe_skip(@buf, 1) if chunk_size.odd?
137
+ skip_bytes(1) if chunk_size.odd?
138
138
 
139
139
  case fourcc
140
140
  when 'EXIF'
@@ -155,9 +155,9 @@ class FormatParser::WebpParser
155
155
  intrinsics[:xmp] ||= safe_read(@buf, chunk_size)
156
156
  when 'ANMF'
157
157
  num_frames += 1 if image.has_multiple_frames
158
- safe_skip(@buf, chunk_size)
158
+ skip_bytes(chunk_size)
159
159
  else
160
- safe_skip(@buf, chunk_size)
160
+ skip_bytes(chunk_size)
161
161
  end
162
162
  end
163
163
 
@@ -177,14 +177,16 @@ describe FormatParser do
177
177
  it 'sorts the parsers by priority and name' do
178
178
  parsers = FormatParser.parsers_for(
179
179
  [:audio, :image],
180
- [:cr2, :dpx, :fdx, :flac, :gif, :jpg, :mov, :mp4, :m4a, :mp3, :mpg, :mpeg, :ogg, :png, :tif, :wav]
180
+ [:cr2, :cr3, :dpx, :fdx, :flac, :gif, :jpg, :mov, :mp4, :m4a, :mp3, :mpg, :mpeg, :ogg, :png, :tif, :wav]
181
181
  )
182
182
 
183
183
  expect(parsers.map { |parser| parser.class.name }).to eq([
184
184
  'FormatParser::GIFParser',
185
185
  'Class',
186
186
  'FormatParser::PNGParser',
187
+ 'FormatParser::MOOVParser',
187
188
  'FormatParser::CR2Parser',
189
+ 'FormatParser::CR3Parser',
188
190
  'FormatParser::DPXParser',
189
191
  'FormatParser::FLACParser',
190
192
  'FormatParser::MP3Parser',
@@ -0,0 +1,58 @@
1
+ require 'spec_helper'
2
+
3
+ describe FormatParser::CR3Parser do
4
+ it 'should match valid CR3 file extensions' do
5
+ valid_extensions = %w[cr3 cR3 Cr3 CR3]
6
+ valid_extensions.each { |extension| expect(subject.likely_match?("foo.#{extension}")).to be_truthy }
7
+ end
8
+
9
+ it 'should not match invalid CR3 file extensions' do
10
+ invalid_filenames = ['', 'foo', 'cr3', 'foocr3', 'foo.cr2', 'foo.cr3.bar']
11
+ invalid_filenames.each { |filename| expect(subject.likely_match?(filename)).to be_falsey }
12
+ end
13
+
14
+ it 'should not parse a file that does not match the CR3 definition' do
15
+ # MOV files are closely related to CR3 files (both extend the ISO Base File Format), so this is a decent edge case
16
+ # to ensure only true CR3 files are being parsed.
17
+ result = subject.call(File.open(fixtures_dir + '/MOOV/MOV/Test_Dimensions.mov'))
18
+ expect(result).to be_nil
19
+ end
20
+
21
+ it 'should return nil if no CMT1 atom is present' do
22
+ # This is a MOV file with the ftyp header modified to masquerade as a CR3 file. It is therefore missing the
23
+ # CR3-specific CMT1 atom containing the image metadata.
24
+ result = subject.call(File.open(fixtures_dir + '/CR3/invalid'))
25
+ expect(result).to be_nil
26
+ end
27
+
28
+ Dir.glob(fixtures_dir + '/CR3/*.cr3').sort.each do |file_path|
29
+ it "successfully parses #{File.basename(file_path)}" do
30
+ result = subject.call(File.open(file_path, 'rb'))
31
+
32
+ expect(result).not_to be_nil
33
+ expect(result.nature).to eq(:image)
34
+ expect(result.width_px).to be > 0
35
+ expect(result.height_px).to be > 0
36
+ expect(result.content_type).to eq('image/x-canon-cr3')
37
+ expect(result.intrinsics).not_to be_nil
38
+ end
39
+ end
40
+
41
+ it 'parses the necessary metadata from a CR3 file' do
42
+ file_path = fixtures_dir + '/CR3/Canon EOS R10 (RAW).CR3'
43
+
44
+ result = subject.call(File.open(file_path, 'rb'))
45
+ expect(result.nature).to eq(:image)
46
+ expect(result.width_px).to eq(6000)
47
+ expect(result.height_px).to eq(4000)
48
+ expect(result.orientation).to eq(:top_left)
49
+ expect(result.display_width_px).to eq(6000)
50
+ expect(result.display_height_px).to eq(4000)
51
+ expect(result.content_type).to eq('image/x-canon-cr3')
52
+ expect(result.intrinsics).not_to be_nil
53
+ expect(result.intrinsics[:atom_tree]).not_to be_nil
54
+ expect(result.intrinsics[:exif]).not_to be_nil
55
+ expect(result.intrinsics[:exif][:image_length]).to eq(result.height_px)
56
+ expect(result.intrinsics[:exif][:image_width]).to eq(result.width_px)
57
+ end
58
+ end
@@ -0,0 +1,242 @@
1
+ require 'spec_helper'
2
+
3
+ describe FormatParser::ISOBaseMediaFileFormat::Decoder do
4
+ context 'when build_atom_tree is called' do
5
+ context 'with no io' do
6
+ it 'raises an error' do
7
+ expect { subject.build_atom_tree(0x0) }.to raise_error(/IO missing - supply a valid IO object/)
8
+ end
9
+ end
10
+
11
+ context('with a max_read smaller than the length of the IO') do
12
+ let(:io) do
13
+ # moov
14
+ # moov
15
+ input = [0x8].pack('N') + 'moov' + [0x8].pack('N') + 'moov'
16
+ StringIO.new(input)
17
+ end
18
+
19
+ it 'stops building the tree' do
20
+ expect(subject.build_atom_tree(0x0, io).length).to eq(0)
21
+ expect(io.pos).to eq(0)
22
+
23
+ expect(subject.build_atom_tree(0x8, io).length).to eq(1)
24
+ expect(io.pos).to eq(0x8)
25
+ io.seek(0)
26
+
27
+ expect(subject.build_atom_tree(0x10, io).length).to eq(2)
28
+ expect(io.pos).to eq(0x10)
29
+ end
30
+ end
31
+
32
+ context 'when parsing an unknown atom' do
33
+ let(:io) do
34
+ # foo
35
+ # |-> moov
36
+ input = [0x14].pack('N') + 'foo ' + [0x1].pack('N') + [0x8].pack('N') + 'moov'
37
+ StringIO.new(input)
38
+ end
39
+
40
+ it 'parses only the type, position and size, and skips any fields and children' do
41
+ result = subject.build_atom_tree(0xFF, io)
42
+ expect(result.length).to eq(1)
43
+ expect(io.pos).to eq(0x14)
44
+
45
+ foo_atom = result[0]
46
+ expect(foo_atom.type).to eq('foo ')
47
+ expect(foo_atom.position).to eq(0)
48
+ expect(foo_atom.size).to eq(0x14)
49
+ expect(foo_atom.fields).to eq({})
50
+ expect(foo_atom.children).to eq([])
51
+ end
52
+ end
53
+
54
+ context 'when parsing a container atom' do
55
+ let(:io) do
56
+ # moov
57
+ # |-> foo
58
+ # |-> bar
59
+ input = [0x18].pack('N') + 'moov' + [0x8].pack('N') + 'foo ' + [0x8].pack('N') + 'bar '
60
+ StringIO.new(input)
61
+ end
62
+
63
+ it 'parses type, position, size and children' do
64
+ result = subject.build_atom_tree(0xFF, io)
65
+ expect(result.length).to eq(1)
66
+ expect(io.pos).to eq(0x18)
67
+
68
+ moov_atom = result[0]
69
+ expect(moov_atom.type).to eq('moov')
70
+ expect(moov_atom.position).to eq(0)
71
+ expect(moov_atom.size).to eq(0x18)
72
+ expect(moov_atom.fields).to eq({})
73
+ expect(moov_atom.children.length).to eq(2)
74
+ end
75
+ end
76
+
77
+ context 'when parsing an empty atom' do
78
+ let(:io) do
79
+ # nmhd
80
+ # |-> foo
81
+ input = [0x18].pack('N') + 'nmhd' + [0x1].pack('c') + 'fla' + [0x2].pack('N') + [0x8].pack('N') + 'foo '
82
+ StringIO.new(input)
83
+ end
84
+
85
+ it 'parses type, position, size, version and flags, and skips any other fields or children' do
86
+ result = subject.build_atom_tree(0xFF, io)
87
+ expect(result.length).to eq(1)
88
+ expect(io.pos).to eq(0x18)
89
+
90
+ nmhd_atom = result[0]
91
+ expect(nmhd_atom.type).to eq('nmhd')
92
+ expect(nmhd_atom.position).to eq(0)
93
+ expect(nmhd_atom.size).to eq(0x18)
94
+ expect(nmhd_atom.fields).to include({
95
+ version: 1,
96
+ flags: 'fla'
97
+ })
98
+ expect(nmhd_atom.children).to eq([])
99
+ end
100
+ end
101
+
102
+ context 'when parsing a uuid atom' do
103
+ let(:usertype) { '90f7c66ec2db476b977461e796f0dd4b' }
104
+ let(:io) do
105
+ input = [0x20].pack('N') + 'uuid' + [usertype].pack('H*') + [0x8].pack('N') + 'foo '
106
+ StringIO.new(input)
107
+ end
108
+
109
+ it 'parses type, position, size and usertype, and skips any other fields or children' do
110
+ # uuid
111
+ # |-> foo
112
+ result = subject.build_atom_tree(0xFF, io)
113
+ expect(result.length).to eq(1)
114
+ expect(io.pos).to eq(0x20)
115
+
116
+ nmhd_atom = result[0]
117
+ expect(nmhd_atom.type).to eq('uuid')
118
+ expect(nmhd_atom.position).to eq(0)
119
+ expect(nmhd_atom.size).to eq(0x20)
120
+ expect(nmhd_atom.fields).to include({
121
+ usertype: usertype,
122
+ })
123
+ expect(nmhd_atom.children).to eq([])
124
+ end
125
+ end
126
+ end
127
+ end
128
+
129
+ describe FormatParser::ISOBaseMediaFileFormat::Decoder::Atom do
130
+ context 'when initialized' do
131
+ context 'without fields and/or children' do
132
+ subject { described_class.new('foo', 0, 0) }
133
+
134
+ it 'sets them as an empty array/hash' do
135
+ expect(subject.type).to eq('foo')
136
+ expect(subject.position).to eq(0)
137
+ expect(subject.size).to eq(0)
138
+ expect(subject.fields).to eq({})
139
+ expect(subject.children).to eq([])
140
+ end
141
+ end
142
+
143
+ context 'with fields and/or children' do
144
+ let(:fields) { { foo: 1, bar: 'bar' } }
145
+ let(:children) { [described_class.new('bar', 0, 0)] }
146
+
147
+ subject { described_class.new('foo', 0, 0, fields, children) }
148
+
149
+ it 'sets them correctly' do
150
+ expect(subject.type).to eq('foo')
151
+ expect(subject.position).to eq(0)
152
+ expect(subject.size).to eq(0)
153
+ expect(subject.fields).to eq(fields)
154
+ expect(subject.children).to eq(children)
155
+ end
156
+ end
157
+ end
158
+
159
+ context 'when find_first_descendent is called' do
160
+ context 'with no children' do
161
+ subject { described_class.new('root', 0, 0) }
162
+ it 'returns nil' do
163
+ expect(subject.find_first_descendent(%w[root foo])).to be_nil
164
+ end
165
+ end
166
+
167
+ context 'with no descendents of the given type(s)' do
168
+ subject do
169
+ described_class.new('root', 0, 0, nil, [
170
+ described_class.new('foo', 0, 0),
171
+ described_class.new('bar', 0, 0, nil, [
172
+ described_class.new('baz', 0, 0)
173
+ ])
174
+ ])
175
+ end
176
+
177
+ it 'returns nil' do
178
+ expect(subject.find_first_descendent(%w[root qux])).to be_nil
179
+ end
180
+ end
181
+
182
+ context 'with multiple descendents of the given type(s)' do
183
+ let(:descendent) { described_class.new('bar', 0, 0) }
184
+
185
+ subject do
186
+ described_class.new('root', 0, 0, nil, [
187
+ described_class.new('foo', 0, 0, nil, [
188
+ descendent
189
+ ]),
190
+ described_class.new('bar', 0, 0),
191
+ ])
192
+ end
193
+
194
+ it 'returns the first relevant descendent in order of appearance' do
195
+ expect(subject.find_first_descendent(%w[bar])).to be(descendent)
196
+ end
197
+ end
198
+ end
199
+
200
+ context 'when select_descendents is called' do
201
+ context 'with no children' do
202
+ subject { described_class.new('root', 0, 0) }
203
+ it 'returns an empty array' do
204
+ expect(subject.select_descendents(%w[root foo])).to eq([])
205
+ end
206
+ end
207
+
208
+ context 'with no descendents of the given type(s)' do
209
+ subject do
210
+ described_class.new('root', 0, 0, nil, [
211
+ described_class.new('foo', 0, 0),
212
+ described_class.new('bar', 0, 0, nil, [
213
+ described_class.new('baz', 0, 0)
214
+ ])
215
+ ])
216
+ end
217
+
218
+ it 'returns an empty array' do
219
+ expect(subject.select_descendents(%w[root qux])).to eq([])
220
+ end
221
+ end
222
+
223
+ context 'with multiple descendents of the given type(s)' do
224
+ let(:descendent_1) { described_class.new('bar', 0, 0) }
225
+ let(:descendent_3) { described_class.new('bar', 20, 20) }
226
+ let(:descendent_2) { described_class.new('baz', 10, 10, nil, [descendent_3]) }
227
+
228
+ subject do
229
+ described_class.new('root', 0, 0, nil, [
230
+ described_class.new('foo', 0, 0, nil, [
231
+ descendent_1
232
+ ]),
233
+ descendent_2,
234
+ ])
235
+ end
236
+
237
+ it 'returns all relevant descendents' do
238
+ expect(subject.select_descendents(%w[bar baz])).to match_array([descendent_1, descendent_2, descendent_3])
239
+ end
240
+ end
241
+ end
242
+ end
metadata CHANGED
@@ -1,15 +1,15 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: format_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.1.0
4
+ version: 2.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Noah Berman
8
8
  - Julik Tarkhanov
9
- autorequire:
9
+ autorequire:
10
10
  bindir: exe
11
11
  cert_chain: []
12
- date: 2022-11-07 00:00:00.000000000 Z
12
+ date: 2022-11-28 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: exifr
@@ -195,6 +195,8 @@ files:
195
195
  - lib/parsers/arw_parser.rb
196
196
  - lib/parsers/bmp_parser.rb
197
197
  - lib/parsers/cr2_parser.rb
198
+ - lib/parsers/cr3_parser.rb
199
+ - lib/parsers/cr3_parser/decoder.rb
198
200
  - lib/parsers/dpx_parser.rb
199
201
  - lib/parsers/dpx_parser/dpx_structs.rb
200
202
  - lib/parsers/exif_parser.rb
@@ -202,6 +204,7 @@ files:
202
204
  - lib/parsers/flac_parser.rb
203
205
  - lib/parsers/gif_parser.rb
204
206
  - lib/parsers/heif_parser.rb
207
+ - lib/parsers/iso_base_media_file_format/decoder.rb
205
208
  - lib/parsers/jpeg_parser.rb
206
209
  - lib/parsers/m3u_parser.rb
207
210
  - lib/parsers/moov_parser.rb
@@ -243,12 +246,14 @@ files:
243
246
  - spec/parsers/arw_parser_spec.rb
244
247
  - spec/parsers/bmp_parser_spec.rb
245
248
  - spec/parsers/cr2_parser_spec.rb
249
+ - spec/parsers/cr3_parser_spec.rb
246
250
  - spec/parsers/dpx_parser_spec.rb
247
251
  - spec/parsers/exif_parser_spec.rb
248
252
  - spec/parsers/fdx_parser_spec.rb
249
253
  - spec/parsers/flac_parser_spec.rb
250
254
  - spec/parsers/gif_parser_spec.rb
251
255
  - spec/parsers/heif_parser_spec.rb
256
+ - spec/parsers/iso_base_media_file_format/decoder_spec.rb
252
257
  - spec/parsers/jpeg_parser_spec.rb
253
258
  - spec/parsers/m3u_parser_spec.rb
254
259
  - spec/parsers/moov_parser_spec.rb
@@ -273,7 +278,7 @@ licenses:
273
278
  - MIT (Hippocratic)
274
279
  metadata:
275
280
  allowed_push_host: https://rubygems.org
276
- post_install_message:
281
+ post_install_message:
277
282
  rdoc_options: []
278
283
  require_paths:
279
284
  - lib
@@ -288,8 +293,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
288
293
  - !ruby/object:Gem::Version
289
294
  version: '0'
290
295
  requirements: []
291
- rubygems_version: 3.3.17
292
- signing_key:
296
+ rubygems_version: 3.3.7
297
+ signing_key:
293
298
  specification_version: 4
294
299
  summary: A library for efficient parsing of file metadata
295
300
  test_files: []