format_parser 0.23.1 → 0.25.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f76db376646606abe6a7ccaa6f0a14efdc997ecd7fe29aff0ab3d8172857649f
4
- data.tar.gz: 07c142e7ce6aaa518285d425eb95961d18e053a6b15456d4cb569cdc70a79069
3
+ metadata.gz: 87adbfef15c2281ab6a13f151b857409f0ffad0ecc5270d9d0bbc5cebe207cdb
4
+ data.tar.gz: 332e3c4efd4ae01b3cf47c669debba7cc1aee5c264bef503720f632f6d801054
5
5
  SHA512:
6
- metadata.gz: 3e92625dbe822d3423a084174fd7bc9d4a296effe26182a91743b36995f8470edc4fa56a3a0a10ce095d4998da5dee9620b983c1f980e0418a3b4574244e848c
7
- data.tar.gz: b89df24f7b6638b3cd42b284a1792475a094e947bd995e5ccdd0c554014075f5d167a7c04fbc08c681483b0c82aa2e59ae4e4af62f99d6b158fb4b3f1096d80b
6
+ metadata.gz: 231d768d4b69b2c2f29bcb861888a7fbb0f4242eec5c9313d6428c9053b4fb4e7d20b8615d731a695c278ac59d3bf07b4977d217892aa1fce8fa7adc9d415efa
7
+ data.tar.gz: 732fae92e71f6a25fa98b7d88fa70b82404f00c234a6debd641e20ad8ffd9e45c78270f20adb8ad5593cd815e128a2a409ec639351c985b79763d4bfd821fec9
@@ -1,3 +1,22 @@
1
+ ## 0.25.1
2
+ * MOV: Fix error "negative length"
3
+ * MOV: Fix reading dimensions in multi-track files
4
+ * MP3: Fix parse of the Xing header to not raise errors
5
+
6
+ ## 0.25.0
7
+ * MP3: add suport to id3 v2.4.x
8
+ * JPEG: Update gem exifr to 1.3.8 to fix a bug
9
+
10
+ ## 0.24.2
11
+ * Update gem id3tag to 0.14.0 to fix MP3 issues
12
+
13
+ ## 0.24.1
14
+ * Fix MP3 frames reading to jump correctly to the next bytes
15
+
16
+ ## 0.24.0
17
+ * The TIFF parser will now return :arw as format for Sony ARW files insted of :tif so that the caller can decide whether it
18
+ wants to deal with RAW processing or not
19
+
1
20
  ## 0.23.1
2
21
  * Updated gem exifr to fix problems related to jpeg files from Olympos microscopes, which often have bad thumbnail data
3
22
 
@@ -31,8 +31,8 @@ Gem::Specification.new do |spec|
31
31
  spec.require_paths = ['lib']
32
32
 
33
33
  spec.add_dependency 'ks', '~> 0.0'
34
- spec.add_dependency 'exifr', '~> 1', '>= 1.3.7'
35
- spec.add_dependency 'id3tag', '~> 0.13'
34
+ spec.add_dependency 'exifr', '~> 1', '>= 1.3.8'
35
+ spec.add_dependency 'id3tag', '~> 0.14'
36
36
  spec.add_dependency 'faraday', '~> 0.13'
37
37
  spec.add_dependency 'measurometer', '~> 1'
38
38
 
@@ -1,3 +1,3 @@
1
1
  module FormatParser
2
- VERSION = '0.23.1'
2
+ VERSION = '0.25.1'
3
3
  end
@@ -38,14 +38,8 @@ class FormatParser::MOOVParser
38
38
  ftyp_atom = decoder.find_first_atom_by_path(atom_tree, 'ftyp')
39
39
  file_type = ftyp_atom.field_value(:major_brand)
40
40
 
41
- width = nil
42
- height = nil
43
-
44
41
  # Try to find the width and height in the tkhd
45
- if tkhd = decoder.find_first_atom_by_path(atom_tree, 'moov', 'trak', 'tkhd')
46
- width = tkhd.field_value(:track_width).first
47
- height = tkhd.field_value(:track_height).first
48
- end
42
+ width, height = parse_dimensions(decoder, atom_tree)
49
43
 
50
44
  # Try to find the "topmost" duration (respecting edits)
51
45
  if mdhd = decoder.find_first_atom_by_path(atom_tree, 'moov', 'mvhd')
@@ -78,6 +72,31 @@ class FormatParser::MOOVParser
78
72
  FTYP_MAP.fetch(file_type.downcase, :mov)
79
73
  end
80
74
 
75
+ # The dimensions are located in tkhd atom, but in some files it is necessary
76
+ # to get it below the video track, because it can have other tracks such as
77
+ # audio which does not have the dimensions.
78
+ # More details in https://developer.apple.com/library/archive/documentation/QuickTime/QTFF/QTFFChap2/qtff2.html#//apple_ref/doc/uid/TP40000939-CH204-DontLinkElementID_147
79
+ #
80
+ # Returns [width, height] if the dimension is found
81
+ # Returns [nil, nil] if the dimension is not found
82
+ def parse_dimensions(decoder, atom_tree)
83
+ video_trak_atom = decoder.find_video_trak_atom(atom_tree)
84
+
85
+ tkhd = begin
86
+ if video_trak_atom
87
+ decoder.find_first_atom_by_path([video_trak_atom], 'trak', 'tkhd')
88
+ else
89
+ decoder.find_first_atom_by_path(atom_tree, 'moov', 'trak', 'tkhd')
90
+ end
91
+ end
92
+
93
+ if tkhd
94
+ [tkhd.field_value(:track_width).first, tkhd.field_value(:track_height).first]
95
+ else
96
+ [nil, nil]
97
+ end
98
+ end
99
+
81
100
  # An MPEG4/MOV/M4A will start with the "ftyp" atom. The atom must have a length
82
101
  # of at least 8 (to accomodate the atom size and the atom type itself) plus the major
83
102
  # and minor version fields. If we cannot find it we can be certain this is not our file.
@@ -1,6 +1,7 @@
1
1
  # Handles decoding of MOV/MPEG4 atoms/boxes in a stream. Will recursively
2
2
  # read atoms and parse their data fields if applicable. Also contains
3
3
  # a few utility functions for finding atoms in a list etc.
4
+ # To know more about Atoms: https://developer.apple.com/library/archive/documentation/QuickTime/QTFF/QTFFChap2/qtff2.html
4
5
  class FormatParser::MOOVParser::Decoder
5
6
  include FormatParser::IOUtils
6
7
 
@@ -47,6 +48,34 @@ class FormatParser::MOOVParser::Decoder
47
48
  find_first_atom_by_path(requisite.children || [], *atom_types)
48
49
  end
49
50
 
51
+ def find_atoms_by_path(atoms, atom_types)
52
+ type_to_find = atom_types.shift
53
+ requisites = atoms.select { |e| e.atom_type == type_to_find }
54
+
55
+ # Return if we found our match
56
+ return requisites if atom_types.empty?
57
+
58
+ # Return nil if we didn't find the match at this nesting level
59
+ return unless requisites
60
+
61
+ # ...otherwise drill further down
62
+ find_atoms_by_path(requisites.flat_map(&:children).compact || [], atom_types)
63
+ end
64
+
65
+ # A file can have multiple tracks. To identify the type it is necessary to check
66
+ # the fields `omponent_subtype` in hdlr atom under the trak atom
67
+ # More details in https://developer.apple.com/library/archive/documentation/QuickTime/QTFF/QTFFChap2/qtff2.html#//apple_ref/doc/uid/TP40000939-CH204-DontLinkElementID_147
68
+ def find_video_trak_atom(atoms)
69
+ trak_atoms = find_atoms_by_path(atoms, ['moov', 'trak'])
70
+
71
+ return [] if trak_atoms.empty?
72
+
73
+ trak_atoms.find do |trak_atom|
74
+ hdlr_atom = find_first_atom_by_path([trak_atom], 'trak', 'mdia', 'hdlr')
75
+ hdlr_atom.atom_fields[:component_type] == 'mhlr' && hdlr_atom.atom_fields[:component_subtype] == 'vide'
76
+ end
77
+ end
78
+
50
79
  def parse_ftyp_atom(io, atom_size)
51
80
  # Subtract 8 for the atom_size+atom_type,
52
81
  # and 8 once more for the major_brand and minor_version. The remaining
@@ -194,6 +223,8 @@ class FormatParser::MOOVParser::Decoder
194
223
  end
195
224
 
196
225
  def parse_meta_atom(io, atom_size)
226
+ return if atom_size == 0 # this atom can be empty
227
+
197
228
  parse_hdlr_atom(io, atom_size)
198
229
  end
199
230
 
@@ -20,13 +20,14 @@ class FormatParser::MP3Parser
20
20
 
21
21
  # We limit the number of MPEG frames we scan
22
22
  # to obtain our duration estimation
23
- MAX_FRAMES_TO_SCAN = 128
23
+ MAX_FRAMES_TO_SCAN = 500
24
24
 
25
25
  # Default frame size for mp3
26
26
  SAMPLES_PER_FRAME = 1152
27
27
 
28
28
  # For some edge cases
29
29
  ZIP_LOCAL_ENTRY_SIGNATURE = "PK\x03\x04\x14\x00".b
30
+ PNG_HEADER_BYTES = [137, 80, 78, 71, 13, 10, 26, 10].pack('C*')
30
31
 
31
32
  # Wraps the Tag object returned by ID3Tag in such
32
33
  # a way that a usable JSON representation gets
@@ -60,8 +61,12 @@ class FormatParser::MP3Parser
60
61
  # To avoid having that happen, we check for the PKZIP signature -
61
62
  # local entry header signature - at the very start of the file.
62
63
  # If the file is too small safe_read will fail too and the parser
63
- # will terminate here.
64
- return if safe_read(io, 6) == ZIP_LOCAL_ENTRY_SIGNATURE
64
+ # will terminate here. Same with PNGs. In the future
65
+ # we should implement "confidence" for MP3 as of all our formats
66
+ # it is by far the most lax.
67
+ header = safe_read(io, 8)
68
+ return if header.start_with?(ZIP_LOCAL_ENTRY_SIGNATURE)
69
+ return if header.start_with?(PNG_HEADER_BYTES)
65
70
 
66
71
  # Read all the ID3 tags (or at least attempt to)
67
72
  io.seek(0)
@@ -131,27 +136,28 @@ class FormatParser::MP3Parser
131
136
  # if you have a minute. https://pypi.python.org/pypi/tinytag
132
137
  def parse_mpeg_frames(io)
133
138
  mpeg_frames = []
139
+ bytes_to_read = 4
134
140
 
135
141
  MAX_FRAMES_TO_SCAN.times do |frame_i|
136
142
  # Read through until we can latch onto the 11 sync bits. Read in 4-byte
137
143
  # increments to save on read() calls
138
- data = io.read(4)
144
+ data = io.read(bytes_to_read)
139
145
 
140
146
  # If we are at EOF - stop iterating
141
- break unless data && data.bytesize == 4
147
+ break unless data && data.bytesize == bytes_to_read
142
148
 
143
149
  # Look for the sync pattern. It can be either the last byte being 0xFF,
144
150
  # or any of the 2 bytes in sequence being 0xFF and > 0xF0.
145
151
  four_bytes = data.unpack('C4')
146
152
  seek_jmp = sync_bytes_offset_in_4_byte_seq(four_bytes)
147
153
  if seek_jmp > 0
148
- io.seek(io.pos + seek_jmp)
154
+ io.seek(io.pos - bytes_to_read + seek_jmp)
149
155
  next
150
156
  end
151
157
 
152
158
  # Once we are past that stage we have latched onto a sync frame header
153
159
  sync, conf, bitrate_freq, rest = four_bytes
154
- frame_detail = parse_mpeg_frame_header(io.pos - 4, sync, conf, bitrate_freq, rest)
160
+ frame_detail = parse_mpeg_frame_header(io.pos - bytes_to_read, sync, conf, bitrate_freq, rest)
155
161
  mpeg_frames << frame_detail
156
162
 
157
163
  # There might be a xing header in the first frame that contains
@@ -166,7 +172,7 @@ class FormatParser::MP3Parser
166
172
  end
167
173
  end
168
174
  if frame_detail.frame_length > 1 # jump over current frame body
169
- io.seek(io.pos + frame_detail.frame_length - 4)
175
+ io.seek(io.pos + frame_detail.frame_length - bytes_to_read)
170
176
  end
171
177
  end
172
178
  [nil, mpeg_frames]
@@ -243,16 +249,16 @@ class FormatParser::MP3Parser
243
249
  io.seek(xing_offset + 4) # Include the length of "Xing" itself
244
250
 
245
251
  # https://www.codeproject.com/Articles/8295/MPEG-Audio-Frame-Header#XINGHeader
246
- header_flags, _ = io.read(4).unpack('s>s>')
252
+ header_flags, _ = io.read(4).unpack('i>')
247
253
  frames = byte_count = toc = vbr_scale = nil
248
254
 
249
- frames = io.read(4).unpack('N1').first if header_flags & 1 # FRAMES FLAG
255
+ frames = io.read(4).unpack('N1').first if header_flags & 1 != 0 # FRAMES FLAG
250
256
 
251
- byte_count = io.read(4).unpack('N1').first if header_flags & 2 # BYTES FLAG
257
+ byte_count = io.read(4).unpack('N1').first if header_flags & 2 != 0 # BYTES FLAG
252
258
 
253
- toc = io.read(100).unpack('C100') if header_flags & 4 # TOC FLAG
259
+ toc = io.read(100).unpack('C100') if header_flags & 4 != 0 # TOC FLAG
254
260
 
255
- vbr_scale = io.read(4).unpack('N1').first if header_flags & 8 # VBR SCALE FLAG
261
+ vbr_scale = io.read(4).unpack('N1').first if header_flags & 8 != 0 # VBR SCALE FLAG
256
262
 
257
263
  VBRHeader.new(frames: frames, byte_count: byte_count, toc_entries: toc, vbr_scale: vbr_scale)
258
264
  end
@@ -1,6 +1,8 @@
1
1
  module FormatParser::MP3Parser::ID3Extraction
2
2
  ID3V1_TAG_SIZE_BYTES = 128
3
- ID3V2_TAG_VERSIONS = ["\x43\x00".b, "\x03\x00".b, "\x02\x00".b]
3
+ # it supports 2.4.x, 2.3.x, 2.2.x which are supported by the gem id3tag
4
+ # see https://id3.org/Developer%20Information for more details of each version
5
+ ID3V2_MINOR_TAG_VERSIONS = [2, 3, 4]
4
6
  MAX_SIZE_FOR_ID3V2 = 1 * 1024 * 1024
5
7
 
6
8
  extend FormatParser::IOUtils
@@ -22,7 +24,7 @@ module FormatParser::MP3Parser::ID3Extraction
22
24
  io.seek(0) # Only support header ID3v2
23
25
  header = parse_id3_v2_header(io)
24
26
  return unless header[:tag] == 'ID3' && header[:size] > 0
25
- return unless ID3V2_TAG_VERSIONS.include?(header[:version])
27
+ return unless ID3V2_MINOR_TAG_VERSIONS.include?(header[:version].unpack('C').first)
26
28
 
27
29
  id3_tag_size = io.pos + header[:size]
28
30
 
@@ -26,7 +26,7 @@ class FormatParser::TIFFParser
26
26
  h = exif_data.height || exif_data.pixel_y_dimension
27
27
 
28
28
  FormatParser::Image.new(
29
- format: :tif,
29
+ format: arw?(exif_data) ? :arw : :tif, # Specify format as arw for Sony ARW format images, else tif
30
30
  width_px: w,
31
31
  height_px: h,
32
32
  display_width_px: exif_data.rotated? ? h : w,
@@ -43,5 +43,11 @@ class FormatParser::TIFFParser
43
43
  safe_read(io, 2) == 'CR'
44
44
  end
45
45
 
46
+ # Similar to how exiftool determines the image type as ARW, we are implementing a check here
47
+ # https://github.com/exiftool/exiftool/blob/e969456372fbaf4b980fea8bb094d71033ac8bf7/lib/Image/ExifTool/Exif.pm#L929
48
+ def arw?(exif_data)
49
+ exif_data.compression == 6 && exif_data.new_subfile_type == 1 && exif_data.make == 'SONY'
50
+ end
51
+
46
52
  FormatParser.register_parser new, natures: :image, formats: :tif
47
53
  end
@@ -108,4 +108,24 @@ describe FormatParser::MOOVParser do
108
108
  it 'provides filename hints' do
109
109
  expect(subject).to be_likely_match('file.m4v')
110
110
  end
111
+
112
+ it 'reads correctly the video dimensions' do
113
+ mov_path = fixtures_dir + '/MOOV/MOV/Test_Dimensions.mov'
114
+
115
+ result = subject.call(File.open(mov_path, 'rb'))
116
+
117
+ expect(result).not_to be_nil
118
+ expect(result.nature).to eq(:video)
119
+ expect(result.format).to eq(:mov)
120
+ expect(result.width_px).to eq(640)
121
+ expect(result.height_px).to eq(360)
122
+ end
123
+
124
+ it 'does not raise error when a meta atom has size 0' do
125
+ mov_path = fixtures_dir + '/MOOV/MOV/Test_Meta_Atom_With_Size_Zero.mov'
126
+
127
+ result = subject.call(File.open(mov_path, 'rb'))
128
+ expect(result).not_to be_nil
129
+ expect(result.format).to eq(:mov)
130
+ end
111
131
  end
@@ -15,6 +15,26 @@ describe FormatParser::MP3Parser do
15
15
  expect(parsed.media_duration_seconds).to be_within(0.1).of(0.836)
16
16
  end
17
17
 
18
+ it 'reads the Xing header without raising errors' do
19
+ fpath = fixtures_dir + '/MP3/test_xing_header.mp3'
20
+ parsed = subject.call(File.open(fpath, 'rb'))
21
+
22
+ expect(parsed).not_to be_nil
23
+
24
+ expect(parsed.nature).to eq(:audio)
25
+ expect(parsed.format).to eq(:mp3)
26
+ expect(parsed.num_audio_channels).to eq(2)
27
+ expect(parsed.audio_sample_rate_hz).to eq(48000)
28
+ expect(parsed.intrinsics).not_to be_nil
29
+ expect(parsed.media_duration_seconds).to be_within(0.1).of(0.0342)
30
+ end
31
+
32
+ it 'does not misdetect a PNG' do
33
+ fpath = fixtures_dir + '/PNG/anim.png'
34
+ parsed = subject.call(File.open(fpath, 'rb'))
35
+ expect(parsed).to be_nil
36
+ end
37
+
18
38
  describe 'title/artist/album attributes' do
19
39
  let(:parsed) { subject.call(File.open(fpath, 'rb')) }
20
40
 
@@ -67,7 +87,7 @@ describe FormatParser::MP3Parser do
67
87
 
68
88
  large_syncsfe_size = [ID3Tag::SynchsafeInteger.encode(more_bytes_than_permitted)].pack('N')
69
89
  prepped = StringIO.new(
70
- 'ID3' + "\x43\x00".b + "\x00".b + large_syncsfe_size + gunk
90
+ 'ID3' + "\x03\x00".b + "\x00".b + large_syncsfe_size + gunk
71
91
  )
72
92
 
73
93
  expect(ID3Tag).not_to receive(:read)
@@ -90,6 +110,14 @@ describe FormatParser::MP3Parser do
90
110
  expect(parsed.title).to eq('test')
91
111
  end
92
112
 
113
+ it 'reads the mpeg frames correctly' do
114
+ fpath = fixtures_dir + '/MP3/test_read_frames.mp3'
115
+
116
+ parsed = subject.call(File.open(fpath, 'rb'))
117
+
118
+ expect(parsed.audio_sample_rate_hz). to eq(48000)
119
+ end
120
+
93
121
  it 'parses the Cassy MP3' do
94
122
  fpath = fixtures_dir + '/MP3/Cassy.mp3'
95
123
  parsed = subject.call(File.open(fpath, 'rb'))
@@ -130,6 +158,14 @@ describe FormatParser::MP3Parser do
130
158
  }.to raise_error(FormatParser::IOUtils::InvalidRead)
131
159
  end
132
160
 
161
+ it 'supports id3 v2.4.x' do
162
+ fpath = fixtures_dir + '/MP3/id3v24.mp3'
163
+
164
+ parsed = subject.call(File.open(fpath, 'rb'))
165
+
166
+ expect(parsed.artist). to eq('wetransfer')
167
+ end
168
+
133
169
  describe '#as_json' do
134
170
  it 'converts all hash keys to string when stringify_keys: true' do
135
171
  fpath = fixtures_dir + '/MP3/Cassy.mp3'
@@ -47,12 +47,15 @@ describe FormatParser::TIFFParser do
47
47
  expect(parsed.intrinsics[:exif]).not_to be_nil
48
48
  end
49
49
 
50
- it 'correctly extracts dimensions for a Sony ARW fixture' do
50
+ it 'parses Sony ARW fixture as arw format file' do
51
51
  arw_path = fixtures_dir + '/ARW/RAW_SONY_ILCE-7RM2.ARW'
52
52
 
53
53
  parsed = subject.call(File.open(arw_path, 'rb'))
54
54
 
55
55
  expect(parsed).not_to be_nil
56
+ expect(parsed.nature).to eq(:image)
57
+ expect(parsed.format).to eq(:arw)
58
+
56
59
  expect(parsed.width_px).to eq(7952)
57
60
  expect(parsed.height_px).to eq(5304)
58
61
  expect(parsed.intrinsics[:exif]).not_to be_nil
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: format_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.23.1
4
+ version: 0.25.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Noah Berman
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: exe
11
11
  cert_chain: []
12
- date: 2020-09-14 00:00:00.000000000 Z
12
+ date: 2020-10-02 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: ks
@@ -34,7 +34,7 @@ dependencies:
34
34
  version: '1'
35
35
  - - ">="
36
36
  - !ruby/object:Gem::Version
37
- version: 1.3.7
37
+ version: 1.3.8
38
38
  type: :runtime
39
39
  prerelease: false
40
40
  version_requirements: !ruby/object:Gem::Requirement
@@ -44,21 +44,21 @@ dependencies:
44
44
  version: '1'
45
45
  - - ">="
46
46
  - !ruby/object:Gem::Version
47
- version: 1.3.7
47
+ version: 1.3.8
48
48
  - !ruby/object:Gem::Dependency
49
49
  name: id3tag
50
50
  requirement: !ruby/object:Gem::Requirement
51
51
  requirements:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: '0.13'
54
+ version: '0.14'
55
55
  type: :runtime
56
56
  prerelease: false
57
57
  version_requirements: !ruby/object:Gem::Requirement
58
58
  requirements:
59
59
  - - "~>"
60
60
  - !ruby/object:Gem::Version
61
- version: '0.13'
61
+ version: '0.14'
62
62
  - !ruby/object:Gem::Dependency
63
63
  name: faraday
64
64
  requirement: !ruby/object:Gem::Requirement
@@ -292,7 +292,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
292
292
  - !ruby/object:Gem::Version
293
293
  version: '0'
294
294
  requirements: []
295
- rubygems_version: 3.0.3
295
+ rubygems_version: 3.1.2
296
296
  signing_key:
297
297
  specification_version: 4
298
298
  summary: A library for efficient parsing of file metadata