format_parser 0.24.0 → 0.25.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: cef6e8b3f4c3914ef022e7ff0e92d38357670315c0804efe8a2967f7f4228f53
4
- data.tar.gz: '09a817b4fc688a3695800f4602a5901613b591d7a30ba26b86988720da7e13a9'
3
+ metadata.gz: 290b2369d2fe089202a76ab9d7b659f8de58fa44b9a40718130105ae7026036a
4
+ data.tar.gz: 53e983a8639cc42377ab50d7364b5099cf1f3308f8108f92b6194040546ea2e8
5
5
  SHA512:
6
- metadata.gz: 979804c2a7381d09e1f4952396321e185a917587d44409b1f7d45be0e05b466601eb7281e3d463785c6c18d79ba48ff3858bcd60c0f6791e2d394c4fa40ed684
7
- data.tar.gz: a58cd8cab20fa8b8b7886a36479bd03fb6f5a4afbf9435d09318da502de50f6f257e45b012a56c79b4309dbaaf96feedb6b2b12016979bffa1336c528a591e3b
6
+ metadata.gz: 5f58620ab165b1c47a8a18fe82081d48cb0b285b4f9638146c31d1bb2f839247b36750a7f85be5a9ea14b8db3fc2ca175b86ab4f8b29f87bd1ea10caed57746c
7
+ data.tar.gz: b9e87723a7cc1d5ecf04a23c28cbfc433c120337275aa51d76396a9a1371bb1b683b22c18ab4e84d8870a76048e26f7c5d7f90b4b5f9b868bf5a0dcb6771640c
@@ -1,3 +1,18 @@
1
+ ## 0.25.1
2
+ * MOV: Fix error "negative length"
3
+ * MOV: Fix reading dimensions in multi-track files
4
+ * MP3: Fix parse of the Xing header to not raise errors
5
+
6
+ ## 0.25.0
7
+ * MP3: add suport to id3 v2.4.x
8
+ * JPEG: Update gem exifr to 1.3.8 to fix a bug
9
+
10
+ ## 0.24.2
11
+ * Update gem id3tag to 0.14.0 to fix MP3 issues
12
+
13
+ ## 0.24.1
14
+ * Fix MP3 frames reading to jump correctly to the next bytes
15
+
1
16
  ## 0.24.0
2
17
  * The TIFF parser will now return :arw as format for Sony ARW files insted of :tif so that the caller can decide whether it
3
18
  wants to deal with RAW processing or not
@@ -31,8 +31,8 @@ Gem::Specification.new do |spec|
31
31
  spec.require_paths = ['lib']
32
32
 
33
33
  spec.add_dependency 'ks', '~> 0.0'
34
- spec.add_dependency 'exifr', '~> 1', '>= 1.3.7'
35
- spec.add_dependency 'id3tag', '~> 0.13'
34
+ spec.add_dependency 'exifr', '~> 1', '>= 1.3.8'
35
+ spec.add_dependency 'id3tag', '~> 0.14'
36
36
  spec.add_dependency 'faraday', '~> 0.13'
37
37
  spec.add_dependency 'measurometer', '~> 1'
38
38
 
@@ -1,3 +1,3 @@
1
1
  module FormatParser
2
- VERSION = '0.24.0'
2
+ VERSION = '0.25.2'
3
3
  end
@@ -38,14 +38,8 @@ class FormatParser::MOOVParser
38
38
  ftyp_atom = decoder.find_first_atom_by_path(atom_tree, 'ftyp')
39
39
  file_type = ftyp_atom.field_value(:major_brand)
40
40
 
41
- width = nil
42
- height = nil
43
-
44
41
  # Try to find the width and height in the tkhd
45
- if tkhd = decoder.find_first_atom_by_path(atom_tree, 'moov', 'trak', 'tkhd')
46
- width = tkhd.field_value(:track_width).first
47
- height = tkhd.field_value(:track_height).first
48
- end
42
+ width, height = parse_dimensions(decoder, atom_tree)
49
43
 
50
44
  # Try to find the "topmost" duration (respecting edits)
51
45
  if mdhd = decoder.find_first_atom_by_path(atom_tree, 'moov', 'mvhd')
@@ -78,6 +72,31 @@ class FormatParser::MOOVParser
78
72
  FTYP_MAP.fetch(file_type.downcase, :mov)
79
73
  end
80
74
 
75
+ # The dimensions are located in tkhd atom, but in some files it is necessary
76
+ # to get it below the video track, because it can have other tracks such as
77
+ # audio which does not have the dimensions.
78
+ # More details in https://developer.apple.com/library/archive/documentation/QuickTime/QTFF/QTFFChap2/qtff2.html#//apple_ref/doc/uid/TP40000939-CH204-DontLinkElementID_147
79
+ #
80
+ # Returns [width, height] if the dimension is found
81
+ # Returns [nil, nil] if the dimension is not found
82
+ def parse_dimensions(decoder, atom_tree)
83
+ video_trak_atom = decoder.find_video_trak_atom(atom_tree)
84
+
85
+ tkhd = begin
86
+ if video_trak_atom
87
+ decoder.find_first_atom_by_path([video_trak_atom], 'trak', 'tkhd')
88
+ else
89
+ decoder.find_first_atom_by_path(atom_tree, 'moov', 'trak', 'tkhd')
90
+ end
91
+ end
92
+
93
+ if tkhd
94
+ [tkhd.field_value(:track_width).first, tkhd.field_value(:track_height).first]
95
+ else
96
+ [nil, nil]
97
+ end
98
+ end
99
+
81
100
  # An MPEG4/MOV/M4A will start with the "ftyp" atom. The atom must have a length
82
101
  # of at least 8 (to accomodate the atom size and the atom type itself) plus the major
83
102
  # and minor version fields. If we cannot find it we can be certain this is not our file.
@@ -1,6 +1,7 @@
1
1
  # Handles decoding of MOV/MPEG4 atoms/boxes in a stream. Will recursively
2
2
  # read atoms and parse their data fields if applicable. Also contains
3
3
  # a few utility functions for finding atoms in a list etc.
4
+ # To know more about Atoms: https://developer.apple.com/library/archive/documentation/QuickTime/QTFF/QTFFChap2/qtff2.html
4
5
  class FormatParser::MOOVParser::Decoder
5
6
  include FormatParser::IOUtils
6
7
 
@@ -47,6 +48,34 @@ class FormatParser::MOOVParser::Decoder
47
48
  find_first_atom_by_path(requisite.children || [], *atom_types)
48
49
  end
49
50
 
51
+ def find_atoms_by_path(atoms, atom_types)
52
+ type_to_find = atom_types.shift
53
+ requisites = atoms.select { |e| e.atom_type == type_to_find }
54
+
55
+ # Return if we found our match
56
+ return requisites if atom_types.empty?
57
+
58
+ # Return nil if we didn't find the match at this nesting level
59
+ return unless requisites
60
+
61
+ # ...otherwise drill further down
62
+ find_atoms_by_path(requisites.flat_map(&:children).compact || [], atom_types)
63
+ end
64
+
65
+ # A file can have multiple tracks. To identify the type it is necessary to check
66
+ # the fields `omponent_subtype` in hdlr atom under the trak atom
67
+ # More details in https://developer.apple.com/library/archive/documentation/QuickTime/QTFF/QTFFChap2/qtff2.html#//apple_ref/doc/uid/TP40000939-CH204-DontLinkElementID_147
68
+ def find_video_trak_atom(atoms)
69
+ trak_atoms = find_atoms_by_path(atoms, ['moov', 'trak'])
70
+
71
+ return if trak_atoms.empty?
72
+
73
+ trak_atoms.find do |trak_atom|
74
+ hdlr_atom = find_first_atom_by_path([trak_atom], 'trak', 'mdia', 'hdlr')
75
+ hdlr_atom.atom_fields[:component_type] == 'mhlr' && hdlr_atom.atom_fields[:component_subtype] == 'vide'
76
+ end
77
+ end
78
+
50
79
  def parse_ftyp_atom(io, atom_size)
51
80
  # Subtract 8 for the atom_size+atom_type,
52
81
  # and 8 once more for the major_brand and minor_version. The remaining
@@ -194,6 +223,8 @@ class FormatParser::MOOVParser::Decoder
194
223
  end
195
224
 
196
225
  def parse_meta_atom(io, atom_size)
226
+ return if atom_size == 0 # this atom can be empty
227
+
197
228
  parse_hdlr_atom(io, atom_size)
198
229
  end
199
230
 
@@ -20,13 +20,14 @@ class FormatParser::MP3Parser
20
20
 
21
21
  # We limit the number of MPEG frames we scan
22
22
  # to obtain our duration estimation
23
- MAX_FRAMES_TO_SCAN = 128
23
+ MAX_FRAMES_TO_SCAN = 500
24
24
 
25
25
  # Default frame size for mp3
26
26
  SAMPLES_PER_FRAME = 1152
27
27
 
28
28
  # For some edge cases
29
29
  ZIP_LOCAL_ENTRY_SIGNATURE = "PK\x03\x04\x14\x00".b
30
+ PNG_HEADER_BYTES = [137, 80, 78, 71, 13, 10, 26, 10].pack('C*')
30
31
 
31
32
  # Wraps the Tag object returned by ID3Tag in such
32
33
  # a way that a usable JSON representation gets
@@ -60,8 +61,12 @@ class FormatParser::MP3Parser
60
61
  # To avoid having that happen, we check for the PKZIP signature -
61
62
  # local entry header signature - at the very start of the file.
62
63
  # If the file is too small safe_read will fail too and the parser
63
- # will terminate here.
64
- return if safe_read(io, 6) == ZIP_LOCAL_ENTRY_SIGNATURE
64
+ # will terminate here. Same with PNGs. In the future
65
+ # we should implement "confidence" for MP3 as of all our formats
66
+ # it is by far the most lax.
67
+ header = safe_read(io, 8)
68
+ return if header.start_with?(ZIP_LOCAL_ENTRY_SIGNATURE)
69
+ return if header.start_with?(PNG_HEADER_BYTES)
65
70
 
66
71
  # Read all the ID3 tags (or at least attempt to)
67
72
  io.seek(0)
@@ -131,27 +136,28 @@ class FormatParser::MP3Parser
131
136
  # if you have a minute. https://pypi.python.org/pypi/tinytag
132
137
  def parse_mpeg_frames(io)
133
138
  mpeg_frames = []
139
+ bytes_to_read = 4
134
140
 
135
141
  MAX_FRAMES_TO_SCAN.times do |frame_i|
136
142
  # Read through until we can latch onto the 11 sync bits. Read in 4-byte
137
143
  # increments to save on read() calls
138
- data = io.read(4)
144
+ data = io.read(bytes_to_read)
139
145
 
140
146
  # If we are at EOF - stop iterating
141
- break unless data && data.bytesize == 4
147
+ break unless data && data.bytesize == bytes_to_read
142
148
 
143
149
  # Look for the sync pattern. It can be either the last byte being 0xFF,
144
150
  # or any of the 2 bytes in sequence being 0xFF and > 0xF0.
145
151
  four_bytes = data.unpack('C4')
146
152
  seek_jmp = sync_bytes_offset_in_4_byte_seq(four_bytes)
147
153
  if seek_jmp > 0
148
- io.seek(io.pos + seek_jmp)
154
+ io.seek(io.pos - bytes_to_read + seek_jmp)
149
155
  next
150
156
  end
151
157
 
152
158
  # Once we are past that stage we have latched onto a sync frame header
153
159
  sync, conf, bitrate_freq, rest = four_bytes
154
- frame_detail = parse_mpeg_frame_header(io.pos - 4, sync, conf, bitrate_freq, rest)
160
+ frame_detail = parse_mpeg_frame_header(io.pos - bytes_to_read, sync, conf, bitrate_freq, rest)
155
161
  mpeg_frames << frame_detail
156
162
 
157
163
  # There might be a xing header in the first frame that contains
@@ -166,7 +172,7 @@ class FormatParser::MP3Parser
166
172
  end
167
173
  end
168
174
  if frame_detail.frame_length > 1 # jump over current frame body
169
- io.seek(io.pos + frame_detail.frame_length - 4)
175
+ io.seek(io.pos + frame_detail.frame_length - bytes_to_read)
170
176
  end
171
177
  end
172
178
  [nil, mpeg_frames]
@@ -243,16 +249,16 @@ class FormatParser::MP3Parser
243
249
  io.seek(xing_offset + 4) # Include the length of "Xing" itself
244
250
 
245
251
  # https://www.codeproject.com/Articles/8295/MPEG-Audio-Frame-Header#XINGHeader
246
- header_flags, _ = io.read(4).unpack('s>s>')
252
+ header_flags, _ = io.read(4).unpack('i>')
247
253
  frames = byte_count = toc = vbr_scale = nil
248
254
 
249
- frames = io.read(4).unpack('N1').first if header_flags & 1 # FRAMES FLAG
255
+ frames = io.read(4).unpack('N1').first if header_flags & 1 != 0 # FRAMES FLAG
250
256
 
251
- byte_count = io.read(4).unpack('N1').first if header_flags & 2 # BYTES FLAG
257
+ byte_count = io.read(4).unpack('N1').first if header_flags & 2 != 0 # BYTES FLAG
252
258
 
253
- toc = io.read(100).unpack('C100') if header_flags & 4 # TOC FLAG
259
+ toc = io.read(100).unpack('C100') if header_flags & 4 != 0 # TOC FLAG
254
260
 
255
- vbr_scale = io.read(4).unpack('N1').first if header_flags & 8 # VBR SCALE FLAG
261
+ vbr_scale = io.read(4).unpack('N1').first if header_flags & 8 != 0 # VBR SCALE FLAG
256
262
 
257
263
  VBRHeader.new(frames: frames, byte_count: byte_count, toc_entries: toc, vbr_scale: vbr_scale)
258
264
  end
@@ -1,6 +1,8 @@
1
1
  module FormatParser::MP3Parser::ID3Extraction
2
2
  ID3V1_TAG_SIZE_BYTES = 128
3
- ID3V2_TAG_VERSIONS = ["\x43\x00".b, "\x03\x00".b, "\x02\x00".b]
3
+ # it supports 2.4.x, 2.3.x, 2.2.x which are supported by the gem id3tag
4
+ # see https://id3.org/Developer%20Information for more details of each version
5
+ ID3V2_MINOR_TAG_VERSIONS = [2, 3, 4]
4
6
  MAX_SIZE_FOR_ID3V2 = 1 * 1024 * 1024
5
7
 
6
8
  extend FormatParser::IOUtils
@@ -22,7 +24,7 @@ module FormatParser::MP3Parser::ID3Extraction
22
24
  io.seek(0) # Only support header ID3v2
23
25
  header = parse_id3_v2_header(io)
24
26
  return unless header[:tag] == 'ID3' && header[:size] > 0
25
- return unless ID3V2_TAG_VERSIONS.include?(header[:version])
27
+ return unless ID3V2_MINOR_TAG_VERSIONS.include?(header[:version].unpack('C').first)
26
28
 
27
29
  id3_tag_size = io.pos + header[:size]
28
30
 
@@ -108,4 +108,24 @@ describe FormatParser::MOOVParser do
108
108
  it 'provides filename hints' do
109
109
  expect(subject).to be_likely_match('file.m4v')
110
110
  end
111
+
112
+ it 'reads correctly the video dimensions' do
113
+ mov_path = fixtures_dir + '/MOOV/MOV/Test_Dimensions.mov'
114
+
115
+ result = subject.call(File.open(mov_path, 'rb'))
116
+
117
+ expect(result).not_to be_nil
118
+ expect(result.nature).to eq(:video)
119
+ expect(result.format).to eq(:mov)
120
+ expect(result.width_px).to eq(640)
121
+ expect(result.height_px).to eq(360)
122
+ end
123
+
124
+ it 'does not raise error when a meta atom has size 0' do
125
+ mov_path = fixtures_dir + '/MOOV/MOV/Test_Meta_Atom_With_Size_Zero.mov'
126
+
127
+ result = subject.call(File.open(mov_path, 'rb'))
128
+ expect(result).not_to be_nil
129
+ expect(result.format).to eq(:mov)
130
+ end
111
131
  end
@@ -15,6 +15,26 @@ describe FormatParser::MP3Parser do
15
15
  expect(parsed.media_duration_seconds).to be_within(0.1).of(0.836)
16
16
  end
17
17
 
18
+ it 'reads the Xing header without raising errors' do
19
+ fpath = fixtures_dir + '/MP3/test_xing_header.mp3'
20
+ parsed = subject.call(File.open(fpath, 'rb'))
21
+
22
+ expect(parsed).not_to be_nil
23
+
24
+ expect(parsed.nature).to eq(:audio)
25
+ expect(parsed.format).to eq(:mp3)
26
+ expect(parsed.num_audio_channels).to eq(2)
27
+ expect(parsed.audio_sample_rate_hz).to eq(48000)
28
+ expect(parsed.intrinsics).not_to be_nil
29
+ expect(parsed.media_duration_seconds).to be_within(0.1).of(0.0342)
30
+ end
31
+
32
+ it 'does not misdetect a PNG' do
33
+ fpath = fixtures_dir + '/PNG/anim.png'
34
+ parsed = subject.call(File.open(fpath, 'rb'))
35
+ expect(parsed).to be_nil
36
+ end
37
+
18
38
  describe 'title/artist/album attributes' do
19
39
  let(:parsed) { subject.call(File.open(fpath, 'rb')) }
20
40
 
@@ -67,7 +87,7 @@ describe FormatParser::MP3Parser do
67
87
 
68
88
  large_syncsfe_size = [ID3Tag::SynchsafeInteger.encode(more_bytes_than_permitted)].pack('N')
69
89
  prepped = StringIO.new(
70
- 'ID3' + "\x43\x00".b + "\x00".b + large_syncsfe_size + gunk
90
+ 'ID3' + "\x03\x00".b + "\x00".b + large_syncsfe_size + gunk
71
91
  )
72
92
 
73
93
  expect(ID3Tag).not_to receive(:read)
@@ -90,6 +110,14 @@ describe FormatParser::MP3Parser do
90
110
  expect(parsed.title).to eq('test')
91
111
  end
92
112
 
113
+ it 'reads the mpeg frames correctly' do
114
+ fpath = fixtures_dir + '/MP3/test_read_frames.mp3'
115
+
116
+ parsed = subject.call(File.open(fpath, 'rb'))
117
+
118
+ expect(parsed.audio_sample_rate_hz). to eq(48000)
119
+ end
120
+
93
121
  it 'parses the Cassy MP3' do
94
122
  fpath = fixtures_dir + '/MP3/Cassy.mp3'
95
123
  parsed = subject.call(File.open(fpath, 'rb'))
@@ -130,6 +158,14 @@ describe FormatParser::MP3Parser do
130
158
  }.to raise_error(FormatParser::IOUtils::InvalidRead)
131
159
  end
132
160
 
161
+ it 'supports id3 v2.4.x' do
162
+ fpath = fixtures_dir + '/MP3/id3v24.mp3'
163
+
164
+ parsed = subject.call(File.open(fpath, 'rb'))
165
+
166
+ expect(parsed.artist). to eq('wetransfer')
167
+ end
168
+
133
169
  describe '#as_json' do
134
170
  it 'converts all hash keys to string when stringify_keys: true' do
135
171
  fpath = fixtures_dir + '/MP3/Cassy.mp3'
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: format_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.24.0
4
+ version: 0.25.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Noah Berman
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: exe
11
11
  cert_chain: []
12
- date: 2020-09-15 00:00:00.000000000 Z
12
+ date: 2020-10-05 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: ks
@@ -34,7 +34,7 @@ dependencies:
34
34
  version: '1'
35
35
  - - ">="
36
36
  - !ruby/object:Gem::Version
37
- version: 1.3.7
37
+ version: 1.3.8
38
38
  type: :runtime
39
39
  prerelease: false
40
40
  version_requirements: !ruby/object:Gem::Requirement
@@ -44,21 +44,21 @@ dependencies:
44
44
  version: '1'
45
45
  - - ">="
46
46
  - !ruby/object:Gem::Version
47
- version: 1.3.7
47
+ version: 1.3.8
48
48
  - !ruby/object:Gem::Dependency
49
49
  name: id3tag
50
50
  requirement: !ruby/object:Gem::Requirement
51
51
  requirements:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: '0.13'
54
+ version: '0.14'
55
55
  type: :runtime
56
56
  prerelease: false
57
57
  version_requirements: !ruby/object:Gem::Requirement
58
58
  requirements:
59
59
  - - "~>"
60
60
  - !ruby/object:Gem::Version
61
- version: '0.13'
61
+ version: '0.14'
62
62
  - !ruby/object:Gem::Dependency
63
63
  name: faraday
64
64
  requirement: !ruby/object:Gem::Requirement
@@ -292,7 +292,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
292
292
  - !ruby/object:Gem::Version
293
293
  version: '0'
294
294
  requirements: []
295
- rubygems_version: 3.0.6
295
+ rubygems_version: 3.1.2
296
296
  signing_key:
297
297
  specification_version: 4
298
298
  summary: A library for efficient parsing of file metadata