format_parser 0.24.0 → 0.24.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: cef6e8b3f4c3914ef022e7ff0e92d38357670315c0804efe8a2967f7f4228f53
4
- data.tar.gz: '09a817b4fc688a3695800f4602a5901613b591d7a30ba26b86988720da7e13a9'
3
+ metadata.gz: c76879d955fbe7598ee7ccdfc663876a29621e1fe4e54721edbba19d8e5f9c81
4
+ data.tar.gz: 7cd4161abb24e1a195fec86dc6c9ced63cb642832edf4d4d9c33129208fdf8b4
5
5
  SHA512:
6
- metadata.gz: 979804c2a7381d09e1f4952396321e185a917587d44409b1f7d45be0e05b466601eb7281e3d463785c6c18d79ba48ff3858bcd60c0f6791e2d394c4fa40ed684
7
- data.tar.gz: a58cd8cab20fa8b8b7886a36479bd03fb6f5a4afbf9435d09318da502de50f6f257e45b012a56c79b4309dbaaf96feedb6b2b12016979bffa1336c528a591e3b
6
+ metadata.gz: 6125db42f078e6e7d4fb0a9111ce29d6750a6b5fedcbfd9a7b28f66fca8fbf59e513f2b91cccc7a2409744dd1a49b7e60361bc7af0c8414bde42e9fd535941e9
7
+ data.tar.gz: f285a67739a9722a77aa9871e0d9b553a463dd22090c3728c8f97898740b303d91c214f2565d679895325e2d45399be311550e4263554d5e056d2ce528108374
@@ -1,3 +1,6 @@
1
+ ## 0.24.1
2
+ * Fix MP3 frames reading to jump correctly to the next bytes
3
+
1
4
  ## 0.24.0
2
5
  * The TIFF parser will now return :arw as format for Sony ARW files insted of :tif so that the caller can decide whether it
3
6
  wants to deal with RAW processing or not
@@ -1,3 +1,3 @@
1
1
  module FormatParser
2
- VERSION = '0.24.0'
2
+ VERSION = '0.24.1'
3
3
  end
@@ -20,13 +20,14 @@ class FormatParser::MP3Parser
20
20
 
21
21
  # We limit the number of MPEG frames we scan
22
22
  # to obtain our duration estimation
23
- MAX_FRAMES_TO_SCAN = 128
23
+ MAX_FRAMES_TO_SCAN = 500
24
24
 
25
25
  # Default frame size for mp3
26
26
  SAMPLES_PER_FRAME = 1152
27
27
 
28
28
  # For some edge cases
29
29
  ZIP_LOCAL_ENTRY_SIGNATURE = "PK\x03\x04\x14\x00".b
30
+ PNG_HEADER_BYTES = [137, 80, 78, 71, 13, 10, 26, 10].pack('C*')
30
31
 
31
32
  # Wraps the Tag object returned by ID3Tag in such
32
33
  # a way that a usable JSON representation gets
@@ -60,8 +61,12 @@ class FormatParser::MP3Parser
60
61
  # To avoid having that happen, we check for the PKZIP signature -
61
62
  # local entry header signature - at the very start of the file.
62
63
  # If the file is too small safe_read will fail too and the parser
63
- # will terminate here.
64
- return if safe_read(io, 6) == ZIP_LOCAL_ENTRY_SIGNATURE
64
+ # will terminate here. Same with PNGs. In the future
65
+ # we should implement "confidence" for MP3 as of all our formats
66
+ # it is by far the most lax.
67
+ header = safe_read(io, 8)
68
+ return if header.start_with?(ZIP_LOCAL_ENTRY_SIGNATURE)
69
+ return if header.start_with?(PNG_HEADER_BYTES)
65
70
 
66
71
  # Read all the ID3 tags (or at least attempt to)
67
72
  io.seek(0)
@@ -131,27 +136,28 @@ class FormatParser::MP3Parser
131
136
  # if you have a minute. https://pypi.python.org/pypi/tinytag
132
137
  def parse_mpeg_frames(io)
133
138
  mpeg_frames = []
139
+ bytes_to_read = 4
134
140
 
135
141
  MAX_FRAMES_TO_SCAN.times do |frame_i|
136
142
  # Read through until we can latch onto the 11 sync bits. Read in 4-byte
137
143
  # increments to save on read() calls
138
- data = io.read(4)
144
+ data = io.read(bytes_to_read)
139
145
 
140
146
  # If we are at EOF - stop iterating
141
- break unless data && data.bytesize == 4
147
+ break unless data && data.bytesize == bytes_to_read
142
148
 
143
149
  # Look for the sync pattern. It can be either the last byte being 0xFF,
144
150
  # or any of the 2 bytes in sequence being 0xFF and > 0xF0.
145
151
  four_bytes = data.unpack('C4')
146
152
  seek_jmp = sync_bytes_offset_in_4_byte_seq(four_bytes)
147
153
  if seek_jmp > 0
148
- io.seek(io.pos + seek_jmp)
154
+ io.seek(io.pos - bytes_to_read + seek_jmp)
149
155
  next
150
156
  end
151
157
 
152
158
  # Once we are past that stage we have latched onto a sync frame header
153
159
  sync, conf, bitrate_freq, rest = four_bytes
154
- frame_detail = parse_mpeg_frame_header(io.pos - 4, sync, conf, bitrate_freq, rest)
160
+ frame_detail = parse_mpeg_frame_header(io.pos - bytes_to_read, sync, conf, bitrate_freq, rest)
155
161
  mpeg_frames << frame_detail
156
162
 
157
163
  # There might be a xing header in the first frame that contains
@@ -166,7 +172,7 @@ class FormatParser::MP3Parser
166
172
  end
167
173
  end
168
174
  if frame_detail.frame_length > 1 # jump over current frame body
169
- io.seek(io.pos + frame_detail.frame_length - 4)
175
+ io.seek(io.pos + frame_detail.frame_length - bytes_to_read)
170
176
  end
171
177
  end
172
178
  [nil, mpeg_frames]
@@ -15,6 +15,12 @@ describe FormatParser::MP3Parser do
15
15
  expect(parsed.media_duration_seconds).to be_within(0.1).of(0.836)
16
16
  end
17
17
 
18
+ it 'does not misdetect a PNG' do
19
+ fpath = fixtures_dir + '/PNG/anim.png'
20
+ parsed = subject.call(File.open(fpath, 'rb'))
21
+ expect(parsed).to be_nil
22
+ end
23
+
18
24
  describe 'title/artist/album attributes' do
19
25
  let(:parsed) { subject.call(File.open(fpath, 'rb')) }
20
26
 
@@ -90,6 +96,14 @@ describe FormatParser::MP3Parser do
90
96
  expect(parsed.title).to eq('test')
91
97
  end
92
98
 
99
+ it 'reads the mpeg frames correctly' do
100
+ fpath = fixtures_dir + '/MP3/test_read_frames.mp3'
101
+
102
+ parsed = subject.call(File.open(fpath, 'rb'))
103
+
104
+ expect(parsed.audio_sample_rate_hz). to eq(48000)
105
+ end
106
+
93
107
  it 'parses the Cassy MP3' do
94
108
  fpath = fixtures_dir + '/MP3/Cassy.mp3'
95
109
  parsed = subject.call(File.open(fpath, 'rb'))
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: format_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.24.0
4
+ version: 0.24.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Noah Berman
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: exe
11
11
  cert_chain: []
12
- date: 2020-09-15 00:00:00.000000000 Z
12
+ date: 2020-09-16 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: ks
@@ -292,7 +292,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
292
292
  - !ruby/object:Gem::Version
293
293
  version: '0'
294
294
  requirements: []
295
- rubygems_version: 3.0.6
295
+ rubygems_version: 3.0.3
296
296
  signing_key:
297
297
  specification_version: 4
298
298
  summary: A library for efficient parsing of file metadata