format_parser 0.23.0 → 0.25.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: dddac3718ccf02324f4632adc68ad87d08ede00c3bbe64d85689dc8a6a06ad3a
4
- data.tar.gz: 3ca9fb36416dffbd6fd1825f15ee0fe9b590633c959a165018f0c60f8d965361
3
+ metadata.gz: c24092543b6c98c713b07a45c3e3a2990332858397e11f6709470c32343a62f3
4
+ data.tar.gz: 0222b05ddfa1efa7cee364db7ad61ae7d44806b9622d6d6012fbb50e1b2e8138
5
5
  SHA512:
6
- metadata.gz: 9193257f175a36087bc4f780659ce4e7bdefbbec456f38015b588ef9b378c71e5601c2d5de0f801297e6e968d70ed3e8db4a1fb0d8a2c78b90f8f9b328cfc43a
7
- data.tar.gz: f181cbae4d8261e05ad3ed4787f9376c3738ba9487258b7d3885dcebe6166409578c82e2e5136c19b47b61ba4fc0d730f752b71a14e19cc6cea79ecdedc357e6
6
+ metadata.gz: 67a3a64166115ef70b2043c05e9c105c2939752b1de34086421a2b96311880673d1c668927eecf904e06db368cc9979218d666004b59a187f52e569f47e9d2a3
7
+ data.tar.gz: 5f4034dbcc2a92cb4908c6609465fbb81ec5011f42f22e9c5477b72a26ee4b59bbaa1191a13a5405d71f68ac8b8c0ceac652befd9eb2c1ea9fc5af000b221828
@@ -1,3 +1,20 @@
1
+ ## 0.25.0
2
+ * MP3: add suport to id3 v2.4.x
3
+ * JPEG: Update gem exifr to 1.3.8 to fix a bug
4
+
5
+ ## 0.24.2
6
+ * Update gem id3tag to 0.14.0 to fix MP3 issues
7
+
8
+ ## 0.24.1
9
+ * Fix MP3 frames reading to jump correctly to the next bytes
10
+
11
+ ## 0.24.0
12
+ * The TIFF parser will now return :arw as format for Sony ARW files insted of :tif so that the caller can decide whether it
13
+ wants to deal with RAW processing or not
14
+
15
+ ## 0.23.1
16
+ * Updated gem exifr to fix problems related to jpeg files from Olympos microscopes, which often have bad thumbnail data
17
+
1
18
  ## 0.23.0
2
19
  * Add ActiveStorage analyzer which can analyze ActiveStorage blobs. Enable it by setting
3
20
  `config.active_storage.analyzers.prepend FormatParser::ActiveStorage::BlobAnalyzer`
@@ -31,8 +31,8 @@ Gem::Specification.new do |spec|
31
31
  spec.require_paths = ['lib']
32
32
 
33
33
  spec.add_dependency 'ks', '~> 0.0'
34
- spec.add_dependency 'exifr', '~> 1', '>= 1.3.4'
35
- spec.add_dependency 'id3tag', '~> 0.13'
34
+ spec.add_dependency 'exifr', '~> 1', '>= 1.3.8'
35
+ spec.add_dependency 'id3tag', '~> 0.14'
36
36
  spec.add_dependency 'faraday', '~> 0.13'
37
37
  spec.add_dependency 'measurometer', '~> 1'
38
38
 
@@ -1,3 +1,3 @@
1
1
  module FormatParser
2
- VERSION = '0.23.0'
2
+ VERSION = '0.25.0'
3
3
  end
@@ -20,13 +20,14 @@ class FormatParser::MP3Parser
20
20
 
21
21
  # We limit the number of MPEG frames we scan
22
22
  # to obtain our duration estimation
23
- MAX_FRAMES_TO_SCAN = 128
23
+ MAX_FRAMES_TO_SCAN = 500
24
24
 
25
25
  # Default frame size for mp3
26
26
  SAMPLES_PER_FRAME = 1152
27
27
 
28
28
  # For some edge cases
29
29
  ZIP_LOCAL_ENTRY_SIGNATURE = "PK\x03\x04\x14\x00".b
30
+ PNG_HEADER_BYTES = [137, 80, 78, 71, 13, 10, 26, 10].pack('C*')
30
31
 
31
32
  # Wraps the Tag object returned by ID3Tag in such
32
33
  # a way that a usable JSON representation gets
@@ -60,8 +61,12 @@ class FormatParser::MP3Parser
60
61
  # To avoid having that happen, we check for the PKZIP signature -
61
62
  # local entry header signature - at the very start of the file.
62
63
  # If the file is too small safe_read will fail too and the parser
63
- # will terminate here.
64
- return if safe_read(io, 6) == ZIP_LOCAL_ENTRY_SIGNATURE
64
+ # will terminate here. Same with PNGs. In the future
65
+ # we should implement "confidence" for MP3 as of all our formats
66
+ # it is by far the most lax.
67
+ header = safe_read(io, 8)
68
+ return if header.start_with?(ZIP_LOCAL_ENTRY_SIGNATURE)
69
+ return if header.start_with?(PNG_HEADER_BYTES)
65
70
 
66
71
  # Read all the ID3 tags (or at least attempt to)
67
72
  io.seek(0)
@@ -131,27 +136,28 @@ class FormatParser::MP3Parser
131
136
  # if you have a minute. https://pypi.python.org/pypi/tinytag
132
137
  def parse_mpeg_frames(io)
133
138
  mpeg_frames = []
139
+ bytes_to_read = 4
134
140
 
135
141
  MAX_FRAMES_TO_SCAN.times do |frame_i|
136
142
  # Read through until we can latch onto the 11 sync bits. Read in 4-byte
137
143
  # increments to save on read() calls
138
- data = io.read(4)
144
+ data = io.read(bytes_to_read)
139
145
 
140
146
  # If we are at EOF - stop iterating
141
- break unless data && data.bytesize == 4
147
+ break unless data && data.bytesize == bytes_to_read
142
148
 
143
149
  # Look for the sync pattern. It can be either the last byte being 0xFF,
144
150
  # or any of the 2 bytes in sequence being 0xFF and > 0xF0.
145
151
  four_bytes = data.unpack('C4')
146
152
  seek_jmp = sync_bytes_offset_in_4_byte_seq(four_bytes)
147
153
  if seek_jmp > 0
148
- io.seek(io.pos + seek_jmp)
154
+ io.seek(io.pos - bytes_to_read + seek_jmp)
149
155
  next
150
156
  end
151
157
 
152
158
  # Once we are past that stage we have latched onto a sync frame header
153
159
  sync, conf, bitrate_freq, rest = four_bytes
154
- frame_detail = parse_mpeg_frame_header(io.pos - 4, sync, conf, bitrate_freq, rest)
160
+ frame_detail = parse_mpeg_frame_header(io.pos - bytes_to_read, sync, conf, bitrate_freq, rest)
155
161
  mpeg_frames << frame_detail
156
162
 
157
163
  # There might be a xing header in the first frame that contains
@@ -166,7 +172,7 @@ class FormatParser::MP3Parser
166
172
  end
167
173
  end
168
174
  if frame_detail.frame_length > 1 # jump over current frame body
169
- io.seek(io.pos + frame_detail.frame_length - 4)
175
+ io.seek(io.pos + frame_detail.frame_length - bytes_to_read)
170
176
  end
171
177
  end
172
178
  [nil, mpeg_frames]
@@ -1,6 +1,8 @@
1
1
  module FormatParser::MP3Parser::ID3Extraction
2
2
  ID3V1_TAG_SIZE_BYTES = 128
3
- ID3V2_TAG_VERSIONS = ["\x43\x00".b, "\x03\x00".b, "\x02\x00".b]
3
+ # it supports 2.4.x, 2.3.x, 2.2.x which are supported by the gem id3tag
4
+ # see https://id3.org/Developer%20Information for more details of each version
5
+ ID3V2_MINOR_TAG_VERSIONS = [2, 3, 4]
4
6
  MAX_SIZE_FOR_ID3V2 = 1 * 1024 * 1024
5
7
 
6
8
  extend FormatParser::IOUtils
@@ -22,7 +24,7 @@ module FormatParser::MP3Parser::ID3Extraction
22
24
  io.seek(0) # Only support header ID3v2
23
25
  header = parse_id3_v2_header(io)
24
26
  return unless header[:tag] == 'ID3' && header[:size] > 0
25
- return unless ID3V2_TAG_VERSIONS.include?(header[:version])
27
+ return unless ID3V2_MINOR_TAG_VERSIONS.include?(header[:version].unpack('C').first)
26
28
 
27
29
  id3_tag_size = io.pos + header[:size]
28
30
 
@@ -26,7 +26,7 @@ class FormatParser::TIFFParser
26
26
  h = exif_data.height || exif_data.pixel_y_dimension
27
27
 
28
28
  FormatParser::Image.new(
29
- format: :tif,
29
+ format: arw?(exif_data) ? :arw : :tif, # Specify format as arw for Sony ARW format images, else tif
30
30
  width_px: w,
31
31
  height_px: h,
32
32
  display_width_px: exif_data.rotated? ? h : w,
@@ -43,5 +43,11 @@ class FormatParser::TIFFParser
43
43
  safe_read(io, 2) == 'CR'
44
44
  end
45
45
 
46
+ # Similar to how exiftool determines the image type as ARW, we are implementing a check here
47
+ # https://github.com/exiftool/exiftool/blob/e969456372fbaf4b980fea8bb094d71033ac8bf7/lib/Image/ExifTool/Exif.pm#L929
48
+ def arw?(exif_data)
49
+ exif_data.compression == 6 && exif_data.new_subfile_type == 1 && exif_data.make == 'SONY'
50
+ end
51
+
46
52
  FormatParser.register_parser new, natures: :image, formats: :tif
47
53
  end
@@ -15,6 +15,12 @@ describe FormatParser::MP3Parser do
15
15
  expect(parsed.media_duration_seconds).to be_within(0.1).of(0.836)
16
16
  end
17
17
 
18
+ it 'does not misdetect a PNG' do
19
+ fpath = fixtures_dir + '/PNG/anim.png'
20
+ parsed = subject.call(File.open(fpath, 'rb'))
21
+ expect(parsed).to be_nil
22
+ end
23
+
18
24
  describe 'title/artist/album attributes' do
19
25
  let(:parsed) { subject.call(File.open(fpath, 'rb')) }
20
26
 
@@ -67,7 +73,7 @@ describe FormatParser::MP3Parser do
67
73
 
68
74
  large_syncsfe_size = [ID3Tag::SynchsafeInteger.encode(more_bytes_than_permitted)].pack('N')
69
75
  prepped = StringIO.new(
70
- 'ID3' + "\x43\x00".b + "\x00".b + large_syncsfe_size + gunk
76
+ 'ID3' + "\x03\x00".b + "\x00".b + large_syncsfe_size + gunk
71
77
  )
72
78
 
73
79
  expect(ID3Tag).not_to receive(:read)
@@ -90,6 +96,14 @@ describe FormatParser::MP3Parser do
90
96
  expect(parsed.title).to eq('test')
91
97
  end
92
98
 
99
+ it 'reads the mpeg frames correctly' do
100
+ fpath = fixtures_dir + '/MP3/test_read_frames.mp3'
101
+
102
+ parsed = subject.call(File.open(fpath, 'rb'))
103
+
104
+ expect(parsed.audio_sample_rate_hz). to eq(48000)
105
+ end
106
+
93
107
  it 'parses the Cassy MP3' do
94
108
  fpath = fixtures_dir + '/MP3/Cassy.mp3'
95
109
  parsed = subject.call(File.open(fpath, 'rb'))
@@ -130,6 +144,14 @@ describe FormatParser::MP3Parser do
130
144
  }.to raise_error(FormatParser::IOUtils::InvalidRead)
131
145
  end
132
146
 
147
+ it 'supports id3 v2.4.x' do
148
+ fpath = fixtures_dir + '/MP3/id3v24.mp3'
149
+
150
+ parsed = subject.call(File.open(fpath, 'rb'))
151
+
152
+ expect(parsed.artist). to eq('wetransfer')
153
+ end
154
+
133
155
  describe '#as_json' do
134
156
  it 'converts all hash keys to string when stringify_keys: true' do
135
157
  fpath = fixtures_dir + '/MP3/Cassy.mp3'
@@ -47,12 +47,15 @@ describe FormatParser::TIFFParser do
47
47
  expect(parsed.intrinsics[:exif]).not_to be_nil
48
48
  end
49
49
 
50
- it 'correctly extracts dimensions for a Sony ARW fixture' do
50
+ it 'parses Sony ARW fixture as arw format file' do
51
51
  arw_path = fixtures_dir + '/ARW/RAW_SONY_ILCE-7RM2.ARW'
52
52
 
53
53
  parsed = subject.call(File.open(arw_path, 'rb'))
54
54
 
55
55
  expect(parsed).not_to be_nil
56
+ expect(parsed.nature).to eq(:image)
57
+ expect(parsed.format).to eq(:arw)
58
+
56
59
  expect(parsed.width_px).to eq(7952)
57
60
  expect(parsed.height_px).to eq(5304)
58
61
  expect(parsed.intrinsics[:exif]).not_to be_nil
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: format_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.23.0
4
+ version: 0.25.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Noah Berman
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: exe
11
11
  cert_chain: []
12
- date: 2020-09-08 00:00:00.000000000 Z
12
+ date: 2020-09-30 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: ks
@@ -34,7 +34,7 @@ dependencies:
34
34
  version: '1'
35
35
  - - ">="
36
36
  - !ruby/object:Gem::Version
37
- version: 1.3.4
37
+ version: 1.3.8
38
38
  type: :runtime
39
39
  prerelease: false
40
40
  version_requirements: !ruby/object:Gem::Requirement
@@ -44,21 +44,21 @@ dependencies:
44
44
  version: '1'
45
45
  - - ">="
46
46
  - !ruby/object:Gem::Version
47
- version: 1.3.4
47
+ version: 1.3.8
48
48
  - !ruby/object:Gem::Dependency
49
49
  name: id3tag
50
50
  requirement: !ruby/object:Gem::Requirement
51
51
  requirements:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: '0.13'
54
+ version: '0.14'
55
55
  type: :runtime
56
56
  prerelease: false
57
57
  version_requirements: !ruby/object:Gem::Requirement
58
58
  requirements:
59
59
  - - "~>"
60
60
  - !ruby/object:Gem::Version
61
- version: '0.13'
61
+ version: '0.14'
62
62
  - !ruby/object:Gem::Dependency
63
63
  name: faraday
64
64
  requirement: !ruby/object:Gem::Requirement
@@ -292,7 +292,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
292
292
  - !ruby/object:Gem::Version
293
293
  version: '0'
294
294
  requirements: []
295
- rubygems_version: 3.0.3
295
+ rubygems_version: 3.1.2
296
296
  signing_key:
297
297
  specification_version: 4
298
298
  summary: A library for efficient parsing of file metadata