format_parser 0.23.0 → 0.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: dddac3718ccf02324f4632adc68ad87d08ede00c3bbe64d85689dc8a6a06ad3a
4
- data.tar.gz: 3ca9fb36416dffbd6fd1825f15ee0fe9b590633c959a165018f0c60f8d965361
3
+ metadata.gz: c24092543b6c98c713b07a45c3e3a2990332858397e11f6709470c32343a62f3
4
+ data.tar.gz: 0222b05ddfa1efa7cee364db7ad61ae7d44806b9622d6d6012fbb50e1b2e8138
5
5
  SHA512:
6
- metadata.gz: 9193257f175a36087bc4f780659ce4e7bdefbbec456f38015b588ef9b378c71e5601c2d5de0f801297e6e968d70ed3e8db4a1fb0d8a2c78b90f8f9b328cfc43a
7
- data.tar.gz: f181cbae4d8261e05ad3ed4787f9376c3738ba9487258b7d3885dcebe6166409578c82e2e5136c19b47b61ba4fc0d730f752b71a14e19cc6cea79ecdedc357e6
6
+ metadata.gz: 67a3a64166115ef70b2043c05e9c105c2939752b1de34086421a2b96311880673d1c668927eecf904e06db368cc9979218d666004b59a187f52e569f47e9d2a3
7
+ data.tar.gz: 5f4034dbcc2a92cb4908c6609465fbb81ec5011f42f22e9c5477b72a26ee4b59bbaa1191a13a5405d71f68ac8b8c0ceac652befd9eb2c1ea9fc5af000b221828
@@ -1,3 +1,20 @@
1
+ ## 0.25.0
2
+ * MP3: add suport to id3 v2.4.x
3
+ * JPEG: Update gem exifr to 1.3.8 to fix a bug
4
+
5
+ ## 0.24.2
6
+ * Update gem id3tag to 0.14.0 to fix MP3 issues
7
+
8
+ ## 0.24.1
9
+ * Fix MP3 frames reading to jump correctly to the next bytes
10
+
11
+ ## 0.24.0
12
+ * The TIFF parser will now return :arw as format for Sony ARW files insted of :tif so that the caller can decide whether it
13
+ wants to deal with RAW processing or not
14
+
15
+ ## 0.23.1
16
+ * Updated gem exifr to fix problems related to jpeg files from Olympos microscopes, which often have bad thumbnail data
17
+
1
18
  ## 0.23.0
2
19
  * Add ActiveStorage analyzer which can analyze ActiveStorage blobs. Enable it by setting
3
20
  `config.active_storage.analyzers.prepend FormatParser::ActiveStorage::BlobAnalyzer`
@@ -31,8 +31,8 @@ Gem::Specification.new do |spec|
31
31
  spec.require_paths = ['lib']
32
32
 
33
33
  spec.add_dependency 'ks', '~> 0.0'
34
- spec.add_dependency 'exifr', '~> 1', '>= 1.3.4'
35
- spec.add_dependency 'id3tag', '~> 0.13'
34
+ spec.add_dependency 'exifr', '~> 1', '>= 1.3.8'
35
+ spec.add_dependency 'id3tag', '~> 0.14'
36
36
  spec.add_dependency 'faraday', '~> 0.13'
37
37
  spec.add_dependency 'measurometer', '~> 1'
38
38
 
@@ -1,3 +1,3 @@
1
1
  module FormatParser
2
- VERSION = '0.23.0'
2
+ VERSION = '0.25.0'
3
3
  end
@@ -20,13 +20,14 @@ class FormatParser::MP3Parser
20
20
 
21
21
  # We limit the number of MPEG frames we scan
22
22
  # to obtain our duration estimation
23
- MAX_FRAMES_TO_SCAN = 128
23
+ MAX_FRAMES_TO_SCAN = 500
24
24
 
25
25
  # Default frame size for mp3
26
26
  SAMPLES_PER_FRAME = 1152
27
27
 
28
28
  # For some edge cases
29
29
  ZIP_LOCAL_ENTRY_SIGNATURE = "PK\x03\x04\x14\x00".b
30
+ PNG_HEADER_BYTES = [137, 80, 78, 71, 13, 10, 26, 10].pack('C*')
30
31
 
31
32
  # Wraps the Tag object returned by ID3Tag in such
32
33
  # a way that a usable JSON representation gets
@@ -60,8 +61,12 @@ class FormatParser::MP3Parser
60
61
  # To avoid having that happen, we check for the PKZIP signature -
61
62
  # local entry header signature - at the very start of the file.
62
63
  # If the file is too small safe_read will fail too and the parser
63
- # will terminate here.
64
- return if safe_read(io, 6) == ZIP_LOCAL_ENTRY_SIGNATURE
64
+ # will terminate here. Same with PNGs. In the future
65
+ # we should implement "confidence" for MP3 as of all our formats
66
+ # it is by far the most lax.
67
+ header = safe_read(io, 8)
68
+ return if header.start_with?(ZIP_LOCAL_ENTRY_SIGNATURE)
69
+ return if header.start_with?(PNG_HEADER_BYTES)
65
70
 
66
71
  # Read all the ID3 tags (or at least attempt to)
67
72
  io.seek(0)
@@ -131,27 +136,28 @@ class FormatParser::MP3Parser
131
136
  # if you have a minute. https://pypi.python.org/pypi/tinytag
132
137
  def parse_mpeg_frames(io)
133
138
  mpeg_frames = []
139
+ bytes_to_read = 4
134
140
 
135
141
  MAX_FRAMES_TO_SCAN.times do |frame_i|
136
142
  # Read through until we can latch onto the 11 sync bits. Read in 4-byte
137
143
  # increments to save on read() calls
138
- data = io.read(4)
144
+ data = io.read(bytes_to_read)
139
145
 
140
146
  # If we are at EOF - stop iterating
141
- break unless data && data.bytesize == 4
147
+ break unless data && data.bytesize == bytes_to_read
142
148
 
143
149
  # Look for the sync pattern. It can be either the last byte being 0xFF,
144
150
  # or any of the 2 bytes in sequence being 0xFF and > 0xF0.
145
151
  four_bytes = data.unpack('C4')
146
152
  seek_jmp = sync_bytes_offset_in_4_byte_seq(four_bytes)
147
153
  if seek_jmp > 0
148
- io.seek(io.pos + seek_jmp)
154
+ io.seek(io.pos - bytes_to_read + seek_jmp)
149
155
  next
150
156
  end
151
157
 
152
158
  # Once we are past that stage we have latched onto a sync frame header
153
159
  sync, conf, bitrate_freq, rest = four_bytes
154
- frame_detail = parse_mpeg_frame_header(io.pos - 4, sync, conf, bitrate_freq, rest)
160
+ frame_detail = parse_mpeg_frame_header(io.pos - bytes_to_read, sync, conf, bitrate_freq, rest)
155
161
  mpeg_frames << frame_detail
156
162
 
157
163
  # There might be a xing header in the first frame that contains
@@ -166,7 +172,7 @@ class FormatParser::MP3Parser
166
172
  end
167
173
  end
168
174
  if frame_detail.frame_length > 1 # jump over current frame body
169
- io.seek(io.pos + frame_detail.frame_length - 4)
175
+ io.seek(io.pos + frame_detail.frame_length - bytes_to_read)
170
176
  end
171
177
  end
172
178
  [nil, mpeg_frames]
@@ -1,6 +1,8 @@
1
1
  module FormatParser::MP3Parser::ID3Extraction
2
2
  ID3V1_TAG_SIZE_BYTES = 128
3
- ID3V2_TAG_VERSIONS = ["\x43\x00".b, "\x03\x00".b, "\x02\x00".b]
3
+ # it supports 2.4.x, 2.3.x, 2.2.x which are supported by the gem id3tag
4
+ # see https://id3.org/Developer%20Information for more details of each version
5
+ ID3V2_MINOR_TAG_VERSIONS = [2, 3, 4]
4
6
  MAX_SIZE_FOR_ID3V2 = 1 * 1024 * 1024
5
7
 
6
8
  extend FormatParser::IOUtils
@@ -22,7 +24,7 @@ module FormatParser::MP3Parser::ID3Extraction
22
24
  io.seek(0) # Only support header ID3v2
23
25
  header = parse_id3_v2_header(io)
24
26
  return unless header[:tag] == 'ID3' && header[:size] > 0
25
- return unless ID3V2_TAG_VERSIONS.include?(header[:version])
27
+ return unless ID3V2_MINOR_TAG_VERSIONS.include?(header[:version].unpack('C').first)
26
28
 
27
29
  id3_tag_size = io.pos + header[:size]
28
30
 
@@ -26,7 +26,7 @@ class FormatParser::TIFFParser
26
26
  h = exif_data.height || exif_data.pixel_y_dimension
27
27
 
28
28
  FormatParser::Image.new(
29
- format: :tif,
29
+ format: arw?(exif_data) ? :arw : :tif, # Specify format as arw for Sony ARW format images, else tif
30
30
  width_px: w,
31
31
  height_px: h,
32
32
  display_width_px: exif_data.rotated? ? h : w,
@@ -43,5 +43,11 @@ class FormatParser::TIFFParser
43
43
  safe_read(io, 2) == 'CR'
44
44
  end
45
45
 
46
+ # Similar to how exiftool determines the image type as ARW, we are implementing a check here
47
+ # https://github.com/exiftool/exiftool/blob/e969456372fbaf4b980fea8bb094d71033ac8bf7/lib/Image/ExifTool/Exif.pm#L929
48
+ def arw?(exif_data)
49
+ exif_data.compression == 6 && exif_data.new_subfile_type == 1 && exif_data.make == 'SONY'
50
+ end
51
+
46
52
  FormatParser.register_parser new, natures: :image, formats: :tif
47
53
  end
@@ -15,6 +15,12 @@ describe FormatParser::MP3Parser do
15
15
  expect(parsed.media_duration_seconds).to be_within(0.1).of(0.836)
16
16
  end
17
17
 
18
+ it 'does not misdetect a PNG' do
19
+ fpath = fixtures_dir + '/PNG/anim.png'
20
+ parsed = subject.call(File.open(fpath, 'rb'))
21
+ expect(parsed).to be_nil
22
+ end
23
+
18
24
  describe 'title/artist/album attributes' do
19
25
  let(:parsed) { subject.call(File.open(fpath, 'rb')) }
20
26
 
@@ -67,7 +73,7 @@ describe FormatParser::MP3Parser do
67
73
 
68
74
  large_syncsfe_size = [ID3Tag::SynchsafeInteger.encode(more_bytes_than_permitted)].pack('N')
69
75
  prepped = StringIO.new(
70
- 'ID3' + "\x43\x00".b + "\x00".b + large_syncsfe_size + gunk
76
+ 'ID3' + "\x03\x00".b + "\x00".b + large_syncsfe_size + gunk
71
77
  )
72
78
 
73
79
  expect(ID3Tag).not_to receive(:read)
@@ -90,6 +96,14 @@ describe FormatParser::MP3Parser do
90
96
  expect(parsed.title).to eq('test')
91
97
  end
92
98
 
99
+ it 'reads the mpeg frames correctly' do
100
+ fpath = fixtures_dir + '/MP3/test_read_frames.mp3'
101
+
102
+ parsed = subject.call(File.open(fpath, 'rb'))
103
+
104
+ expect(parsed.audio_sample_rate_hz). to eq(48000)
105
+ end
106
+
93
107
  it 'parses the Cassy MP3' do
94
108
  fpath = fixtures_dir + '/MP3/Cassy.mp3'
95
109
  parsed = subject.call(File.open(fpath, 'rb'))
@@ -130,6 +144,14 @@ describe FormatParser::MP3Parser do
130
144
  }.to raise_error(FormatParser::IOUtils::InvalidRead)
131
145
  end
132
146
 
147
+ it 'supports id3 v2.4.x' do
148
+ fpath = fixtures_dir + '/MP3/id3v24.mp3'
149
+
150
+ parsed = subject.call(File.open(fpath, 'rb'))
151
+
152
+ expect(parsed.artist). to eq('wetransfer')
153
+ end
154
+
133
155
  describe '#as_json' do
134
156
  it 'converts all hash keys to string when stringify_keys: true' do
135
157
  fpath = fixtures_dir + '/MP3/Cassy.mp3'
@@ -47,12 +47,15 @@ describe FormatParser::TIFFParser do
47
47
  expect(parsed.intrinsics[:exif]).not_to be_nil
48
48
  end
49
49
 
50
- it 'correctly extracts dimensions for a Sony ARW fixture' do
50
+ it 'parses Sony ARW fixture as arw format file' do
51
51
  arw_path = fixtures_dir + '/ARW/RAW_SONY_ILCE-7RM2.ARW'
52
52
 
53
53
  parsed = subject.call(File.open(arw_path, 'rb'))
54
54
 
55
55
  expect(parsed).not_to be_nil
56
+ expect(parsed.nature).to eq(:image)
57
+ expect(parsed.format).to eq(:arw)
58
+
56
59
  expect(parsed.width_px).to eq(7952)
57
60
  expect(parsed.height_px).to eq(5304)
58
61
  expect(parsed.intrinsics[:exif]).not_to be_nil
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: format_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.23.0
4
+ version: 0.25.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Noah Berman
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: exe
11
11
  cert_chain: []
12
- date: 2020-09-08 00:00:00.000000000 Z
12
+ date: 2020-09-30 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: ks
@@ -34,7 +34,7 @@ dependencies:
34
34
  version: '1'
35
35
  - - ">="
36
36
  - !ruby/object:Gem::Version
37
- version: 1.3.4
37
+ version: 1.3.8
38
38
  type: :runtime
39
39
  prerelease: false
40
40
  version_requirements: !ruby/object:Gem::Requirement
@@ -44,21 +44,21 @@ dependencies:
44
44
  version: '1'
45
45
  - - ">="
46
46
  - !ruby/object:Gem::Version
47
- version: 1.3.4
47
+ version: 1.3.8
48
48
  - !ruby/object:Gem::Dependency
49
49
  name: id3tag
50
50
  requirement: !ruby/object:Gem::Requirement
51
51
  requirements:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: '0.13'
54
+ version: '0.14'
55
55
  type: :runtime
56
56
  prerelease: false
57
57
  version_requirements: !ruby/object:Gem::Requirement
58
58
  requirements:
59
59
  - - "~>"
60
60
  - !ruby/object:Gem::Version
61
- version: '0.13'
61
+ version: '0.14'
62
62
  - !ruby/object:Gem::Dependency
63
63
  name: faraday
64
64
  requirement: !ruby/object:Gem::Requirement
@@ -292,7 +292,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
292
292
  - !ruby/object:Gem::Version
293
293
  version: '0'
294
294
  requirements: []
295
- rubygems_version: 3.0.3
295
+ rubygems_version: 3.1.2
296
296
  signing_key:
297
297
  specification_version: 4
298
298
  summary: A library for efficient parsing of file metadata