format_parser 0.23.0 → 0.25.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +17 -0
- data/format_parser.gemspec +2 -2
- data/lib/format_parser/version.rb +1 -1
- data/lib/parsers/mp3_parser.rb +14 -8
- data/lib/parsers/mp3_parser/id3_extraction.rb +4 -2
- data/lib/parsers/tiff_parser.rb +7 -1
- data/spec/parsers/mp3_parser_spec.rb +23 -1
- data/spec/parsers/tiff_parser_spec.rb +4 -1
- metadata +7 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c24092543b6c98c713b07a45c3e3a2990332858397e11f6709470c32343a62f3
|
4
|
+
data.tar.gz: 0222b05ddfa1efa7cee364db7ad61ae7d44806b9622d6d6012fbb50e1b2e8138
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 67a3a64166115ef70b2043c05e9c105c2939752b1de34086421a2b96311880673d1c668927eecf904e06db368cc9979218d666004b59a187f52e569f47e9d2a3
|
7
|
+
data.tar.gz: 5f4034dbcc2a92cb4908c6609465fbb81ec5011f42f22e9c5477b72a26ee4b59bbaa1191a13a5405d71f68ac8b8c0ceac652befd9eb2c1ea9fc5af000b221828
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,20 @@
|
|
1
|
+
## 0.25.0
|
2
|
+
* MP3: add suport to id3 v2.4.x
|
3
|
+
* JPEG: Update gem exifr to 1.3.8 to fix a bug
|
4
|
+
|
5
|
+
## 0.24.2
|
6
|
+
* Update gem id3tag to 0.14.0 to fix MP3 issues
|
7
|
+
|
8
|
+
## 0.24.1
|
9
|
+
* Fix MP3 frames reading to jump correctly to the next bytes
|
10
|
+
|
11
|
+
## 0.24.0
|
12
|
+
* The TIFF parser will now return :arw as format for Sony ARW files insted of :tif so that the caller can decide whether it
|
13
|
+
wants to deal with RAW processing or not
|
14
|
+
|
15
|
+
## 0.23.1
|
16
|
+
* Updated gem exifr to fix problems related to jpeg files from Olympos microscopes, which often have bad thumbnail data
|
17
|
+
|
1
18
|
## 0.23.0
|
2
19
|
* Add ActiveStorage analyzer which can analyze ActiveStorage blobs. Enable it by setting
|
3
20
|
`config.active_storage.analyzers.prepend FormatParser::ActiveStorage::BlobAnalyzer`
|
data/format_parser.gemspec
CHANGED
@@ -31,8 +31,8 @@ Gem::Specification.new do |spec|
|
|
31
31
|
spec.require_paths = ['lib']
|
32
32
|
|
33
33
|
spec.add_dependency 'ks', '~> 0.0'
|
34
|
-
spec.add_dependency 'exifr', '~> 1', '>= 1.3.
|
35
|
-
spec.add_dependency 'id3tag', '~> 0.
|
34
|
+
spec.add_dependency 'exifr', '~> 1', '>= 1.3.8'
|
35
|
+
spec.add_dependency 'id3tag', '~> 0.14'
|
36
36
|
spec.add_dependency 'faraday', '~> 0.13'
|
37
37
|
spec.add_dependency 'measurometer', '~> 1'
|
38
38
|
|
data/lib/parsers/mp3_parser.rb
CHANGED
@@ -20,13 +20,14 @@ class FormatParser::MP3Parser
|
|
20
20
|
|
21
21
|
# We limit the number of MPEG frames we scan
|
22
22
|
# to obtain our duration estimation
|
23
|
-
MAX_FRAMES_TO_SCAN =
|
23
|
+
MAX_FRAMES_TO_SCAN = 500
|
24
24
|
|
25
25
|
# Default frame size for mp3
|
26
26
|
SAMPLES_PER_FRAME = 1152
|
27
27
|
|
28
28
|
# For some edge cases
|
29
29
|
ZIP_LOCAL_ENTRY_SIGNATURE = "PK\x03\x04\x14\x00".b
|
30
|
+
PNG_HEADER_BYTES = [137, 80, 78, 71, 13, 10, 26, 10].pack('C*')
|
30
31
|
|
31
32
|
# Wraps the Tag object returned by ID3Tag in such
|
32
33
|
# a way that a usable JSON representation gets
|
@@ -60,8 +61,12 @@ class FormatParser::MP3Parser
|
|
60
61
|
# To avoid having that happen, we check for the PKZIP signature -
|
61
62
|
# local entry header signature - at the very start of the file.
|
62
63
|
# If the file is too small safe_read will fail too and the parser
|
63
|
-
# will terminate here.
|
64
|
-
|
64
|
+
# will terminate here. Same with PNGs. In the future
|
65
|
+
# we should implement "confidence" for MP3 as of all our formats
|
66
|
+
# it is by far the most lax.
|
67
|
+
header = safe_read(io, 8)
|
68
|
+
return if header.start_with?(ZIP_LOCAL_ENTRY_SIGNATURE)
|
69
|
+
return if header.start_with?(PNG_HEADER_BYTES)
|
65
70
|
|
66
71
|
# Read all the ID3 tags (or at least attempt to)
|
67
72
|
io.seek(0)
|
@@ -131,27 +136,28 @@ class FormatParser::MP3Parser
|
|
131
136
|
# if you have a minute. https://pypi.python.org/pypi/tinytag
|
132
137
|
def parse_mpeg_frames(io)
|
133
138
|
mpeg_frames = []
|
139
|
+
bytes_to_read = 4
|
134
140
|
|
135
141
|
MAX_FRAMES_TO_SCAN.times do |frame_i|
|
136
142
|
# Read through until we can latch onto the 11 sync bits. Read in 4-byte
|
137
143
|
# increments to save on read() calls
|
138
|
-
data = io.read(
|
144
|
+
data = io.read(bytes_to_read)
|
139
145
|
|
140
146
|
# If we are at EOF - stop iterating
|
141
|
-
break unless data && data.bytesize ==
|
147
|
+
break unless data && data.bytesize == bytes_to_read
|
142
148
|
|
143
149
|
# Look for the sync pattern. It can be either the last byte being 0xFF,
|
144
150
|
# or any of the 2 bytes in sequence being 0xFF and > 0xF0.
|
145
151
|
four_bytes = data.unpack('C4')
|
146
152
|
seek_jmp = sync_bytes_offset_in_4_byte_seq(four_bytes)
|
147
153
|
if seek_jmp > 0
|
148
|
-
io.seek(io.pos + seek_jmp)
|
154
|
+
io.seek(io.pos - bytes_to_read + seek_jmp)
|
149
155
|
next
|
150
156
|
end
|
151
157
|
|
152
158
|
# Once we are past that stage we have latched onto a sync frame header
|
153
159
|
sync, conf, bitrate_freq, rest = four_bytes
|
154
|
-
frame_detail = parse_mpeg_frame_header(io.pos -
|
160
|
+
frame_detail = parse_mpeg_frame_header(io.pos - bytes_to_read, sync, conf, bitrate_freq, rest)
|
155
161
|
mpeg_frames << frame_detail
|
156
162
|
|
157
163
|
# There might be a xing header in the first frame that contains
|
@@ -166,7 +172,7 @@ class FormatParser::MP3Parser
|
|
166
172
|
end
|
167
173
|
end
|
168
174
|
if frame_detail.frame_length > 1 # jump over current frame body
|
169
|
-
io.seek(io.pos + frame_detail.frame_length -
|
175
|
+
io.seek(io.pos + frame_detail.frame_length - bytes_to_read)
|
170
176
|
end
|
171
177
|
end
|
172
178
|
[nil, mpeg_frames]
|
@@ -1,6 +1,8 @@
|
|
1
1
|
module FormatParser::MP3Parser::ID3Extraction
|
2
2
|
ID3V1_TAG_SIZE_BYTES = 128
|
3
|
-
|
3
|
+
# it supports 2.4.x, 2.3.x, 2.2.x which are supported by the gem id3tag
|
4
|
+
# see https://id3.org/Developer%20Information for more details of each version
|
5
|
+
ID3V2_MINOR_TAG_VERSIONS = [2, 3, 4]
|
4
6
|
MAX_SIZE_FOR_ID3V2 = 1 * 1024 * 1024
|
5
7
|
|
6
8
|
extend FormatParser::IOUtils
|
@@ -22,7 +24,7 @@ module FormatParser::MP3Parser::ID3Extraction
|
|
22
24
|
io.seek(0) # Only support header ID3v2
|
23
25
|
header = parse_id3_v2_header(io)
|
24
26
|
return unless header[:tag] == 'ID3' && header[:size] > 0
|
25
|
-
return unless
|
27
|
+
return unless ID3V2_MINOR_TAG_VERSIONS.include?(header[:version].unpack('C').first)
|
26
28
|
|
27
29
|
id3_tag_size = io.pos + header[:size]
|
28
30
|
|
data/lib/parsers/tiff_parser.rb
CHANGED
@@ -26,7 +26,7 @@ class FormatParser::TIFFParser
|
|
26
26
|
h = exif_data.height || exif_data.pixel_y_dimension
|
27
27
|
|
28
28
|
FormatParser::Image.new(
|
29
|
-
format: :tif,
|
29
|
+
format: arw?(exif_data) ? :arw : :tif, # Specify format as arw for Sony ARW format images, else tif
|
30
30
|
width_px: w,
|
31
31
|
height_px: h,
|
32
32
|
display_width_px: exif_data.rotated? ? h : w,
|
@@ -43,5 +43,11 @@ class FormatParser::TIFFParser
|
|
43
43
|
safe_read(io, 2) == 'CR'
|
44
44
|
end
|
45
45
|
|
46
|
+
# Similar to how exiftool determines the image type as ARW, we are implementing a check here
|
47
|
+
# https://github.com/exiftool/exiftool/blob/e969456372fbaf4b980fea8bb094d71033ac8bf7/lib/Image/ExifTool/Exif.pm#L929
|
48
|
+
def arw?(exif_data)
|
49
|
+
exif_data.compression == 6 && exif_data.new_subfile_type == 1 && exif_data.make == 'SONY'
|
50
|
+
end
|
51
|
+
|
46
52
|
FormatParser.register_parser new, natures: :image, formats: :tif
|
47
53
|
end
|
@@ -15,6 +15,12 @@ describe FormatParser::MP3Parser do
|
|
15
15
|
expect(parsed.media_duration_seconds).to be_within(0.1).of(0.836)
|
16
16
|
end
|
17
17
|
|
18
|
+
it 'does not misdetect a PNG' do
|
19
|
+
fpath = fixtures_dir + '/PNG/anim.png'
|
20
|
+
parsed = subject.call(File.open(fpath, 'rb'))
|
21
|
+
expect(parsed).to be_nil
|
22
|
+
end
|
23
|
+
|
18
24
|
describe 'title/artist/album attributes' do
|
19
25
|
let(:parsed) { subject.call(File.open(fpath, 'rb')) }
|
20
26
|
|
@@ -67,7 +73,7 @@ describe FormatParser::MP3Parser do
|
|
67
73
|
|
68
74
|
large_syncsfe_size = [ID3Tag::SynchsafeInteger.encode(more_bytes_than_permitted)].pack('N')
|
69
75
|
prepped = StringIO.new(
|
70
|
-
'ID3' + "\
|
76
|
+
'ID3' + "\x03\x00".b + "\x00".b + large_syncsfe_size + gunk
|
71
77
|
)
|
72
78
|
|
73
79
|
expect(ID3Tag).not_to receive(:read)
|
@@ -90,6 +96,14 @@ describe FormatParser::MP3Parser do
|
|
90
96
|
expect(parsed.title).to eq('test')
|
91
97
|
end
|
92
98
|
|
99
|
+
it 'reads the mpeg frames correctly' do
|
100
|
+
fpath = fixtures_dir + '/MP3/test_read_frames.mp3'
|
101
|
+
|
102
|
+
parsed = subject.call(File.open(fpath, 'rb'))
|
103
|
+
|
104
|
+
expect(parsed.audio_sample_rate_hz). to eq(48000)
|
105
|
+
end
|
106
|
+
|
93
107
|
it 'parses the Cassy MP3' do
|
94
108
|
fpath = fixtures_dir + '/MP3/Cassy.mp3'
|
95
109
|
parsed = subject.call(File.open(fpath, 'rb'))
|
@@ -130,6 +144,14 @@ describe FormatParser::MP3Parser do
|
|
130
144
|
}.to raise_error(FormatParser::IOUtils::InvalidRead)
|
131
145
|
end
|
132
146
|
|
147
|
+
it 'supports id3 v2.4.x' do
|
148
|
+
fpath = fixtures_dir + '/MP3/id3v24.mp3'
|
149
|
+
|
150
|
+
parsed = subject.call(File.open(fpath, 'rb'))
|
151
|
+
|
152
|
+
expect(parsed.artist). to eq('wetransfer')
|
153
|
+
end
|
154
|
+
|
133
155
|
describe '#as_json' do
|
134
156
|
it 'converts all hash keys to string when stringify_keys: true' do
|
135
157
|
fpath = fixtures_dir + '/MP3/Cassy.mp3'
|
@@ -47,12 +47,15 @@ describe FormatParser::TIFFParser do
|
|
47
47
|
expect(parsed.intrinsics[:exif]).not_to be_nil
|
48
48
|
end
|
49
49
|
|
50
|
-
it '
|
50
|
+
it 'parses Sony ARW fixture as arw format file' do
|
51
51
|
arw_path = fixtures_dir + '/ARW/RAW_SONY_ILCE-7RM2.ARW'
|
52
52
|
|
53
53
|
parsed = subject.call(File.open(arw_path, 'rb'))
|
54
54
|
|
55
55
|
expect(parsed).not_to be_nil
|
56
|
+
expect(parsed.nature).to eq(:image)
|
57
|
+
expect(parsed.format).to eq(:arw)
|
58
|
+
|
56
59
|
expect(parsed.width_px).to eq(7952)
|
57
60
|
expect(parsed.height_px).to eq(5304)
|
58
61
|
expect(parsed.intrinsics[:exif]).not_to be_nil
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: format_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.25.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Noah Berman
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: exe
|
11
11
|
cert_chain: []
|
12
|
-
date: 2020-09-
|
12
|
+
date: 2020-09-30 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: ks
|
@@ -34,7 +34,7 @@ dependencies:
|
|
34
34
|
version: '1'
|
35
35
|
- - ">="
|
36
36
|
- !ruby/object:Gem::Version
|
37
|
-
version: 1.3.
|
37
|
+
version: 1.3.8
|
38
38
|
type: :runtime
|
39
39
|
prerelease: false
|
40
40
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -44,21 +44,21 @@ dependencies:
|
|
44
44
|
version: '1'
|
45
45
|
- - ">="
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: 1.3.
|
47
|
+
version: 1.3.8
|
48
48
|
- !ruby/object:Gem::Dependency
|
49
49
|
name: id3tag
|
50
50
|
requirement: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: '0.
|
54
|
+
version: '0.14'
|
55
55
|
type: :runtime
|
56
56
|
prerelease: false
|
57
57
|
version_requirements: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
59
|
- - "~>"
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version: '0.
|
61
|
+
version: '0.14'
|
62
62
|
- !ruby/object:Gem::Dependency
|
63
63
|
name: faraday
|
64
64
|
requirement: !ruby/object:Gem::Requirement
|
@@ -292,7 +292,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
292
292
|
- !ruby/object:Gem::Version
|
293
293
|
version: '0'
|
294
294
|
requirements: []
|
295
|
-
rubygems_version: 3.
|
295
|
+
rubygems_version: 3.1.2
|
296
296
|
signing_key:
|
297
297
|
specification_version: 4
|
298
298
|
summary: A library for efficient parsing of file metadata
|