format_parser 0.23.0 → 0.25.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +17 -0
- data/format_parser.gemspec +2 -2
- data/lib/format_parser/version.rb +1 -1
- data/lib/parsers/mp3_parser.rb +14 -8
- data/lib/parsers/mp3_parser/id3_extraction.rb +4 -2
- data/lib/parsers/tiff_parser.rb +7 -1
- data/spec/parsers/mp3_parser_spec.rb +23 -1
- data/spec/parsers/tiff_parser_spec.rb +4 -1
- metadata +7 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c24092543b6c98c713b07a45c3e3a2990332858397e11f6709470c32343a62f3
|
4
|
+
data.tar.gz: 0222b05ddfa1efa7cee364db7ad61ae7d44806b9622d6d6012fbb50e1b2e8138
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 67a3a64166115ef70b2043c05e9c105c2939752b1de34086421a2b96311880673d1c668927eecf904e06db368cc9979218d666004b59a187f52e569f47e9d2a3
|
7
|
+
data.tar.gz: 5f4034dbcc2a92cb4908c6609465fbb81ec5011f42f22e9c5477b72a26ee4b59bbaa1191a13a5405d71f68ac8b8c0ceac652befd9eb2c1ea9fc5af000b221828
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,20 @@
|
|
1
|
+
## 0.25.0
|
2
|
+
* MP3: add suport to id3 v2.4.x
|
3
|
+
* JPEG: Update gem exifr to 1.3.8 to fix a bug
|
4
|
+
|
5
|
+
## 0.24.2
|
6
|
+
* Update gem id3tag to 0.14.0 to fix MP3 issues
|
7
|
+
|
8
|
+
## 0.24.1
|
9
|
+
* Fix MP3 frames reading to jump correctly to the next bytes
|
10
|
+
|
11
|
+
## 0.24.0
|
12
|
+
* The TIFF parser will now return :arw as format for Sony ARW files insted of :tif so that the caller can decide whether it
|
13
|
+
wants to deal with RAW processing or not
|
14
|
+
|
15
|
+
## 0.23.1
|
16
|
+
* Updated gem exifr to fix problems related to jpeg files from Olympos microscopes, which often have bad thumbnail data
|
17
|
+
|
1
18
|
## 0.23.0
|
2
19
|
* Add ActiveStorage analyzer which can analyze ActiveStorage blobs. Enable it by setting
|
3
20
|
`config.active_storage.analyzers.prepend FormatParser::ActiveStorage::BlobAnalyzer`
|
data/format_parser.gemspec
CHANGED
@@ -31,8 +31,8 @@ Gem::Specification.new do |spec|
|
|
31
31
|
spec.require_paths = ['lib']
|
32
32
|
|
33
33
|
spec.add_dependency 'ks', '~> 0.0'
|
34
|
-
spec.add_dependency 'exifr', '~> 1', '>= 1.3.
|
35
|
-
spec.add_dependency 'id3tag', '~> 0.
|
34
|
+
spec.add_dependency 'exifr', '~> 1', '>= 1.3.8'
|
35
|
+
spec.add_dependency 'id3tag', '~> 0.14'
|
36
36
|
spec.add_dependency 'faraday', '~> 0.13'
|
37
37
|
spec.add_dependency 'measurometer', '~> 1'
|
38
38
|
|
data/lib/parsers/mp3_parser.rb
CHANGED
@@ -20,13 +20,14 @@ class FormatParser::MP3Parser
|
|
20
20
|
|
21
21
|
# We limit the number of MPEG frames we scan
|
22
22
|
# to obtain our duration estimation
|
23
|
-
MAX_FRAMES_TO_SCAN =
|
23
|
+
MAX_FRAMES_TO_SCAN = 500
|
24
24
|
|
25
25
|
# Default frame size for mp3
|
26
26
|
SAMPLES_PER_FRAME = 1152
|
27
27
|
|
28
28
|
# For some edge cases
|
29
29
|
ZIP_LOCAL_ENTRY_SIGNATURE = "PK\x03\x04\x14\x00".b
|
30
|
+
PNG_HEADER_BYTES = [137, 80, 78, 71, 13, 10, 26, 10].pack('C*')
|
30
31
|
|
31
32
|
# Wraps the Tag object returned by ID3Tag in such
|
32
33
|
# a way that a usable JSON representation gets
|
@@ -60,8 +61,12 @@ class FormatParser::MP3Parser
|
|
60
61
|
# To avoid having that happen, we check for the PKZIP signature -
|
61
62
|
# local entry header signature - at the very start of the file.
|
62
63
|
# If the file is too small safe_read will fail too and the parser
|
63
|
-
# will terminate here.
|
64
|
-
|
64
|
+
# will terminate here. Same with PNGs. In the future
|
65
|
+
# we should implement "confidence" for MP3 as of all our formats
|
66
|
+
# it is by far the most lax.
|
67
|
+
header = safe_read(io, 8)
|
68
|
+
return if header.start_with?(ZIP_LOCAL_ENTRY_SIGNATURE)
|
69
|
+
return if header.start_with?(PNG_HEADER_BYTES)
|
65
70
|
|
66
71
|
# Read all the ID3 tags (or at least attempt to)
|
67
72
|
io.seek(0)
|
@@ -131,27 +136,28 @@ class FormatParser::MP3Parser
|
|
131
136
|
# if you have a minute. https://pypi.python.org/pypi/tinytag
|
132
137
|
def parse_mpeg_frames(io)
|
133
138
|
mpeg_frames = []
|
139
|
+
bytes_to_read = 4
|
134
140
|
|
135
141
|
MAX_FRAMES_TO_SCAN.times do |frame_i|
|
136
142
|
# Read through until we can latch onto the 11 sync bits. Read in 4-byte
|
137
143
|
# increments to save on read() calls
|
138
|
-
data = io.read(
|
144
|
+
data = io.read(bytes_to_read)
|
139
145
|
|
140
146
|
# If we are at EOF - stop iterating
|
141
|
-
break unless data && data.bytesize ==
|
147
|
+
break unless data && data.bytesize == bytes_to_read
|
142
148
|
|
143
149
|
# Look for the sync pattern. It can be either the last byte being 0xFF,
|
144
150
|
# or any of the 2 bytes in sequence being 0xFF and > 0xF0.
|
145
151
|
four_bytes = data.unpack('C4')
|
146
152
|
seek_jmp = sync_bytes_offset_in_4_byte_seq(four_bytes)
|
147
153
|
if seek_jmp > 0
|
148
|
-
io.seek(io.pos + seek_jmp)
|
154
|
+
io.seek(io.pos - bytes_to_read + seek_jmp)
|
149
155
|
next
|
150
156
|
end
|
151
157
|
|
152
158
|
# Once we are past that stage we have latched onto a sync frame header
|
153
159
|
sync, conf, bitrate_freq, rest = four_bytes
|
154
|
-
frame_detail = parse_mpeg_frame_header(io.pos -
|
160
|
+
frame_detail = parse_mpeg_frame_header(io.pos - bytes_to_read, sync, conf, bitrate_freq, rest)
|
155
161
|
mpeg_frames << frame_detail
|
156
162
|
|
157
163
|
# There might be a xing header in the first frame that contains
|
@@ -166,7 +172,7 @@ class FormatParser::MP3Parser
|
|
166
172
|
end
|
167
173
|
end
|
168
174
|
if frame_detail.frame_length > 1 # jump over current frame body
|
169
|
-
io.seek(io.pos + frame_detail.frame_length -
|
175
|
+
io.seek(io.pos + frame_detail.frame_length - bytes_to_read)
|
170
176
|
end
|
171
177
|
end
|
172
178
|
[nil, mpeg_frames]
|
@@ -1,6 +1,8 @@
|
|
1
1
|
module FormatParser::MP3Parser::ID3Extraction
|
2
2
|
ID3V1_TAG_SIZE_BYTES = 128
|
3
|
-
|
3
|
+
# it supports 2.4.x, 2.3.x, 2.2.x which are supported by the gem id3tag
|
4
|
+
# see https://id3.org/Developer%20Information for more details of each version
|
5
|
+
ID3V2_MINOR_TAG_VERSIONS = [2, 3, 4]
|
4
6
|
MAX_SIZE_FOR_ID3V2 = 1 * 1024 * 1024
|
5
7
|
|
6
8
|
extend FormatParser::IOUtils
|
@@ -22,7 +24,7 @@ module FormatParser::MP3Parser::ID3Extraction
|
|
22
24
|
io.seek(0) # Only support header ID3v2
|
23
25
|
header = parse_id3_v2_header(io)
|
24
26
|
return unless header[:tag] == 'ID3' && header[:size] > 0
|
25
|
-
return unless
|
27
|
+
return unless ID3V2_MINOR_TAG_VERSIONS.include?(header[:version].unpack('C').first)
|
26
28
|
|
27
29
|
id3_tag_size = io.pos + header[:size]
|
28
30
|
|
data/lib/parsers/tiff_parser.rb
CHANGED
@@ -26,7 +26,7 @@ class FormatParser::TIFFParser
|
|
26
26
|
h = exif_data.height || exif_data.pixel_y_dimension
|
27
27
|
|
28
28
|
FormatParser::Image.new(
|
29
|
-
format: :tif,
|
29
|
+
format: arw?(exif_data) ? :arw : :tif, # Specify format as arw for Sony ARW format images, else tif
|
30
30
|
width_px: w,
|
31
31
|
height_px: h,
|
32
32
|
display_width_px: exif_data.rotated? ? h : w,
|
@@ -43,5 +43,11 @@ class FormatParser::TIFFParser
|
|
43
43
|
safe_read(io, 2) == 'CR'
|
44
44
|
end
|
45
45
|
|
46
|
+
# Similar to how exiftool determines the image type as ARW, we are implementing a check here
|
47
|
+
# https://github.com/exiftool/exiftool/blob/e969456372fbaf4b980fea8bb094d71033ac8bf7/lib/Image/ExifTool/Exif.pm#L929
|
48
|
+
def arw?(exif_data)
|
49
|
+
exif_data.compression == 6 && exif_data.new_subfile_type == 1 && exif_data.make == 'SONY'
|
50
|
+
end
|
51
|
+
|
46
52
|
FormatParser.register_parser new, natures: :image, formats: :tif
|
47
53
|
end
|
@@ -15,6 +15,12 @@ describe FormatParser::MP3Parser do
|
|
15
15
|
expect(parsed.media_duration_seconds).to be_within(0.1).of(0.836)
|
16
16
|
end
|
17
17
|
|
18
|
+
it 'does not misdetect a PNG' do
|
19
|
+
fpath = fixtures_dir + '/PNG/anim.png'
|
20
|
+
parsed = subject.call(File.open(fpath, 'rb'))
|
21
|
+
expect(parsed).to be_nil
|
22
|
+
end
|
23
|
+
|
18
24
|
describe 'title/artist/album attributes' do
|
19
25
|
let(:parsed) { subject.call(File.open(fpath, 'rb')) }
|
20
26
|
|
@@ -67,7 +73,7 @@ describe FormatParser::MP3Parser do
|
|
67
73
|
|
68
74
|
large_syncsfe_size = [ID3Tag::SynchsafeInteger.encode(more_bytes_than_permitted)].pack('N')
|
69
75
|
prepped = StringIO.new(
|
70
|
-
'ID3' + "\
|
76
|
+
'ID3' + "\x03\x00".b + "\x00".b + large_syncsfe_size + gunk
|
71
77
|
)
|
72
78
|
|
73
79
|
expect(ID3Tag).not_to receive(:read)
|
@@ -90,6 +96,14 @@ describe FormatParser::MP3Parser do
|
|
90
96
|
expect(parsed.title).to eq('test')
|
91
97
|
end
|
92
98
|
|
99
|
+
it 'reads the mpeg frames correctly' do
|
100
|
+
fpath = fixtures_dir + '/MP3/test_read_frames.mp3'
|
101
|
+
|
102
|
+
parsed = subject.call(File.open(fpath, 'rb'))
|
103
|
+
|
104
|
+
expect(parsed.audio_sample_rate_hz). to eq(48000)
|
105
|
+
end
|
106
|
+
|
93
107
|
it 'parses the Cassy MP3' do
|
94
108
|
fpath = fixtures_dir + '/MP3/Cassy.mp3'
|
95
109
|
parsed = subject.call(File.open(fpath, 'rb'))
|
@@ -130,6 +144,14 @@ describe FormatParser::MP3Parser do
|
|
130
144
|
}.to raise_error(FormatParser::IOUtils::InvalidRead)
|
131
145
|
end
|
132
146
|
|
147
|
+
it 'supports id3 v2.4.x' do
|
148
|
+
fpath = fixtures_dir + '/MP3/id3v24.mp3'
|
149
|
+
|
150
|
+
parsed = subject.call(File.open(fpath, 'rb'))
|
151
|
+
|
152
|
+
expect(parsed.artist). to eq('wetransfer')
|
153
|
+
end
|
154
|
+
|
133
155
|
describe '#as_json' do
|
134
156
|
it 'converts all hash keys to string when stringify_keys: true' do
|
135
157
|
fpath = fixtures_dir + '/MP3/Cassy.mp3'
|
@@ -47,12 +47,15 @@ describe FormatParser::TIFFParser do
|
|
47
47
|
expect(parsed.intrinsics[:exif]).not_to be_nil
|
48
48
|
end
|
49
49
|
|
50
|
-
it '
|
50
|
+
it 'parses Sony ARW fixture as arw format file' do
|
51
51
|
arw_path = fixtures_dir + '/ARW/RAW_SONY_ILCE-7RM2.ARW'
|
52
52
|
|
53
53
|
parsed = subject.call(File.open(arw_path, 'rb'))
|
54
54
|
|
55
55
|
expect(parsed).not_to be_nil
|
56
|
+
expect(parsed.nature).to eq(:image)
|
57
|
+
expect(parsed.format).to eq(:arw)
|
58
|
+
|
56
59
|
expect(parsed.width_px).to eq(7952)
|
57
60
|
expect(parsed.height_px).to eq(5304)
|
58
61
|
expect(parsed.intrinsics[:exif]).not_to be_nil
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: format_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.25.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Noah Berman
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: exe
|
11
11
|
cert_chain: []
|
12
|
-
date: 2020-09-
|
12
|
+
date: 2020-09-30 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: ks
|
@@ -34,7 +34,7 @@ dependencies:
|
|
34
34
|
version: '1'
|
35
35
|
- - ">="
|
36
36
|
- !ruby/object:Gem::Version
|
37
|
-
version: 1.3.
|
37
|
+
version: 1.3.8
|
38
38
|
type: :runtime
|
39
39
|
prerelease: false
|
40
40
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -44,21 +44,21 @@ dependencies:
|
|
44
44
|
version: '1'
|
45
45
|
- - ">="
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: 1.3.
|
47
|
+
version: 1.3.8
|
48
48
|
- !ruby/object:Gem::Dependency
|
49
49
|
name: id3tag
|
50
50
|
requirement: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: '0.
|
54
|
+
version: '0.14'
|
55
55
|
type: :runtime
|
56
56
|
prerelease: false
|
57
57
|
version_requirements: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
59
|
- - "~>"
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version: '0.
|
61
|
+
version: '0.14'
|
62
62
|
- !ruby/object:Gem::Dependency
|
63
63
|
name: faraday
|
64
64
|
requirement: !ruby/object:Gem::Requirement
|
@@ -292,7 +292,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
292
292
|
- !ruby/object:Gem::Version
|
293
293
|
version: '0'
|
294
294
|
requirements: []
|
295
|
-
rubygems_version: 3.
|
295
|
+
rubygems_version: 3.1.2
|
296
296
|
signing_key:
|
297
297
|
specification_version: 4
|
298
298
|
summary: A library for efficient parsing of file metadata
|