format_parser 0.21.0 → 0.23.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +3 -0
- data/CHANGELOG.md +21 -0
- data/CONTRIBUTING.md +6 -1
- data/README.md +12 -1
- data/format_parser.gemspec +2 -3
- data/lib/active_storage/blob_analyzer.rb +35 -0
- data/lib/active_storage/blob_io.rb +51 -0
- data/lib/attributes_json.rb +9 -1
- data/lib/format_parser.rb +2 -0
- data/lib/format_parser/version.rb +1 -1
- data/lib/hash_utils.rb +19 -0
- data/lib/parsers/mp3_parser.rb +11 -6
- data/lib/parsers/mpeg_parser.rb +13 -17
- data/lib/parsers/zip_parser/file_reader.rb +3 -0
- data/spec/active_storage/blob_io_spec.rb +36 -0
- data/spec/active_storage/rails_app_spec.rb +58 -0
- data/spec/attributes_json_spec.rb +26 -0
- data/spec/hash_utils_spec.rb +42 -0
- data/spec/integration/active_storage/rails_app.rb +72 -0
- data/spec/parsers/mp3_parser_spec.rb +47 -0
- data/spec/parsers/mpeg_parser_spec.rb +21 -0
- data/spec/parsers/zip_parser_spec.rb +7 -0
- data/spec/spec_helper.rb +2 -1
- metadata +13 -26
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f76db376646606abe6a7ccaa6f0a14efdc997ecd7fe29aff0ab3d8172857649f
|
4
|
+
data.tar.gz: 07c142e7ce6aaa518285d425eb95961d18e053a6b15456d4cb569cdc70a79069
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3e92625dbe822d3423a084174fd7bc9d4a296effe26182a91743b36995f8470edc4fa56a3a0a10ce095d4998da5dee9620b983c1f980e0418a3b4574244e848c
|
7
|
+
data.tar.gz: b89df24f7b6638b3cd42b284a1792475a094e947bd995e5ccdd0c554014075f5d167a7c04fbc08c681483b0c82aa2e59ae4e4af62f99d6b158fb4b3f1096d80b
|
data/.gitignore
CHANGED
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,24 @@
|
|
1
|
+
## 0.23.1
|
2
|
+
* Updated gem exifr to fix problems related to jpeg files from Olympos microscopes, which often have bad thumbnail data
|
3
|
+
|
4
|
+
## 0.23.0
|
5
|
+
* Add ActiveStorage analyzer which can analyze ActiveStorage blobs. Enable it by setting
|
6
|
+
`config.active_storage.analyzers.prepend FormatParser::ActiveStorage::BlobAnalyzer`
|
7
|
+
* Ignore empty ID3 tags and do not allow them to overwrite others
|
8
|
+
* Update the id3tag dependency so that we can fallback to UTF8 instead of raising an error when parsing
|
9
|
+
MP3 files
|
10
|
+
|
11
|
+
## 0.22.1
|
12
|
+
* Fix Zip parser to not raise error for invalid zip files, with an invalid central directory
|
13
|
+
|
14
|
+
## 0.22.0
|
15
|
+
* Adds option `stringify_keys: true` to #as_json methods (fix #151)
|
16
|
+
|
17
|
+
## 0.21.1
|
18
|
+
* MPEG: Ensure parsing does not inadvertently return an Integer instead of Result|nil
|
19
|
+
* MPEG: Scan further into the MPEG file than previously (scan 32 1KB chunks)
|
20
|
+
* MPEG: Ensure the parser does not raise an exception when there is no data to read for scanning beyound the initial header
|
21
|
+
|
1
22
|
## 0.21.0
|
2
23
|
* Adds support for MPEG video files
|
3
24
|
|
data/CONTRIBUTING.md
CHANGED
@@ -234,4 +234,9 @@ This provision also applies to the test files you include with the changed code
|
|
234
234
|
|
235
235
|
## Changelog
|
236
236
|
|
237
|
-
When creating a new release you must add an entry in the `CHANGELOG.md`.
|
237
|
+
When creating a new release you must add an entry in the `CHANGELOG.md`.
|
238
|
+
|
239
|
+
## Testing locally
|
240
|
+
|
241
|
+
It's possible to run `exe/format_parser_inspect FILE_NAME` or `exe/format_parser_inspect FILE_URI`
|
242
|
+
to test the new code without the necessity of installing the gem.
|
data/README.md
CHANGED
@@ -75,6 +75,17 @@ img_info = FormatParser.parse(File.open("myimage.jpg", "rb"))
|
|
75
75
|
JSON.pretty_generate(img_info) #=> ...
|
76
76
|
```
|
77
77
|
|
78
|
+
To convert the result to a Hash or a structure suitable for JSON serialization
|
79
|
+
|
80
|
+
```ruby
|
81
|
+
img_info = FormatParser.parse(File.open("myimage.jpg", "rb"))
|
82
|
+
img_info.as_json
|
83
|
+
|
84
|
+
# it's also possible to convert all keys to string
|
85
|
+
img_info.as_json(stringify_keys: true)
|
86
|
+
```
|
87
|
+
|
88
|
+
|
78
89
|
## Creating your own parsers
|
79
90
|
|
80
91
|
See the [section on writing parsers in CONTRIBUTING.md](CONTRIBUTING.md#so-you-want-to-contribute-a-new-parser)
|
@@ -188,7 +199,7 @@ Unless specified otherwise in this section the fixture files are MIT licensed an
|
|
188
199
|
|
189
200
|
## Copyright
|
190
201
|
|
191
|
-
Copyright (c)
|
202
|
+
Copyright (c) 2020 WeTransfer.
|
192
203
|
|
193
204
|
`format_parser` is distributed under the conditions of the [Hippocratic License](https://firstdonoharm.dev/version/1/2/license.html)
|
194
205
|
- See LICENSE.txt for further details.
|
data/format_parser.gemspec
CHANGED
@@ -31,15 +31,14 @@ Gem::Specification.new do |spec|
|
|
31
31
|
spec.require_paths = ['lib']
|
32
32
|
|
33
33
|
spec.add_dependency 'ks', '~> 0.0'
|
34
|
-
spec.add_dependency 'exifr', '~> 1', '>= 1.3.
|
35
|
-
spec.add_dependency 'id3tag', '~> 0.
|
34
|
+
spec.add_dependency 'exifr', '~> 1', '>= 1.3.7'
|
35
|
+
spec.add_dependency 'id3tag', '~> 0.13'
|
36
36
|
spec.add_dependency 'faraday', '~> 0.13'
|
37
37
|
spec.add_dependency 'measurometer', '~> 1'
|
38
38
|
|
39
39
|
spec.add_development_dependency 'rspec', '~> 3.0'
|
40
40
|
spec.add_development_dependency 'rake', '~> 12'
|
41
41
|
spec.add_development_dependency 'simplecov', '~> 0.15'
|
42
|
-
spec.add_development_dependency 'pry', '~> 0.11'
|
43
42
|
spec.add_development_dependency 'yard', '~> 0.9'
|
44
43
|
spec.add_development_dependency 'wetransfer_style', '0.5.0'
|
45
44
|
spec.add_development_dependency 'parallel_tests'
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require_relative 'blob_io'
|
2
|
+
|
3
|
+
# An analyzer class that can be hooked to ActiveStorage, in order to enable
|
4
|
+
# FormatParser to do the blob analysis instead of ActiveStorage builtin-analyzers.
|
5
|
+
# Invoked if properly integrated in Rails initializer.
|
6
|
+
|
7
|
+
module FormatParser
|
8
|
+
module ActiveStorage
|
9
|
+
class BlobAnalyzer
|
10
|
+
# Format parser is able to handle a lot of format so by default it will accept all files
|
11
|
+
#
|
12
|
+
# @return [Boolean, true] always return true
|
13
|
+
def self.accept?(_blob)
|
14
|
+
true
|
15
|
+
end
|
16
|
+
|
17
|
+
def initialize(blob)
|
18
|
+
@blob = blob
|
19
|
+
end
|
20
|
+
|
21
|
+
# @return [Hash] file metadatas
|
22
|
+
def metadata
|
23
|
+
io = BlobIO.new(@blob)
|
24
|
+
parsed_file = FormatParser.parse(io)
|
25
|
+
|
26
|
+
if parsed_file
|
27
|
+
# We symbolize keys because of existing output hash format of ImageAnalyzer
|
28
|
+
parsed_file.as_json.symbolize_keys
|
29
|
+
else
|
30
|
+
logger.info "Skipping file analysis because FormatParser doesn't support the file"
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
# Acts as a proxy to turn ActiveStorage file into IO object
|
2
|
+
|
3
|
+
module FormatParser
|
4
|
+
module ActiveStorage
|
5
|
+
class BlobIO
|
6
|
+
# @param blob[ActiveStorage::Blob] the file with linked service
|
7
|
+
# @return [BlobIO]
|
8
|
+
def initialize(blob)
|
9
|
+
@blob = blob
|
10
|
+
@service = blob.service
|
11
|
+
@pos = 0
|
12
|
+
end
|
13
|
+
|
14
|
+
# Emulates IO#read, but requires the number of bytes to read.
|
15
|
+
# Rely on `ActiveStorage::Service.download_chunk` of each hosting type (local, S3, Azure, etc)
|
16
|
+
#
|
17
|
+
# @param n_bytes[Integer] how many bytes to read
|
18
|
+
# @return [String] the read bytes
|
19
|
+
def read(n_bytes)
|
20
|
+
# HTTP ranges are exclusive.
|
21
|
+
http_range = (@pos..(@pos + n_bytes - 1))
|
22
|
+
body = @service.download_chunk(@blob.key, http_range)
|
23
|
+
@pos += body.bytesize
|
24
|
+
body.force_encoding(Encoding::ASCII_8BIT)
|
25
|
+
end
|
26
|
+
|
27
|
+
# Emulates IO#seek
|
28
|
+
#
|
29
|
+
# @param [Integer] offset size
|
30
|
+
# @return [Integer] always return 0, `seek` only mutates `pos` attribute
|
31
|
+
def seek(offset)
|
32
|
+
@pos = offset
|
33
|
+
0
|
34
|
+
end
|
35
|
+
|
36
|
+
# Emulates IO#size.
|
37
|
+
#
|
38
|
+
# @return [Integer] the size of the blob size from ActiveStorage
|
39
|
+
def size
|
40
|
+
@blob.byte_size
|
41
|
+
end
|
42
|
+
|
43
|
+
# Emulates IO#pos
|
44
|
+
#
|
45
|
+
# @return [Integer] the current offset (in bytes) of the io
|
46
|
+
def pos
|
47
|
+
@pos
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
data/lib/attributes_json.rb
CHANGED
@@ -15,7 +15,12 @@ module FormatParser::AttributesJSON
|
|
15
15
|
|
16
16
|
# Implements a sane default `as_json` for an object
|
17
17
|
# that accessors defined
|
18
|
-
|
18
|
+
#
|
19
|
+
# @param root[Bool] if true, it surrounds the result in a hash with a key
|
20
|
+
# `format_parser_file_info`
|
21
|
+
# @param stringify_keys[Bool] if true, it transforms all the hash keys to a string.
|
22
|
+
# The default value is false for backward compatibility
|
23
|
+
def as_json(root: false, stringify_keys: false, **)
|
19
24
|
h = {}
|
20
25
|
h['nature'] = nature if respond_to?(:nature) # Needed for file info structs
|
21
26
|
methods.grep(/\w\=$/).each_with_object(h) do |attr_writer_method_name, h|
|
@@ -27,6 +32,9 @@ module FormatParser::AttributesJSON
|
|
27
32
|
sanitized_value = _sanitize_json_value(unwrapped_attribute_value)
|
28
33
|
h[reader_method_name] = sanitized_value
|
29
34
|
end
|
35
|
+
|
36
|
+
h = FormatParser::HashUtils.deep_transform_keys(h, &:to_s) if stringify_keys
|
37
|
+
|
30
38
|
if root
|
31
39
|
{'format_parser_file_info' => h}
|
32
40
|
else
|
data/lib/format_parser.rb
CHANGED
@@ -5,6 +5,7 @@ require 'measurometer'
|
|
5
5
|
# top-level methods of the library.
|
6
6
|
module FormatParser
|
7
7
|
require_relative 'format_parser/version'
|
8
|
+
require_relative 'hash_utils'
|
8
9
|
require_relative 'attributes_json'
|
9
10
|
require_relative 'image'
|
10
11
|
require_relative 'audio'
|
@@ -17,6 +18,7 @@ module FormatParser
|
|
17
18
|
require_relative 'remote_io'
|
18
19
|
require_relative 'io_constraint'
|
19
20
|
require_relative 'care'
|
21
|
+
require_relative 'active_storage/blob_analyzer'
|
20
22
|
|
21
23
|
# Define Measurometer in the internal namespace as well
|
22
24
|
# so that we stay compatible for the applications that use it
|
data/lib/hash_utils.rb
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
# based on https://github.com/rails/rails/blob/master/activesupport/lib/active_support/core_ext/hash/keys.rb#L116
|
2
|
+
# I chose to copy this method instead of adding activesupport as a dependency
|
3
|
+
# because we want to have the least number of dependencies
|
4
|
+
module FormatParser
|
5
|
+
class HashUtils
|
6
|
+
def self.deep_transform_keys(object, &block)
|
7
|
+
case object
|
8
|
+
when Hash
|
9
|
+
object.each_with_object({}) do |(key, value), result|
|
10
|
+
result[yield(key)] = deep_transform_keys(value, &block)
|
11
|
+
end
|
12
|
+
when Array
|
13
|
+
object.map { |e| deep_transform_keys(e, &block) }
|
14
|
+
else
|
15
|
+
object
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
data/lib/parsers/mp3_parser.rb
CHANGED
@@ -44,13 +44,9 @@ class FormatParser::MP3Parser
|
|
44
44
|
tag = __getobj__
|
45
45
|
MEMBERS.each_with_object({}) do |k, h|
|
46
46
|
value = tag.public_send(k)
|
47
|
-
h[k] = value if value
|
47
|
+
h[k] = value if value && !value.empty?
|
48
48
|
end
|
49
49
|
end
|
50
|
-
|
51
|
-
def as_json(*)
|
52
|
-
to_h
|
53
|
-
end
|
54
50
|
end
|
55
51
|
|
56
52
|
def likely_match?(filename)
|
@@ -85,7 +81,7 @@ class FormatParser::MP3Parser
|
|
85
81
|
|
86
82
|
first_frame = initial_frames.first
|
87
83
|
|
88
|
-
id3tags_hash = blend_id3_tags_into_hash(*tags)
|
84
|
+
id3tags_hash = with_id3tag_local_configs { blend_id3_tags_into_hash(*tags) }
|
89
85
|
|
90
86
|
file_info = FormatParser::Audio.new(
|
91
87
|
format: :mp3,
|
@@ -297,5 +293,14 @@ class FormatParser::MP3Parser
|
|
297
293
|
attrs
|
298
294
|
end
|
299
295
|
|
296
|
+
def with_id3tag_local_configs
|
297
|
+
ID3Tag.local_configuration do |c|
|
298
|
+
c.string_encode_options = { invalid: :replace, undef: :replace }
|
299
|
+
c.source_encoding_fallback = Encoding::UTF_8
|
300
|
+
|
301
|
+
yield
|
302
|
+
end
|
303
|
+
end
|
304
|
+
|
300
305
|
FormatParser.register_parser new, natures: :audio, formats: :mp3, priority: 99
|
301
306
|
end
|
data/lib/parsers/mpeg_parser.rb
CHANGED
@@ -26,9 +26,8 @@ class FormatParser::MPEGParser
|
|
26
26
|
|
27
27
|
PACK_HEADER_START_CODE = [0x00, 0x00, 0x01, 0xBA].pack('C*')
|
28
28
|
SEQUENCE_HEADER_START_CODE = [0xB3].pack('C*')
|
29
|
-
|
30
|
-
|
31
|
-
BYTES_TO_READ_PER_TIME = 1024
|
29
|
+
MAX_BLOCK_READS = 32
|
30
|
+
BYTES_TO_READ_PER_READ = 1024
|
32
31
|
|
33
32
|
def self.likely_match?(filename)
|
34
33
|
filename =~ /\.(mpg|mpeg)$/i
|
@@ -37,18 +36,19 @@ class FormatParser::MPEGParser
|
|
37
36
|
def self.call(io)
|
38
37
|
return unless matches_mpeg_header?(io)
|
39
38
|
|
40
|
-
# We are looping though the stream because there can be several sequence headers and some of them are not
|
41
|
-
# If we detect that the header is not
|
39
|
+
# We are looping though the stream because there can be several sequence headers and some of them are not useful.
|
40
|
+
# If we detect that the header is not useful, then we look for the next one for SEEK_FOR_SEQUENCE_HEADER_TIMES_LIMIT
|
42
41
|
# If we reach the EOF, then the mpg is likely to be corrupted and we return nil
|
43
|
-
|
44
|
-
|
42
|
+
MAX_BLOCK_READS.times do
|
43
|
+
next unless pos = find_next_header_code_pos(io)
|
44
|
+
io.seek(pos + 1)
|
45
45
|
horizontal_size, vertical_size = parse_image_size(io)
|
46
46
|
ratio_code, rate_code = parse_rate_information(io)
|
47
|
-
|
48
47
|
if valid_aspect_ratio_code?(ratio_code) && valid_frame_rate_code?(rate_code)
|
49
48
|
return file_info(horizontal_size, vertical_size, ratio_code, rate_code)
|
50
49
|
end
|
51
50
|
end
|
51
|
+
nil # otherwise the return value of Integer#times will be returned
|
52
52
|
rescue FormatParser::IOUtils::InvalidRead
|
53
53
|
nil
|
54
54
|
end
|
@@ -90,15 +90,11 @@ class FormatParser::MPEGParser
|
|
90
90
|
# Returns the position of the next sequence package content in the stream
|
91
91
|
# This method will read BYTES_TO_READ_PER_TIME in each loop for a maximum amount of SEEK_FOR_SEQUENCE_HEADER_START_CODE_TIMES_LIMIT times
|
92
92
|
# If the package is not found, then it returns nil.
|
93
|
-
def self.
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
new_io_pos = io.pos - BYTES_TO_READ_PER_TIME + header_relative_index + 1
|
99
|
-
io.seek(new_io_pos)
|
100
|
-
return new_io_pos
|
101
|
-
end
|
93
|
+
def self.find_next_header_code_pos(io)
|
94
|
+
pos_before_read = io.pos
|
95
|
+
bin_str = io.read(BYTES_TO_READ_PER_READ) # bin_str might be nil if we are at EOF
|
96
|
+
header_relative_index = bin_str && bin_str.index(SEQUENCE_HEADER_START_CODE)
|
97
|
+
return pos_before_read + header_relative_index if header_relative_index
|
102
98
|
end
|
103
99
|
|
104
100
|
# If the first 4 bytes of the stream are equal to 00 00 01 BA, the pack start code for the Pack Header, then it's an MPEG file.
|
@@ -18,6 +18,7 @@ class FormatParser::ZIPParser::FileReader
|
|
18
18
|
'Could not find the EOCD signature in the buffer - maybe a malformed ZIP file'
|
19
19
|
end
|
20
20
|
end
|
21
|
+
InvalidCentralDirectory = Class.new(Error)
|
21
22
|
|
22
23
|
C_UINT32LE = 'V'
|
23
24
|
C_UINT16LE = 'v'
|
@@ -175,6 +176,8 @@ class FormatParser::ZIPParser::FileReader
|
|
175
176
|
# BUT! in format_parser we avoid unbounded reads, as a matter of fact they are forbidden.
|
176
177
|
# So we will again limit ouselves to cdir_size, and we will take cushion of 1 KB.
|
177
178
|
central_directory_str = io.read(cdir_size + 1024)
|
179
|
+
raise InvalidCentralDirectory if central_directory_str.nil?
|
180
|
+
|
178
181
|
central_directory_io = StringIO.new(central_directory_str)
|
179
182
|
log do
|
180
183
|
format(
|
@@ -0,0 +1,36 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe FormatParser::ActiveStorage::BlobIO do
|
4
|
+
let(:blob_service) { double }
|
5
|
+
let(:blob) { double(key: 'blob_key', service: blob_service, byte_size: 43000) }
|
6
|
+
let(:io) { described_class.new(blob) }
|
7
|
+
let(:fixture_path) { fixtures_dir + '/test.png' }
|
8
|
+
|
9
|
+
it_behaves_like 'an IO object compatible with IOConstraint'
|
10
|
+
|
11
|
+
describe '#read' do
|
12
|
+
it 'reads io using download_chunk from ActiveStorage#Service' do
|
13
|
+
allow(blob_service).to receive(:download_chunk) { 'a' }
|
14
|
+
|
15
|
+
expect(io.read(1)).to eq('a')
|
16
|
+
end
|
17
|
+
|
18
|
+
it 'updates #pos on read' do
|
19
|
+
allow(blob_service).to receive(:download_chunk) { 'a' }
|
20
|
+
|
21
|
+
expect { io.read(1) }.to change { io.pos }.from(0).to(1)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
describe '#seek' do
|
26
|
+
it 'updates @pos' do
|
27
|
+
expect { io.seek(10) }.to change { io.pos }.from(0).to(10)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
describe '#size' do
|
32
|
+
it 'returns the size of the blob byte_size' do
|
33
|
+
expect(io.size).to eq(blob.byte_size)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
def skip_reason
|
4
|
+
if RUBY_ENGINE == 'jruby'
|
5
|
+
'Skipping because JRuby have randon failing issue'
|
6
|
+
elsif RUBY_VERSION.to_f < 2.5
|
7
|
+
'Skipping because Rails testing script use Rails 6, who does not support Ruby bellow 2.5'
|
8
|
+
else
|
9
|
+
false
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
describe 'Rails app with ActiveStorage and format-parser', skip: skip_reason do
|
14
|
+
describe 'local hosting with ActiveStorage disk adapter' do
|
15
|
+
it 'parse local file with format_parser' do
|
16
|
+
clean_env do
|
17
|
+
cmd = 'ruby spec/integration/active_storage/rails_app.rb'
|
18
|
+
cmd_status = ruby_script_runner(cmd)
|
19
|
+
expect(cmd_status[:stdout].last).to match(/1 runs, 3 assertions, 0 failures, 0 errors, 0 skips/)
|
20
|
+
expect(cmd_status[:exitstatus]).to eq(0)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def ruby_script_runner(cmd)
|
26
|
+
require 'open3'
|
27
|
+
cmd_status = { stdout: [], exitstatus: nil }
|
28
|
+
Open3.popen2(cmd) do |_stdin, stdout, wait_thr|
|
29
|
+
frame_stdout do
|
30
|
+
while line = stdout.gets
|
31
|
+
puts "| #{line}"
|
32
|
+
cmd_status[:stdout] << line
|
33
|
+
end
|
34
|
+
end
|
35
|
+
cmd_status[:exitstatus] = wait_thr.value.exitstatus
|
36
|
+
end
|
37
|
+
cmd_status
|
38
|
+
end
|
39
|
+
|
40
|
+
def frame_stdout
|
41
|
+
puts
|
42
|
+
puts '-' * 50
|
43
|
+
yield
|
44
|
+
puts '-' * 50
|
45
|
+
end
|
46
|
+
|
47
|
+
def clean_env
|
48
|
+
if Bundler.respond_to?(:with_unbundled_env)
|
49
|
+
Bundler.with_unbundled_env do
|
50
|
+
yield
|
51
|
+
end
|
52
|
+
else
|
53
|
+
Bundler.with_clean_env do
|
54
|
+
yield
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
@@ -140,4 +140,30 @@ describe FormatParser::AttributesJSON do
|
|
140
140
|
JSON.pretty_generate(object_with_attributes_module)
|
141
141
|
}.to raise_error(/structure too deep/)
|
142
142
|
end
|
143
|
+
|
144
|
+
it 'converts all hash keys to string when stringify_keys: true' do
|
145
|
+
fixture_path = fixtures_dir + '/ZIP/arch_few_entries.zip'
|
146
|
+
fi_io = File.open(fixture_path, 'rb')
|
147
|
+
|
148
|
+
result = FormatParser::ZIPParser.new.call(fi_io).as_json(stringify_keys: true)
|
149
|
+
|
150
|
+
result['entries'].each do |entry|
|
151
|
+
entry.each do |key, _value|
|
152
|
+
expect(key).to be_a(String)
|
153
|
+
end
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
it 'does not convert hash keys to string when stringify_keys: false' do
|
158
|
+
fixture_path = fixtures_dir + '/ZIP/arch_few_entries.zip'
|
159
|
+
fi_io = File.open(fixture_path, 'rb')
|
160
|
+
|
161
|
+
result = FormatParser::ZIPParser.new.call(fi_io).as_json
|
162
|
+
|
163
|
+
result['entries'].each do |entry|
|
164
|
+
entry.each do |key, _value|
|
165
|
+
expect(key).to be_a(Symbol)
|
166
|
+
end
|
167
|
+
end
|
168
|
+
end
|
143
169
|
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe FormatParser::HashUtils do
|
4
|
+
describe '.deep_transform_keys' do
|
5
|
+
it 'transforms all the keys in a hash' do
|
6
|
+
hash = { aa: 1, 'bb' => 2 }
|
7
|
+
result = described_class.deep_transform_keys(hash, &:to_s)
|
8
|
+
|
9
|
+
expect(result).to eq('aa' => 1, 'bb' => 2)
|
10
|
+
end
|
11
|
+
|
12
|
+
it 'transforms all the keys in a array of hashes' do
|
13
|
+
array = [{ aa: 1, bb: 2 }, { cc: 3, dd: [{c: 2, d: 3}] }]
|
14
|
+
result = described_class.deep_transform_keys(array, &:to_s)
|
15
|
+
|
16
|
+
expect(result).to eq(
|
17
|
+
[{'aa' => 1, 'bb' => 2}, {'cc' => 3, 'dd' => [{'c' => 2, 'd' => 3}]}]
|
18
|
+
)
|
19
|
+
end
|
20
|
+
|
21
|
+
it 'transforms all the keys in a hash recursively' do
|
22
|
+
hash = { aa: 1, bb: { cc: 22, dd: 3 } }
|
23
|
+
result = described_class.deep_transform_keys(hash, &:to_s)
|
24
|
+
|
25
|
+
expect(result).to eq('aa' => 1, 'bb' => { 'cc' => 22, 'dd' => 3})
|
26
|
+
end
|
27
|
+
|
28
|
+
it 'does nothing for an non array/hash object' do
|
29
|
+
object = Object.new
|
30
|
+
result = described_class.deep_transform_keys(object, &:to_s)
|
31
|
+
|
32
|
+
expect(result).to eq(object)
|
33
|
+
end
|
34
|
+
|
35
|
+
it 'returns the last value if different keys are transformed into the same one' do
|
36
|
+
hash = { aa: 0, 'bb' => 2, bb: 1 }
|
37
|
+
result = described_class.deep_transform_keys(hash, &:to_s)
|
38
|
+
|
39
|
+
expect(result).to eq('aa' => 0, 'bb' => 1)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,72 @@
|
|
1
|
+
require 'bundler/inline'
|
2
|
+
|
3
|
+
gemfile(true) do
|
4
|
+
source 'https://rubygems.org'
|
5
|
+
|
6
|
+
git_source(:github) { |repo| "https://github.com/#{repo}.git" }
|
7
|
+
|
8
|
+
gem 'rails', '6.0.3'
|
9
|
+
gem 'sqlite3'
|
10
|
+
gem 'format_parser', path: './'
|
11
|
+
end
|
12
|
+
|
13
|
+
require 'active_record/railtie'
|
14
|
+
require 'active_storage/engine'
|
15
|
+
require 'tmpdir'
|
16
|
+
|
17
|
+
class TestApp < Rails::Application
|
18
|
+
config.root = __dir__
|
19
|
+
config.hosts << 'example.org'
|
20
|
+
config.eager_load = false
|
21
|
+
config.session_store :cookie_store, key: 'cookie_store_key'
|
22
|
+
secrets.secret_key_base = 'secret_key_base'
|
23
|
+
|
24
|
+
config.logger = Logger.new('/dev/null')
|
25
|
+
|
26
|
+
config.active_storage.service = :local
|
27
|
+
config.active_storage.service_configurations = {
|
28
|
+
local: {
|
29
|
+
root: Dir.tmpdir,
|
30
|
+
service: 'Disk'
|
31
|
+
}
|
32
|
+
}
|
33
|
+
|
34
|
+
config.active_storage.analyzers.prepend FormatParser::ActiveStorage::BlobAnalyzer
|
35
|
+
end
|
36
|
+
|
37
|
+
ENV['DATABASE_URL'] = 'sqlite3::memory:'
|
38
|
+
|
39
|
+
Rails.application.initialize!
|
40
|
+
|
41
|
+
require ActiveStorage::Engine.root.join('db/migrate/20170806125915_create_active_storage_tables.rb').to_s
|
42
|
+
|
43
|
+
ActiveRecord::Schema.define do
|
44
|
+
CreateActiveStorageTables.new.change
|
45
|
+
|
46
|
+
create_table :users, force: true
|
47
|
+
end
|
48
|
+
|
49
|
+
class User < ActiveRecord::Base
|
50
|
+
has_one_attached :profile_picture
|
51
|
+
end
|
52
|
+
|
53
|
+
require 'minitest/autorun'
|
54
|
+
require 'open-uri'
|
55
|
+
|
56
|
+
describe User do
|
57
|
+
describe "profile_picture's metadatas" do
|
58
|
+
it 'parse metadatas with format_parser' do
|
59
|
+
user = User.create
|
60
|
+
user.profile_picture.attach(
|
61
|
+
filename: 'cat.png',
|
62
|
+
io: URI.open('https://freesvg.org/img/1416155153.png')
|
63
|
+
)
|
64
|
+
|
65
|
+
user.profile_picture.analyze
|
66
|
+
|
67
|
+
_(user.profile_picture.metadata[:width_px]).must_equal 500
|
68
|
+
_(user.profile_picture.metadata[:height_px]).must_equal 296
|
69
|
+
_(user.profile_picture.metadata[:color_mode]).must_equal 'rgba'
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
@@ -37,6 +37,14 @@ describe FormatParser::MP3Parser do
|
|
37
37
|
expect(parsed.album).to be_nil
|
38
38
|
end
|
39
39
|
end
|
40
|
+
|
41
|
+
context 'when has an empty tag' do
|
42
|
+
let(:fpath) { fixtures_dir + '/MP3/id3v2_with_empty_tag.mp3' }
|
43
|
+
|
44
|
+
it 'ignores the empty tags' do
|
45
|
+
expect(parsed.intrinsics[:genre]).to eq('Rock')
|
46
|
+
end
|
47
|
+
end
|
40
48
|
end
|
41
49
|
|
42
50
|
it 'decodes and estimates duration for a CBR MP3' do
|
@@ -71,6 +79,17 @@ describe FormatParser::MP3Parser do
|
|
71
79
|
expect(prepped.pos).to eq(3145738)
|
72
80
|
end
|
73
81
|
|
82
|
+
it 'does not raise error when a tag frame has unsupported encoding' do
|
83
|
+
fpath = fixtures_dir + '/MP3/id3v2_frame_with_invalid_encoding.mp3'
|
84
|
+
|
85
|
+
parsed = subject.call(File.open(fpath, 'rb'))
|
86
|
+
|
87
|
+
expect(parsed.nature). to eq(:audio)
|
88
|
+
expect(parsed.album).to eq('wetransfer')
|
89
|
+
expect(parsed.artist).to eq('wetransfer')
|
90
|
+
expect(parsed.title).to eq('test')
|
91
|
+
end
|
92
|
+
|
74
93
|
it 'parses the Cassy MP3' do
|
75
94
|
fpath = fixtures_dir + '/MP3/Cassy.mp3'
|
76
95
|
parsed = subject.call(File.open(fpath, 'rb'))
|
@@ -110,4 +129,32 @@ describe FormatParser::MP3Parser do
|
|
110
129
|
subject.call(StringIO.new(''))
|
111
130
|
}.to raise_error(FormatParser::IOUtils::InvalidRead)
|
112
131
|
end
|
132
|
+
|
133
|
+
describe '#as_json' do
|
134
|
+
it 'converts all hash keys to string when stringify_keys: true' do
|
135
|
+
fpath = fixtures_dir + '/MP3/Cassy.mp3'
|
136
|
+
result = subject.call(File.open(fpath, 'rb')).as_json(stringify_keys: true)
|
137
|
+
|
138
|
+
expect(
|
139
|
+
result['intrinsics'].keys.map(&:class).uniq
|
140
|
+
).to eq([String])
|
141
|
+
|
142
|
+
expect(
|
143
|
+
result['intrinsics']['id3tags'].map(&:class).uniq
|
144
|
+
).to eq([ID3Tag::Tag])
|
145
|
+
end
|
146
|
+
|
147
|
+
it 'does not convert the hash keys to string when stringify_keys: false' do
|
148
|
+
fpath = fixtures_dir + '/MP3/Cassy.mp3'
|
149
|
+
result = subject.call(File.open(fpath, 'rb')).as_json
|
150
|
+
|
151
|
+
expect(
|
152
|
+
result['intrinsics'].keys.map(&:class).uniq
|
153
|
+
).to eq([Symbol])
|
154
|
+
|
155
|
+
expect(
|
156
|
+
result['intrinsics'][:id3tags].map(&:class).uniq
|
157
|
+
).to eq([ID3Tag::Tag])
|
158
|
+
end
|
159
|
+
end
|
113
160
|
end
|
@@ -12,6 +12,27 @@ describe FormatParser::MPEGParser do
|
|
12
12
|
expect(parse_result.intrinsics[:frame_rate]).to eq('30')
|
13
13
|
end
|
14
14
|
|
15
|
+
it 'returns a nil if it is necessary to iterate over a very large number of bytes and the requisite sequences are not detected' do
|
16
|
+
bytes_buffer = StringIO.new
|
17
|
+
bytes_buffer.write([0x00, 0x00, 0x01, 0xBA].pack('C*')) # MPEG header
|
18
|
+
zero_bytes = [0x00].pack('C') * (1024 * 1024 * 5)
|
19
|
+
bytes_buffer.write(zero_bytes)
|
20
|
+
|
21
|
+
bytes_buffer.rewind
|
22
|
+
|
23
|
+
parse_result = described_class.call(bytes_buffer)
|
24
|
+
expect(parse_result).to be_nil
|
25
|
+
end
|
26
|
+
|
27
|
+
it 'returns a nil if the IO only contains the MPEG header bytes at the start and nothing else' do
|
28
|
+
bytes_buffer = StringIO.new
|
29
|
+
bytes_buffer.write([0x00, 0x00, 0x01, 0xBA].pack('C*')) # MPEG header
|
30
|
+
bytes_buffer.rewind
|
31
|
+
|
32
|
+
parse_result = described_class.call(bytes_buffer)
|
33
|
+
expect(parse_result).to be_nil
|
34
|
+
end
|
35
|
+
|
15
36
|
it 'parses a file with mpeg extension' do
|
16
37
|
parse_result = described_class.call(File.open(__dir__ + '/../fixtures/MPG/video2.mpeg', 'rb'))
|
17
38
|
|
@@ -103,4 +103,11 @@ describe FormatParser::ZIPParser do
|
|
103
103
|
expect(first_entry.filename).to eq('Li��nia Extreme//')
|
104
104
|
expect(first_entry.type).to eq(:directory)
|
105
105
|
end
|
106
|
+
|
107
|
+
it 'is able to handle files with invalid central directory position' do
|
108
|
+
invalid_zip_path = fixtures_dir + '/ZIP/invalid_central_directory.zip'
|
109
|
+
|
110
|
+
expect { subject.call(File.open(invalid_zip_path, 'rb')) }
|
111
|
+
.to_not raise_error
|
112
|
+
end
|
106
113
|
end
|
data/spec/spec_helper.rb
CHANGED
@@ -8,7 +8,6 @@ $LOAD_PATH.unshift(File.dirname(__FILE__))
|
|
8
8
|
|
9
9
|
require 'rspec'
|
10
10
|
require 'format_parser'
|
11
|
-
require 'pry'
|
12
11
|
|
13
12
|
module SpecHelpers
|
14
13
|
def fixtures_dir
|
@@ -19,6 +18,8 @@ end
|
|
19
18
|
RSpec.configure do |c|
|
20
19
|
c.include SpecHelpers
|
21
20
|
c.extend SpecHelpers # makes fixtures_dir available for example groups too
|
21
|
+
# https://relishapp.com/rspec/rspec-core/docs/command-line/only-failures
|
22
|
+
c.example_status_persistence_file_path = 'spec/examples.txt'
|
22
23
|
end
|
23
24
|
|
24
25
|
RSpec.shared_examples 'an IO object compatible with IOConstraint' do
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: format_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.23.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Noah Berman
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: exe
|
11
11
|
cert_chain: []
|
12
|
-
date: 2020-
|
12
|
+
date: 2020-09-14 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: ks
|
@@ -34,7 +34,7 @@ dependencies:
|
|
34
34
|
version: '1'
|
35
35
|
- - ">="
|
36
36
|
- !ruby/object:Gem::Version
|
37
|
-
version: 1.3.
|
37
|
+
version: 1.3.7
|
38
38
|
type: :runtime
|
39
39
|
prerelease: false
|
40
40
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -44,27 +44,21 @@ dependencies:
|
|
44
44
|
version: '1'
|
45
45
|
- - ">="
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: 1.3.
|
47
|
+
version: 1.3.7
|
48
48
|
- !ruby/object:Gem::Dependency
|
49
49
|
name: id3tag
|
50
50
|
requirement: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: '0.
|
55
|
-
- - ">="
|
56
|
-
- !ruby/object:Gem::Version
|
57
|
-
version: 0.10.1
|
54
|
+
version: '0.13'
|
58
55
|
type: :runtime
|
59
56
|
prerelease: false
|
60
57
|
version_requirements: !ruby/object:Gem::Requirement
|
61
58
|
requirements:
|
62
59
|
- - "~>"
|
63
60
|
- !ruby/object:Gem::Version
|
64
|
-
version: '0.
|
65
|
-
- - ">="
|
66
|
-
- !ruby/object:Gem::Version
|
67
|
-
version: 0.10.1
|
61
|
+
version: '0.13'
|
68
62
|
- !ruby/object:Gem::Dependency
|
69
63
|
name: faraday
|
70
64
|
requirement: !ruby/object:Gem::Requirement
|
@@ -135,20 +129,6 @@ dependencies:
|
|
135
129
|
- - "~>"
|
136
130
|
- !ruby/object:Gem::Version
|
137
131
|
version: '0.15'
|
138
|
-
- !ruby/object:Gem::Dependency
|
139
|
-
name: pry
|
140
|
-
requirement: !ruby/object:Gem::Requirement
|
141
|
-
requirements:
|
142
|
-
- - "~>"
|
143
|
-
- !ruby/object:Gem::Version
|
144
|
-
version: '0.11'
|
145
|
-
type: :development
|
146
|
-
prerelease: false
|
147
|
-
version_requirements: !ruby/object:Gem::Requirement
|
148
|
-
requirements:
|
149
|
-
- - "~>"
|
150
|
-
- !ruby/object:Gem::Version
|
151
|
-
version: '0.11'
|
152
132
|
- !ruby/object:Gem::Dependency
|
153
133
|
name: yard
|
154
134
|
requirement: !ruby/object:Gem::Requirement
|
@@ -216,6 +196,8 @@ files:
|
|
216
196
|
- Rakefile
|
217
197
|
- exe/format_parser_inspect
|
218
198
|
- format_parser.gemspec
|
199
|
+
- lib/active_storage/blob_analyzer.rb
|
200
|
+
- lib/active_storage/blob_io.rb
|
219
201
|
- lib/archive.rb
|
220
202
|
- lib/attributes_json.rb
|
221
203
|
- lib/audio.rb
|
@@ -223,6 +205,7 @@ files:
|
|
223
205
|
- lib/document.rb
|
224
206
|
- lib/format_parser.rb
|
225
207
|
- lib/format_parser/version.rb
|
208
|
+
- lib/hash_utils.rb
|
226
209
|
- lib/image.rb
|
227
210
|
- lib/io_constraint.rb
|
228
211
|
- lib/io_utils.rb
|
@@ -254,12 +237,16 @@ files:
|
|
254
237
|
- lib/read_limits_config.rb
|
255
238
|
- lib/remote_io.rb
|
256
239
|
- lib/video.rb
|
240
|
+
- spec/active_storage/blob_io_spec.rb
|
241
|
+
- spec/active_storage/rails_app_spec.rb
|
257
242
|
- spec/attributes_json_spec.rb
|
258
243
|
- spec/care_spec.rb
|
259
244
|
- spec/esoteric_formats_spec.rb
|
260
245
|
- spec/file_information_spec.rb
|
261
246
|
- spec/format_parser_inspect_spec.rb
|
262
247
|
- spec/format_parser_spec.rb
|
248
|
+
- spec/hash_utils_spec.rb
|
249
|
+
- spec/integration/active_storage/rails_app.rb
|
263
250
|
- spec/io_utils_spec.rb
|
264
251
|
- spec/parsers/aiff_parser_spec.rb
|
265
252
|
- spec/parsers/bmp_parser_spec.rb
|