format_parser 0.21.0 → 0.23.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +3 -0
- data/CHANGELOG.md +21 -0
- data/CONTRIBUTING.md +6 -1
- data/README.md +12 -1
- data/format_parser.gemspec +2 -3
- data/lib/active_storage/blob_analyzer.rb +35 -0
- data/lib/active_storage/blob_io.rb +51 -0
- data/lib/attributes_json.rb +9 -1
- data/lib/format_parser.rb +2 -0
- data/lib/format_parser/version.rb +1 -1
- data/lib/hash_utils.rb +19 -0
- data/lib/parsers/mp3_parser.rb +11 -6
- data/lib/parsers/mpeg_parser.rb +13 -17
- data/lib/parsers/zip_parser/file_reader.rb +3 -0
- data/spec/active_storage/blob_io_spec.rb +36 -0
- data/spec/active_storage/rails_app_spec.rb +58 -0
- data/spec/attributes_json_spec.rb +26 -0
- data/spec/hash_utils_spec.rb +42 -0
- data/spec/integration/active_storage/rails_app.rb +72 -0
- data/spec/parsers/mp3_parser_spec.rb +47 -0
- data/spec/parsers/mpeg_parser_spec.rb +21 -0
- data/spec/parsers/zip_parser_spec.rb +7 -0
- data/spec/spec_helper.rb +2 -1
- metadata +13 -26
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f76db376646606abe6a7ccaa6f0a14efdc997ecd7fe29aff0ab3d8172857649f
|
4
|
+
data.tar.gz: 07c142e7ce6aaa518285d425eb95961d18e053a6b15456d4cb569cdc70a79069
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3e92625dbe822d3423a084174fd7bc9d4a296effe26182a91743b36995f8470edc4fa56a3a0a10ce095d4998da5dee9620b983c1f980e0418a3b4574244e848c
|
7
|
+
data.tar.gz: b89df24f7b6638b3cd42b284a1792475a094e947bd995e5ccdd0c554014075f5d167a7c04fbc08c681483b0c82aa2e59ae4e4af62f99d6b158fb4b3f1096d80b
|
data/.gitignore
CHANGED
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,24 @@
|
|
1
|
+
## 0.23.1
|
2
|
+
* Updated gem exifr to fix problems related to jpeg files from Olympos microscopes, which often have bad thumbnail data
|
3
|
+
|
4
|
+
## 0.23.0
|
5
|
+
* Add ActiveStorage analyzer which can analyze ActiveStorage blobs. Enable it by setting
|
6
|
+
`config.active_storage.analyzers.prepend FormatParser::ActiveStorage::BlobAnalyzer`
|
7
|
+
* Ignore empty ID3 tags and do not allow them to overwrite others
|
8
|
+
* Update the id3tag dependency so that we can fallback to UTF8 instead of raising an error when parsing
|
9
|
+
MP3 files
|
10
|
+
|
11
|
+
## 0.22.1
|
12
|
+
* Fix Zip parser to not raise error for invalid zip files, with an invalid central directory
|
13
|
+
|
14
|
+
## 0.22.0
|
15
|
+
* Adds option `stringify_keys: true` to #as_json methods (fix #151)
|
16
|
+
|
17
|
+
## 0.21.1
|
18
|
+
* MPEG: Ensure parsing does not inadvertently return an Integer instead of Result|nil
|
19
|
+
* MPEG: Scan further into the MPEG file than previously (scan 32 1KB chunks)
|
20
|
+
* MPEG: Ensure the parser does not raise an exception when there is no data to read for scanning beyound the initial header
|
21
|
+
|
1
22
|
## 0.21.0
|
2
23
|
* Adds support for MPEG video files
|
3
24
|
|
data/CONTRIBUTING.md
CHANGED
@@ -234,4 +234,9 @@ This provision also applies to the test files you include with the changed code
|
|
234
234
|
|
235
235
|
## Changelog
|
236
236
|
|
237
|
-
When creating a new release you must add an entry in the `CHANGELOG.md`.
|
237
|
+
When creating a new release you must add an entry in the `CHANGELOG.md`.
|
238
|
+
|
239
|
+
## Testing locally
|
240
|
+
|
241
|
+
It's possible to run `exe/format_parser_inspect FILE_NAME` or `exe/format_parser_inspect FILE_URI`
|
242
|
+
to test the new code without the necessity of installing the gem.
|
data/README.md
CHANGED
@@ -75,6 +75,17 @@ img_info = FormatParser.parse(File.open("myimage.jpg", "rb"))
|
|
75
75
|
JSON.pretty_generate(img_info) #=> ...
|
76
76
|
```
|
77
77
|
|
78
|
+
To convert the result to a Hash or a structure suitable for JSON serialization
|
79
|
+
|
80
|
+
```ruby
|
81
|
+
img_info = FormatParser.parse(File.open("myimage.jpg", "rb"))
|
82
|
+
img_info.as_json
|
83
|
+
|
84
|
+
# it's also possible to convert all keys to string
|
85
|
+
img_info.as_json(stringify_keys: true)
|
86
|
+
```
|
87
|
+
|
88
|
+
|
78
89
|
## Creating your own parsers
|
79
90
|
|
80
91
|
See the [section on writing parsers in CONTRIBUTING.md](CONTRIBUTING.md#so-you-want-to-contribute-a-new-parser)
|
@@ -188,7 +199,7 @@ Unless specified otherwise in this section the fixture files are MIT licensed an
|
|
188
199
|
|
189
200
|
## Copyright
|
190
201
|
|
191
|
-
Copyright (c)
|
202
|
+
Copyright (c) 2020 WeTransfer.
|
192
203
|
|
193
204
|
`format_parser` is distributed under the conditions of the [Hippocratic License](https://firstdonoharm.dev/version/1/2/license.html)
|
194
205
|
- See LICENSE.txt for further details.
|
data/format_parser.gemspec
CHANGED
@@ -31,15 +31,14 @@ Gem::Specification.new do |spec|
|
|
31
31
|
spec.require_paths = ['lib']
|
32
32
|
|
33
33
|
spec.add_dependency 'ks', '~> 0.0'
|
34
|
-
spec.add_dependency 'exifr', '~> 1', '>= 1.3.
|
35
|
-
spec.add_dependency 'id3tag', '~> 0.
|
34
|
+
spec.add_dependency 'exifr', '~> 1', '>= 1.3.7'
|
35
|
+
spec.add_dependency 'id3tag', '~> 0.13'
|
36
36
|
spec.add_dependency 'faraday', '~> 0.13'
|
37
37
|
spec.add_dependency 'measurometer', '~> 1'
|
38
38
|
|
39
39
|
spec.add_development_dependency 'rspec', '~> 3.0'
|
40
40
|
spec.add_development_dependency 'rake', '~> 12'
|
41
41
|
spec.add_development_dependency 'simplecov', '~> 0.15'
|
42
|
-
spec.add_development_dependency 'pry', '~> 0.11'
|
43
42
|
spec.add_development_dependency 'yard', '~> 0.9'
|
44
43
|
spec.add_development_dependency 'wetransfer_style', '0.5.0'
|
45
44
|
spec.add_development_dependency 'parallel_tests'
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require_relative 'blob_io'
|
2
|
+
|
3
|
+
# An analyzer class that can be hooked to ActiveStorage, in order to enable
|
4
|
+
# FormatParser to do the blob analysis instead of ActiveStorage builtin-analyzers.
|
5
|
+
# Invoked if properly integrated in Rails initializer.
|
6
|
+
|
7
|
+
module FormatParser
|
8
|
+
module ActiveStorage
|
9
|
+
class BlobAnalyzer
|
10
|
+
# Format parser is able to handle a lot of format so by default it will accept all files
|
11
|
+
#
|
12
|
+
# @return [Boolean, true] always return true
|
13
|
+
def self.accept?(_blob)
|
14
|
+
true
|
15
|
+
end
|
16
|
+
|
17
|
+
def initialize(blob)
|
18
|
+
@blob = blob
|
19
|
+
end
|
20
|
+
|
21
|
+
# @return [Hash] file metadatas
|
22
|
+
def metadata
|
23
|
+
io = BlobIO.new(@blob)
|
24
|
+
parsed_file = FormatParser.parse(io)
|
25
|
+
|
26
|
+
if parsed_file
|
27
|
+
# We symbolize keys because of existing output hash format of ImageAnalyzer
|
28
|
+
parsed_file.as_json.symbolize_keys
|
29
|
+
else
|
30
|
+
logger.info "Skipping file analysis because FormatParser doesn't support the file"
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
# Acts as a proxy to turn ActiveStorage file into IO object
|
2
|
+
|
3
|
+
module FormatParser
|
4
|
+
module ActiveStorage
|
5
|
+
class BlobIO
|
6
|
+
# @param blob[ActiveStorage::Blob] the file with linked service
|
7
|
+
# @return [BlobIO]
|
8
|
+
def initialize(blob)
|
9
|
+
@blob = blob
|
10
|
+
@service = blob.service
|
11
|
+
@pos = 0
|
12
|
+
end
|
13
|
+
|
14
|
+
# Emulates IO#read, but requires the number of bytes to read.
|
15
|
+
# Rely on `ActiveStorage::Service.download_chunk` of each hosting type (local, S3, Azure, etc)
|
16
|
+
#
|
17
|
+
# @param n_bytes[Integer] how many bytes to read
|
18
|
+
# @return [String] the read bytes
|
19
|
+
def read(n_bytes)
|
20
|
+
# HTTP ranges are exclusive.
|
21
|
+
http_range = (@pos..(@pos + n_bytes - 1))
|
22
|
+
body = @service.download_chunk(@blob.key, http_range)
|
23
|
+
@pos += body.bytesize
|
24
|
+
body.force_encoding(Encoding::ASCII_8BIT)
|
25
|
+
end
|
26
|
+
|
27
|
+
# Emulates IO#seek
|
28
|
+
#
|
29
|
+
# @param [Integer] offset size
|
30
|
+
# @return [Integer] always return 0, `seek` only mutates `pos` attribute
|
31
|
+
def seek(offset)
|
32
|
+
@pos = offset
|
33
|
+
0
|
34
|
+
end
|
35
|
+
|
36
|
+
# Emulates IO#size.
|
37
|
+
#
|
38
|
+
# @return [Integer] the size of the blob size from ActiveStorage
|
39
|
+
def size
|
40
|
+
@blob.byte_size
|
41
|
+
end
|
42
|
+
|
43
|
+
# Emulates IO#pos
|
44
|
+
#
|
45
|
+
# @return [Integer] the current offset (in bytes) of the io
|
46
|
+
def pos
|
47
|
+
@pos
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
data/lib/attributes_json.rb
CHANGED
@@ -15,7 +15,12 @@ module FormatParser::AttributesJSON
|
|
15
15
|
|
16
16
|
# Implements a sane default `as_json` for an object
|
17
17
|
# that accessors defined
|
18
|
-
|
18
|
+
#
|
19
|
+
# @param root[Bool] if true, it surrounds the result in a hash with a key
|
20
|
+
# `format_parser_file_info`
|
21
|
+
# @param stringify_keys[Bool] if true, it transforms all the hash keys to a string.
|
22
|
+
# The default value is false for backward compatibility
|
23
|
+
def as_json(root: false, stringify_keys: false, **)
|
19
24
|
h = {}
|
20
25
|
h['nature'] = nature if respond_to?(:nature) # Needed for file info structs
|
21
26
|
methods.grep(/\w\=$/).each_with_object(h) do |attr_writer_method_name, h|
|
@@ -27,6 +32,9 @@ module FormatParser::AttributesJSON
|
|
27
32
|
sanitized_value = _sanitize_json_value(unwrapped_attribute_value)
|
28
33
|
h[reader_method_name] = sanitized_value
|
29
34
|
end
|
35
|
+
|
36
|
+
h = FormatParser::HashUtils.deep_transform_keys(h, &:to_s) if stringify_keys
|
37
|
+
|
30
38
|
if root
|
31
39
|
{'format_parser_file_info' => h}
|
32
40
|
else
|
data/lib/format_parser.rb
CHANGED
@@ -5,6 +5,7 @@ require 'measurometer'
|
|
5
5
|
# top-level methods of the library.
|
6
6
|
module FormatParser
|
7
7
|
require_relative 'format_parser/version'
|
8
|
+
require_relative 'hash_utils'
|
8
9
|
require_relative 'attributes_json'
|
9
10
|
require_relative 'image'
|
10
11
|
require_relative 'audio'
|
@@ -17,6 +18,7 @@ module FormatParser
|
|
17
18
|
require_relative 'remote_io'
|
18
19
|
require_relative 'io_constraint'
|
19
20
|
require_relative 'care'
|
21
|
+
require_relative 'active_storage/blob_analyzer'
|
20
22
|
|
21
23
|
# Define Measurometer in the internal namespace as well
|
22
24
|
# so that we stay compatible for the applications that use it
|
data/lib/hash_utils.rb
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
# based on https://github.com/rails/rails/blob/master/activesupport/lib/active_support/core_ext/hash/keys.rb#L116
|
2
|
+
# I chose to copy this method instead of adding activesupport as a dependency
|
3
|
+
# because we want to have the least number of dependencies
|
4
|
+
module FormatParser
|
5
|
+
class HashUtils
|
6
|
+
def self.deep_transform_keys(object, &block)
|
7
|
+
case object
|
8
|
+
when Hash
|
9
|
+
object.each_with_object({}) do |(key, value), result|
|
10
|
+
result[yield(key)] = deep_transform_keys(value, &block)
|
11
|
+
end
|
12
|
+
when Array
|
13
|
+
object.map { |e| deep_transform_keys(e, &block) }
|
14
|
+
else
|
15
|
+
object
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
data/lib/parsers/mp3_parser.rb
CHANGED
@@ -44,13 +44,9 @@ class FormatParser::MP3Parser
|
|
44
44
|
tag = __getobj__
|
45
45
|
MEMBERS.each_with_object({}) do |k, h|
|
46
46
|
value = tag.public_send(k)
|
47
|
-
h[k] = value if value
|
47
|
+
h[k] = value if value && !value.empty?
|
48
48
|
end
|
49
49
|
end
|
50
|
-
|
51
|
-
def as_json(*)
|
52
|
-
to_h
|
53
|
-
end
|
54
50
|
end
|
55
51
|
|
56
52
|
def likely_match?(filename)
|
@@ -85,7 +81,7 @@ class FormatParser::MP3Parser
|
|
85
81
|
|
86
82
|
first_frame = initial_frames.first
|
87
83
|
|
88
|
-
id3tags_hash = blend_id3_tags_into_hash(*tags)
|
84
|
+
id3tags_hash = with_id3tag_local_configs { blend_id3_tags_into_hash(*tags) }
|
89
85
|
|
90
86
|
file_info = FormatParser::Audio.new(
|
91
87
|
format: :mp3,
|
@@ -297,5 +293,14 @@ class FormatParser::MP3Parser
|
|
297
293
|
attrs
|
298
294
|
end
|
299
295
|
|
296
|
+
def with_id3tag_local_configs
|
297
|
+
ID3Tag.local_configuration do |c|
|
298
|
+
c.string_encode_options = { invalid: :replace, undef: :replace }
|
299
|
+
c.source_encoding_fallback = Encoding::UTF_8
|
300
|
+
|
301
|
+
yield
|
302
|
+
end
|
303
|
+
end
|
304
|
+
|
300
305
|
FormatParser.register_parser new, natures: :audio, formats: :mp3, priority: 99
|
301
306
|
end
|
data/lib/parsers/mpeg_parser.rb
CHANGED
@@ -26,9 +26,8 @@ class FormatParser::MPEGParser
|
|
26
26
|
|
27
27
|
PACK_HEADER_START_CODE = [0x00, 0x00, 0x01, 0xBA].pack('C*')
|
28
28
|
SEQUENCE_HEADER_START_CODE = [0xB3].pack('C*')
|
29
|
-
|
30
|
-
|
31
|
-
BYTES_TO_READ_PER_TIME = 1024
|
29
|
+
MAX_BLOCK_READS = 32
|
30
|
+
BYTES_TO_READ_PER_READ = 1024
|
32
31
|
|
33
32
|
def self.likely_match?(filename)
|
34
33
|
filename =~ /\.(mpg|mpeg)$/i
|
@@ -37,18 +36,19 @@ class FormatParser::MPEGParser
|
|
37
36
|
def self.call(io)
|
38
37
|
return unless matches_mpeg_header?(io)
|
39
38
|
|
40
|
-
# We are looping though the stream because there can be several sequence headers and some of them are not
|
41
|
-
# If we detect that the header is not
|
39
|
+
# We are looping though the stream because there can be several sequence headers and some of them are not useful.
|
40
|
+
# If we detect that the header is not useful, then we look for the next one for SEEK_FOR_SEQUENCE_HEADER_TIMES_LIMIT
|
42
41
|
# If we reach the EOF, then the mpg is likely to be corrupted and we return nil
|
43
|
-
|
44
|
-
|
42
|
+
MAX_BLOCK_READS.times do
|
43
|
+
next unless pos = find_next_header_code_pos(io)
|
44
|
+
io.seek(pos + 1)
|
45
45
|
horizontal_size, vertical_size = parse_image_size(io)
|
46
46
|
ratio_code, rate_code = parse_rate_information(io)
|
47
|
-
|
48
47
|
if valid_aspect_ratio_code?(ratio_code) && valid_frame_rate_code?(rate_code)
|
49
48
|
return file_info(horizontal_size, vertical_size, ratio_code, rate_code)
|
50
49
|
end
|
51
50
|
end
|
51
|
+
nil # otherwise the return value of Integer#times will be returned
|
52
52
|
rescue FormatParser::IOUtils::InvalidRead
|
53
53
|
nil
|
54
54
|
end
|
@@ -90,15 +90,11 @@ class FormatParser::MPEGParser
|
|
90
90
|
# Returns the position of the next sequence package content in the stream
|
91
91
|
# This method will read BYTES_TO_READ_PER_TIME in each loop for a maximum amount of SEEK_FOR_SEQUENCE_HEADER_START_CODE_TIMES_LIMIT times
|
92
92
|
# If the package is not found, then it returns nil.
|
93
|
-
def self.
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
new_io_pos = io.pos - BYTES_TO_READ_PER_TIME + header_relative_index + 1
|
99
|
-
io.seek(new_io_pos)
|
100
|
-
return new_io_pos
|
101
|
-
end
|
93
|
+
def self.find_next_header_code_pos(io)
|
94
|
+
pos_before_read = io.pos
|
95
|
+
bin_str = io.read(BYTES_TO_READ_PER_READ) # bin_str might be nil if we are at EOF
|
96
|
+
header_relative_index = bin_str && bin_str.index(SEQUENCE_HEADER_START_CODE)
|
97
|
+
return pos_before_read + header_relative_index if header_relative_index
|
102
98
|
end
|
103
99
|
|
104
100
|
# If the first 4 bytes of the stream are equal to 00 00 01 BA, the pack start code for the Pack Header, then it's an MPEG file.
|
@@ -18,6 +18,7 @@ class FormatParser::ZIPParser::FileReader
|
|
18
18
|
'Could not find the EOCD signature in the buffer - maybe a malformed ZIP file'
|
19
19
|
end
|
20
20
|
end
|
21
|
+
InvalidCentralDirectory = Class.new(Error)
|
21
22
|
|
22
23
|
C_UINT32LE = 'V'
|
23
24
|
C_UINT16LE = 'v'
|
@@ -175,6 +176,8 @@ class FormatParser::ZIPParser::FileReader
|
|
175
176
|
# BUT! in format_parser we avoid unbounded reads, as a matter of fact they are forbidden.
|
176
177
|
# So we will again limit ouselves to cdir_size, and we will take cushion of 1 KB.
|
177
178
|
central_directory_str = io.read(cdir_size + 1024)
|
179
|
+
raise InvalidCentralDirectory if central_directory_str.nil?
|
180
|
+
|
178
181
|
central_directory_io = StringIO.new(central_directory_str)
|
179
182
|
log do
|
180
183
|
format(
|
@@ -0,0 +1,36 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe FormatParser::ActiveStorage::BlobIO do
|
4
|
+
let(:blob_service) { double }
|
5
|
+
let(:blob) { double(key: 'blob_key', service: blob_service, byte_size: 43000) }
|
6
|
+
let(:io) { described_class.new(blob) }
|
7
|
+
let(:fixture_path) { fixtures_dir + '/test.png' }
|
8
|
+
|
9
|
+
it_behaves_like 'an IO object compatible with IOConstraint'
|
10
|
+
|
11
|
+
describe '#read' do
|
12
|
+
it 'reads io using download_chunk from ActiveStorage#Service' do
|
13
|
+
allow(blob_service).to receive(:download_chunk) { 'a' }
|
14
|
+
|
15
|
+
expect(io.read(1)).to eq('a')
|
16
|
+
end
|
17
|
+
|
18
|
+
it 'updates #pos on read' do
|
19
|
+
allow(blob_service).to receive(:download_chunk) { 'a' }
|
20
|
+
|
21
|
+
expect { io.read(1) }.to change { io.pos }.from(0).to(1)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
describe '#seek' do
|
26
|
+
it 'updates @pos' do
|
27
|
+
expect { io.seek(10) }.to change { io.pos }.from(0).to(10)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
describe '#size' do
|
32
|
+
it 'returns the size of the blob byte_size' do
|
33
|
+
expect(io.size).to eq(blob.byte_size)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
def skip_reason
|
4
|
+
if RUBY_ENGINE == 'jruby'
|
5
|
+
'Skipping because JRuby have randon failing issue'
|
6
|
+
elsif RUBY_VERSION.to_f < 2.5
|
7
|
+
'Skipping because Rails testing script use Rails 6, who does not support Ruby bellow 2.5'
|
8
|
+
else
|
9
|
+
false
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
describe 'Rails app with ActiveStorage and format-parser', skip: skip_reason do
|
14
|
+
describe 'local hosting with ActiveStorage disk adapter' do
|
15
|
+
it 'parse local file with format_parser' do
|
16
|
+
clean_env do
|
17
|
+
cmd = 'ruby spec/integration/active_storage/rails_app.rb'
|
18
|
+
cmd_status = ruby_script_runner(cmd)
|
19
|
+
expect(cmd_status[:stdout].last).to match(/1 runs, 3 assertions, 0 failures, 0 errors, 0 skips/)
|
20
|
+
expect(cmd_status[:exitstatus]).to eq(0)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def ruby_script_runner(cmd)
|
26
|
+
require 'open3'
|
27
|
+
cmd_status = { stdout: [], exitstatus: nil }
|
28
|
+
Open3.popen2(cmd) do |_stdin, stdout, wait_thr|
|
29
|
+
frame_stdout do
|
30
|
+
while line = stdout.gets
|
31
|
+
puts "| #{line}"
|
32
|
+
cmd_status[:stdout] << line
|
33
|
+
end
|
34
|
+
end
|
35
|
+
cmd_status[:exitstatus] = wait_thr.value.exitstatus
|
36
|
+
end
|
37
|
+
cmd_status
|
38
|
+
end
|
39
|
+
|
40
|
+
def frame_stdout
|
41
|
+
puts
|
42
|
+
puts '-' * 50
|
43
|
+
yield
|
44
|
+
puts '-' * 50
|
45
|
+
end
|
46
|
+
|
47
|
+
def clean_env
|
48
|
+
if Bundler.respond_to?(:with_unbundled_env)
|
49
|
+
Bundler.with_unbundled_env do
|
50
|
+
yield
|
51
|
+
end
|
52
|
+
else
|
53
|
+
Bundler.with_clean_env do
|
54
|
+
yield
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
@@ -140,4 +140,30 @@ describe FormatParser::AttributesJSON do
|
|
140
140
|
JSON.pretty_generate(object_with_attributes_module)
|
141
141
|
}.to raise_error(/structure too deep/)
|
142
142
|
end
|
143
|
+
|
144
|
+
it 'converts all hash keys to string when stringify_keys: true' do
|
145
|
+
fixture_path = fixtures_dir + '/ZIP/arch_few_entries.zip'
|
146
|
+
fi_io = File.open(fixture_path, 'rb')
|
147
|
+
|
148
|
+
result = FormatParser::ZIPParser.new.call(fi_io).as_json(stringify_keys: true)
|
149
|
+
|
150
|
+
result['entries'].each do |entry|
|
151
|
+
entry.each do |key, _value|
|
152
|
+
expect(key).to be_a(String)
|
153
|
+
end
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
it 'does not convert hash keys to string when stringify_keys: false' do
|
158
|
+
fixture_path = fixtures_dir + '/ZIP/arch_few_entries.zip'
|
159
|
+
fi_io = File.open(fixture_path, 'rb')
|
160
|
+
|
161
|
+
result = FormatParser::ZIPParser.new.call(fi_io).as_json
|
162
|
+
|
163
|
+
result['entries'].each do |entry|
|
164
|
+
entry.each do |key, _value|
|
165
|
+
expect(key).to be_a(Symbol)
|
166
|
+
end
|
167
|
+
end
|
168
|
+
end
|
143
169
|
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe FormatParser::HashUtils do
|
4
|
+
describe '.deep_transform_keys' do
|
5
|
+
it 'transforms all the keys in a hash' do
|
6
|
+
hash = { aa: 1, 'bb' => 2 }
|
7
|
+
result = described_class.deep_transform_keys(hash, &:to_s)
|
8
|
+
|
9
|
+
expect(result).to eq('aa' => 1, 'bb' => 2)
|
10
|
+
end
|
11
|
+
|
12
|
+
it 'transforms all the keys in a array of hashes' do
|
13
|
+
array = [{ aa: 1, bb: 2 }, { cc: 3, dd: [{c: 2, d: 3}] }]
|
14
|
+
result = described_class.deep_transform_keys(array, &:to_s)
|
15
|
+
|
16
|
+
expect(result).to eq(
|
17
|
+
[{'aa' => 1, 'bb' => 2}, {'cc' => 3, 'dd' => [{'c' => 2, 'd' => 3}]}]
|
18
|
+
)
|
19
|
+
end
|
20
|
+
|
21
|
+
it 'transforms all the keys in a hash recursively' do
|
22
|
+
hash = { aa: 1, bb: { cc: 22, dd: 3 } }
|
23
|
+
result = described_class.deep_transform_keys(hash, &:to_s)
|
24
|
+
|
25
|
+
expect(result).to eq('aa' => 1, 'bb' => { 'cc' => 22, 'dd' => 3})
|
26
|
+
end
|
27
|
+
|
28
|
+
it 'does nothing for an non array/hash object' do
|
29
|
+
object = Object.new
|
30
|
+
result = described_class.deep_transform_keys(object, &:to_s)
|
31
|
+
|
32
|
+
expect(result).to eq(object)
|
33
|
+
end
|
34
|
+
|
35
|
+
it 'returns the last value if different keys are transformed into the same one' do
|
36
|
+
hash = { aa: 0, 'bb' => 2, bb: 1 }
|
37
|
+
result = described_class.deep_transform_keys(hash, &:to_s)
|
38
|
+
|
39
|
+
expect(result).to eq('aa' => 0, 'bb' => 1)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,72 @@
|
|
1
|
+
require 'bundler/inline'
|
2
|
+
|
3
|
+
gemfile(true) do
|
4
|
+
source 'https://rubygems.org'
|
5
|
+
|
6
|
+
git_source(:github) { |repo| "https://github.com/#{repo}.git" }
|
7
|
+
|
8
|
+
gem 'rails', '6.0.3'
|
9
|
+
gem 'sqlite3'
|
10
|
+
gem 'format_parser', path: './'
|
11
|
+
end
|
12
|
+
|
13
|
+
require 'active_record/railtie'
|
14
|
+
require 'active_storage/engine'
|
15
|
+
require 'tmpdir'
|
16
|
+
|
17
|
+
class TestApp < Rails::Application
|
18
|
+
config.root = __dir__
|
19
|
+
config.hosts << 'example.org'
|
20
|
+
config.eager_load = false
|
21
|
+
config.session_store :cookie_store, key: 'cookie_store_key'
|
22
|
+
secrets.secret_key_base = 'secret_key_base'
|
23
|
+
|
24
|
+
config.logger = Logger.new('/dev/null')
|
25
|
+
|
26
|
+
config.active_storage.service = :local
|
27
|
+
config.active_storage.service_configurations = {
|
28
|
+
local: {
|
29
|
+
root: Dir.tmpdir,
|
30
|
+
service: 'Disk'
|
31
|
+
}
|
32
|
+
}
|
33
|
+
|
34
|
+
config.active_storage.analyzers.prepend FormatParser::ActiveStorage::BlobAnalyzer
|
35
|
+
end
|
36
|
+
|
37
|
+
ENV['DATABASE_URL'] = 'sqlite3::memory:'
|
38
|
+
|
39
|
+
Rails.application.initialize!
|
40
|
+
|
41
|
+
require ActiveStorage::Engine.root.join('db/migrate/20170806125915_create_active_storage_tables.rb').to_s
|
42
|
+
|
43
|
+
ActiveRecord::Schema.define do
|
44
|
+
CreateActiveStorageTables.new.change
|
45
|
+
|
46
|
+
create_table :users, force: true
|
47
|
+
end
|
48
|
+
|
49
|
+
class User < ActiveRecord::Base
|
50
|
+
has_one_attached :profile_picture
|
51
|
+
end
|
52
|
+
|
53
|
+
require 'minitest/autorun'
|
54
|
+
require 'open-uri'
|
55
|
+
|
56
|
+
describe User do
|
57
|
+
describe "profile_picture's metadatas" do
|
58
|
+
it 'parse metadatas with format_parser' do
|
59
|
+
user = User.create
|
60
|
+
user.profile_picture.attach(
|
61
|
+
filename: 'cat.png',
|
62
|
+
io: URI.open('https://freesvg.org/img/1416155153.png')
|
63
|
+
)
|
64
|
+
|
65
|
+
user.profile_picture.analyze
|
66
|
+
|
67
|
+
_(user.profile_picture.metadata[:width_px]).must_equal 500
|
68
|
+
_(user.profile_picture.metadata[:height_px]).must_equal 296
|
69
|
+
_(user.profile_picture.metadata[:color_mode]).must_equal 'rgba'
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
@@ -37,6 +37,14 @@ describe FormatParser::MP3Parser do
|
|
37
37
|
expect(parsed.album).to be_nil
|
38
38
|
end
|
39
39
|
end
|
40
|
+
|
41
|
+
context 'when has an empty tag' do
|
42
|
+
let(:fpath) { fixtures_dir + '/MP3/id3v2_with_empty_tag.mp3' }
|
43
|
+
|
44
|
+
it 'ignores the empty tags' do
|
45
|
+
expect(parsed.intrinsics[:genre]).to eq('Rock')
|
46
|
+
end
|
47
|
+
end
|
40
48
|
end
|
41
49
|
|
42
50
|
it 'decodes and estimates duration for a CBR MP3' do
|
@@ -71,6 +79,17 @@ describe FormatParser::MP3Parser do
|
|
71
79
|
expect(prepped.pos).to eq(3145738)
|
72
80
|
end
|
73
81
|
|
82
|
+
it 'does not raise error when a tag frame has unsupported encoding' do
|
83
|
+
fpath = fixtures_dir + '/MP3/id3v2_frame_with_invalid_encoding.mp3'
|
84
|
+
|
85
|
+
parsed = subject.call(File.open(fpath, 'rb'))
|
86
|
+
|
87
|
+
expect(parsed.nature). to eq(:audio)
|
88
|
+
expect(parsed.album).to eq('wetransfer')
|
89
|
+
expect(parsed.artist).to eq('wetransfer')
|
90
|
+
expect(parsed.title).to eq('test')
|
91
|
+
end
|
92
|
+
|
74
93
|
it 'parses the Cassy MP3' do
|
75
94
|
fpath = fixtures_dir + '/MP3/Cassy.mp3'
|
76
95
|
parsed = subject.call(File.open(fpath, 'rb'))
|
@@ -110,4 +129,32 @@ describe FormatParser::MP3Parser do
|
|
110
129
|
subject.call(StringIO.new(''))
|
111
130
|
}.to raise_error(FormatParser::IOUtils::InvalidRead)
|
112
131
|
end
|
132
|
+
|
133
|
+
describe '#as_json' do
|
134
|
+
it 'converts all hash keys to string when stringify_keys: true' do
|
135
|
+
fpath = fixtures_dir + '/MP3/Cassy.mp3'
|
136
|
+
result = subject.call(File.open(fpath, 'rb')).as_json(stringify_keys: true)
|
137
|
+
|
138
|
+
expect(
|
139
|
+
result['intrinsics'].keys.map(&:class).uniq
|
140
|
+
).to eq([String])
|
141
|
+
|
142
|
+
expect(
|
143
|
+
result['intrinsics']['id3tags'].map(&:class).uniq
|
144
|
+
).to eq([ID3Tag::Tag])
|
145
|
+
end
|
146
|
+
|
147
|
+
it 'does not convert the hash keys to string when stringify_keys: false' do
|
148
|
+
fpath = fixtures_dir + '/MP3/Cassy.mp3'
|
149
|
+
result = subject.call(File.open(fpath, 'rb')).as_json
|
150
|
+
|
151
|
+
expect(
|
152
|
+
result['intrinsics'].keys.map(&:class).uniq
|
153
|
+
).to eq([Symbol])
|
154
|
+
|
155
|
+
expect(
|
156
|
+
result['intrinsics'][:id3tags].map(&:class).uniq
|
157
|
+
).to eq([ID3Tag::Tag])
|
158
|
+
end
|
159
|
+
end
|
113
160
|
end
|
@@ -12,6 +12,27 @@ describe FormatParser::MPEGParser do
|
|
12
12
|
expect(parse_result.intrinsics[:frame_rate]).to eq('30')
|
13
13
|
end
|
14
14
|
|
15
|
+
it 'returns a nil if it is necessary to iterate over a very large number of bytes and the requisite sequences are not detected' do
|
16
|
+
bytes_buffer = StringIO.new
|
17
|
+
bytes_buffer.write([0x00, 0x00, 0x01, 0xBA].pack('C*')) # MPEG header
|
18
|
+
zero_bytes = [0x00].pack('C') * (1024 * 1024 * 5)
|
19
|
+
bytes_buffer.write(zero_bytes)
|
20
|
+
|
21
|
+
bytes_buffer.rewind
|
22
|
+
|
23
|
+
parse_result = described_class.call(bytes_buffer)
|
24
|
+
expect(parse_result).to be_nil
|
25
|
+
end
|
26
|
+
|
27
|
+
it 'returns a nil if the IO only contains the MPEG header bytes at the start and nothing else' do
|
28
|
+
bytes_buffer = StringIO.new
|
29
|
+
bytes_buffer.write([0x00, 0x00, 0x01, 0xBA].pack('C*')) # MPEG header
|
30
|
+
bytes_buffer.rewind
|
31
|
+
|
32
|
+
parse_result = described_class.call(bytes_buffer)
|
33
|
+
expect(parse_result).to be_nil
|
34
|
+
end
|
35
|
+
|
15
36
|
it 'parses a file with mpeg extension' do
|
16
37
|
parse_result = described_class.call(File.open(__dir__ + '/../fixtures/MPG/video2.mpeg', 'rb'))
|
17
38
|
|
@@ -103,4 +103,11 @@ describe FormatParser::ZIPParser do
|
|
103
103
|
expect(first_entry.filename).to eq('Li��nia Extreme//')
|
104
104
|
expect(first_entry.type).to eq(:directory)
|
105
105
|
end
|
106
|
+
|
107
|
+
it 'is able to handle files with invalid central directory position' do
|
108
|
+
invalid_zip_path = fixtures_dir + '/ZIP/invalid_central_directory.zip'
|
109
|
+
|
110
|
+
expect { subject.call(File.open(invalid_zip_path, 'rb')) }
|
111
|
+
.to_not raise_error
|
112
|
+
end
|
106
113
|
end
|
data/spec/spec_helper.rb
CHANGED
@@ -8,7 +8,6 @@ $LOAD_PATH.unshift(File.dirname(__FILE__))
|
|
8
8
|
|
9
9
|
require 'rspec'
|
10
10
|
require 'format_parser'
|
11
|
-
require 'pry'
|
12
11
|
|
13
12
|
module SpecHelpers
|
14
13
|
def fixtures_dir
|
@@ -19,6 +18,8 @@ end
|
|
19
18
|
RSpec.configure do |c|
|
20
19
|
c.include SpecHelpers
|
21
20
|
c.extend SpecHelpers # makes fixtures_dir available for example groups too
|
21
|
+
# https://relishapp.com/rspec/rspec-core/docs/command-line/only-failures
|
22
|
+
c.example_status_persistence_file_path = 'spec/examples.txt'
|
22
23
|
end
|
23
24
|
|
24
25
|
RSpec.shared_examples 'an IO object compatible with IOConstraint' do
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: format_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.23.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Noah Berman
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: exe
|
11
11
|
cert_chain: []
|
12
|
-
date: 2020-
|
12
|
+
date: 2020-09-14 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: ks
|
@@ -34,7 +34,7 @@ dependencies:
|
|
34
34
|
version: '1'
|
35
35
|
- - ">="
|
36
36
|
- !ruby/object:Gem::Version
|
37
|
-
version: 1.3.
|
37
|
+
version: 1.3.7
|
38
38
|
type: :runtime
|
39
39
|
prerelease: false
|
40
40
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -44,27 +44,21 @@ dependencies:
|
|
44
44
|
version: '1'
|
45
45
|
- - ">="
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: 1.3.
|
47
|
+
version: 1.3.7
|
48
48
|
- !ruby/object:Gem::Dependency
|
49
49
|
name: id3tag
|
50
50
|
requirement: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: '0.
|
55
|
-
- - ">="
|
56
|
-
- !ruby/object:Gem::Version
|
57
|
-
version: 0.10.1
|
54
|
+
version: '0.13'
|
58
55
|
type: :runtime
|
59
56
|
prerelease: false
|
60
57
|
version_requirements: !ruby/object:Gem::Requirement
|
61
58
|
requirements:
|
62
59
|
- - "~>"
|
63
60
|
- !ruby/object:Gem::Version
|
64
|
-
version: '0.
|
65
|
-
- - ">="
|
66
|
-
- !ruby/object:Gem::Version
|
67
|
-
version: 0.10.1
|
61
|
+
version: '0.13'
|
68
62
|
- !ruby/object:Gem::Dependency
|
69
63
|
name: faraday
|
70
64
|
requirement: !ruby/object:Gem::Requirement
|
@@ -135,20 +129,6 @@ dependencies:
|
|
135
129
|
- - "~>"
|
136
130
|
- !ruby/object:Gem::Version
|
137
131
|
version: '0.15'
|
138
|
-
- !ruby/object:Gem::Dependency
|
139
|
-
name: pry
|
140
|
-
requirement: !ruby/object:Gem::Requirement
|
141
|
-
requirements:
|
142
|
-
- - "~>"
|
143
|
-
- !ruby/object:Gem::Version
|
144
|
-
version: '0.11'
|
145
|
-
type: :development
|
146
|
-
prerelease: false
|
147
|
-
version_requirements: !ruby/object:Gem::Requirement
|
148
|
-
requirements:
|
149
|
-
- - "~>"
|
150
|
-
- !ruby/object:Gem::Version
|
151
|
-
version: '0.11'
|
152
132
|
- !ruby/object:Gem::Dependency
|
153
133
|
name: yard
|
154
134
|
requirement: !ruby/object:Gem::Requirement
|
@@ -216,6 +196,8 @@ files:
|
|
216
196
|
- Rakefile
|
217
197
|
- exe/format_parser_inspect
|
218
198
|
- format_parser.gemspec
|
199
|
+
- lib/active_storage/blob_analyzer.rb
|
200
|
+
- lib/active_storage/blob_io.rb
|
219
201
|
- lib/archive.rb
|
220
202
|
- lib/attributes_json.rb
|
221
203
|
- lib/audio.rb
|
@@ -223,6 +205,7 @@ files:
|
|
223
205
|
- lib/document.rb
|
224
206
|
- lib/format_parser.rb
|
225
207
|
- lib/format_parser/version.rb
|
208
|
+
- lib/hash_utils.rb
|
226
209
|
- lib/image.rb
|
227
210
|
- lib/io_constraint.rb
|
228
211
|
- lib/io_utils.rb
|
@@ -254,12 +237,16 @@ files:
|
|
254
237
|
- lib/read_limits_config.rb
|
255
238
|
- lib/remote_io.rb
|
256
239
|
- lib/video.rb
|
240
|
+
- spec/active_storage/blob_io_spec.rb
|
241
|
+
- spec/active_storage/rails_app_spec.rb
|
257
242
|
- spec/attributes_json_spec.rb
|
258
243
|
- spec/care_spec.rb
|
259
244
|
- spec/esoteric_formats_spec.rb
|
260
245
|
- spec/file_information_spec.rb
|
261
246
|
- spec/format_parser_inspect_spec.rb
|
262
247
|
- spec/format_parser_spec.rb
|
248
|
+
- spec/hash_utils_spec.rb
|
249
|
+
- spec/integration/active_storage/rails_app.rb
|
263
250
|
- spec/io_utils_spec.rb
|
264
251
|
- spec/parsers/aiff_parser_spec.rb
|
265
252
|
- spec/parsers/bmp_parser_spec.rb
|