format_parser 0.21.1 → 0.24.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +3 -0
- data/CHANGELOG.md +20 -0
- data/CONTRIBUTING.md +6 -1
- data/README.md +12 -1
- data/format_parser.gemspec +2 -3
- data/lib/active_storage/blob_analyzer.rb +35 -0
- data/lib/active_storage/blob_io.rb +51 -0
- data/lib/attributes_json.rb +9 -1
- data/lib/format_parser.rb +2 -0
- data/lib/format_parser/version.rb +1 -1
- data/lib/hash_utils.rb +19 -0
- data/lib/parsers/mp3_parser.rb +11 -6
- data/lib/parsers/tiff_parser.rb +7 -1
- data/lib/parsers/zip_parser/file_reader.rb +3 -0
- data/spec/active_storage/blob_io_spec.rb +36 -0
- data/spec/active_storage/rails_app_spec.rb +58 -0
- data/spec/attributes_json_spec.rb +26 -0
- data/spec/hash_utils_spec.rb +42 -0
- data/spec/integration/active_storage/rails_app.rb +72 -0
- data/spec/parsers/mp3_parser_spec.rb +47 -0
- data/spec/parsers/tiff_parser_spec.rb +4 -1
- data/spec/parsers/zip_parser_spec.rb +7 -0
- data/spec/spec_helper.rb +2 -1
- metadata +14 -27
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cef6e8b3f4c3914ef022e7ff0e92d38357670315c0804efe8a2967f7f4228f53
|
4
|
+
data.tar.gz: '09a817b4fc688a3695800f4602a5901613b591d7a30ba26b86988720da7e13a9'
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 979804c2a7381d09e1f4952396321e185a917587d44409b1f7d45be0e05b466601eb7281e3d463785c6c18d79ba48ff3858bcd60c0f6791e2d394c4fa40ed684
|
7
|
+
data.tar.gz: a58cd8cab20fa8b8b7886a36479bd03fb6f5a4afbf9435d09318da502de50f6f257e45b012a56c79b4309dbaaf96feedb6b2b12016979bffa1336c528a591e3b
|
data/.gitignore
CHANGED
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,23 @@
|
|
1
|
+
## 0.24.0
|
2
|
+
* The TIFF parser will now return :arw as format for Sony ARW files insted of :tif so that the caller can decide whether it
|
3
|
+
wants to deal with RAW processing or not
|
4
|
+
|
5
|
+
## 0.23.1
|
6
|
+
* Updated gem exifr to fix problems related to jpeg files from Olympos microscopes, which often have bad thumbnail data
|
7
|
+
|
8
|
+
## 0.23.0
|
9
|
+
* Add ActiveStorage analyzer which can analyze ActiveStorage blobs. Enable it by setting
|
10
|
+
`config.active_storage.analyzers.prepend FormatParser::ActiveStorage::BlobAnalyzer`
|
11
|
+
* Ignore empty ID3 tags and do not allow them to overwrite others
|
12
|
+
* Update the id3tag dependency so that we can fallback to UTF8 instead of raising an error when parsing
|
13
|
+
MP3 files
|
14
|
+
|
15
|
+
## 0.22.1
|
16
|
+
* Fix Zip parser to not raise error for invalid zip files, with an invalid central directory
|
17
|
+
|
18
|
+
## 0.22.0
|
19
|
+
* Adds option `stringify_keys: true` to #as_json methods (fix #151)
|
20
|
+
|
1
21
|
## 0.21.1
|
2
22
|
* MPEG: Ensure parsing does not inadvertently return an Integer instead of Result|nil
|
3
23
|
* MPEG: Scan further into the MPEG file than previously (scan 32 1KB chunks)
|
data/CONTRIBUTING.md
CHANGED
@@ -234,4 +234,9 @@ This provision also applies to the test files you include with the changed code
|
|
234
234
|
|
235
235
|
## Changelog
|
236
236
|
|
237
|
-
When creating a new release you must add an entry in the `CHANGELOG.md`.
|
237
|
+
When creating a new release you must add an entry in the `CHANGELOG.md`.
|
238
|
+
|
239
|
+
## Testing locally
|
240
|
+
|
241
|
+
It's possible to run `exe/format_parser_inspect FILE_NAME` or `exe/format_parser_inspect FILE_URI`
|
242
|
+
to test the new code without the necessity of installing the gem.
|
data/README.md
CHANGED
@@ -75,6 +75,17 @@ img_info = FormatParser.parse(File.open("myimage.jpg", "rb"))
|
|
75
75
|
JSON.pretty_generate(img_info) #=> ...
|
76
76
|
```
|
77
77
|
|
78
|
+
To convert the result to a Hash or a structure suitable for JSON serialization
|
79
|
+
|
80
|
+
```ruby
|
81
|
+
img_info = FormatParser.parse(File.open("myimage.jpg", "rb"))
|
82
|
+
img_info.as_json
|
83
|
+
|
84
|
+
# it's also possible to convert all keys to string
|
85
|
+
img_info.as_json(stringify_keys: true)
|
86
|
+
```
|
87
|
+
|
88
|
+
|
78
89
|
## Creating your own parsers
|
79
90
|
|
80
91
|
See the [section on writing parsers in CONTRIBUTING.md](CONTRIBUTING.md#so-you-want-to-contribute-a-new-parser)
|
@@ -188,7 +199,7 @@ Unless specified otherwise in this section the fixture files are MIT licensed an
|
|
188
199
|
|
189
200
|
## Copyright
|
190
201
|
|
191
|
-
Copyright (c)
|
202
|
+
Copyright (c) 2020 WeTransfer.
|
192
203
|
|
193
204
|
`format_parser` is distributed under the conditions of the [Hippocratic License](https://firstdonoharm.dev/version/1/2/license.html)
|
194
205
|
- See LICENSE.txt for further details.
|
data/format_parser.gemspec
CHANGED
@@ -31,15 +31,14 @@ Gem::Specification.new do |spec|
|
|
31
31
|
spec.require_paths = ['lib']
|
32
32
|
|
33
33
|
spec.add_dependency 'ks', '~> 0.0'
|
34
|
-
spec.add_dependency 'exifr', '~> 1', '>= 1.3.
|
35
|
-
spec.add_dependency 'id3tag', '~> 0.
|
34
|
+
spec.add_dependency 'exifr', '~> 1', '>= 1.3.7'
|
35
|
+
spec.add_dependency 'id3tag', '~> 0.13'
|
36
36
|
spec.add_dependency 'faraday', '~> 0.13'
|
37
37
|
spec.add_dependency 'measurometer', '~> 1'
|
38
38
|
|
39
39
|
spec.add_development_dependency 'rspec', '~> 3.0'
|
40
40
|
spec.add_development_dependency 'rake', '~> 12'
|
41
41
|
spec.add_development_dependency 'simplecov', '~> 0.15'
|
42
|
-
spec.add_development_dependency 'pry', '~> 0.11'
|
43
42
|
spec.add_development_dependency 'yard', '~> 0.9'
|
44
43
|
spec.add_development_dependency 'wetransfer_style', '0.5.0'
|
45
44
|
spec.add_development_dependency 'parallel_tests'
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require_relative 'blob_io'
|
2
|
+
|
3
|
+
# An analyzer class that can be hooked to ActiveStorage, in order to enable
|
4
|
+
# FormatParser to do the blob analysis instead of ActiveStorage builtin-analyzers.
|
5
|
+
# Invoked if properly integrated in Rails initializer.
|
6
|
+
|
7
|
+
module FormatParser
|
8
|
+
module ActiveStorage
|
9
|
+
class BlobAnalyzer
|
10
|
+
# Format parser is able to handle a lot of format so by default it will accept all files
|
11
|
+
#
|
12
|
+
# @return [Boolean, true] always return true
|
13
|
+
def self.accept?(_blob)
|
14
|
+
true
|
15
|
+
end
|
16
|
+
|
17
|
+
def initialize(blob)
|
18
|
+
@blob = blob
|
19
|
+
end
|
20
|
+
|
21
|
+
# @return [Hash] file metadatas
|
22
|
+
def metadata
|
23
|
+
io = BlobIO.new(@blob)
|
24
|
+
parsed_file = FormatParser.parse(io)
|
25
|
+
|
26
|
+
if parsed_file
|
27
|
+
# We symbolize keys because of existing output hash format of ImageAnalyzer
|
28
|
+
parsed_file.as_json.symbolize_keys
|
29
|
+
else
|
30
|
+
logger.info "Skipping file analysis because FormatParser doesn't support the file"
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
# Acts as a proxy to turn ActiveStorage file into IO object
|
2
|
+
|
3
|
+
module FormatParser
|
4
|
+
module ActiveStorage
|
5
|
+
class BlobIO
|
6
|
+
# @param blob[ActiveStorage::Blob] the file with linked service
|
7
|
+
# @return [BlobIO]
|
8
|
+
def initialize(blob)
|
9
|
+
@blob = blob
|
10
|
+
@service = blob.service
|
11
|
+
@pos = 0
|
12
|
+
end
|
13
|
+
|
14
|
+
# Emulates IO#read, but requires the number of bytes to read.
|
15
|
+
# Rely on `ActiveStorage::Service.download_chunk` of each hosting type (local, S3, Azure, etc)
|
16
|
+
#
|
17
|
+
# @param n_bytes[Integer] how many bytes to read
|
18
|
+
# @return [String] the read bytes
|
19
|
+
def read(n_bytes)
|
20
|
+
# HTTP ranges are exclusive.
|
21
|
+
http_range = (@pos..(@pos + n_bytes - 1))
|
22
|
+
body = @service.download_chunk(@blob.key, http_range)
|
23
|
+
@pos += body.bytesize
|
24
|
+
body.force_encoding(Encoding::ASCII_8BIT)
|
25
|
+
end
|
26
|
+
|
27
|
+
# Emulates IO#seek
|
28
|
+
#
|
29
|
+
# @param [Integer] offset size
|
30
|
+
# @return [Integer] always return 0, `seek` only mutates `pos` attribute
|
31
|
+
def seek(offset)
|
32
|
+
@pos = offset
|
33
|
+
0
|
34
|
+
end
|
35
|
+
|
36
|
+
# Emulates IO#size.
|
37
|
+
#
|
38
|
+
# @return [Integer] the size of the blob size from ActiveStorage
|
39
|
+
def size
|
40
|
+
@blob.byte_size
|
41
|
+
end
|
42
|
+
|
43
|
+
# Emulates IO#pos
|
44
|
+
#
|
45
|
+
# @return [Integer] the current offset (in bytes) of the io
|
46
|
+
def pos
|
47
|
+
@pos
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
data/lib/attributes_json.rb
CHANGED
@@ -15,7 +15,12 @@ module FormatParser::AttributesJSON
|
|
15
15
|
|
16
16
|
# Implements a sane default `as_json` for an object
|
17
17
|
# that accessors defined
|
18
|
-
|
18
|
+
#
|
19
|
+
# @param root[Bool] if true, it surrounds the result in a hash with a key
|
20
|
+
# `format_parser_file_info`
|
21
|
+
# @param stringify_keys[Bool] if true, it transforms all the hash keys to a string.
|
22
|
+
# The default value is false for backward compatibility
|
23
|
+
def as_json(root: false, stringify_keys: false, **)
|
19
24
|
h = {}
|
20
25
|
h['nature'] = nature if respond_to?(:nature) # Needed for file info structs
|
21
26
|
methods.grep(/\w\=$/).each_with_object(h) do |attr_writer_method_name, h|
|
@@ -27,6 +32,9 @@ module FormatParser::AttributesJSON
|
|
27
32
|
sanitized_value = _sanitize_json_value(unwrapped_attribute_value)
|
28
33
|
h[reader_method_name] = sanitized_value
|
29
34
|
end
|
35
|
+
|
36
|
+
h = FormatParser::HashUtils.deep_transform_keys(h, &:to_s) if stringify_keys
|
37
|
+
|
30
38
|
if root
|
31
39
|
{'format_parser_file_info' => h}
|
32
40
|
else
|
data/lib/format_parser.rb
CHANGED
@@ -5,6 +5,7 @@ require 'measurometer'
|
|
5
5
|
# top-level methods of the library.
|
6
6
|
module FormatParser
|
7
7
|
require_relative 'format_parser/version'
|
8
|
+
require_relative 'hash_utils'
|
8
9
|
require_relative 'attributes_json'
|
9
10
|
require_relative 'image'
|
10
11
|
require_relative 'audio'
|
@@ -17,6 +18,7 @@ module FormatParser
|
|
17
18
|
require_relative 'remote_io'
|
18
19
|
require_relative 'io_constraint'
|
19
20
|
require_relative 'care'
|
21
|
+
require_relative 'active_storage/blob_analyzer'
|
20
22
|
|
21
23
|
# Define Measurometer in the internal namespace as well
|
22
24
|
# so that we stay compatible for the applications that use it
|
data/lib/hash_utils.rb
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
# based on https://github.com/rails/rails/blob/master/activesupport/lib/active_support/core_ext/hash/keys.rb#L116
|
2
|
+
# I chose to copy this method instead of adding activesupport as a dependency
|
3
|
+
# because we want to have the least number of dependencies
|
4
|
+
module FormatParser
|
5
|
+
class HashUtils
|
6
|
+
def self.deep_transform_keys(object, &block)
|
7
|
+
case object
|
8
|
+
when Hash
|
9
|
+
object.each_with_object({}) do |(key, value), result|
|
10
|
+
result[yield(key)] = deep_transform_keys(value, &block)
|
11
|
+
end
|
12
|
+
when Array
|
13
|
+
object.map { |e| deep_transform_keys(e, &block) }
|
14
|
+
else
|
15
|
+
object
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
data/lib/parsers/mp3_parser.rb
CHANGED
@@ -44,13 +44,9 @@ class FormatParser::MP3Parser
|
|
44
44
|
tag = __getobj__
|
45
45
|
MEMBERS.each_with_object({}) do |k, h|
|
46
46
|
value = tag.public_send(k)
|
47
|
-
h[k] = value if value
|
47
|
+
h[k] = value if value && !value.empty?
|
48
48
|
end
|
49
49
|
end
|
50
|
-
|
51
|
-
def as_json(*)
|
52
|
-
to_h
|
53
|
-
end
|
54
50
|
end
|
55
51
|
|
56
52
|
def likely_match?(filename)
|
@@ -85,7 +81,7 @@ class FormatParser::MP3Parser
|
|
85
81
|
|
86
82
|
first_frame = initial_frames.first
|
87
83
|
|
88
|
-
id3tags_hash = blend_id3_tags_into_hash(*tags)
|
84
|
+
id3tags_hash = with_id3tag_local_configs { blend_id3_tags_into_hash(*tags) }
|
89
85
|
|
90
86
|
file_info = FormatParser::Audio.new(
|
91
87
|
format: :mp3,
|
@@ -297,5 +293,14 @@ class FormatParser::MP3Parser
|
|
297
293
|
attrs
|
298
294
|
end
|
299
295
|
|
296
|
+
def with_id3tag_local_configs
|
297
|
+
ID3Tag.local_configuration do |c|
|
298
|
+
c.string_encode_options = { invalid: :replace, undef: :replace }
|
299
|
+
c.source_encoding_fallback = Encoding::UTF_8
|
300
|
+
|
301
|
+
yield
|
302
|
+
end
|
303
|
+
end
|
304
|
+
|
300
305
|
FormatParser.register_parser new, natures: :audio, formats: :mp3, priority: 99
|
301
306
|
end
|
data/lib/parsers/tiff_parser.rb
CHANGED
@@ -26,7 +26,7 @@ class FormatParser::TIFFParser
|
|
26
26
|
h = exif_data.height || exif_data.pixel_y_dimension
|
27
27
|
|
28
28
|
FormatParser::Image.new(
|
29
|
-
format: :tif,
|
29
|
+
format: arw?(exif_data) ? :arw : :tif, # Specify format as arw for Sony ARW format images, else tif
|
30
30
|
width_px: w,
|
31
31
|
height_px: h,
|
32
32
|
display_width_px: exif_data.rotated? ? h : w,
|
@@ -43,5 +43,11 @@ class FormatParser::TIFFParser
|
|
43
43
|
safe_read(io, 2) == 'CR'
|
44
44
|
end
|
45
45
|
|
46
|
+
# Similar to how exiftool determines the image type as ARW, we are implementing a check here
|
47
|
+
# https://github.com/exiftool/exiftool/blob/e969456372fbaf4b980fea8bb094d71033ac8bf7/lib/Image/ExifTool/Exif.pm#L929
|
48
|
+
def arw?(exif_data)
|
49
|
+
exif_data.compression == 6 && exif_data.new_subfile_type == 1 && exif_data.make == 'SONY'
|
50
|
+
end
|
51
|
+
|
46
52
|
FormatParser.register_parser new, natures: :image, formats: :tif
|
47
53
|
end
|
@@ -18,6 +18,7 @@ class FormatParser::ZIPParser::FileReader
|
|
18
18
|
'Could not find the EOCD signature in the buffer - maybe a malformed ZIP file'
|
19
19
|
end
|
20
20
|
end
|
21
|
+
InvalidCentralDirectory = Class.new(Error)
|
21
22
|
|
22
23
|
C_UINT32LE = 'V'
|
23
24
|
C_UINT16LE = 'v'
|
@@ -175,6 +176,8 @@ class FormatParser::ZIPParser::FileReader
|
|
175
176
|
# BUT! in format_parser we avoid unbounded reads, as a matter of fact they are forbidden.
|
176
177
|
# So we will again limit ouselves to cdir_size, and we will take cushion of 1 KB.
|
177
178
|
central_directory_str = io.read(cdir_size + 1024)
|
179
|
+
raise InvalidCentralDirectory if central_directory_str.nil?
|
180
|
+
|
178
181
|
central_directory_io = StringIO.new(central_directory_str)
|
179
182
|
log do
|
180
183
|
format(
|
@@ -0,0 +1,36 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe FormatParser::ActiveStorage::BlobIO do
|
4
|
+
let(:blob_service) { double }
|
5
|
+
let(:blob) { double(key: 'blob_key', service: blob_service, byte_size: 43000) }
|
6
|
+
let(:io) { described_class.new(blob) }
|
7
|
+
let(:fixture_path) { fixtures_dir + '/test.png' }
|
8
|
+
|
9
|
+
it_behaves_like 'an IO object compatible with IOConstraint'
|
10
|
+
|
11
|
+
describe '#read' do
|
12
|
+
it 'reads io using download_chunk from ActiveStorage#Service' do
|
13
|
+
allow(blob_service).to receive(:download_chunk) { 'a' }
|
14
|
+
|
15
|
+
expect(io.read(1)).to eq('a')
|
16
|
+
end
|
17
|
+
|
18
|
+
it 'updates #pos on read' do
|
19
|
+
allow(blob_service).to receive(:download_chunk) { 'a' }
|
20
|
+
|
21
|
+
expect { io.read(1) }.to change { io.pos }.from(0).to(1)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
describe '#seek' do
|
26
|
+
it 'updates @pos' do
|
27
|
+
expect { io.seek(10) }.to change { io.pos }.from(0).to(10)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
describe '#size' do
|
32
|
+
it 'returns the size of the blob byte_size' do
|
33
|
+
expect(io.size).to eq(blob.byte_size)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
def skip_reason
|
4
|
+
if RUBY_ENGINE == 'jruby'
|
5
|
+
'Skipping because JRuby have randon failing issue'
|
6
|
+
elsif RUBY_VERSION.to_f < 2.5
|
7
|
+
'Skipping because Rails testing script use Rails 6, who does not support Ruby bellow 2.5'
|
8
|
+
else
|
9
|
+
false
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
describe 'Rails app with ActiveStorage and format-parser', skip: skip_reason do
|
14
|
+
describe 'local hosting with ActiveStorage disk adapter' do
|
15
|
+
it 'parse local file with format_parser' do
|
16
|
+
clean_env do
|
17
|
+
cmd = 'ruby spec/integration/active_storage/rails_app.rb'
|
18
|
+
cmd_status = ruby_script_runner(cmd)
|
19
|
+
expect(cmd_status[:stdout].last).to match(/1 runs, 3 assertions, 0 failures, 0 errors, 0 skips/)
|
20
|
+
expect(cmd_status[:exitstatus]).to eq(0)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def ruby_script_runner(cmd)
|
26
|
+
require 'open3'
|
27
|
+
cmd_status = { stdout: [], exitstatus: nil }
|
28
|
+
Open3.popen2(cmd) do |_stdin, stdout, wait_thr|
|
29
|
+
frame_stdout do
|
30
|
+
while line = stdout.gets
|
31
|
+
puts "| #{line}"
|
32
|
+
cmd_status[:stdout] << line
|
33
|
+
end
|
34
|
+
end
|
35
|
+
cmd_status[:exitstatus] = wait_thr.value.exitstatus
|
36
|
+
end
|
37
|
+
cmd_status
|
38
|
+
end
|
39
|
+
|
40
|
+
def frame_stdout
|
41
|
+
puts
|
42
|
+
puts '-' * 50
|
43
|
+
yield
|
44
|
+
puts '-' * 50
|
45
|
+
end
|
46
|
+
|
47
|
+
def clean_env
|
48
|
+
if Bundler.respond_to?(:with_unbundled_env)
|
49
|
+
Bundler.with_unbundled_env do
|
50
|
+
yield
|
51
|
+
end
|
52
|
+
else
|
53
|
+
Bundler.with_clean_env do
|
54
|
+
yield
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
@@ -140,4 +140,30 @@ describe FormatParser::AttributesJSON do
|
|
140
140
|
JSON.pretty_generate(object_with_attributes_module)
|
141
141
|
}.to raise_error(/structure too deep/)
|
142
142
|
end
|
143
|
+
|
144
|
+
it 'converts all hash keys to string when stringify_keys: true' do
|
145
|
+
fixture_path = fixtures_dir + '/ZIP/arch_few_entries.zip'
|
146
|
+
fi_io = File.open(fixture_path, 'rb')
|
147
|
+
|
148
|
+
result = FormatParser::ZIPParser.new.call(fi_io).as_json(stringify_keys: true)
|
149
|
+
|
150
|
+
result['entries'].each do |entry|
|
151
|
+
entry.each do |key, _value|
|
152
|
+
expect(key).to be_a(String)
|
153
|
+
end
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
it 'does not convert hash keys to string when stringify_keys: false' do
|
158
|
+
fixture_path = fixtures_dir + '/ZIP/arch_few_entries.zip'
|
159
|
+
fi_io = File.open(fixture_path, 'rb')
|
160
|
+
|
161
|
+
result = FormatParser::ZIPParser.new.call(fi_io).as_json
|
162
|
+
|
163
|
+
result['entries'].each do |entry|
|
164
|
+
entry.each do |key, _value|
|
165
|
+
expect(key).to be_a(Symbol)
|
166
|
+
end
|
167
|
+
end
|
168
|
+
end
|
143
169
|
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe FormatParser::HashUtils do
|
4
|
+
describe '.deep_transform_keys' do
|
5
|
+
it 'transforms all the keys in a hash' do
|
6
|
+
hash = { aa: 1, 'bb' => 2 }
|
7
|
+
result = described_class.deep_transform_keys(hash, &:to_s)
|
8
|
+
|
9
|
+
expect(result).to eq('aa' => 1, 'bb' => 2)
|
10
|
+
end
|
11
|
+
|
12
|
+
it 'transforms all the keys in a array of hashes' do
|
13
|
+
array = [{ aa: 1, bb: 2 }, { cc: 3, dd: [{c: 2, d: 3}] }]
|
14
|
+
result = described_class.deep_transform_keys(array, &:to_s)
|
15
|
+
|
16
|
+
expect(result).to eq(
|
17
|
+
[{'aa' => 1, 'bb' => 2}, {'cc' => 3, 'dd' => [{'c' => 2, 'd' => 3}]}]
|
18
|
+
)
|
19
|
+
end
|
20
|
+
|
21
|
+
it 'transforms all the keys in a hash recursively' do
|
22
|
+
hash = { aa: 1, bb: { cc: 22, dd: 3 } }
|
23
|
+
result = described_class.deep_transform_keys(hash, &:to_s)
|
24
|
+
|
25
|
+
expect(result).to eq('aa' => 1, 'bb' => { 'cc' => 22, 'dd' => 3})
|
26
|
+
end
|
27
|
+
|
28
|
+
it 'does nothing for an non array/hash object' do
|
29
|
+
object = Object.new
|
30
|
+
result = described_class.deep_transform_keys(object, &:to_s)
|
31
|
+
|
32
|
+
expect(result).to eq(object)
|
33
|
+
end
|
34
|
+
|
35
|
+
it 'returns the last value if different keys are transformed into the same one' do
|
36
|
+
hash = { aa: 0, 'bb' => 2, bb: 1 }
|
37
|
+
result = described_class.deep_transform_keys(hash, &:to_s)
|
38
|
+
|
39
|
+
expect(result).to eq('aa' => 0, 'bb' => 1)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,72 @@
|
|
1
|
+
require 'bundler/inline'
|
2
|
+
|
3
|
+
gemfile(true) do
|
4
|
+
source 'https://rubygems.org'
|
5
|
+
|
6
|
+
git_source(:github) { |repo| "https://github.com/#{repo}.git" }
|
7
|
+
|
8
|
+
gem 'rails', '6.0.3'
|
9
|
+
gem 'sqlite3'
|
10
|
+
gem 'format_parser', path: './'
|
11
|
+
end
|
12
|
+
|
13
|
+
require 'active_record/railtie'
|
14
|
+
require 'active_storage/engine'
|
15
|
+
require 'tmpdir'
|
16
|
+
|
17
|
+
class TestApp < Rails::Application
|
18
|
+
config.root = __dir__
|
19
|
+
config.hosts << 'example.org'
|
20
|
+
config.eager_load = false
|
21
|
+
config.session_store :cookie_store, key: 'cookie_store_key'
|
22
|
+
secrets.secret_key_base = 'secret_key_base'
|
23
|
+
|
24
|
+
config.logger = Logger.new('/dev/null')
|
25
|
+
|
26
|
+
config.active_storage.service = :local
|
27
|
+
config.active_storage.service_configurations = {
|
28
|
+
local: {
|
29
|
+
root: Dir.tmpdir,
|
30
|
+
service: 'Disk'
|
31
|
+
}
|
32
|
+
}
|
33
|
+
|
34
|
+
config.active_storage.analyzers.prepend FormatParser::ActiveStorage::BlobAnalyzer
|
35
|
+
end
|
36
|
+
|
37
|
+
ENV['DATABASE_URL'] = 'sqlite3::memory:'
|
38
|
+
|
39
|
+
Rails.application.initialize!
|
40
|
+
|
41
|
+
require ActiveStorage::Engine.root.join('db/migrate/20170806125915_create_active_storage_tables.rb').to_s
|
42
|
+
|
43
|
+
ActiveRecord::Schema.define do
|
44
|
+
CreateActiveStorageTables.new.change
|
45
|
+
|
46
|
+
create_table :users, force: true
|
47
|
+
end
|
48
|
+
|
49
|
+
class User < ActiveRecord::Base
|
50
|
+
has_one_attached :profile_picture
|
51
|
+
end
|
52
|
+
|
53
|
+
require 'minitest/autorun'
|
54
|
+
require 'open-uri'
|
55
|
+
|
56
|
+
describe User do
|
57
|
+
describe "profile_picture's metadatas" do
|
58
|
+
it 'parse metadatas with format_parser' do
|
59
|
+
user = User.create
|
60
|
+
user.profile_picture.attach(
|
61
|
+
filename: 'cat.png',
|
62
|
+
io: URI.open('https://freesvg.org/img/1416155153.png')
|
63
|
+
)
|
64
|
+
|
65
|
+
user.profile_picture.analyze
|
66
|
+
|
67
|
+
_(user.profile_picture.metadata[:width_px]).must_equal 500
|
68
|
+
_(user.profile_picture.metadata[:height_px]).must_equal 296
|
69
|
+
_(user.profile_picture.metadata[:color_mode]).must_equal 'rgba'
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
@@ -37,6 +37,14 @@ describe FormatParser::MP3Parser do
|
|
37
37
|
expect(parsed.album).to be_nil
|
38
38
|
end
|
39
39
|
end
|
40
|
+
|
41
|
+
context 'when has an empty tag' do
|
42
|
+
let(:fpath) { fixtures_dir + '/MP3/id3v2_with_empty_tag.mp3' }
|
43
|
+
|
44
|
+
it 'ignores the empty tags' do
|
45
|
+
expect(parsed.intrinsics[:genre]).to eq('Rock')
|
46
|
+
end
|
47
|
+
end
|
40
48
|
end
|
41
49
|
|
42
50
|
it 'decodes and estimates duration for a CBR MP3' do
|
@@ -71,6 +79,17 @@ describe FormatParser::MP3Parser do
|
|
71
79
|
expect(prepped.pos).to eq(3145738)
|
72
80
|
end
|
73
81
|
|
82
|
+
it 'does not raise error when a tag frame has unsupported encoding' do
|
83
|
+
fpath = fixtures_dir + '/MP3/id3v2_frame_with_invalid_encoding.mp3'
|
84
|
+
|
85
|
+
parsed = subject.call(File.open(fpath, 'rb'))
|
86
|
+
|
87
|
+
expect(parsed.nature). to eq(:audio)
|
88
|
+
expect(parsed.album).to eq('wetransfer')
|
89
|
+
expect(parsed.artist).to eq('wetransfer')
|
90
|
+
expect(parsed.title).to eq('test')
|
91
|
+
end
|
92
|
+
|
74
93
|
it 'parses the Cassy MP3' do
|
75
94
|
fpath = fixtures_dir + '/MP3/Cassy.mp3'
|
76
95
|
parsed = subject.call(File.open(fpath, 'rb'))
|
@@ -110,4 +129,32 @@ describe FormatParser::MP3Parser do
|
|
110
129
|
subject.call(StringIO.new(''))
|
111
130
|
}.to raise_error(FormatParser::IOUtils::InvalidRead)
|
112
131
|
end
|
132
|
+
|
133
|
+
describe '#as_json' do
|
134
|
+
it 'converts all hash keys to string when stringify_keys: true' do
|
135
|
+
fpath = fixtures_dir + '/MP3/Cassy.mp3'
|
136
|
+
result = subject.call(File.open(fpath, 'rb')).as_json(stringify_keys: true)
|
137
|
+
|
138
|
+
expect(
|
139
|
+
result['intrinsics'].keys.map(&:class).uniq
|
140
|
+
).to eq([String])
|
141
|
+
|
142
|
+
expect(
|
143
|
+
result['intrinsics']['id3tags'].map(&:class).uniq
|
144
|
+
).to eq([ID3Tag::Tag])
|
145
|
+
end
|
146
|
+
|
147
|
+
it 'does not convert the hash keys to string when stringify_keys: false' do
|
148
|
+
fpath = fixtures_dir + '/MP3/Cassy.mp3'
|
149
|
+
result = subject.call(File.open(fpath, 'rb')).as_json
|
150
|
+
|
151
|
+
expect(
|
152
|
+
result['intrinsics'].keys.map(&:class).uniq
|
153
|
+
).to eq([Symbol])
|
154
|
+
|
155
|
+
expect(
|
156
|
+
result['intrinsics'][:id3tags].map(&:class).uniq
|
157
|
+
).to eq([ID3Tag::Tag])
|
158
|
+
end
|
159
|
+
end
|
113
160
|
end
|
@@ -47,12 +47,15 @@ describe FormatParser::TIFFParser do
|
|
47
47
|
expect(parsed.intrinsics[:exif]).not_to be_nil
|
48
48
|
end
|
49
49
|
|
50
|
-
it '
|
50
|
+
it 'parses Sony ARW fixture as arw format file' do
|
51
51
|
arw_path = fixtures_dir + '/ARW/RAW_SONY_ILCE-7RM2.ARW'
|
52
52
|
|
53
53
|
parsed = subject.call(File.open(arw_path, 'rb'))
|
54
54
|
|
55
55
|
expect(parsed).not_to be_nil
|
56
|
+
expect(parsed.nature).to eq(:image)
|
57
|
+
expect(parsed.format).to eq(:arw)
|
58
|
+
|
56
59
|
expect(parsed.width_px).to eq(7952)
|
57
60
|
expect(parsed.height_px).to eq(5304)
|
58
61
|
expect(parsed.intrinsics[:exif]).not_to be_nil
|
@@ -103,4 +103,11 @@ describe FormatParser::ZIPParser do
|
|
103
103
|
expect(first_entry.filename).to eq('Li��nia Extreme//')
|
104
104
|
expect(first_entry.type).to eq(:directory)
|
105
105
|
end
|
106
|
+
|
107
|
+
it 'is able to handle files with invalid central directory position' do
|
108
|
+
invalid_zip_path = fixtures_dir + '/ZIP/invalid_central_directory.zip'
|
109
|
+
|
110
|
+
expect { subject.call(File.open(invalid_zip_path, 'rb')) }
|
111
|
+
.to_not raise_error
|
112
|
+
end
|
106
113
|
end
|
data/spec/spec_helper.rb
CHANGED
@@ -8,7 +8,6 @@ $LOAD_PATH.unshift(File.dirname(__FILE__))
|
|
8
8
|
|
9
9
|
require 'rspec'
|
10
10
|
require 'format_parser'
|
11
|
-
require 'pry'
|
12
11
|
|
13
12
|
module SpecHelpers
|
14
13
|
def fixtures_dir
|
@@ -19,6 +18,8 @@ end
|
|
19
18
|
RSpec.configure do |c|
|
20
19
|
c.include SpecHelpers
|
21
20
|
c.extend SpecHelpers # makes fixtures_dir available for example groups too
|
21
|
+
# https://relishapp.com/rspec/rspec-core/docs/command-line/only-failures
|
22
|
+
c.example_status_persistence_file_path = 'spec/examples.txt'
|
22
23
|
end
|
23
24
|
|
24
25
|
RSpec.shared_examples 'an IO object compatible with IOConstraint' do
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: format_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.24.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Noah Berman
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: exe
|
11
11
|
cert_chain: []
|
12
|
-
date: 2020-
|
12
|
+
date: 2020-09-15 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: ks
|
@@ -34,7 +34,7 @@ dependencies:
|
|
34
34
|
version: '1'
|
35
35
|
- - ">="
|
36
36
|
- !ruby/object:Gem::Version
|
37
|
-
version: 1.3.
|
37
|
+
version: 1.3.7
|
38
38
|
type: :runtime
|
39
39
|
prerelease: false
|
40
40
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -44,27 +44,21 @@ dependencies:
|
|
44
44
|
version: '1'
|
45
45
|
- - ">="
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: 1.3.
|
47
|
+
version: 1.3.7
|
48
48
|
- !ruby/object:Gem::Dependency
|
49
49
|
name: id3tag
|
50
50
|
requirement: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: '0.
|
55
|
-
- - ">="
|
56
|
-
- !ruby/object:Gem::Version
|
57
|
-
version: 0.10.1
|
54
|
+
version: '0.13'
|
58
55
|
type: :runtime
|
59
56
|
prerelease: false
|
60
57
|
version_requirements: !ruby/object:Gem::Requirement
|
61
58
|
requirements:
|
62
59
|
- - "~>"
|
63
60
|
- !ruby/object:Gem::Version
|
64
|
-
version: '0.
|
65
|
-
- - ">="
|
66
|
-
- !ruby/object:Gem::Version
|
67
|
-
version: 0.10.1
|
61
|
+
version: '0.13'
|
68
62
|
- !ruby/object:Gem::Dependency
|
69
63
|
name: faraday
|
70
64
|
requirement: !ruby/object:Gem::Requirement
|
@@ -135,20 +129,6 @@ dependencies:
|
|
135
129
|
- - "~>"
|
136
130
|
- !ruby/object:Gem::Version
|
137
131
|
version: '0.15'
|
138
|
-
- !ruby/object:Gem::Dependency
|
139
|
-
name: pry
|
140
|
-
requirement: !ruby/object:Gem::Requirement
|
141
|
-
requirements:
|
142
|
-
- - "~>"
|
143
|
-
- !ruby/object:Gem::Version
|
144
|
-
version: '0.11'
|
145
|
-
type: :development
|
146
|
-
prerelease: false
|
147
|
-
version_requirements: !ruby/object:Gem::Requirement
|
148
|
-
requirements:
|
149
|
-
- - "~>"
|
150
|
-
- !ruby/object:Gem::Version
|
151
|
-
version: '0.11'
|
152
132
|
- !ruby/object:Gem::Dependency
|
153
133
|
name: yard
|
154
134
|
requirement: !ruby/object:Gem::Requirement
|
@@ -216,6 +196,8 @@ files:
|
|
216
196
|
- Rakefile
|
217
197
|
- exe/format_parser_inspect
|
218
198
|
- format_parser.gemspec
|
199
|
+
- lib/active_storage/blob_analyzer.rb
|
200
|
+
- lib/active_storage/blob_io.rb
|
219
201
|
- lib/archive.rb
|
220
202
|
- lib/attributes_json.rb
|
221
203
|
- lib/audio.rb
|
@@ -223,6 +205,7 @@ files:
|
|
223
205
|
- lib/document.rb
|
224
206
|
- lib/format_parser.rb
|
225
207
|
- lib/format_parser/version.rb
|
208
|
+
- lib/hash_utils.rb
|
226
209
|
- lib/image.rb
|
227
210
|
- lib/io_constraint.rb
|
228
211
|
- lib/io_utils.rb
|
@@ -254,12 +237,16 @@ files:
|
|
254
237
|
- lib/read_limits_config.rb
|
255
238
|
- lib/remote_io.rb
|
256
239
|
- lib/video.rb
|
240
|
+
- spec/active_storage/blob_io_spec.rb
|
241
|
+
- spec/active_storage/rails_app_spec.rb
|
257
242
|
- spec/attributes_json_spec.rb
|
258
243
|
- spec/care_spec.rb
|
259
244
|
- spec/esoteric_formats_spec.rb
|
260
245
|
- spec/file_information_spec.rb
|
261
246
|
- spec/format_parser_inspect_spec.rb
|
262
247
|
- spec/format_parser_spec.rb
|
248
|
+
- spec/hash_utils_spec.rb
|
249
|
+
- spec/integration/active_storage/rails_app.rb
|
263
250
|
- spec/io_utils_spec.rb
|
264
251
|
- spec/parsers/aiff_parser_spec.rb
|
265
252
|
- spec/parsers/bmp_parser_spec.rb
|
@@ -305,7 +292,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
305
292
|
- !ruby/object:Gem::Version
|
306
293
|
version: '0'
|
307
294
|
requirements: []
|
308
|
-
rubygems_version: 3.0.
|
295
|
+
rubygems_version: 3.0.6
|
309
296
|
signing_key:
|
310
297
|
specification_version: 4
|
311
298
|
summary: A library for efficient parsing of file metadata
|