format_parser 0.22.1 → 0.23.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/format_parser.gemspec +1 -1
- data/lib/active_storage/blob_analyzer.rb +35 -0
- data/lib/active_storage/blob_io.rb +51 -0
- data/lib/format_parser.rb +1 -0
- data/lib/format_parser/version.rb +1 -1
- data/lib/parsers/mp3_parser.rb +11 -2
- data/spec/active_storage/blob_io_spec.rb +36 -0
- data/spec/active_storage/rails_app_spec.rb +58 -0
- data/spec/integration/active_storage/rails_app.rb +72 -0
- data/spec/parsers/mp3_parser_spec.rb +19 -0
- metadata +9 -10
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: dddac3718ccf02324f4632adc68ad87d08ede00c3bbe64d85689dc8a6a06ad3a
|
4
|
+
data.tar.gz: 3ca9fb36416dffbd6fd1825f15ee0fe9b590633c959a165018f0c60f8d965361
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9193257f175a36087bc4f780659ce4e7bdefbbec456f38015b588ef9b378c71e5601c2d5de0f801297e6e968d70ed3e8db4a1fb0d8a2c78b90f8f9b328cfc43a
|
7
|
+
data.tar.gz: f181cbae4d8261e05ad3ed4787f9376c3738ba9487258b7d3885dcebe6166409578c82e2e5136c19b47b61ba4fc0d730f752b71a14e19cc6cea79ecdedc357e6
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,10 @@
|
|
1
|
+
## 0.23.0
|
2
|
+
* Add ActiveStorage analyzer which can analyze ActiveStorage blobs. Enable it by setting
|
3
|
+
`config.active_storage.analyzers.prepend FormatParser::ActiveStorage::BlobAnalyzer`
|
4
|
+
* Ignore empty ID3 tags and do not allow them to overwrite others
|
5
|
+
* Update the id3tag dependency so that we can fallback to UTF8 instead of raising an error when parsing
|
6
|
+
MP3 files
|
7
|
+
|
1
8
|
## 0.22.1
|
2
9
|
* Fix Zip parser to not raise error for invalid zip files, with an invalid central directory
|
3
10
|
|
data/format_parser.gemspec
CHANGED
@@ -32,7 +32,7 @@ Gem::Specification.new do |spec|
|
|
32
32
|
|
33
33
|
spec.add_dependency 'ks', '~> 0.0'
|
34
34
|
spec.add_dependency 'exifr', '~> 1', '>= 1.3.4'
|
35
|
-
spec.add_dependency 'id3tag', '~> 0.
|
35
|
+
spec.add_dependency 'id3tag', '~> 0.13'
|
36
36
|
spec.add_dependency 'faraday', '~> 0.13'
|
37
37
|
spec.add_dependency 'measurometer', '~> 1'
|
38
38
|
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require_relative 'blob_io'
|
2
|
+
|
3
|
+
# An analyzer class that can be hooked to ActiveStorage, in order to enable
|
4
|
+
# FormatParser to do the blob analysis instead of ActiveStorage builtin-analyzers.
|
5
|
+
# Invoked if properly integrated in Rails initializer.
|
6
|
+
|
7
|
+
module FormatParser
|
8
|
+
module ActiveStorage
|
9
|
+
class BlobAnalyzer
|
10
|
+
# Format parser is able to handle a lot of format so by default it will accept all files
|
11
|
+
#
|
12
|
+
# @return [Boolean, true] always return true
|
13
|
+
def self.accept?(_blob)
|
14
|
+
true
|
15
|
+
end
|
16
|
+
|
17
|
+
def initialize(blob)
|
18
|
+
@blob = blob
|
19
|
+
end
|
20
|
+
|
21
|
+
# @return [Hash] file metadatas
|
22
|
+
def metadata
|
23
|
+
io = BlobIO.new(@blob)
|
24
|
+
parsed_file = FormatParser.parse(io)
|
25
|
+
|
26
|
+
if parsed_file
|
27
|
+
# We symbolize keys because of existing output hash format of ImageAnalyzer
|
28
|
+
parsed_file.as_json.symbolize_keys
|
29
|
+
else
|
30
|
+
logger.info "Skipping file analysis because FormatParser doesn't support the file"
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
# Acts as a proxy to turn ActiveStorage file into IO object
|
2
|
+
|
3
|
+
module FormatParser
|
4
|
+
module ActiveStorage
|
5
|
+
class BlobIO
|
6
|
+
# @param blob[ActiveStorage::Blob] the file with linked service
|
7
|
+
# @return [BlobIO]
|
8
|
+
def initialize(blob)
|
9
|
+
@blob = blob
|
10
|
+
@service = blob.service
|
11
|
+
@pos = 0
|
12
|
+
end
|
13
|
+
|
14
|
+
# Emulates IO#read, but requires the number of bytes to read.
|
15
|
+
# Rely on `ActiveStorage::Service.download_chunk` of each hosting type (local, S3, Azure, etc)
|
16
|
+
#
|
17
|
+
# @param n_bytes[Integer] how many bytes to read
|
18
|
+
# @return [String] the read bytes
|
19
|
+
def read(n_bytes)
|
20
|
+
# HTTP ranges are exclusive.
|
21
|
+
http_range = (@pos..(@pos + n_bytes - 1))
|
22
|
+
body = @service.download_chunk(@blob.key, http_range)
|
23
|
+
@pos += body.bytesize
|
24
|
+
body.force_encoding(Encoding::ASCII_8BIT)
|
25
|
+
end
|
26
|
+
|
27
|
+
# Emulates IO#seek
|
28
|
+
#
|
29
|
+
# @param [Integer] offset size
|
30
|
+
# @return [Integer] always return 0, `seek` only mutates `pos` attribute
|
31
|
+
def seek(offset)
|
32
|
+
@pos = offset
|
33
|
+
0
|
34
|
+
end
|
35
|
+
|
36
|
+
# Emulates IO#size.
|
37
|
+
#
|
38
|
+
# @return [Integer] the size of the blob size from ActiveStorage
|
39
|
+
def size
|
40
|
+
@blob.byte_size
|
41
|
+
end
|
42
|
+
|
43
|
+
# Emulates IO#pos
|
44
|
+
#
|
45
|
+
# @return [Integer] the current offset (in bytes) of the io
|
46
|
+
def pos
|
47
|
+
@pos
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
data/lib/format_parser.rb
CHANGED
@@ -18,6 +18,7 @@ module FormatParser
|
|
18
18
|
require_relative 'remote_io'
|
19
19
|
require_relative 'io_constraint'
|
20
20
|
require_relative 'care'
|
21
|
+
require_relative 'active_storage/blob_analyzer'
|
21
22
|
|
22
23
|
# Define Measurometer in the internal namespace as well
|
23
24
|
# so that we stay compatible for the applications that use it
|
data/lib/parsers/mp3_parser.rb
CHANGED
@@ -44,7 +44,7 @@ class FormatParser::MP3Parser
|
|
44
44
|
tag = __getobj__
|
45
45
|
MEMBERS.each_with_object({}) do |k, h|
|
46
46
|
value = tag.public_send(k)
|
47
|
-
h[k] = value if value
|
47
|
+
h[k] = value if value && !value.empty?
|
48
48
|
end
|
49
49
|
end
|
50
50
|
end
|
@@ -81,7 +81,7 @@ class FormatParser::MP3Parser
|
|
81
81
|
|
82
82
|
first_frame = initial_frames.first
|
83
83
|
|
84
|
-
id3tags_hash = blend_id3_tags_into_hash(*tags)
|
84
|
+
id3tags_hash = with_id3tag_local_configs { blend_id3_tags_into_hash(*tags) }
|
85
85
|
|
86
86
|
file_info = FormatParser::Audio.new(
|
87
87
|
format: :mp3,
|
@@ -293,5 +293,14 @@ class FormatParser::MP3Parser
|
|
293
293
|
attrs
|
294
294
|
end
|
295
295
|
|
296
|
+
def with_id3tag_local_configs
|
297
|
+
ID3Tag.local_configuration do |c|
|
298
|
+
c.string_encode_options = { invalid: :replace, undef: :replace }
|
299
|
+
c.source_encoding_fallback = Encoding::UTF_8
|
300
|
+
|
301
|
+
yield
|
302
|
+
end
|
303
|
+
end
|
304
|
+
|
296
305
|
FormatParser.register_parser new, natures: :audio, formats: :mp3, priority: 99
|
297
306
|
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe FormatParser::ActiveStorage::BlobIO do
|
4
|
+
let(:blob_service) { double }
|
5
|
+
let(:blob) { double(key: 'blob_key', service: blob_service, byte_size: 43000) }
|
6
|
+
let(:io) { described_class.new(blob) }
|
7
|
+
let(:fixture_path) { fixtures_dir + '/test.png' }
|
8
|
+
|
9
|
+
it_behaves_like 'an IO object compatible with IOConstraint'
|
10
|
+
|
11
|
+
describe '#read' do
|
12
|
+
it 'reads io using download_chunk from ActiveStorage#Service' do
|
13
|
+
allow(blob_service).to receive(:download_chunk) { 'a' }
|
14
|
+
|
15
|
+
expect(io.read(1)).to eq('a')
|
16
|
+
end
|
17
|
+
|
18
|
+
it 'updates #pos on read' do
|
19
|
+
allow(blob_service).to receive(:download_chunk) { 'a' }
|
20
|
+
|
21
|
+
expect { io.read(1) }.to change { io.pos }.from(0).to(1)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
describe '#seek' do
|
26
|
+
it 'updates @pos' do
|
27
|
+
expect { io.seek(10) }.to change { io.pos }.from(0).to(10)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
describe '#size' do
|
32
|
+
it 'returns the size of the blob byte_size' do
|
33
|
+
expect(io.size).to eq(blob.byte_size)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
def skip_reason
|
4
|
+
if RUBY_ENGINE == 'jruby'
|
5
|
+
'Skipping because JRuby have randon failing issue'
|
6
|
+
elsif RUBY_VERSION.to_f < 2.5
|
7
|
+
'Skipping because Rails testing script use Rails 6, who does not support Ruby bellow 2.5'
|
8
|
+
else
|
9
|
+
false
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
describe 'Rails app with ActiveStorage and format-parser', skip: skip_reason do
|
14
|
+
describe 'local hosting with ActiveStorage disk adapter' do
|
15
|
+
it 'parse local file with format_parser' do
|
16
|
+
clean_env do
|
17
|
+
cmd = 'ruby spec/integration/active_storage/rails_app.rb'
|
18
|
+
cmd_status = ruby_script_runner(cmd)
|
19
|
+
expect(cmd_status[:stdout].last).to match(/1 runs, 3 assertions, 0 failures, 0 errors, 0 skips/)
|
20
|
+
expect(cmd_status[:exitstatus]).to eq(0)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def ruby_script_runner(cmd)
|
26
|
+
require 'open3'
|
27
|
+
cmd_status = { stdout: [], exitstatus: nil }
|
28
|
+
Open3.popen2(cmd) do |_stdin, stdout, wait_thr|
|
29
|
+
frame_stdout do
|
30
|
+
while line = stdout.gets
|
31
|
+
puts "| #{line}"
|
32
|
+
cmd_status[:stdout] << line
|
33
|
+
end
|
34
|
+
end
|
35
|
+
cmd_status[:exitstatus] = wait_thr.value.exitstatus
|
36
|
+
end
|
37
|
+
cmd_status
|
38
|
+
end
|
39
|
+
|
40
|
+
def frame_stdout
|
41
|
+
puts
|
42
|
+
puts '-' * 50
|
43
|
+
yield
|
44
|
+
puts '-' * 50
|
45
|
+
end
|
46
|
+
|
47
|
+
def clean_env
|
48
|
+
if Bundler.respond_to?(:with_unbundled_env)
|
49
|
+
Bundler.with_unbundled_env do
|
50
|
+
yield
|
51
|
+
end
|
52
|
+
else
|
53
|
+
Bundler.with_clean_env do
|
54
|
+
yield
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,72 @@
|
|
1
|
+
require 'bundler/inline'
|
2
|
+
|
3
|
+
gemfile(true) do
|
4
|
+
source 'https://rubygems.org'
|
5
|
+
|
6
|
+
git_source(:github) { |repo| "https://github.com/#{repo}.git" }
|
7
|
+
|
8
|
+
gem 'rails', '6.0.3'
|
9
|
+
gem 'sqlite3'
|
10
|
+
gem 'format_parser', path: './'
|
11
|
+
end
|
12
|
+
|
13
|
+
require 'active_record/railtie'
|
14
|
+
require 'active_storage/engine'
|
15
|
+
require 'tmpdir'
|
16
|
+
|
17
|
+
class TestApp < Rails::Application
|
18
|
+
config.root = __dir__
|
19
|
+
config.hosts << 'example.org'
|
20
|
+
config.eager_load = false
|
21
|
+
config.session_store :cookie_store, key: 'cookie_store_key'
|
22
|
+
secrets.secret_key_base = 'secret_key_base'
|
23
|
+
|
24
|
+
config.logger = Logger.new('/dev/null')
|
25
|
+
|
26
|
+
config.active_storage.service = :local
|
27
|
+
config.active_storage.service_configurations = {
|
28
|
+
local: {
|
29
|
+
root: Dir.tmpdir,
|
30
|
+
service: 'Disk'
|
31
|
+
}
|
32
|
+
}
|
33
|
+
|
34
|
+
config.active_storage.analyzers.prepend FormatParser::ActiveStorage::BlobAnalyzer
|
35
|
+
end
|
36
|
+
|
37
|
+
ENV['DATABASE_URL'] = 'sqlite3::memory:'
|
38
|
+
|
39
|
+
Rails.application.initialize!
|
40
|
+
|
41
|
+
require ActiveStorage::Engine.root.join('db/migrate/20170806125915_create_active_storage_tables.rb').to_s
|
42
|
+
|
43
|
+
ActiveRecord::Schema.define do
|
44
|
+
CreateActiveStorageTables.new.change
|
45
|
+
|
46
|
+
create_table :users, force: true
|
47
|
+
end
|
48
|
+
|
49
|
+
class User < ActiveRecord::Base
|
50
|
+
has_one_attached :profile_picture
|
51
|
+
end
|
52
|
+
|
53
|
+
require 'minitest/autorun'
|
54
|
+
require 'open-uri'
|
55
|
+
|
56
|
+
describe User do
|
57
|
+
describe "profile_picture's metadatas" do
|
58
|
+
it 'parse metadatas with format_parser' do
|
59
|
+
user = User.create
|
60
|
+
user.profile_picture.attach(
|
61
|
+
filename: 'cat.png',
|
62
|
+
io: URI.open('https://freesvg.org/img/1416155153.png')
|
63
|
+
)
|
64
|
+
|
65
|
+
user.profile_picture.analyze
|
66
|
+
|
67
|
+
_(user.profile_picture.metadata[:width_px]).must_equal 500
|
68
|
+
_(user.profile_picture.metadata[:height_px]).must_equal 296
|
69
|
+
_(user.profile_picture.metadata[:color_mode]).must_equal 'rgba'
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
@@ -37,6 +37,14 @@ describe FormatParser::MP3Parser do
|
|
37
37
|
expect(parsed.album).to be_nil
|
38
38
|
end
|
39
39
|
end
|
40
|
+
|
41
|
+
context 'when has an empty tag' do
|
42
|
+
let(:fpath) { fixtures_dir + '/MP3/id3v2_with_empty_tag.mp3' }
|
43
|
+
|
44
|
+
it 'ignores the empty tags' do
|
45
|
+
expect(parsed.intrinsics[:genre]).to eq('Rock')
|
46
|
+
end
|
47
|
+
end
|
40
48
|
end
|
41
49
|
|
42
50
|
it 'decodes and estimates duration for a CBR MP3' do
|
@@ -71,6 +79,17 @@ describe FormatParser::MP3Parser do
|
|
71
79
|
expect(prepped.pos).to eq(3145738)
|
72
80
|
end
|
73
81
|
|
82
|
+
it 'does not raise error when a tag frame has unsupported encoding' do
|
83
|
+
fpath = fixtures_dir + '/MP3/id3v2_frame_with_invalid_encoding.mp3'
|
84
|
+
|
85
|
+
parsed = subject.call(File.open(fpath, 'rb'))
|
86
|
+
|
87
|
+
expect(parsed.nature). to eq(:audio)
|
88
|
+
expect(parsed.album).to eq('wetransfer')
|
89
|
+
expect(parsed.artist).to eq('wetransfer')
|
90
|
+
expect(parsed.title).to eq('test')
|
91
|
+
end
|
92
|
+
|
74
93
|
it 'parses the Cassy MP3' do
|
75
94
|
fpath = fixtures_dir + '/MP3/Cassy.mp3'
|
76
95
|
parsed = subject.call(File.open(fpath, 'rb'))
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: format_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.23.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Noah Berman
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: exe
|
11
11
|
cert_chain: []
|
12
|
-
date: 2020-08
|
12
|
+
date: 2020-09-08 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: ks
|
@@ -51,20 +51,14 @@ dependencies:
|
|
51
51
|
requirements:
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: '0.
|
55
|
-
- - ">="
|
56
|
-
- !ruby/object:Gem::Version
|
57
|
-
version: 0.10.1
|
54
|
+
version: '0.13'
|
58
55
|
type: :runtime
|
59
56
|
prerelease: false
|
60
57
|
version_requirements: !ruby/object:Gem::Requirement
|
61
58
|
requirements:
|
62
59
|
- - "~>"
|
63
60
|
- !ruby/object:Gem::Version
|
64
|
-
version: '0.
|
65
|
-
- - ">="
|
66
|
-
- !ruby/object:Gem::Version
|
67
|
-
version: 0.10.1
|
61
|
+
version: '0.13'
|
68
62
|
- !ruby/object:Gem::Dependency
|
69
63
|
name: faraday
|
70
64
|
requirement: !ruby/object:Gem::Requirement
|
@@ -202,6 +196,8 @@ files:
|
|
202
196
|
- Rakefile
|
203
197
|
- exe/format_parser_inspect
|
204
198
|
- format_parser.gemspec
|
199
|
+
- lib/active_storage/blob_analyzer.rb
|
200
|
+
- lib/active_storage/blob_io.rb
|
205
201
|
- lib/archive.rb
|
206
202
|
- lib/attributes_json.rb
|
207
203
|
- lib/audio.rb
|
@@ -241,6 +237,8 @@ files:
|
|
241
237
|
- lib/read_limits_config.rb
|
242
238
|
- lib/remote_io.rb
|
243
239
|
- lib/video.rb
|
240
|
+
- spec/active_storage/blob_io_spec.rb
|
241
|
+
- spec/active_storage/rails_app_spec.rb
|
244
242
|
- spec/attributes_json_spec.rb
|
245
243
|
- spec/care_spec.rb
|
246
244
|
- spec/esoteric_formats_spec.rb
|
@@ -248,6 +246,7 @@ files:
|
|
248
246
|
- spec/format_parser_inspect_spec.rb
|
249
247
|
- spec/format_parser_spec.rb
|
250
248
|
- spec/hash_utils_spec.rb
|
249
|
+
- spec/integration/active_storage/rails_app.rb
|
251
250
|
- spec/io_utils_spec.rb
|
252
251
|
- spec/parsers/aiff_parser_spec.rb
|
253
252
|
- spec/parsers/bmp_parser_spec.rb
|