format_parser 0.22.1 → 0.23.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 05470fef0edec68c427b9680cf94a88a8f3286e2279b2daf13e87679324eb061
4
- data.tar.gz: f65ed1d390e4e70bd34a3b01b5d54d39e4076d18ebf15fc20196c2746c86122f
3
+ metadata.gz: dddac3718ccf02324f4632adc68ad87d08ede00c3bbe64d85689dc8a6a06ad3a
4
+ data.tar.gz: 3ca9fb36416dffbd6fd1825f15ee0fe9b590633c959a165018f0c60f8d965361
5
5
  SHA512:
6
- metadata.gz: e6e9e2d1fbfe6e813d1e0d9d2c102ce3f031fd37395cc75807ca7b527b1a2fc4a9a3c41baaf9e2cdaa601063cded92ef446ce913f2210f3d82e063126873d54d
7
- data.tar.gz: 86e166b2ac754fe5d7e42e471bcaa70cd54ae0aa22da3e0f85a6223213c2bd5a284f2b53416d03656dc291898837636678dbfba2e32b179a562ea7c8b05ef0bf
6
+ metadata.gz: 9193257f175a36087bc4f780659ce4e7bdefbbec456f38015b588ef9b378c71e5601c2d5de0f801297e6e968d70ed3e8db4a1fb0d8a2c78b90f8f9b328cfc43a
7
+ data.tar.gz: f181cbae4d8261e05ad3ed4787f9376c3738ba9487258b7d3885dcebe6166409578c82e2e5136c19b47b61ba4fc0d730f752b71a14e19cc6cea79ecdedc357e6
@@ -1,3 +1,10 @@
1
+ ## 0.23.0
2
+ * Add ActiveStorage analyzer which can analyze ActiveStorage blobs. Enable it by setting
3
+ `config.active_storage.analyzers.prepend FormatParser::ActiveStorage::BlobAnalyzer`
4
+ * Ignore empty ID3 tags and do not allow them to overwrite others
5
+ * Update the id3tag dependency so that we can fallback to UTF8 instead of raising an error when parsing
6
+ MP3 files
7
+
1
8
  ## 0.22.1
2
9
  * Fix Zip parser to not raise error for invalid zip files, with an invalid central directory
3
10
 
@@ -32,7 +32,7 @@ Gem::Specification.new do |spec|
32
32
 
33
33
  spec.add_dependency 'ks', '~> 0.0'
34
34
  spec.add_dependency 'exifr', '~> 1', '>= 1.3.4'
35
- spec.add_dependency 'id3tag', '~> 0.10', '>= 0.10.1'
35
+ spec.add_dependency 'id3tag', '~> 0.13'
36
36
  spec.add_dependency 'faraday', '~> 0.13'
37
37
  spec.add_dependency 'measurometer', '~> 1'
38
38
 
@@ -0,0 +1,35 @@
1
+ require_relative 'blob_io'
2
+
3
+ # An analyzer class that can be hooked to ActiveStorage, in order to enable
4
+ # FormatParser to do the blob analysis instead of ActiveStorage builtin-analyzers.
5
+ # Invoked if properly integrated in Rails initializer.
6
+
7
+ module FormatParser
8
+ module ActiveStorage
9
+ class BlobAnalyzer
10
+ # Format parser is able to handle a lot of format so by default it will accept all files
11
+ #
12
+ # @return [Boolean, true] always return true
13
+ def self.accept?(_blob)
14
+ true
15
+ end
16
+
17
+ def initialize(blob)
18
+ @blob = blob
19
+ end
20
+
21
+ # @return [Hash] file metadatas
22
+ def metadata
23
+ io = BlobIO.new(@blob)
24
+ parsed_file = FormatParser.parse(io)
25
+
26
+ if parsed_file
27
+ # We symbolize keys because of existing output hash format of ImageAnalyzer
28
+ parsed_file.as_json.symbolize_keys
29
+ else
30
+ logger.info "Skipping file analysis because FormatParser doesn't support the file"
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,51 @@
1
+ # Acts as a proxy to turn ActiveStorage file into IO object
2
+
3
+ module FormatParser
4
+ module ActiveStorage
5
+ class BlobIO
6
+ # @param blob[ActiveStorage::Blob] the file with linked service
7
+ # @return [BlobIO]
8
+ def initialize(blob)
9
+ @blob = blob
10
+ @service = blob.service
11
+ @pos = 0
12
+ end
13
+
14
+ # Emulates IO#read, but requires the number of bytes to read.
15
+ # Rely on `ActiveStorage::Service.download_chunk` of each hosting type (local, S3, Azure, etc)
16
+ #
17
+ # @param n_bytes[Integer] how many bytes to read
18
+ # @return [String] the read bytes
19
+ def read(n_bytes)
20
+ # HTTP ranges are exclusive.
21
+ http_range = (@pos..(@pos + n_bytes - 1))
22
+ body = @service.download_chunk(@blob.key, http_range)
23
+ @pos += body.bytesize
24
+ body.force_encoding(Encoding::ASCII_8BIT)
25
+ end
26
+
27
+ # Emulates IO#seek
28
+ #
29
+ # @param [Integer] offset size
30
+ # @return [Integer] always return 0, `seek` only mutates `pos` attribute
31
+ def seek(offset)
32
+ @pos = offset
33
+ 0
34
+ end
35
+
36
+ # Emulates IO#size.
37
+ #
38
+ # @return [Integer] the size of the blob size from ActiveStorage
39
+ def size
40
+ @blob.byte_size
41
+ end
42
+
43
+ # Emulates IO#pos
44
+ #
45
+ # @return [Integer] the current offset (in bytes) of the io
46
+ def pos
47
+ @pos
48
+ end
49
+ end
50
+ end
51
+ end
@@ -18,6 +18,7 @@ module FormatParser
18
18
  require_relative 'remote_io'
19
19
  require_relative 'io_constraint'
20
20
  require_relative 'care'
21
+ require_relative 'active_storage/blob_analyzer'
21
22
 
22
23
  # Define Measurometer in the internal namespace as well
23
24
  # so that we stay compatible for the applications that use it
@@ -1,3 +1,3 @@
1
1
  module FormatParser
2
- VERSION = '0.22.1'
2
+ VERSION = '0.23.0'
3
3
  end
@@ -44,7 +44,7 @@ class FormatParser::MP3Parser
44
44
  tag = __getobj__
45
45
  MEMBERS.each_with_object({}) do |k, h|
46
46
  value = tag.public_send(k)
47
- h[k] = value if value
47
+ h[k] = value if value && !value.empty?
48
48
  end
49
49
  end
50
50
  end
@@ -81,7 +81,7 @@ class FormatParser::MP3Parser
81
81
 
82
82
  first_frame = initial_frames.first
83
83
 
84
- id3tags_hash = blend_id3_tags_into_hash(*tags)
84
+ id3tags_hash = with_id3tag_local_configs { blend_id3_tags_into_hash(*tags) }
85
85
 
86
86
  file_info = FormatParser::Audio.new(
87
87
  format: :mp3,
@@ -293,5 +293,14 @@ class FormatParser::MP3Parser
293
293
  attrs
294
294
  end
295
295
 
296
+ def with_id3tag_local_configs
297
+ ID3Tag.local_configuration do |c|
298
+ c.string_encode_options = { invalid: :replace, undef: :replace }
299
+ c.source_encoding_fallback = Encoding::UTF_8
300
+
301
+ yield
302
+ end
303
+ end
304
+
296
305
  FormatParser.register_parser new, natures: :audio, formats: :mp3, priority: 99
297
306
  end
@@ -0,0 +1,36 @@
1
+ require 'spec_helper'
2
+
3
+ describe FormatParser::ActiveStorage::BlobIO do
4
+ let(:blob_service) { double }
5
+ let(:blob) { double(key: 'blob_key', service: blob_service, byte_size: 43000) }
6
+ let(:io) { described_class.new(blob) }
7
+ let(:fixture_path) { fixtures_dir + '/test.png' }
8
+
9
+ it_behaves_like 'an IO object compatible with IOConstraint'
10
+
11
+ describe '#read' do
12
+ it 'reads io using download_chunk from ActiveStorage#Service' do
13
+ allow(blob_service).to receive(:download_chunk) { 'a' }
14
+
15
+ expect(io.read(1)).to eq('a')
16
+ end
17
+
18
+ it 'updates #pos on read' do
19
+ allow(blob_service).to receive(:download_chunk) { 'a' }
20
+
21
+ expect { io.read(1) }.to change { io.pos }.from(0).to(1)
22
+ end
23
+ end
24
+
25
+ describe '#seek' do
26
+ it 'updates @pos' do
27
+ expect { io.seek(10) }.to change { io.pos }.from(0).to(10)
28
+ end
29
+ end
30
+
31
+ describe '#size' do
32
+ it 'returns the size of the blob byte_size' do
33
+ expect(io.size).to eq(blob.byte_size)
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,58 @@
1
+ require 'spec_helper'
2
+
3
+ def skip_reason
4
+ if RUBY_ENGINE == 'jruby'
5
+ 'Skipping because JRuby have randon failing issue'
6
+ elsif RUBY_VERSION.to_f < 2.5
7
+ 'Skipping because Rails testing script use Rails 6, who does not support Ruby bellow 2.5'
8
+ else
9
+ false
10
+ end
11
+ end
12
+
13
+ describe 'Rails app with ActiveStorage and format-parser', skip: skip_reason do
14
+ describe 'local hosting with ActiveStorage disk adapter' do
15
+ it 'parse local file with format_parser' do
16
+ clean_env do
17
+ cmd = 'ruby spec/integration/active_storage/rails_app.rb'
18
+ cmd_status = ruby_script_runner(cmd)
19
+ expect(cmd_status[:stdout].last).to match(/1 runs, 3 assertions, 0 failures, 0 errors, 0 skips/)
20
+ expect(cmd_status[:exitstatus]).to eq(0)
21
+ end
22
+ end
23
+ end
24
+
25
+ def ruby_script_runner(cmd)
26
+ require 'open3'
27
+ cmd_status = { stdout: [], exitstatus: nil }
28
+ Open3.popen2(cmd) do |_stdin, stdout, wait_thr|
29
+ frame_stdout do
30
+ while line = stdout.gets
31
+ puts "| #{line}"
32
+ cmd_status[:stdout] << line
33
+ end
34
+ end
35
+ cmd_status[:exitstatus] = wait_thr.value.exitstatus
36
+ end
37
+ cmd_status
38
+ end
39
+
40
+ def frame_stdout
41
+ puts
42
+ puts '-' * 50
43
+ yield
44
+ puts '-' * 50
45
+ end
46
+
47
+ def clean_env
48
+ if Bundler.respond_to?(:with_unbundled_env)
49
+ Bundler.with_unbundled_env do
50
+ yield
51
+ end
52
+ else
53
+ Bundler.with_clean_env do
54
+ yield
55
+ end
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,72 @@
1
+ require 'bundler/inline'
2
+
3
+ gemfile(true) do
4
+ source 'https://rubygems.org'
5
+
6
+ git_source(:github) { |repo| "https://github.com/#{repo}.git" }
7
+
8
+ gem 'rails', '6.0.3'
9
+ gem 'sqlite3'
10
+ gem 'format_parser', path: './'
11
+ end
12
+
13
+ require 'active_record/railtie'
14
+ require 'active_storage/engine'
15
+ require 'tmpdir'
16
+
17
+ class TestApp < Rails::Application
18
+ config.root = __dir__
19
+ config.hosts << 'example.org'
20
+ config.eager_load = false
21
+ config.session_store :cookie_store, key: 'cookie_store_key'
22
+ secrets.secret_key_base = 'secret_key_base'
23
+
24
+ config.logger = Logger.new('/dev/null')
25
+
26
+ config.active_storage.service = :local
27
+ config.active_storage.service_configurations = {
28
+ local: {
29
+ root: Dir.tmpdir,
30
+ service: 'Disk'
31
+ }
32
+ }
33
+
34
+ config.active_storage.analyzers.prepend FormatParser::ActiveStorage::BlobAnalyzer
35
+ end
36
+
37
+ ENV['DATABASE_URL'] = 'sqlite3::memory:'
38
+
39
+ Rails.application.initialize!
40
+
41
+ require ActiveStorage::Engine.root.join('db/migrate/20170806125915_create_active_storage_tables.rb').to_s
42
+
43
+ ActiveRecord::Schema.define do
44
+ CreateActiveStorageTables.new.change
45
+
46
+ create_table :users, force: true
47
+ end
48
+
49
+ class User < ActiveRecord::Base
50
+ has_one_attached :profile_picture
51
+ end
52
+
53
+ require 'minitest/autorun'
54
+ require 'open-uri'
55
+
56
+ describe User do
57
+ describe "profile_picture's metadatas" do
58
+ it 'parse metadatas with format_parser' do
59
+ user = User.create
60
+ user.profile_picture.attach(
61
+ filename: 'cat.png',
62
+ io: URI.open('https://freesvg.org/img/1416155153.png')
63
+ )
64
+
65
+ user.profile_picture.analyze
66
+
67
+ _(user.profile_picture.metadata[:width_px]).must_equal 500
68
+ _(user.profile_picture.metadata[:height_px]).must_equal 296
69
+ _(user.profile_picture.metadata[:color_mode]).must_equal 'rgba'
70
+ end
71
+ end
72
+ end
@@ -37,6 +37,14 @@ describe FormatParser::MP3Parser do
37
37
  expect(parsed.album).to be_nil
38
38
  end
39
39
  end
40
+
41
+ context 'when has an empty tag' do
42
+ let(:fpath) { fixtures_dir + '/MP3/id3v2_with_empty_tag.mp3' }
43
+
44
+ it 'ignores the empty tags' do
45
+ expect(parsed.intrinsics[:genre]).to eq('Rock')
46
+ end
47
+ end
40
48
  end
41
49
 
42
50
  it 'decodes and estimates duration for a CBR MP3' do
@@ -71,6 +79,17 @@ describe FormatParser::MP3Parser do
71
79
  expect(prepped.pos).to eq(3145738)
72
80
  end
73
81
 
82
+ it 'does not raise error when a tag frame has unsupported encoding' do
83
+ fpath = fixtures_dir + '/MP3/id3v2_frame_with_invalid_encoding.mp3'
84
+
85
+ parsed = subject.call(File.open(fpath, 'rb'))
86
+
87
+ expect(parsed.nature). to eq(:audio)
88
+ expect(parsed.album).to eq('wetransfer')
89
+ expect(parsed.artist).to eq('wetransfer')
90
+ expect(parsed.title).to eq('test')
91
+ end
92
+
74
93
  it 'parses the Cassy MP3' do
75
94
  fpath = fixtures_dir + '/MP3/Cassy.mp3'
76
95
  parsed = subject.call(File.open(fpath, 'rb'))
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: format_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.22.1
4
+ version: 0.23.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Noah Berman
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: exe
11
11
  cert_chain: []
12
- date: 2020-08-26 00:00:00.000000000 Z
12
+ date: 2020-09-08 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: ks
@@ -51,20 +51,14 @@ dependencies:
51
51
  requirements:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: '0.10'
55
- - - ">="
56
- - !ruby/object:Gem::Version
57
- version: 0.10.1
54
+ version: '0.13'
58
55
  type: :runtime
59
56
  prerelease: false
60
57
  version_requirements: !ruby/object:Gem::Requirement
61
58
  requirements:
62
59
  - - "~>"
63
60
  - !ruby/object:Gem::Version
64
- version: '0.10'
65
- - - ">="
66
- - !ruby/object:Gem::Version
67
- version: 0.10.1
61
+ version: '0.13'
68
62
  - !ruby/object:Gem::Dependency
69
63
  name: faraday
70
64
  requirement: !ruby/object:Gem::Requirement
@@ -202,6 +196,8 @@ files:
202
196
  - Rakefile
203
197
  - exe/format_parser_inspect
204
198
  - format_parser.gemspec
199
+ - lib/active_storage/blob_analyzer.rb
200
+ - lib/active_storage/blob_io.rb
205
201
  - lib/archive.rb
206
202
  - lib/attributes_json.rb
207
203
  - lib/audio.rb
@@ -241,6 +237,8 @@ files:
241
237
  - lib/read_limits_config.rb
242
238
  - lib/remote_io.rb
243
239
  - lib/video.rb
240
+ - spec/active_storage/blob_io_spec.rb
241
+ - spec/active_storage/rails_app_spec.rb
244
242
  - spec/attributes_json_spec.rb
245
243
  - spec/care_spec.rb
246
244
  - spec/esoteric_formats_spec.rb
@@ -248,6 +246,7 @@ files:
248
246
  - spec/format_parser_inspect_spec.rb
249
247
  - spec/format_parser_spec.rb
250
248
  - spec/hash_utils_spec.rb
249
+ - spec/integration/active_storage/rails_app.rb
251
250
  - spec/io_utils_spec.rb
252
251
  - spec/parsers/aiff_parser_spec.rb
253
252
  - spec/parsers/bmp_parser_spec.rb