format_parser 0.21.0 → 0.23.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ad2245de4a2119c7572c3962ad14abbf16395b2bec4064b218ee9f99d1e7c24b
4
- data.tar.gz: b982bcc7f6626b66684db532317b0c0d35cd062aa89766ea1a230f93e7d996d6
3
+ metadata.gz: f76db376646606abe6a7ccaa6f0a14efdc997ecd7fe29aff0ab3d8172857649f
4
+ data.tar.gz: 07c142e7ce6aaa518285d425eb95961d18e053a6b15456d4cb569cdc70a79069
5
5
  SHA512:
6
- metadata.gz: 2df2a3763e12e2bb0c70a8f5ec3319fcf6a3210a73461dc8abb5ec2af706028403eca48cf8c589bd40122dff0cfdabce383a79b1253237e219a5c89936ec0a5e
7
- data.tar.gz: 0ca6084649313b2c7ad32204b4c8b745f13dbf6cf2c347cb7c80ca7976b964f00feabd9666c0372e9a6c10e4ed250500ed4e202cdb687372cbd369378b7a0faa
6
+ metadata.gz: 3e92625dbe822d3423a084174fd7bc9d4a296effe26182a91743b36995f8470edc4fa56a3a0a10ce095d4998da5dee9620b983c1f980e0418a3b4574244e848c
7
+ data.tar.gz: b89df24f7b6638b3cd42b284a1792475a094e947bd995e5ccdd0c554014075f5d167a7c04fbc08c681483b0c82aa2e59ae4e4af62f99d6b158fb4b3f1096d80b
data/.gitignore CHANGED
@@ -58,3 +58,6 @@ Gemfile.lock
58
58
 
59
59
  # OSX Files
60
60
  .DS_Store
61
+
62
+ # rspec examples
63
+ spec/examples.txt
@@ -1,3 +1,24 @@
1
+ ## 0.23.1
2
+ * Updated gem exifr to fix problems related to jpeg files from Olympos microscopes, which often have bad thumbnail data
3
+
4
+ ## 0.23.0
5
+ * Add ActiveStorage analyzer which can analyze ActiveStorage blobs. Enable it by setting
6
+ `config.active_storage.analyzers.prepend FormatParser::ActiveStorage::BlobAnalyzer`
7
+ * Ignore empty ID3 tags and do not allow them to overwrite others
8
+ * Update the id3tag dependency so that we can fallback to UTF8 instead of raising an error when parsing
9
+ MP3 files
10
+
11
+ ## 0.22.1
12
+ * Fix Zip parser to not raise error for invalid zip files, with an invalid central directory
13
+
14
+ ## 0.22.0
15
+ * Adds option `stringify_keys: true` to #as_json methods (fix #151)
16
+
17
+ ## 0.21.1
18
+ * MPEG: Ensure parsing does not inadvertently return an Integer instead of Result|nil
19
+ * MPEG: Scan further into the MPEG file than previously (scan 32 1KB chunks)
20
+ * MPEG: Ensure the parser does not raise an exception when there is no data to read for scanning beyound the initial header
21
+
1
22
  ## 0.21.0
2
23
  * Adds support for MPEG video files
3
24
 
@@ -234,4 +234,9 @@ This provision also applies to the test files you include with the changed code
234
234
 
235
235
  ## Changelog
236
236
 
237
- When creating a new release you must add an entry in the `CHANGELOG.md`.
237
+ When creating a new release you must add an entry in the `CHANGELOG.md`.
238
+
239
+ ## Testing locally
240
+
241
+ It's possible to run `exe/format_parser_inspect FILE_NAME` or `exe/format_parser_inspect FILE_URI`
242
+ to test the new code without the necessity of installing the gem.
data/README.md CHANGED
@@ -75,6 +75,17 @@ img_info = FormatParser.parse(File.open("myimage.jpg", "rb"))
75
75
  JSON.pretty_generate(img_info) #=> ...
76
76
  ```
77
77
 
78
+ To convert the result to a Hash or a structure suitable for JSON serialization
79
+
80
+ ```ruby
81
+ img_info = FormatParser.parse(File.open("myimage.jpg", "rb"))
82
+ img_info.as_json
83
+
84
+ # it's also possible to convert all keys to string
85
+ img_info.as_json(stringify_keys: true)
86
+ ```
87
+
88
+
78
89
  ## Creating your own parsers
79
90
 
80
91
  See the [section on writing parsers in CONTRIBUTING.md](CONTRIBUTING.md#so-you-want-to-contribute-a-new-parser)
@@ -188,7 +199,7 @@ Unless specified otherwise in this section the fixture files are MIT licensed an
188
199
 
189
200
  ## Copyright
190
201
 
191
- Copyright (c) 2019 WeTransfer.
202
+ Copyright (c) 2020 WeTransfer.
192
203
 
193
204
  `format_parser` is distributed under the conditions of the [Hippocratic License](https://firstdonoharm.dev/version/1/2/license.html)
194
205
  - See LICENSE.txt for further details.
@@ -31,15 +31,14 @@ Gem::Specification.new do |spec|
31
31
  spec.require_paths = ['lib']
32
32
 
33
33
  spec.add_dependency 'ks', '~> 0.0'
34
- spec.add_dependency 'exifr', '~> 1', '>= 1.3.4'
35
- spec.add_dependency 'id3tag', '~> 0.10', '>= 0.10.1'
34
+ spec.add_dependency 'exifr', '~> 1', '>= 1.3.7'
35
+ spec.add_dependency 'id3tag', '~> 0.13'
36
36
  spec.add_dependency 'faraday', '~> 0.13'
37
37
  spec.add_dependency 'measurometer', '~> 1'
38
38
 
39
39
  spec.add_development_dependency 'rspec', '~> 3.0'
40
40
  spec.add_development_dependency 'rake', '~> 12'
41
41
  spec.add_development_dependency 'simplecov', '~> 0.15'
42
- spec.add_development_dependency 'pry', '~> 0.11'
43
42
  spec.add_development_dependency 'yard', '~> 0.9'
44
43
  spec.add_development_dependency 'wetransfer_style', '0.5.0'
45
44
  spec.add_development_dependency 'parallel_tests'
@@ -0,0 +1,35 @@
1
+ require_relative 'blob_io'
2
+
3
+ # An analyzer class that can be hooked to ActiveStorage, in order to enable
4
+ # FormatParser to do the blob analysis instead of ActiveStorage builtin-analyzers.
5
+ # Invoked if properly integrated in Rails initializer.
6
+
7
+ module FormatParser
8
+ module ActiveStorage
9
+ class BlobAnalyzer
10
+ # Format parser is able to handle a lot of format so by default it will accept all files
11
+ #
12
+ # @return [Boolean, true] always return true
13
+ def self.accept?(_blob)
14
+ true
15
+ end
16
+
17
+ def initialize(blob)
18
+ @blob = blob
19
+ end
20
+
21
+ # @return [Hash] file metadatas
22
+ def metadata
23
+ io = BlobIO.new(@blob)
24
+ parsed_file = FormatParser.parse(io)
25
+
26
+ if parsed_file
27
+ # We symbolize keys because of existing output hash format of ImageAnalyzer
28
+ parsed_file.as_json.symbolize_keys
29
+ else
30
+ logger.info "Skipping file analysis because FormatParser doesn't support the file"
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,51 @@
1
+ # Acts as a proxy to turn ActiveStorage file into IO object
2
+
3
+ module FormatParser
4
+ module ActiveStorage
5
+ class BlobIO
6
+ # @param blob[ActiveStorage::Blob] the file with linked service
7
+ # @return [BlobIO]
8
+ def initialize(blob)
9
+ @blob = blob
10
+ @service = blob.service
11
+ @pos = 0
12
+ end
13
+
14
+ # Emulates IO#read, but requires the number of bytes to read.
15
+ # Rely on `ActiveStorage::Service.download_chunk` of each hosting type (local, S3, Azure, etc)
16
+ #
17
+ # @param n_bytes[Integer] how many bytes to read
18
+ # @return [String] the read bytes
19
+ def read(n_bytes)
20
+ # HTTP ranges are exclusive.
21
+ http_range = (@pos..(@pos + n_bytes - 1))
22
+ body = @service.download_chunk(@blob.key, http_range)
23
+ @pos += body.bytesize
24
+ body.force_encoding(Encoding::ASCII_8BIT)
25
+ end
26
+
27
+ # Emulates IO#seek
28
+ #
29
+ # @param [Integer] offset size
30
+ # @return [Integer] always return 0, `seek` only mutates `pos` attribute
31
+ def seek(offset)
32
+ @pos = offset
33
+ 0
34
+ end
35
+
36
+ # Emulates IO#size.
37
+ #
38
+ # @return [Integer] the size of the blob size from ActiveStorage
39
+ def size
40
+ @blob.byte_size
41
+ end
42
+
43
+ # Emulates IO#pos
44
+ #
45
+ # @return [Integer] the current offset (in bytes) of the io
46
+ def pos
47
+ @pos
48
+ end
49
+ end
50
+ end
51
+ end
@@ -15,7 +15,12 @@ module FormatParser::AttributesJSON
15
15
 
16
16
  # Implements a sane default `as_json` for an object
17
17
  # that accessors defined
18
- def as_json(root: false)
18
+ #
19
+ # @param root[Bool] if true, it surrounds the result in a hash with a key
20
+ # `format_parser_file_info`
21
+ # @param stringify_keys[Bool] if true, it transforms all the hash keys to a string.
22
+ # The default value is false for backward compatibility
23
+ def as_json(root: false, stringify_keys: false, **)
19
24
  h = {}
20
25
  h['nature'] = nature if respond_to?(:nature) # Needed for file info structs
21
26
  methods.grep(/\w\=$/).each_with_object(h) do |attr_writer_method_name, h|
@@ -27,6 +32,9 @@ module FormatParser::AttributesJSON
27
32
  sanitized_value = _sanitize_json_value(unwrapped_attribute_value)
28
33
  h[reader_method_name] = sanitized_value
29
34
  end
35
+
36
+ h = FormatParser::HashUtils.deep_transform_keys(h, &:to_s) if stringify_keys
37
+
30
38
  if root
31
39
  {'format_parser_file_info' => h}
32
40
  else
@@ -5,6 +5,7 @@ require 'measurometer'
5
5
  # top-level methods of the library.
6
6
  module FormatParser
7
7
  require_relative 'format_parser/version'
8
+ require_relative 'hash_utils'
8
9
  require_relative 'attributes_json'
9
10
  require_relative 'image'
10
11
  require_relative 'audio'
@@ -17,6 +18,7 @@ module FormatParser
17
18
  require_relative 'remote_io'
18
19
  require_relative 'io_constraint'
19
20
  require_relative 'care'
21
+ require_relative 'active_storage/blob_analyzer'
20
22
 
21
23
  # Define Measurometer in the internal namespace as well
22
24
  # so that we stay compatible for the applications that use it
@@ -1,3 +1,3 @@
1
1
  module FormatParser
2
- VERSION = '0.21.0'
2
+ VERSION = '0.23.1'
3
3
  end
@@ -0,0 +1,19 @@
1
+ # based on https://github.com/rails/rails/blob/master/activesupport/lib/active_support/core_ext/hash/keys.rb#L116
2
+ # I chose to copy this method instead of adding activesupport as a dependency
3
+ # because we want to have the least number of dependencies
4
+ module FormatParser
5
+ class HashUtils
6
+ def self.deep_transform_keys(object, &block)
7
+ case object
8
+ when Hash
9
+ object.each_with_object({}) do |(key, value), result|
10
+ result[yield(key)] = deep_transform_keys(value, &block)
11
+ end
12
+ when Array
13
+ object.map { |e| deep_transform_keys(e, &block) }
14
+ else
15
+ object
16
+ end
17
+ end
18
+ end
19
+ end
@@ -44,13 +44,9 @@ class FormatParser::MP3Parser
44
44
  tag = __getobj__
45
45
  MEMBERS.each_with_object({}) do |k, h|
46
46
  value = tag.public_send(k)
47
- h[k] = value if value
47
+ h[k] = value if value && !value.empty?
48
48
  end
49
49
  end
50
-
51
- def as_json(*)
52
- to_h
53
- end
54
50
  end
55
51
 
56
52
  def likely_match?(filename)
@@ -85,7 +81,7 @@ class FormatParser::MP3Parser
85
81
 
86
82
  first_frame = initial_frames.first
87
83
 
88
- id3tags_hash = blend_id3_tags_into_hash(*tags)
84
+ id3tags_hash = with_id3tag_local_configs { blend_id3_tags_into_hash(*tags) }
89
85
 
90
86
  file_info = FormatParser::Audio.new(
91
87
  format: :mp3,
@@ -297,5 +293,14 @@ class FormatParser::MP3Parser
297
293
  attrs
298
294
  end
299
295
 
296
+ def with_id3tag_local_configs
297
+ ID3Tag.local_configuration do |c|
298
+ c.string_encode_options = { invalid: :replace, undef: :replace }
299
+ c.source_encoding_fallback = Encoding::UTF_8
300
+
301
+ yield
302
+ end
303
+ end
304
+
300
305
  FormatParser.register_parser new, natures: :audio, formats: :mp3, priority: 99
301
306
  end
@@ -26,9 +26,8 @@ class FormatParser::MPEGParser
26
26
 
27
27
  PACK_HEADER_START_CODE = [0x00, 0x00, 0x01, 0xBA].pack('C*')
28
28
  SEQUENCE_HEADER_START_CODE = [0xB3].pack('C*')
29
- SEEK_FOR_SEQUENCE_HEADER_TIMES_LIMIT = 4
30
- SEEK_FOR_SEQUENCE_HEADER_START_CODE_TIMES_LIMIT = 4
31
- BYTES_TO_READ_PER_TIME = 1024
29
+ MAX_BLOCK_READS = 32
30
+ BYTES_TO_READ_PER_READ = 1024
32
31
 
33
32
  def self.likely_match?(filename)
34
33
  filename =~ /\.(mpg|mpeg)$/i
@@ -37,18 +36,19 @@ class FormatParser::MPEGParser
37
36
  def self.call(io)
38
37
  return unless matches_mpeg_header?(io)
39
38
 
40
- # We are looping though the stream because there can be several sequence headers and some of them are not usefull.
41
- # If we detect that the header is not usefull, then we look for the next one for SEEK_FOR_SEQUENCE_HEADER_TIMES_LIMIT
39
+ # We are looping though the stream because there can be several sequence headers and some of them are not useful.
40
+ # If we detect that the header is not useful, then we look for the next one for SEEK_FOR_SEQUENCE_HEADER_TIMES_LIMIT
42
41
  # If we reach the EOF, then the mpg is likely to be corrupted and we return nil
43
- SEEK_FOR_SEQUENCE_HEADER_TIMES_LIMIT.times do
44
- return if fetch_next_sequence_header_code_position(io).nil?
42
+ MAX_BLOCK_READS.times do
43
+ next unless pos = find_next_header_code_pos(io)
44
+ io.seek(pos + 1)
45
45
  horizontal_size, vertical_size = parse_image_size(io)
46
46
  ratio_code, rate_code = parse_rate_information(io)
47
-
48
47
  if valid_aspect_ratio_code?(ratio_code) && valid_frame_rate_code?(rate_code)
49
48
  return file_info(horizontal_size, vertical_size, ratio_code, rate_code)
50
49
  end
51
50
  end
51
+ nil # otherwise the return value of Integer#times will be returned
52
52
  rescue FormatParser::IOUtils::InvalidRead
53
53
  nil
54
54
  end
@@ -90,15 +90,11 @@ class FormatParser::MPEGParser
90
90
  # Returns the position of the next sequence package content in the stream
91
91
  # This method will read BYTES_TO_READ_PER_TIME in each loop for a maximum amount of SEEK_FOR_SEQUENCE_HEADER_START_CODE_TIMES_LIMIT times
92
92
  # If the package is not found, then it returns nil.
93
- def self.fetch_next_sequence_header_code_position(io)
94
- SEEK_FOR_SEQUENCE_HEADER_START_CODE_TIMES_LIMIT.times do
95
- bytes_stream_read = io.read(BYTES_TO_READ_PER_TIME)
96
- header_relative_index = bytes_stream_read.index(SEQUENCE_HEADER_START_CODE)
97
- next if header_relative_index.nil?
98
- new_io_pos = io.pos - BYTES_TO_READ_PER_TIME + header_relative_index + 1
99
- io.seek(new_io_pos)
100
- return new_io_pos
101
- end
93
+ def self.find_next_header_code_pos(io)
94
+ pos_before_read = io.pos
95
+ bin_str = io.read(BYTES_TO_READ_PER_READ) # bin_str might be nil if we are at EOF
96
+ header_relative_index = bin_str && bin_str.index(SEQUENCE_HEADER_START_CODE)
97
+ return pos_before_read + header_relative_index if header_relative_index
102
98
  end
103
99
 
104
100
  # If the first 4 bytes of the stream are equal to 00 00 01 BA, the pack start code for the Pack Header, then it's an MPEG file.
@@ -18,6 +18,7 @@ class FormatParser::ZIPParser::FileReader
18
18
  'Could not find the EOCD signature in the buffer - maybe a malformed ZIP file'
19
19
  end
20
20
  end
21
+ InvalidCentralDirectory = Class.new(Error)
21
22
 
22
23
  C_UINT32LE = 'V'
23
24
  C_UINT16LE = 'v'
@@ -175,6 +176,8 @@ class FormatParser::ZIPParser::FileReader
175
176
  # BUT! in format_parser we avoid unbounded reads, as a matter of fact they are forbidden.
176
177
  # So we will again limit ouselves to cdir_size, and we will take cushion of 1 KB.
177
178
  central_directory_str = io.read(cdir_size + 1024)
179
+ raise InvalidCentralDirectory if central_directory_str.nil?
180
+
178
181
  central_directory_io = StringIO.new(central_directory_str)
179
182
  log do
180
183
  format(
@@ -0,0 +1,36 @@
1
+ require 'spec_helper'
2
+
3
+ describe FormatParser::ActiveStorage::BlobIO do
4
+ let(:blob_service) { double }
5
+ let(:blob) { double(key: 'blob_key', service: blob_service, byte_size: 43000) }
6
+ let(:io) { described_class.new(blob) }
7
+ let(:fixture_path) { fixtures_dir + '/test.png' }
8
+
9
+ it_behaves_like 'an IO object compatible with IOConstraint'
10
+
11
+ describe '#read' do
12
+ it 'reads io using download_chunk from ActiveStorage#Service' do
13
+ allow(blob_service).to receive(:download_chunk) { 'a' }
14
+
15
+ expect(io.read(1)).to eq('a')
16
+ end
17
+
18
+ it 'updates #pos on read' do
19
+ allow(blob_service).to receive(:download_chunk) { 'a' }
20
+
21
+ expect { io.read(1) }.to change { io.pos }.from(0).to(1)
22
+ end
23
+ end
24
+
25
+ describe '#seek' do
26
+ it 'updates @pos' do
27
+ expect { io.seek(10) }.to change { io.pos }.from(0).to(10)
28
+ end
29
+ end
30
+
31
+ describe '#size' do
32
+ it 'returns the size of the blob byte_size' do
33
+ expect(io.size).to eq(blob.byte_size)
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,58 @@
1
+ require 'spec_helper'
2
+
3
+ def skip_reason
4
+ if RUBY_ENGINE == 'jruby'
5
+ 'Skipping because JRuby have randon failing issue'
6
+ elsif RUBY_VERSION.to_f < 2.5
7
+ 'Skipping because Rails testing script use Rails 6, who does not support Ruby bellow 2.5'
8
+ else
9
+ false
10
+ end
11
+ end
12
+
13
+ describe 'Rails app with ActiveStorage and format-parser', skip: skip_reason do
14
+ describe 'local hosting with ActiveStorage disk adapter' do
15
+ it 'parse local file with format_parser' do
16
+ clean_env do
17
+ cmd = 'ruby spec/integration/active_storage/rails_app.rb'
18
+ cmd_status = ruby_script_runner(cmd)
19
+ expect(cmd_status[:stdout].last).to match(/1 runs, 3 assertions, 0 failures, 0 errors, 0 skips/)
20
+ expect(cmd_status[:exitstatus]).to eq(0)
21
+ end
22
+ end
23
+ end
24
+
25
+ def ruby_script_runner(cmd)
26
+ require 'open3'
27
+ cmd_status = { stdout: [], exitstatus: nil }
28
+ Open3.popen2(cmd) do |_stdin, stdout, wait_thr|
29
+ frame_stdout do
30
+ while line = stdout.gets
31
+ puts "| #{line}"
32
+ cmd_status[:stdout] << line
33
+ end
34
+ end
35
+ cmd_status[:exitstatus] = wait_thr.value.exitstatus
36
+ end
37
+ cmd_status
38
+ end
39
+
40
+ def frame_stdout
41
+ puts
42
+ puts '-' * 50
43
+ yield
44
+ puts '-' * 50
45
+ end
46
+
47
+ def clean_env
48
+ if Bundler.respond_to?(:with_unbundled_env)
49
+ Bundler.with_unbundled_env do
50
+ yield
51
+ end
52
+ else
53
+ Bundler.with_clean_env do
54
+ yield
55
+ end
56
+ end
57
+ end
58
+ end
@@ -140,4 +140,30 @@ describe FormatParser::AttributesJSON do
140
140
  JSON.pretty_generate(object_with_attributes_module)
141
141
  }.to raise_error(/structure too deep/)
142
142
  end
143
+
144
+ it 'converts all hash keys to string when stringify_keys: true' do
145
+ fixture_path = fixtures_dir + '/ZIP/arch_few_entries.zip'
146
+ fi_io = File.open(fixture_path, 'rb')
147
+
148
+ result = FormatParser::ZIPParser.new.call(fi_io).as_json(stringify_keys: true)
149
+
150
+ result['entries'].each do |entry|
151
+ entry.each do |key, _value|
152
+ expect(key).to be_a(String)
153
+ end
154
+ end
155
+ end
156
+
157
+ it 'does not convert hash keys to string when stringify_keys: false' do
158
+ fixture_path = fixtures_dir + '/ZIP/arch_few_entries.zip'
159
+ fi_io = File.open(fixture_path, 'rb')
160
+
161
+ result = FormatParser::ZIPParser.new.call(fi_io).as_json
162
+
163
+ result['entries'].each do |entry|
164
+ entry.each do |key, _value|
165
+ expect(key).to be_a(Symbol)
166
+ end
167
+ end
168
+ end
143
169
  end
@@ -0,0 +1,42 @@
1
+ require 'spec_helper'
2
+
3
+ describe FormatParser::HashUtils do
4
+ describe '.deep_transform_keys' do
5
+ it 'transforms all the keys in a hash' do
6
+ hash = { aa: 1, 'bb' => 2 }
7
+ result = described_class.deep_transform_keys(hash, &:to_s)
8
+
9
+ expect(result).to eq('aa' => 1, 'bb' => 2)
10
+ end
11
+
12
+ it 'transforms all the keys in a array of hashes' do
13
+ array = [{ aa: 1, bb: 2 }, { cc: 3, dd: [{c: 2, d: 3}] }]
14
+ result = described_class.deep_transform_keys(array, &:to_s)
15
+
16
+ expect(result).to eq(
17
+ [{'aa' => 1, 'bb' => 2}, {'cc' => 3, 'dd' => [{'c' => 2, 'd' => 3}]}]
18
+ )
19
+ end
20
+
21
+ it 'transforms all the keys in a hash recursively' do
22
+ hash = { aa: 1, bb: { cc: 22, dd: 3 } }
23
+ result = described_class.deep_transform_keys(hash, &:to_s)
24
+
25
+ expect(result).to eq('aa' => 1, 'bb' => { 'cc' => 22, 'dd' => 3})
26
+ end
27
+
28
+ it 'does nothing for an non array/hash object' do
29
+ object = Object.new
30
+ result = described_class.deep_transform_keys(object, &:to_s)
31
+
32
+ expect(result).to eq(object)
33
+ end
34
+
35
+ it 'returns the last value if different keys are transformed into the same one' do
36
+ hash = { aa: 0, 'bb' => 2, bb: 1 }
37
+ result = described_class.deep_transform_keys(hash, &:to_s)
38
+
39
+ expect(result).to eq('aa' => 0, 'bb' => 1)
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,72 @@
1
+ require 'bundler/inline'
2
+
3
+ gemfile(true) do
4
+ source 'https://rubygems.org'
5
+
6
+ git_source(:github) { |repo| "https://github.com/#{repo}.git" }
7
+
8
+ gem 'rails', '6.0.3'
9
+ gem 'sqlite3'
10
+ gem 'format_parser', path: './'
11
+ end
12
+
13
+ require 'active_record/railtie'
14
+ require 'active_storage/engine'
15
+ require 'tmpdir'
16
+
17
+ class TestApp < Rails::Application
18
+ config.root = __dir__
19
+ config.hosts << 'example.org'
20
+ config.eager_load = false
21
+ config.session_store :cookie_store, key: 'cookie_store_key'
22
+ secrets.secret_key_base = 'secret_key_base'
23
+
24
+ config.logger = Logger.new('/dev/null')
25
+
26
+ config.active_storage.service = :local
27
+ config.active_storage.service_configurations = {
28
+ local: {
29
+ root: Dir.tmpdir,
30
+ service: 'Disk'
31
+ }
32
+ }
33
+
34
+ config.active_storage.analyzers.prepend FormatParser::ActiveStorage::BlobAnalyzer
35
+ end
36
+
37
+ ENV['DATABASE_URL'] = 'sqlite3::memory:'
38
+
39
+ Rails.application.initialize!
40
+
41
+ require ActiveStorage::Engine.root.join('db/migrate/20170806125915_create_active_storage_tables.rb').to_s
42
+
43
+ ActiveRecord::Schema.define do
44
+ CreateActiveStorageTables.new.change
45
+
46
+ create_table :users, force: true
47
+ end
48
+
49
+ class User < ActiveRecord::Base
50
+ has_one_attached :profile_picture
51
+ end
52
+
53
+ require 'minitest/autorun'
54
+ require 'open-uri'
55
+
56
+ describe User do
57
+ describe "profile_picture's metadatas" do
58
+ it 'parse metadatas with format_parser' do
59
+ user = User.create
60
+ user.profile_picture.attach(
61
+ filename: 'cat.png',
62
+ io: URI.open('https://freesvg.org/img/1416155153.png')
63
+ )
64
+
65
+ user.profile_picture.analyze
66
+
67
+ _(user.profile_picture.metadata[:width_px]).must_equal 500
68
+ _(user.profile_picture.metadata[:height_px]).must_equal 296
69
+ _(user.profile_picture.metadata[:color_mode]).must_equal 'rgba'
70
+ end
71
+ end
72
+ end
@@ -37,6 +37,14 @@ describe FormatParser::MP3Parser do
37
37
  expect(parsed.album).to be_nil
38
38
  end
39
39
  end
40
+
41
+ context 'when has an empty tag' do
42
+ let(:fpath) { fixtures_dir + '/MP3/id3v2_with_empty_tag.mp3' }
43
+
44
+ it 'ignores the empty tags' do
45
+ expect(parsed.intrinsics[:genre]).to eq('Rock')
46
+ end
47
+ end
40
48
  end
41
49
 
42
50
  it 'decodes and estimates duration for a CBR MP3' do
@@ -71,6 +79,17 @@ describe FormatParser::MP3Parser do
71
79
  expect(prepped.pos).to eq(3145738)
72
80
  end
73
81
 
82
+ it 'does not raise error when a tag frame has unsupported encoding' do
83
+ fpath = fixtures_dir + '/MP3/id3v2_frame_with_invalid_encoding.mp3'
84
+
85
+ parsed = subject.call(File.open(fpath, 'rb'))
86
+
87
+ expect(parsed.nature). to eq(:audio)
88
+ expect(parsed.album).to eq('wetransfer')
89
+ expect(parsed.artist).to eq('wetransfer')
90
+ expect(parsed.title).to eq('test')
91
+ end
92
+
74
93
  it 'parses the Cassy MP3' do
75
94
  fpath = fixtures_dir + '/MP3/Cassy.mp3'
76
95
  parsed = subject.call(File.open(fpath, 'rb'))
@@ -110,4 +129,32 @@ describe FormatParser::MP3Parser do
110
129
  subject.call(StringIO.new(''))
111
130
  }.to raise_error(FormatParser::IOUtils::InvalidRead)
112
131
  end
132
+
133
+ describe '#as_json' do
134
+ it 'converts all hash keys to string when stringify_keys: true' do
135
+ fpath = fixtures_dir + '/MP3/Cassy.mp3'
136
+ result = subject.call(File.open(fpath, 'rb')).as_json(stringify_keys: true)
137
+
138
+ expect(
139
+ result['intrinsics'].keys.map(&:class).uniq
140
+ ).to eq([String])
141
+
142
+ expect(
143
+ result['intrinsics']['id3tags'].map(&:class).uniq
144
+ ).to eq([ID3Tag::Tag])
145
+ end
146
+
147
+ it 'does not convert the hash keys to string when stringify_keys: false' do
148
+ fpath = fixtures_dir + '/MP3/Cassy.mp3'
149
+ result = subject.call(File.open(fpath, 'rb')).as_json
150
+
151
+ expect(
152
+ result['intrinsics'].keys.map(&:class).uniq
153
+ ).to eq([Symbol])
154
+
155
+ expect(
156
+ result['intrinsics'][:id3tags].map(&:class).uniq
157
+ ).to eq([ID3Tag::Tag])
158
+ end
159
+ end
113
160
  end
@@ -12,6 +12,27 @@ describe FormatParser::MPEGParser do
12
12
  expect(parse_result.intrinsics[:frame_rate]).to eq('30')
13
13
  end
14
14
 
15
+ it 'returns a nil if it is necessary to iterate over a very large number of bytes and the requisite sequences are not detected' do
16
+ bytes_buffer = StringIO.new
17
+ bytes_buffer.write([0x00, 0x00, 0x01, 0xBA].pack('C*')) # MPEG header
18
+ zero_bytes = [0x00].pack('C') * (1024 * 1024 * 5)
19
+ bytes_buffer.write(zero_bytes)
20
+
21
+ bytes_buffer.rewind
22
+
23
+ parse_result = described_class.call(bytes_buffer)
24
+ expect(parse_result).to be_nil
25
+ end
26
+
27
+ it 'returns a nil if the IO only contains the MPEG header bytes at the start and nothing else' do
28
+ bytes_buffer = StringIO.new
29
+ bytes_buffer.write([0x00, 0x00, 0x01, 0xBA].pack('C*')) # MPEG header
30
+ bytes_buffer.rewind
31
+
32
+ parse_result = described_class.call(bytes_buffer)
33
+ expect(parse_result).to be_nil
34
+ end
35
+
15
36
  it 'parses a file with mpeg extension' do
16
37
  parse_result = described_class.call(File.open(__dir__ + '/../fixtures/MPG/video2.mpeg', 'rb'))
17
38
 
@@ -103,4 +103,11 @@ describe FormatParser::ZIPParser do
103
103
  expect(first_entry.filename).to eq('Li��nia Extreme//')
104
104
  expect(first_entry.type).to eq(:directory)
105
105
  end
106
+
107
+ it 'is able to handle files with invalid central directory position' do
108
+ invalid_zip_path = fixtures_dir + '/ZIP/invalid_central_directory.zip'
109
+
110
+ expect { subject.call(File.open(invalid_zip_path, 'rb')) }
111
+ .to_not raise_error
112
+ end
106
113
  end
@@ -8,7 +8,6 @@ $LOAD_PATH.unshift(File.dirname(__FILE__))
8
8
 
9
9
  require 'rspec'
10
10
  require 'format_parser'
11
- require 'pry'
12
11
 
13
12
  module SpecHelpers
14
13
  def fixtures_dir
@@ -19,6 +18,8 @@ end
19
18
  RSpec.configure do |c|
20
19
  c.include SpecHelpers
21
20
  c.extend SpecHelpers # makes fixtures_dir available for example groups too
21
+ # https://relishapp.com/rspec/rspec-core/docs/command-line/only-failures
22
+ c.example_status_persistence_file_path = 'spec/examples.txt'
22
23
  end
23
24
 
24
25
  RSpec.shared_examples 'an IO object compatible with IOConstraint' do
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: format_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.21.0
4
+ version: 0.23.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Noah Berman
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: exe
11
11
  cert_chain: []
12
- date: 2020-03-27 00:00:00.000000000 Z
12
+ date: 2020-09-14 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: ks
@@ -34,7 +34,7 @@ dependencies:
34
34
  version: '1'
35
35
  - - ">="
36
36
  - !ruby/object:Gem::Version
37
- version: 1.3.4
37
+ version: 1.3.7
38
38
  type: :runtime
39
39
  prerelease: false
40
40
  version_requirements: !ruby/object:Gem::Requirement
@@ -44,27 +44,21 @@ dependencies:
44
44
  version: '1'
45
45
  - - ">="
46
46
  - !ruby/object:Gem::Version
47
- version: 1.3.4
47
+ version: 1.3.7
48
48
  - !ruby/object:Gem::Dependency
49
49
  name: id3tag
50
50
  requirement: !ruby/object:Gem::Requirement
51
51
  requirements:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: '0.10'
55
- - - ">="
56
- - !ruby/object:Gem::Version
57
- version: 0.10.1
54
+ version: '0.13'
58
55
  type: :runtime
59
56
  prerelease: false
60
57
  version_requirements: !ruby/object:Gem::Requirement
61
58
  requirements:
62
59
  - - "~>"
63
60
  - !ruby/object:Gem::Version
64
- version: '0.10'
65
- - - ">="
66
- - !ruby/object:Gem::Version
67
- version: 0.10.1
61
+ version: '0.13'
68
62
  - !ruby/object:Gem::Dependency
69
63
  name: faraday
70
64
  requirement: !ruby/object:Gem::Requirement
@@ -135,20 +129,6 @@ dependencies:
135
129
  - - "~>"
136
130
  - !ruby/object:Gem::Version
137
131
  version: '0.15'
138
- - !ruby/object:Gem::Dependency
139
- name: pry
140
- requirement: !ruby/object:Gem::Requirement
141
- requirements:
142
- - - "~>"
143
- - !ruby/object:Gem::Version
144
- version: '0.11'
145
- type: :development
146
- prerelease: false
147
- version_requirements: !ruby/object:Gem::Requirement
148
- requirements:
149
- - - "~>"
150
- - !ruby/object:Gem::Version
151
- version: '0.11'
152
132
  - !ruby/object:Gem::Dependency
153
133
  name: yard
154
134
  requirement: !ruby/object:Gem::Requirement
@@ -216,6 +196,8 @@ files:
216
196
  - Rakefile
217
197
  - exe/format_parser_inspect
218
198
  - format_parser.gemspec
199
+ - lib/active_storage/blob_analyzer.rb
200
+ - lib/active_storage/blob_io.rb
219
201
  - lib/archive.rb
220
202
  - lib/attributes_json.rb
221
203
  - lib/audio.rb
@@ -223,6 +205,7 @@ files:
223
205
  - lib/document.rb
224
206
  - lib/format_parser.rb
225
207
  - lib/format_parser/version.rb
208
+ - lib/hash_utils.rb
226
209
  - lib/image.rb
227
210
  - lib/io_constraint.rb
228
211
  - lib/io_utils.rb
@@ -254,12 +237,16 @@ files:
254
237
  - lib/read_limits_config.rb
255
238
  - lib/remote_io.rb
256
239
  - lib/video.rb
240
+ - spec/active_storage/blob_io_spec.rb
241
+ - spec/active_storage/rails_app_spec.rb
257
242
  - spec/attributes_json_spec.rb
258
243
  - spec/care_spec.rb
259
244
  - spec/esoteric_formats_spec.rb
260
245
  - spec/file_information_spec.rb
261
246
  - spec/format_parser_inspect_spec.rb
262
247
  - spec/format_parser_spec.rb
248
+ - spec/hash_utils_spec.rb
249
+ - spec/integration/active_storage/rails_app.rb
263
250
  - spec/io_utils_spec.rb
264
251
  - spec/parsers/aiff_parser_spec.rb
265
252
  - spec/parsers/bmp_parser_spec.rb