format_parser 0.20.0 → 0.22.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e97554eb9bf6ca4a4ea76daa0f226c370f707e270931759375e9f6b72c1e7f40
4
- data.tar.gz: 946767ce39eb4a96420fd52852f514333baa33ec20d256241acc7208863aaf87
3
+ metadata.gz: 05470fef0edec68c427b9680cf94a88a8f3286e2279b2daf13e87679324eb061
4
+ data.tar.gz: f65ed1d390e4e70bd34a3b01b5d54d39e4076d18ebf15fc20196c2746c86122f
5
5
  SHA512:
6
- metadata.gz: 612b8b1af86329fce62e3eb035b30178485679a76516fcfe8a72c3785ef782855098609e7189559df2fd5929663d5b7ec3b7319e5e44a7048229aa13b5fc5dc9
7
- data.tar.gz: 8ffa31f8c498d72eb734b15d9b5de99c44742e1651a200b5484da8e7d5bc5bba5d52092a9b8b52d7374646265be2ecd0f0b4f06bc4798b4da9a16f83d3749512
6
+ metadata.gz: e6e9e2d1fbfe6e813d1e0d9d2c102ce3f031fd37395cc75807ca7b527b1a2fc4a9a3c41baaf9e2cdaa601063cded92ef446ce913f2210f3d82e063126873d54d
7
+ data.tar.gz: 86e166b2ac754fe5d7e42e471bcaa70cd54ae0aa22da3e0f85a6223213c2bd5a284f2b53416d03656dc291898837636678dbfba2e32b179a562ea7c8b05ef0bf
data/.gitignore CHANGED
@@ -54,3 +54,10 @@ Gemfile.lock
54
54
 
55
55
  # Used by RuboCop. Remote config files pulled in from inherit_from directive.
56
56
  # .rubocop-https?--*
57
+
58
+
59
+ # OSX Files
60
+ .DS_Store
61
+
62
+ # rspec examples
63
+ spec/examples.txt
@@ -1,3 +1,20 @@
1
+ ## 0.22.1
2
+ * Fix Zip parser to not raise error for invalid zip files, with an invalid central directory
3
+
4
+ ## 0.22.0
5
+ * Adds option `stringify_keys: true` to #as_json methods (fix #151)
6
+
7
+ ## 0.21.1
8
+ * MPEG: Ensure parsing does not inadvertently return an Integer instead of Result|nil
9
+ * MPEG: Scan further into the MPEG file than previously (scan 32 1KB chunks)
10
+ * MPEG: Ensure the parser does not raise an exception when there is no data to read for scanning beyound the initial header
11
+
12
+ ## 0.21.0
13
+ * Adds support for MPEG video files
14
+
15
+ ## 0.20.1
16
+ * Make sure EXIF results work correctly with ActiveSupport JSON encoders
17
+
1
18
  ## 0.20.0
2
19
  * Correctly tag the license on Rubygems as MIT (Hippocratic) for easier audit
3
20
 
@@ -234,4 +234,9 @@ This provision also applies to the test files you include with the changed code
234
234
 
235
235
  ## Changelog
236
236
 
237
- When creating a new release you must add an entry in the `CHANGELOG.md`.
237
+ When creating a new release you must add an entry in the `CHANGELOG.md`.
238
+
239
+ ## Testing locally
240
+
241
+ It's possible to run `exe/format_parser_inspect FILE_NAME` or `exe/format_parser_inspect FILE_URI`
242
+ to test the new code without the necessity of installing the gem.
data/README.md CHANGED
@@ -31,6 +31,7 @@ and [dimensions,](https://github.com/sstephenson/dimensions) borrowing from them
31
31
  * ZIP
32
32
  * DOCX, PPTX, XLSX
33
33
  * OGG
34
+ * MPEG, MPG
34
35
 
35
36
  ...with [more](https://github.com/WeTransfer/format_parser/issues?q=is%3Aissue+is%3Aopen+label%3Aformats) on the way!
36
37
 
@@ -74,6 +75,17 @@ img_info = FormatParser.parse(File.open("myimage.jpg", "rb"))
74
75
  JSON.pretty_generate(img_info) #=> ...
75
76
  ```
76
77
 
78
+ To convert the result to a Hash or a structure suitable for JSON serialization
79
+
80
+ ```ruby
81
+ img_info = FormatParser.parse(File.open("myimage.jpg", "rb"))
82
+ img_info.as_json
83
+
84
+ # it's also possible to convert all keys to string
85
+ img_info.as_json(stringify_keys: true)
86
+ ```
87
+
88
+
77
89
  ## Creating your own parsers
78
90
 
79
91
  See the [section on writing parsers in CONTRIBUTING.md](CONTRIBUTING.md#so-you-want-to-contribute-a-new-parser)
@@ -173,6 +185,10 @@ Unless specified otherwise in this section the fixture files are MIT licensed an
173
185
  ### .docx
174
186
  - The .docx files were generated by the project maintainers
175
187
 
188
+ ### .mpg and .mpeg
189
+ - The files (video 1 to 4) were downloaded from https://standaloneinstaller.com/blog/big-list-of-sample-videos-for-testers-124.html.
190
+ - Video 5 was downloaded from https://archive.org/details/ligouHDR-HC1_sample1.
191
+
176
192
  ### JPEG examples of EXIF orientation
177
193
  - Downloaded from Unspash (and thus freely avaliable) - https://unsplash.com/license and have then been
178
194
  manipulated using the [https://github.com/recurser/exif-orientation-examples](exif-orientation-examples)
@@ -183,7 +199,7 @@ Unless specified otherwise in this section the fixture files are MIT licensed an
183
199
 
184
200
  ## Copyright
185
201
 
186
- Copyright (c) 2019 WeTransfer.
202
+ Copyright (c) 2020 WeTransfer.
187
203
 
188
204
  `format_parser` is distributed under the conditions of the [Hippocratic License](https://firstdonoharm.dev/version/1/2/license.html)
189
205
  - See LICENSE.txt for further details.
@@ -39,7 +39,6 @@ Gem::Specification.new do |spec|
39
39
  spec.add_development_dependency 'rspec', '~> 3.0'
40
40
  spec.add_development_dependency 'rake', '~> 12'
41
41
  spec.add_development_dependency 'simplecov', '~> 0.15'
42
- spec.add_development_dependency 'pry', '~> 0.11'
43
42
  spec.add_development_dependency 'yard', '~> 0.9'
44
43
  spec.add_development_dependency 'wetransfer_style', '0.5.0'
45
44
  spec.add_development_dependency 'parallel_tests'
@@ -15,7 +15,12 @@ module FormatParser::AttributesJSON
15
15
 
16
16
  # Implements a sane default `as_json` for an object
17
17
  # that accessors defined
18
- def as_json(root: false)
18
+ #
19
+ # @param root[Bool] if true, it surrounds the result in a hash with a key
20
+ # `format_parser_file_info`
21
+ # @param stringify_keys[Bool] if true, it transforms all the hash keys to a string.
22
+ # The default value is false for backward compatibility
23
+ def as_json(root: false, stringify_keys: false, **)
19
24
  h = {}
20
25
  h['nature'] = nature if respond_to?(:nature) # Needed for file info structs
21
26
  methods.grep(/\w\=$/).each_with_object(h) do |attr_writer_method_name, h|
@@ -27,6 +32,9 @@ module FormatParser::AttributesJSON
27
32
  sanitized_value = _sanitize_json_value(unwrapped_attribute_value)
28
33
  h[reader_method_name] = sanitized_value
29
34
  end
35
+
36
+ h = FormatParser::HashUtils.deep_transform_keys(h, &:to_s) if stringify_keys
37
+
30
38
  if root
31
39
  {'format_parser_file_info' => h}
32
40
  else
@@ -5,6 +5,7 @@ require 'measurometer'
5
5
  # top-level methods of the library.
6
6
  module FormatParser
7
7
  require_relative 'format_parser/version'
8
+ require_relative 'hash_utils'
8
9
  require_relative 'attributes_json'
9
10
  require_relative 'image'
10
11
  require_relative 'audio'
@@ -1,3 +1,3 @@
1
1
  module FormatParser
2
- VERSION = '0.20.0'
2
+ VERSION = '0.22.1'
3
3
  end
@@ -0,0 +1,19 @@
1
+ # based on https://github.com/rails/rails/blob/master/activesupport/lib/active_support/core_ext/hash/keys.rb#L116
2
+ # I chose to copy this method instead of adding activesupport as a dependency
3
+ # because we want to have the least number of dependencies
4
+ module FormatParser
5
+ class HashUtils
6
+ def self.deep_transform_keys(object, &block)
7
+ case object
8
+ when Hash
9
+ object.each_with_object({}) do |(key, value), result|
10
+ result[yield(key)] = deep_transform_keys(value, &block)
11
+ end
12
+ when Array
13
+ object.map { |e| deep_transform_keys(e, &block) }
14
+ else
15
+ object
16
+ end
17
+ end
18
+ end
19
+ end
@@ -100,17 +100,7 @@ module FormatParser::EXIFParser
100
100
  end
101
101
 
102
102
  def to_json(*maybe_coder)
103
- # Let EXIF tags that come later overwrite the properties from the tags
104
- # that come earlier
105
- overlay = @multiple_exif_results.each_with_object({}) do |one_exif_frame, h|
106
- h.merge!(one_exif_frame.to_hash)
107
- end
108
- # Overwrite the orientation with our custom method implementation, because
109
- # it does reject 0-values.
110
- overlay[:orientation] = orientation
111
-
112
- sanitized = FormatParser::AttributesJSON._sanitize_json_value(overlay)
113
- sanitized.to_json(*maybe_coder)
103
+ to_hash.to_json(*maybe_coder)
114
104
  end
115
105
 
116
106
  def orientation_sym
@@ -135,10 +125,27 @@ module FormatParser::EXIFParser
135
125
  0 # If none were found - the orientation is unknown
136
126
  end
137
127
 
128
+ # ActiveSupport will attempt to call #to_hash first, and
129
+ # #to_hash is a decent default implementation to have
130
+ def to_hash
131
+ # Let EXIF tags that come later overwrite the properties from the tags
132
+ # that come earlier
133
+ overlay = @multiple_exif_results.each_with_object({}) do |one_exif_frame, h|
134
+ h.merge!(one_exif_frame.to_hash)
135
+ end
136
+ # Overwrite the orientation with our custom method implementation, because
137
+ # it does reject 0-values.
138
+ overlay[:orientation] = orientation
139
+
140
+ FormatParser::AttributesJSON._sanitize_json_value(overlay)
141
+ end
142
+
138
143
  private
139
144
 
140
- def respond_to_missing?(method_name)
141
- @multiple_exif_results.last.respond_to?(method_name)
145
+ # respond_to_missing? accepts 2 arguments: the method name symbol
146
+ # and whether the method being looked up can be private or not
147
+ def respond_to_missing?(method_name, include_private_methods)
148
+ @multiple_exif_results.last.respond_to?(method_name, include_private_methods)
142
149
  end
143
150
 
144
151
  def method_missing(*a)
@@ -47,10 +47,6 @@ class FormatParser::MP3Parser
47
47
  h[k] = value if value
48
48
  end
49
49
  end
50
-
51
- def as_json(*)
52
- to_h
53
- end
54
50
  end
55
51
 
56
52
  def likely_match?(filename)
@@ -0,0 +1,127 @@
1
+
2
+ # MPEG Headers documentation:
3
+ # http://dvd.sourceforge.net/dvdinfo/mpeghdrs.html#seq
4
+ # http://www.cs.columbia.edu/~delbert/docs/Dueck%20--%20MPEG-2%20Video%20Transcoding.pdf
5
+ # Useful tool to check the file information: https://www.metadata2go.com/
6
+ class FormatParser::MPEGParser
7
+ extend FormatParser::IOUtils
8
+
9
+ ASPECT_RATIOS = {
10
+ 1 => '1:1',
11
+ 2 => '4:3',
12
+ 3 => '16:9',
13
+ 4 => '2.21:1'
14
+ }
15
+
16
+ FRAME_RATES = {
17
+ 1 => '23.976',
18
+ 2 => '24',
19
+ 3 => '25',
20
+ 4 => '29.97',
21
+ 5 => '30',
22
+ 6 => '50',
23
+ 7 => '59.94',
24
+ 8 => '60'
25
+ }
26
+
27
+ PACK_HEADER_START_CODE = [0x00, 0x00, 0x01, 0xBA].pack('C*')
28
+ SEQUENCE_HEADER_START_CODE = [0xB3].pack('C*')
29
+ MAX_BLOCK_READS = 32
30
+ BYTES_TO_READ_PER_READ = 1024
31
+
32
+ def self.likely_match?(filename)
33
+ filename =~ /\.(mpg|mpeg)$/i
34
+ end
35
+
36
+ def self.call(io)
37
+ return unless matches_mpeg_header?(io)
38
+
39
+ # We are looping though the stream because there can be several sequence headers and some of them are not useful.
40
+ # If we detect that the header is not useful, then we look for the next one for SEEK_FOR_SEQUENCE_HEADER_TIMES_LIMIT
41
+ # If we reach the EOF, then the mpg is likely to be corrupted and we return nil
42
+ MAX_BLOCK_READS.times do
43
+ next unless pos = find_next_header_code_pos(io)
44
+ io.seek(pos + 1)
45
+ horizontal_size, vertical_size = parse_image_size(io)
46
+ ratio_code, rate_code = parse_rate_information(io)
47
+ if valid_aspect_ratio_code?(ratio_code) && valid_frame_rate_code?(rate_code)
48
+ return file_info(horizontal_size, vertical_size, ratio_code, rate_code)
49
+ end
50
+ end
51
+ nil # otherwise the return value of Integer#times will be returned
52
+ rescue FormatParser::IOUtils::InvalidRead
53
+ nil
54
+ end
55
+
56
+ def self.file_info(width_px, height_px, ratio_code, rate_code)
57
+ FormatParser::Video.new(
58
+ format: :mpg,
59
+ width_px: width_px,
60
+ height_px: height_px,
61
+ intrinsics: {
62
+ aspect_ratio: ASPECT_RATIOS.fetch(ratio_code),
63
+ frame_rate: FRAME_RATES.fetch(rate_code)
64
+ },
65
+ )
66
+ end
67
+
68
+ # The following 3 bytes after the sequence header code, gives us information about the px size
69
+ # 1.5 bytes (12 bits) for horizontal size and 1.5 bytes for vertical size
70
+ def self.parse_image_size(io)
71
+ image_size = convert_3_bytes_to_bits(safe_read(io, 3))
72
+ [read_first_12_bits(image_size), read_last_12_bits(image_size)]
73
+ end
74
+
75
+ # The following byte gives us information about the aspect ratio and frame rate
76
+ # 4 bits corresponds to the aspect ratio and 4 bits to the frame rate code
77
+ def self.parse_rate_information(io)
78
+ rate_information = safe_read(io, 1).unpack('C').first
79
+ [read_first_4_bits(rate_information), read_last_4_bits(rate_information)]
80
+ end
81
+
82
+ def self.valid_aspect_ratio_code?(ratio_code)
83
+ ASPECT_RATIOS.include?(ratio_code)
84
+ end
85
+
86
+ def self.valid_frame_rate_code?(rate_code)
87
+ FRAME_RATES.include?(rate_code)
88
+ end
89
+
90
+ # Returns the position of the next sequence package content in the stream
91
+ # This method will read BYTES_TO_READ_PER_TIME in each loop for a maximum amount of SEEK_FOR_SEQUENCE_HEADER_START_CODE_TIMES_LIMIT times
92
+ # If the package is not found, then it returns nil.
93
+ def self.find_next_header_code_pos(io)
94
+ pos_before_read = io.pos
95
+ bin_str = io.read(BYTES_TO_READ_PER_READ) # bin_str might be nil if we are at EOF
96
+ header_relative_index = bin_str && bin_str.index(SEQUENCE_HEADER_START_CODE)
97
+ return pos_before_read + header_relative_index if header_relative_index
98
+ end
99
+
100
+ # If the first 4 bytes of the stream are equal to 00 00 01 BA, the pack start code for the Pack Header, then it's an MPEG file.
101
+ def self.matches_mpeg_header?(io)
102
+ safe_read(io, 4) == PACK_HEADER_START_CODE
103
+ end
104
+
105
+ def self.convert_3_bytes_to_bits(bytes)
106
+ bytes = bytes.unpack('CCC')
107
+ (bytes[0] << 16) | (bytes[1] << 8) | (bytes[2])
108
+ end
109
+
110
+ def self.read_first_12_bits(bits)
111
+ bits >> 12 & 0x0fff
112
+ end
113
+
114
+ def self.read_last_12_bits(bits)
115
+ bits & 0x0fff
116
+ end
117
+
118
+ def self.read_first_4_bits(byte)
119
+ byte >> 4
120
+ end
121
+
122
+ def self.read_last_4_bits(byte)
123
+ byte & 0x0F
124
+ end
125
+
126
+ FormatParser.register_parser self, natures: [:video], formats: [:mpg, :mpeg]
127
+ end
@@ -18,6 +18,7 @@ class FormatParser::ZIPParser::FileReader
18
18
  'Could not find the EOCD signature in the buffer - maybe a malformed ZIP file'
19
19
  end
20
20
  end
21
+ InvalidCentralDirectory = Class.new(Error)
21
22
 
22
23
  C_UINT32LE = 'V'
23
24
  C_UINT16LE = 'v'
@@ -175,6 +176,8 @@ class FormatParser::ZIPParser::FileReader
175
176
  # BUT! in format_parser we avoid unbounded reads, as a matter of fact they are forbidden.
176
177
  # So we will again limit ouselves to cdir_size, and we will take cushion of 1 KB.
177
178
  central_directory_str = io.read(cdir_size + 1024)
179
+ raise InvalidCentralDirectory if central_directory_str.nil?
180
+
178
181
  central_directory_io = StringIO.new(central_directory_str)
179
182
  log do
180
183
  format(
@@ -140,4 +140,30 @@ describe FormatParser::AttributesJSON do
140
140
  JSON.pretty_generate(object_with_attributes_module)
141
141
  }.to raise_error(/structure too deep/)
142
142
  end
143
+
144
+ it 'converts all hash keys to string when stringify_keys: true' do
145
+ fixture_path = fixtures_dir + '/ZIP/arch_few_entries.zip'
146
+ fi_io = File.open(fixture_path, 'rb')
147
+
148
+ result = FormatParser::ZIPParser.new.call(fi_io).as_json(stringify_keys: true)
149
+
150
+ result['entries'].each do |entry|
151
+ entry.each do |key, _value|
152
+ expect(key).to be_a(String)
153
+ end
154
+ end
155
+ end
156
+
157
+ it 'does not convert hash keys to string when stringify_keys: false' do
158
+ fixture_path = fixtures_dir + '/ZIP/arch_few_entries.zip'
159
+ fi_io = File.open(fixture_path, 'rb')
160
+
161
+ result = FormatParser::ZIPParser.new.call(fi_io).as_json
162
+
163
+ result['entries'].each do |entry|
164
+ entry.each do |key, _value|
165
+ expect(key).to be_a(Symbol)
166
+ end
167
+ end
168
+ end
143
169
  end
@@ -0,0 +1,42 @@
1
+ require 'spec_helper'
2
+
3
+ describe FormatParser::HashUtils do
4
+ describe '.deep_transform_keys' do
5
+ it 'transforms all the keys in a hash' do
6
+ hash = { aa: 1, 'bb' => 2 }
7
+ result = described_class.deep_transform_keys(hash, &:to_s)
8
+
9
+ expect(result).to eq('aa' => 1, 'bb' => 2)
10
+ end
11
+
12
+ it 'transforms all the keys in a array of hashes' do
13
+ array = [{ aa: 1, bb: 2 }, { cc: 3, dd: [{c: 2, d: 3}] }]
14
+ result = described_class.deep_transform_keys(array, &:to_s)
15
+
16
+ expect(result).to eq(
17
+ [{'aa' => 1, 'bb' => 2}, {'cc' => 3, 'dd' => [{'c' => 2, 'd' => 3}]}]
18
+ )
19
+ end
20
+
21
+ it 'transforms all the keys in a hash recursively' do
22
+ hash = { aa: 1, bb: { cc: 22, dd: 3 } }
23
+ result = described_class.deep_transform_keys(hash, &:to_s)
24
+
25
+ expect(result).to eq('aa' => 1, 'bb' => { 'cc' => 22, 'dd' => 3})
26
+ end
27
+
28
+ it 'does nothing for an non array/hash object' do
29
+ object = Object.new
30
+ result = described_class.deep_transform_keys(object, &:to_s)
31
+
32
+ expect(result).to eq(object)
33
+ end
34
+
35
+ it 'returns the last value if different keys are transformed into the same one' do
36
+ hash = { aa: 0, 'bb' => 2, bb: 1 }
37
+ result = described_class.deep_transform_keys(hash, &:to_s)
38
+
39
+ expect(result).to eq('aa' => 0, 'bb' => 1)
40
+ end
41
+ end
42
+ end
@@ -14,6 +14,34 @@ describe FormatParser::EXIFParser do
14
14
  end
15
15
  end
16
16
 
17
+ describe 'EXIFStack' do
18
+ it 'supports respond_to? for methods it does not have' do
19
+ # Peculiar thing: we need to support respond_to?(:to_hash)
20
+ # for compatibility with ActiveSupport JSON output. When you call as_json
21
+ # on an object ActiveSupport implements that as_json method and will then
22
+ # call #as_json on the contained objects as necessary, _or_ call
23
+ # other methods if it thinks it is necessary.
24
+ #
25
+ # Although we _will_ be implementing to_hash specifically
26
+ # the respond_to_missing must be implemented correctly
27
+ stack = FormatParser::EXIFParser::EXIFStack.new([{}, {}])
28
+ expect(stack).not_to respond_to(:no_such_method__at_all)
29
+ end
30
+
31
+ it 'returns a Hash from #to_hash' do
32
+ first_fake_exif = double(orientation: 1, to_hash: {foo: 123, bar: 675})
33
+ second_fake_exif = double(orientation: 4, to_hash: {foo: 245})
34
+
35
+ stack = FormatParser::EXIFParser::EXIFStack.new([first_fake_exif, second_fake_exif])
36
+ stack_as_hash = stack.to_hash
37
+
38
+ # In this instance we DO need an actual type_check, because #to_hash
39
+ # is used by default type coercions in Ruby
40
+ expect(stack_as_hash).to be_kind_of(Hash)
41
+ expect(stack_as_hash).to eq(foo: 245, bar: 675, orientation: 4)
42
+ end
43
+ end
44
+
17
45
  it 'is able to deal with an orientation tag which a tuple value for orientation' do
18
46
  path = fixtures_dir + '/EXIF/double_orientation.exif.bin'
19
47
  exif_data = File.open(path, 'rb') do |f|
@@ -110,4 +110,32 @@ describe FormatParser::MP3Parser do
110
110
  subject.call(StringIO.new(''))
111
111
  }.to raise_error(FormatParser::IOUtils::InvalidRead)
112
112
  end
113
+
114
+ describe '#as_json' do
115
+ it 'converts all hash keys to string when stringify_keys: true' do
116
+ fpath = fixtures_dir + '/MP3/Cassy.mp3'
117
+ result = subject.call(File.open(fpath, 'rb')).as_json(stringify_keys: true)
118
+
119
+ expect(
120
+ result['intrinsics'].keys.map(&:class).uniq
121
+ ).to eq([String])
122
+
123
+ expect(
124
+ result['intrinsics']['id3tags'].map(&:class).uniq
125
+ ).to eq([ID3Tag::Tag])
126
+ end
127
+
128
+ it 'does not convert the hash keys to string when stringify_keys: false' do
129
+ fpath = fixtures_dir + '/MP3/Cassy.mp3'
130
+ result = subject.call(File.open(fpath, 'rb')).as_json
131
+
132
+ expect(
133
+ result['intrinsics'].keys.map(&:class).uniq
134
+ ).to eq([Symbol])
135
+
136
+ expect(
137
+ result['intrinsics'][:id3tags].map(&:class).uniq
138
+ ).to eq([ID3Tag::Tag])
139
+ end
140
+ end
113
141
  end
@@ -0,0 +1,85 @@
1
+ require 'spec_helper'
2
+
3
+ describe FormatParser::MPEGParser do
4
+ it 'parses a first example mpg file' do
5
+ parse_result = described_class.call(File.open(__dir__ + '/../fixtures/MPG/video1.mpg', 'rb'))
6
+
7
+ expect(parse_result.nature).to eq(:video)
8
+ expect(parse_result.format).to eq(:mpg)
9
+ expect(parse_result.width_px).to eq(560)
10
+ expect(parse_result.height_px).to eq(320)
11
+ expect(parse_result.intrinsics[:aspect_ratio]).to eq('1:1')
12
+ expect(parse_result.intrinsics[:frame_rate]).to eq('30')
13
+ end
14
+
15
+ it 'returns a nil if it is necessary to iterate over a very large number of bytes and the requisite sequences are not detected' do
16
+ bytes_buffer = StringIO.new
17
+ bytes_buffer.write([0x00, 0x00, 0x01, 0xBA].pack('C*')) # MPEG header
18
+ zero_bytes = [0x00].pack('C') * (1024 * 1024 * 5)
19
+ bytes_buffer.write(zero_bytes)
20
+
21
+ bytes_buffer.rewind
22
+
23
+ parse_result = described_class.call(bytes_buffer)
24
+ expect(parse_result).to be_nil
25
+ end
26
+
27
+ it 'returns a nil if the IO only contains the MPEG header bytes at the start and nothing else' do
28
+ bytes_buffer = StringIO.new
29
+ bytes_buffer.write([0x00, 0x00, 0x01, 0xBA].pack('C*')) # MPEG header
30
+ bytes_buffer.rewind
31
+
32
+ parse_result = described_class.call(bytes_buffer)
33
+ expect(parse_result).to be_nil
34
+ end
35
+
36
+ it 'parses a file with mpeg extension' do
37
+ parse_result = described_class.call(File.open(__dir__ + '/../fixtures/MPG/video2.mpeg', 'rb'))
38
+
39
+ expect(parse_result.nature).to eq(:video)
40
+ expect(parse_result.format).to eq(:mpg)
41
+ expect(parse_result.width_px).to eq(720)
42
+ expect(parse_result.height_px).to eq(480)
43
+ expect(parse_result.intrinsics[:aspect_ratio]).to eq('4:3')
44
+ expect(parse_result.intrinsics[:frame_rate]).to eq('29.97')
45
+ end
46
+
47
+ it 'parses a second example mpg file' do
48
+ parse_result = described_class.call(File.open(__dir__ + '/../fixtures/MPG/video3.mpg', 'rb'))
49
+
50
+ expect(parse_result.nature).to eq(:video)
51
+ expect(parse_result.format).to eq(:mpg)
52
+ expect(parse_result.width_px).to eq(720)
53
+ expect(parse_result.height_px).to eq(496)
54
+ expect(parse_result.intrinsics[:aspect_ratio]).to eq('4:3')
55
+ expect(parse_result.intrinsics[:frame_rate]).to eq('29.97')
56
+ end
57
+
58
+ it 'parses a bigger mpg file' do
59
+ parse_result = described_class.call(File.open(__dir__ + '/../fixtures/MPG/video4.mpg', 'rb'))
60
+
61
+ expect(parse_result.nature).to eq(:video)
62
+ expect(parse_result.format).to eq(:mpg)
63
+ expect(parse_result.width_px).to eq(1920)
64
+ expect(parse_result.height_px).to eq(1080)
65
+ expect(parse_result.intrinsics[:aspect_ratio]).to eq('16:9')
66
+ expect(parse_result.intrinsics[:frame_rate]).to eq('29.97')
67
+ end
68
+
69
+ it 'parses a file with different malformed first sequence header' do
70
+ parse_result = described_class.call(File.open(__dir__ + '/../fixtures/MPG/video5.mpg', 'rb'))
71
+
72
+ expect(parse_result.nature).to eq(:video)
73
+ expect(parse_result.format).to eq(:mpg)
74
+ expect(parse_result.width_px).to eq(1440)
75
+ expect(parse_result.height_px).to eq(1080)
76
+ expect(parse_result.intrinsics[:aspect_ratio]).to eq('16:9')
77
+ expect(parse_result.intrinsics[:frame_rate]).to eq('25')
78
+ end
79
+
80
+ it 'parses a MP4 file' do
81
+ parse_result = described_class.call(File.open(__dir__ + '/../fixtures/MOOV/MP4/bmff.mp4', 'rb'))
82
+
83
+ expect(parse_result).to be_nil
84
+ end
85
+ end
@@ -103,4 +103,11 @@ describe FormatParser::ZIPParser do
103
103
  expect(first_entry.filename).to eq('Li��nia Extreme//')
104
104
  expect(first_entry.type).to eq(:directory)
105
105
  end
106
+
107
+ it 'is able to handle files with invalid central directory position' do
108
+ invalid_zip_path = fixtures_dir + '/ZIP/invalid_central_directory.zip'
109
+
110
+ expect { subject.call(File.open(invalid_zip_path, 'rb')) }
111
+ .to_not raise_error
112
+ end
106
113
  end
@@ -8,7 +8,6 @@ $LOAD_PATH.unshift(File.dirname(__FILE__))
8
8
 
9
9
  require 'rspec'
10
10
  require 'format_parser'
11
- require 'pry'
12
11
 
13
12
  module SpecHelpers
14
13
  def fixtures_dir
@@ -19,6 +18,8 @@ end
19
18
  RSpec.configure do |c|
20
19
  c.include SpecHelpers
21
20
  c.extend SpecHelpers # makes fixtures_dir available for example groups too
21
+ # https://relishapp.com/rspec/rspec-core/docs/command-line/only-failures
22
+ c.example_status_persistence_file_path = 'spec/examples.txt'
22
23
  end
23
24
 
24
25
  RSpec.shared_examples 'an IO object compatible with IOConstraint' do
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: format_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.20.0
4
+ version: 0.22.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Noah Berman
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: exe
11
11
  cert_chain: []
12
- date: 2019-12-13 00:00:00.000000000 Z
12
+ date: 2020-08-26 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: ks
@@ -135,20 +135,6 @@ dependencies:
135
135
  - - "~>"
136
136
  - !ruby/object:Gem::Version
137
137
  version: '0.15'
138
- - !ruby/object:Gem::Dependency
139
- name: pry
140
- requirement: !ruby/object:Gem::Requirement
141
- requirements:
142
- - - "~>"
143
- - !ruby/object:Gem::Version
144
- version: '0.11'
145
- type: :development
146
- prerelease: false
147
- version_requirements: !ruby/object:Gem::Requirement
148
- requirements:
149
- - - "~>"
150
- - !ruby/object:Gem::Version
151
- version: '0.11'
152
138
  - !ruby/object:Gem::Dependency
153
139
  name: yard
154
140
  requirement: !ruby/object:Gem::Requirement
@@ -223,6 +209,7 @@ files:
223
209
  - lib/document.rb
224
210
  - lib/format_parser.rb
225
211
  - lib/format_parser/version.rb
212
+ - lib/hash_utils.rb
226
213
  - lib/image.rb
227
214
  - lib/io_constraint.rb
228
215
  - lib/io_utils.rb
@@ -240,6 +227,7 @@ files:
240
227
  - lib/parsers/moov_parser/decoder.rb
241
228
  - lib/parsers/mp3_parser.rb
242
229
  - lib/parsers/mp3_parser/id3_extraction.rb
230
+ - lib/parsers/mpeg_parser.rb
243
231
  - lib/parsers/ogg_parser.rb
244
232
  - lib/parsers/pdf_parser.rb
245
233
  - lib/parsers/png_parser.rb
@@ -259,6 +247,7 @@ files:
259
247
  - spec/file_information_spec.rb
260
248
  - spec/format_parser_inspect_spec.rb
261
249
  - spec/format_parser_spec.rb
250
+ - spec/hash_utils_spec.rb
262
251
  - spec/io_utils_spec.rb
263
252
  - spec/parsers/aiff_parser_spec.rb
264
253
  - spec/parsers/bmp_parser_spec.rb
@@ -271,6 +260,7 @@ files:
271
260
  - spec/parsers/jpeg_parser_spec.rb
272
261
  - spec/parsers/moov_parser_spec.rb
273
262
  - spec/parsers/mp3_parser_spec.rb
263
+ - spec/parsers/mpeg_parser_spec.rb
274
264
  - spec/parsers/ogg_parser_spec.rb
275
265
  - spec/parsers/pdf_parser_spec.rb
276
266
  - spec/parsers/png_parser_spec.rb
@@ -303,7 +293,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
303
293
  - !ruby/object:Gem::Version
304
294
  version: '0'
305
295
  requirements: []
306
- rubygems_version: 3.0.6
296
+ rubygems_version: 3.0.3
307
297
  signing_key:
308
298
  specification_version: 4
309
299
  summary: A library for efficient parsing of file metadata