format_parser 0.20.0 → 0.22.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +7 -0
- data/CHANGELOG.md +17 -0
- data/CONTRIBUTING.md +6 -1
- data/README.md +17 -1
- data/format_parser.gemspec +0 -1
- data/lib/attributes_json.rb +9 -1
- data/lib/format_parser.rb +1 -0
- data/lib/format_parser/version.rb +1 -1
- data/lib/hash_utils.rb +19 -0
- data/lib/parsers/exif_parser.rb +20 -13
- data/lib/parsers/mp3_parser.rb +0 -4
- data/lib/parsers/mpeg_parser.rb +127 -0
- data/lib/parsers/zip_parser/file_reader.rb +3 -0
- data/spec/attributes_json_spec.rb +26 -0
- data/spec/hash_utils_spec.rb +42 -0
- data/spec/parsers/exif_parser_spec.rb +28 -0
- data/spec/parsers/mp3_parser_spec.rb +28 -0
- data/spec/parsers/mpeg_parser_spec.rb +85 -0
- data/spec/parsers/zip_parser_spec.rb +7 -0
- data/spec/spec_helper.rb +2 -1
- metadata +7 -17
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 05470fef0edec68c427b9680cf94a88a8f3286e2279b2daf13e87679324eb061
|
4
|
+
data.tar.gz: f65ed1d390e4e70bd34a3b01b5d54d39e4076d18ebf15fc20196c2746c86122f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e6e9e2d1fbfe6e813d1e0d9d2c102ce3f031fd37395cc75807ca7b527b1a2fc4a9a3c41baaf9e2cdaa601063cded92ef446ce913f2210f3d82e063126873d54d
|
7
|
+
data.tar.gz: 86e166b2ac754fe5d7e42e471bcaa70cd54ae0aa22da3e0f85a6223213c2bd5a284f2b53416d03656dc291898837636678dbfba2e32b179a562ea7c8b05ef0bf
|
data/.gitignore
CHANGED
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,20 @@
|
|
1
|
+
## 0.22.1
|
2
|
+
* Fix Zip parser to not raise error for invalid zip files, with an invalid central directory
|
3
|
+
|
4
|
+
## 0.22.0
|
5
|
+
* Adds option `stringify_keys: true` to #as_json methods (fix #151)
|
6
|
+
|
7
|
+
## 0.21.1
|
8
|
+
* MPEG: Ensure parsing does not inadvertently return an Integer instead of Result|nil
|
9
|
+
* MPEG: Scan further into the MPEG file than previously (scan 32 1KB chunks)
|
10
|
+
* MPEG: Ensure the parser does not raise an exception when there is no data to read for scanning beyound the initial header
|
11
|
+
|
12
|
+
## 0.21.0
|
13
|
+
* Adds support for MPEG video files
|
14
|
+
|
15
|
+
## 0.20.1
|
16
|
+
* Make sure EXIF results work correctly with ActiveSupport JSON encoders
|
17
|
+
|
1
18
|
## 0.20.0
|
2
19
|
* Correctly tag the license on Rubygems as MIT (Hippocratic) for easier audit
|
3
20
|
|
data/CONTRIBUTING.md
CHANGED
@@ -234,4 +234,9 @@ This provision also applies to the test files you include with the changed code
|
|
234
234
|
|
235
235
|
## Changelog
|
236
236
|
|
237
|
-
When creating a new release you must add an entry in the `CHANGELOG.md`.
|
237
|
+
When creating a new release you must add an entry in the `CHANGELOG.md`.
|
238
|
+
|
239
|
+
## Testing locally
|
240
|
+
|
241
|
+
It's possible to run `exe/format_parser_inspect FILE_NAME` or `exe/format_parser_inspect FILE_URI`
|
242
|
+
to test the new code without the necessity of installing the gem.
|
data/README.md
CHANGED
@@ -31,6 +31,7 @@ and [dimensions,](https://github.com/sstephenson/dimensions) borrowing from them
|
|
31
31
|
* ZIP
|
32
32
|
* DOCX, PPTX, XLSX
|
33
33
|
* OGG
|
34
|
+
* MPEG, MPG
|
34
35
|
|
35
36
|
...with [more](https://github.com/WeTransfer/format_parser/issues?q=is%3Aissue+is%3Aopen+label%3Aformats) on the way!
|
36
37
|
|
@@ -74,6 +75,17 @@ img_info = FormatParser.parse(File.open("myimage.jpg", "rb"))
|
|
74
75
|
JSON.pretty_generate(img_info) #=> ...
|
75
76
|
```
|
76
77
|
|
78
|
+
To convert the result to a Hash or a structure suitable for JSON serialization
|
79
|
+
|
80
|
+
```ruby
|
81
|
+
img_info = FormatParser.parse(File.open("myimage.jpg", "rb"))
|
82
|
+
img_info.as_json
|
83
|
+
|
84
|
+
# it's also possible to convert all keys to string
|
85
|
+
img_info.as_json(stringify_keys: true)
|
86
|
+
```
|
87
|
+
|
88
|
+
|
77
89
|
## Creating your own parsers
|
78
90
|
|
79
91
|
See the [section on writing parsers in CONTRIBUTING.md](CONTRIBUTING.md#so-you-want-to-contribute-a-new-parser)
|
@@ -173,6 +185,10 @@ Unless specified otherwise in this section the fixture files are MIT licensed an
|
|
173
185
|
### .docx
|
174
186
|
- The .docx files were generated by the project maintainers
|
175
187
|
|
188
|
+
### .mpg and .mpeg
|
189
|
+
- The files (video 1 to 4) were downloaded from https://standaloneinstaller.com/blog/big-list-of-sample-videos-for-testers-124.html.
|
190
|
+
- Video 5 was downloaded from https://archive.org/details/ligouHDR-HC1_sample1.
|
191
|
+
|
176
192
|
### JPEG examples of EXIF orientation
|
177
193
|
- Downloaded from Unspash (and thus freely avaliable) - https://unsplash.com/license and have then been
|
178
194
|
manipulated using the [https://github.com/recurser/exif-orientation-examples](exif-orientation-examples)
|
@@ -183,7 +199,7 @@ Unless specified otherwise in this section the fixture files are MIT licensed an
|
|
183
199
|
|
184
200
|
## Copyright
|
185
201
|
|
186
|
-
Copyright (c)
|
202
|
+
Copyright (c) 2020 WeTransfer.
|
187
203
|
|
188
204
|
`format_parser` is distributed under the conditions of the [Hippocratic License](https://firstdonoharm.dev/version/1/2/license.html)
|
189
205
|
- See LICENSE.txt for further details.
|
data/format_parser.gemspec
CHANGED
@@ -39,7 +39,6 @@ Gem::Specification.new do |spec|
|
|
39
39
|
spec.add_development_dependency 'rspec', '~> 3.0'
|
40
40
|
spec.add_development_dependency 'rake', '~> 12'
|
41
41
|
spec.add_development_dependency 'simplecov', '~> 0.15'
|
42
|
-
spec.add_development_dependency 'pry', '~> 0.11'
|
43
42
|
spec.add_development_dependency 'yard', '~> 0.9'
|
44
43
|
spec.add_development_dependency 'wetransfer_style', '0.5.0'
|
45
44
|
spec.add_development_dependency 'parallel_tests'
|
data/lib/attributes_json.rb
CHANGED
@@ -15,7 +15,12 @@ module FormatParser::AttributesJSON
|
|
15
15
|
|
16
16
|
# Implements a sane default `as_json` for an object
|
17
17
|
# that accessors defined
|
18
|
-
|
18
|
+
#
|
19
|
+
# @param root[Bool] if true, it surrounds the result in a hash with a key
|
20
|
+
# `format_parser_file_info`
|
21
|
+
# @param stringify_keys[Bool] if true, it transforms all the hash keys to a string.
|
22
|
+
# The default value is false for backward compatibility
|
23
|
+
def as_json(root: false, stringify_keys: false, **)
|
19
24
|
h = {}
|
20
25
|
h['nature'] = nature if respond_to?(:nature) # Needed for file info structs
|
21
26
|
methods.grep(/\w\=$/).each_with_object(h) do |attr_writer_method_name, h|
|
@@ -27,6 +32,9 @@ module FormatParser::AttributesJSON
|
|
27
32
|
sanitized_value = _sanitize_json_value(unwrapped_attribute_value)
|
28
33
|
h[reader_method_name] = sanitized_value
|
29
34
|
end
|
35
|
+
|
36
|
+
h = FormatParser::HashUtils.deep_transform_keys(h, &:to_s) if stringify_keys
|
37
|
+
|
30
38
|
if root
|
31
39
|
{'format_parser_file_info' => h}
|
32
40
|
else
|
data/lib/format_parser.rb
CHANGED
data/lib/hash_utils.rb
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
# based on https://github.com/rails/rails/blob/master/activesupport/lib/active_support/core_ext/hash/keys.rb#L116
|
2
|
+
# I chose to copy this method instead of adding activesupport as a dependency
|
3
|
+
# because we want to have the least number of dependencies
|
4
|
+
module FormatParser
|
5
|
+
class HashUtils
|
6
|
+
def self.deep_transform_keys(object, &block)
|
7
|
+
case object
|
8
|
+
when Hash
|
9
|
+
object.each_with_object({}) do |(key, value), result|
|
10
|
+
result[yield(key)] = deep_transform_keys(value, &block)
|
11
|
+
end
|
12
|
+
when Array
|
13
|
+
object.map { |e| deep_transform_keys(e, &block) }
|
14
|
+
else
|
15
|
+
object
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
data/lib/parsers/exif_parser.rb
CHANGED
@@ -100,17 +100,7 @@ module FormatParser::EXIFParser
|
|
100
100
|
end
|
101
101
|
|
102
102
|
def to_json(*maybe_coder)
|
103
|
-
|
104
|
-
# that come earlier
|
105
|
-
overlay = @multiple_exif_results.each_with_object({}) do |one_exif_frame, h|
|
106
|
-
h.merge!(one_exif_frame.to_hash)
|
107
|
-
end
|
108
|
-
# Overwrite the orientation with our custom method implementation, because
|
109
|
-
# it does reject 0-values.
|
110
|
-
overlay[:orientation] = orientation
|
111
|
-
|
112
|
-
sanitized = FormatParser::AttributesJSON._sanitize_json_value(overlay)
|
113
|
-
sanitized.to_json(*maybe_coder)
|
103
|
+
to_hash.to_json(*maybe_coder)
|
114
104
|
end
|
115
105
|
|
116
106
|
def orientation_sym
|
@@ -135,10 +125,27 @@ module FormatParser::EXIFParser
|
|
135
125
|
0 # If none were found - the orientation is unknown
|
136
126
|
end
|
137
127
|
|
128
|
+
# ActiveSupport will attempt to call #to_hash first, and
|
129
|
+
# #to_hash is a decent default implementation to have
|
130
|
+
def to_hash
|
131
|
+
# Let EXIF tags that come later overwrite the properties from the tags
|
132
|
+
# that come earlier
|
133
|
+
overlay = @multiple_exif_results.each_with_object({}) do |one_exif_frame, h|
|
134
|
+
h.merge!(one_exif_frame.to_hash)
|
135
|
+
end
|
136
|
+
# Overwrite the orientation with our custom method implementation, because
|
137
|
+
# it does reject 0-values.
|
138
|
+
overlay[:orientation] = orientation
|
139
|
+
|
140
|
+
FormatParser::AttributesJSON._sanitize_json_value(overlay)
|
141
|
+
end
|
142
|
+
|
138
143
|
private
|
139
144
|
|
140
|
-
|
141
|
-
|
145
|
+
# respond_to_missing? accepts 2 arguments: the method name symbol
|
146
|
+
# and whether the method being looked up can be private or not
|
147
|
+
def respond_to_missing?(method_name, include_private_methods)
|
148
|
+
@multiple_exif_results.last.respond_to?(method_name, include_private_methods)
|
142
149
|
end
|
143
150
|
|
144
151
|
def method_missing(*a)
|
data/lib/parsers/mp3_parser.rb
CHANGED
@@ -0,0 +1,127 @@
|
|
1
|
+
|
2
|
+
# MPEG Headers documentation:
|
3
|
+
# http://dvd.sourceforge.net/dvdinfo/mpeghdrs.html#seq
|
4
|
+
# http://www.cs.columbia.edu/~delbert/docs/Dueck%20--%20MPEG-2%20Video%20Transcoding.pdf
|
5
|
+
# Useful tool to check the file information: https://www.metadata2go.com/
|
6
|
+
class FormatParser::MPEGParser
|
7
|
+
extend FormatParser::IOUtils
|
8
|
+
|
9
|
+
ASPECT_RATIOS = {
|
10
|
+
1 => '1:1',
|
11
|
+
2 => '4:3',
|
12
|
+
3 => '16:9',
|
13
|
+
4 => '2.21:1'
|
14
|
+
}
|
15
|
+
|
16
|
+
FRAME_RATES = {
|
17
|
+
1 => '23.976',
|
18
|
+
2 => '24',
|
19
|
+
3 => '25',
|
20
|
+
4 => '29.97',
|
21
|
+
5 => '30',
|
22
|
+
6 => '50',
|
23
|
+
7 => '59.94',
|
24
|
+
8 => '60'
|
25
|
+
}
|
26
|
+
|
27
|
+
PACK_HEADER_START_CODE = [0x00, 0x00, 0x01, 0xBA].pack('C*')
|
28
|
+
SEQUENCE_HEADER_START_CODE = [0xB3].pack('C*')
|
29
|
+
MAX_BLOCK_READS = 32
|
30
|
+
BYTES_TO_READ_PER_READ = 1024
|
31
|
+
|
32
|
+
def self.likely_match?(filename)
|
33
|
+
filename =~ /\.(mpg|mpeg)$/i
|
34
|
+
end
|
35
|
+
|
36
|
+
def self.call(io)
|
37
|
+
return unless matches_mpeg_header?(io)
|
38
|
+
|
39
|
+
# We are looping though the stream because there can be several sequence headers and some of them are not useful.
|
40
|
+
# If we detect that the header is not useful, then we look for the next one for SEEK_FOR_SEQUENCE_HEADER_TIMES_LIMIT
|
41
|
+
# If we reach the EOF, then the mpg is likely to be corrupted and we return nil
|
42
|
+
MAX_BLOCK_READS.times do
|
43
|
+
next unless pos = find_next_header_code_pos(io)
|
44
|
+
io.seek(pos + 1)
|
45
|
+
horizontal_size, vertical_size = parse_image_size(io)
|
46
|
+
ratio_code, rate_code = parse_rate_information(io)
|
47
|
+
if valid_aspect_ratio_code?(ratio_code) && valid_frame_rate_code?(rate_code)
|
48
|
+
return file_info(horizontal_size, vertical_size, ratio_code, rate_code)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
nil # otherwise the return value of Integer#times will be returned
|
52
|
+
rescue FormatParser::IOUtils::InvalidRead
|
53
|
+
nil
|
54
|
+
end
|
55
|
+
|
56
|
+
def self.file_info(width_px, height_px, ratio_code, rate_code)
|
57
|
+
FormatParser::Video.new(
|
58
|
+
format: :mpg,
|
59
|
+
width_px: width_px,
|
60
|
+
height_px: height_px,
|
61
|
+
intrinsics: {
|
62
|
+
aspect_ratio: ASPECT_RATIOS.fetch(ratio_code),
|
63
|
+
frame_rate: FRAME_RATES.fetch(rate_code)
|
64
|
+
},
|
65
|
+
)
|
66
|
+
end
|
67
|
+
|
68
|
+
# The following 3 bytes after the sequence header code, gives us information about the px size
|
69
|
+
# 1.5 bytes (12 bits) for horizontal size and 1.5 bytes for vertical size
|
70
|
+
def self.parse_image_size(io)
|
71
|
+
image_size = convert_3_bytes_to_bits(safe_read(io, 3))
|
72
|
+
[read_first_12_bits(image_size), read_last_12_bits(image_size)]
|
73
|
+
end
|
74
|
+
|
75
|
+
# The following byte gives us information about the aspect ratio and frame rate
|
76
|
+
# 4 bits corresponds to the aspect ratio and 4 bits to the frame rate code
|
77
|
+
def self.parse_rate_information(io)
|
78
|
+
rate_information = safe_read(io, 1).unpack('C').first
|
79
|
+
[read_first_4_bits(rate_information), read_last_4_bits(rate_information)]
|
80
|
+
end
|
81
|
+
|
82
|
+
def self.valid_aspect_ratio_code?(ratio_code)
|
83
|
+
ASPECT_RATIOS.include?(ratio_code)
|
84
|
+
end
|
85
|
+
|
86
|
+
def self.valid_frame_rate_code?(rate_code)
|
87
|
+
FRAME_RATES.include?(rate_code)
|
88
|
+
end
|
89
|
+
|
90
|
+
# Returns the position of the next sequence package content in the stream
|
91
|
+
# This method will read BYTES_TO_READ_PER_TIME in each loop for a maximum amount of SEEK_FOR_SEQUENCE_HEADER_START_CODE_TIMES_LIMIT times
|
92
|
+
# If the package is not found, then it returns nil.
|
93
|
+
def self.find_next_header_code_pos(io)
|
94
|
+
pos_before_read = io.pos
|
95
|
+
bin_str = io.read(BYTES_TO_READ_PER_READ) # bin_str might be nil if we are at EOF
|
96
|
+
header_relative_index = bin_str && bin_str.index(SEQUENCE_HEADER_START_CODE)
|
97
|
+
return pos_before_read + header_relative_index if header_relative_index
|
98
|
+
end
|
99
|
+
|
100
|
+
# If the first 4 bytes of the stream are equal to 00 00 01 BA, the pack start code for the Pack Header, then it's an MPEG file.
|
101
|
+
def self.matches_mpeg_header?(io)
|
102
|
+
safe_read(io, 4) == PACK_HEADER_START_CODE
|
103
|
+
end
|
104
|
+
|
105
|
+
def self.convert_3_bytes_to_bits(bytes)
|
106
|
+
bytes = bytes.unpack('CCC')
|
107
|
+
(bytes[0] << 16) | (bytes[1] << 8) | (bytes[2])
|
108
|
+
end
|
109
|
+
|
110
|
+
def self.read_first_12_bits(bits)
|
111
|
+
bits >> 12 & 0x0fff
|
112
|
+
end
|
113
|
+
|
114
|
+
def self.read_last_12_bits(bits)
|
115
|
+
bits & 0x0fff
|
116
|
+
end
|
117
|
+
|
118
|
+
def self.read_first_4_bits(byte)
|
119
|
+
byte >> 4
|
120
|
+
end
|
121
|
+
|
122
|
+
def self.read_last_4_bits(byte)
|
123
|
+
byte & 0x0F
|
124
|
+
end
|
125
|
+
|
126
|
+
FormatParser.register_parser self, natures: [:video], formats: [:mpg, :mpeg]
|
127
|
+
end
|
@@ -18,6 +18,7 @@ class FormatParser::ZIPParser::FileReader
|
|
18
18
|
'Could not find the EOCD signature in the buffer - maybe a malformed ZIP file'
|
19
19
|
end
|
20
20
|
end
|
21
|
+
InvalidCentralDirectory = Class.new(Error)
|
21
22
|
|
22
23
|
C_UINT32LE = 'V'
|
23
24
|
C_UINT16LE = 'v'
|
@@ -175,6 +176,8 @@ class FormatParser::ZIPParser::FileReader
|
|
175
176
|
# BUT! in format_parser we avoid unbounded reads, as a matter of fact they are forbidden.
|
176
177
|
# So we will again limit ouselves to cdir_size, and we will take cushion of 1 KB.
|
177
178
|
central_directory_str = io.read(cdir_size + 1024)
|
179
|
+
raise InvalidCentralDirectory if central_directory_str.nil?
|
180
|
+
|
178
181
|
central_directory_io = StringIO.new(central_directory_str)
|
179
182
|
log do
|
180
183
|
format(
|
@@ -140,4 +140,30 @@ describe FormatParser::AttributesJSON do
|
|
140
140
|
JSON.pretty_generate(object_with_attributes_module)
|
141
141
|
}.to raise_error(/structure too deep/)
|
142
142
|
end
|
143
|
+
|
144
|
+
it 'converts all hash keys to string when stringify_keys: true' do
|
145
|
+
fixture_path = fixtures_dir + '/ZIP/arch_few_entries.zip'
|
146
|
+
fi_io = File.open(fixture_path, 'rb')
|
147
|
+
|
148
|
+
result = FormatParser::ZIPParser.new.call(fi_io).as_json(stringify_keys: true)
|
149
|
+
|
150
|
+
result['entries'].each do |entry|
|
151
|
+
entry.each do |key, _value|
|
152
|
+
expect(key).to be_a(String)
|
153
|
+
end
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
it 'does not convert hash keys to string when stringify_keys: false' do
|
158
|
+
fixture_path = fixtures_dir + '/ZIP/arch_few_entries.zip'
|
159
|
+
fi_io = File.open(fixture_path, 'rb')
|
160
|
+
|
161
|
+
result = FormatParser::ZIPParser.new.call(fi_io).as_json
|
162
|
+
|
163
|
+
result['entries'].each do |entry|
|
164
|
+
entry.each do |key, _value|
|
165
|
+
expect(key).to be_a(Symbol)
|
166
|
+
end
|
167
|
+
end
|
168
|
+
end
|
143
169
|
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe FormatParser::HashUtils do
|
4
|
+
describe '.deep_transform_keys' do
|
5
|
+
it 'transforms all the keys in a hash' do
|
6
|
+
hash = { aa: 1, 'bb' => 2 }
|
7
|
+
result = described_class.deep_transform_keys(hash, &:to_s)
|
8
|
+
|
9
|
+
expect(result).to eq('aa' => 1, 'bb' => 2)
|
10
|
+
end
|
11
|
+
|
12
|
+
it 'transforms all the keys in a array of hashes' do
|
13
|
+
array = [{ aa: 1, bb: 2 }, { cc: 3, dd: [{c: 2, d: 3}] }]
|
14
|
+
result = described_class.deep_transform_keys(array, &:to_s)
|
15
|
+
|
16
|
+
expect(result).to eq(
|
17
|
+
[{'aa' => 1, 'bb' => 2}, {'cc' => 3, 'dd' => [{'c' => 2, 'd' => 3}]}]
|
18
|
+
)
|
19
|
+
end
|
20
|
+
|
21
|
+
it 'transforms all the keys in a hash recursively' do
|
22
|
+
hash = { aa: 1, bb: { cc: 22, dd: 3 } }
|
23
|
+
result = described_class.deep_transform_keys(hash, &:to_s)
|
24
|
+
|
25
|
+
expect(result).to eq('aa' => 1, 'bb' => { 'cc' => 22, 'dd' => 3})
|
26
|
+
end
|
27
|
+
|
28
|
+
it 'does nothing for an non array/hash object' do
|
29
|
+
object = Object.new
|
30
|
+
result = described_class.deep_transform_keys(object, &:to_s)
|
31
|
+
|
32
|
+
expect(result).to eq(object)
|
33
|
+
end
|
34
|
+
|
35
|
+
it 'returns the last value if different keys are transformed into the same one' do
|
36
|
+
hash = { aa: 0, 'bb' => 2, bb: 1 }
|
37
|
+
result = described_class.deep_transform_keys(hash, &:to_s)
|
38
|
+
|
39
|
+
expect(result).to eq('aa' => 0, 'bb' => 1)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -14,6 +14,34 @@ describe FormatParser::EXIFParser do
|
|
14
14
|
end
|
15
15
|
end
|
16
16
|
|
17
|
+
describe 'EXIFStack' do
|
18
|
+
it 'supports respond_to? for methods it does not have' do
|
19
|
+
# Peculiar thing: we need to support respond_to?(:to_hash)
|
20
|
+
# for compatibility with ActiveSupport JSON output. When you call as_json
|
21
|
+
# on an object ActiveSupport implements that as_json method and will then
|
22
|
+
# call #as_json on the contained objects as necessary, _or_ call
|
23
|
+
# other methods if it thinks it is necessary.
|
24
|
+
#
|
25
|
+
# Although we _will_ be implementing to_hash specifically
|
26
|
+
# the respond_to_missing must be implemented correctly
|
27
|
+
stack = FormatParser::EXIFParser::EXIFStack.new([{}, {}])
|
28
|
+
expect(stack).not_to respond_to(:no_such_method__at_all)
|
29
|
+
end
|
30
|
+
|
31
|
+
it 'returns a Hash from #to_hash' do
|
32
|
+
first_fake_exif = double(orientation: 1, to_hash: {foo: 123, bar: 675})
|
33
|
+
second_fake_exif = double(orientation: 4, to_hash: {foo: 245})
|
34
|
+
|
35
|
+
stack = FormatParser::EXIFParser::EXIFStack.new([first_fake_exif, second_fake_exif])
|
36
|
+
stack_as_hash = stack.to_hash
|
37
|
+
|
38
|
+
# In this instance we DO need an actual type_check, because #to_hash
|
39
|
+
# is used by default type coercions in Ruby
|
40
|
+
expect(stack_as_hash).to be_kind_of(Hash)
|
41
|
+
expect(stack_as_hash).to eq(foo: 245, bar: 675, orientation: 4)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
17
45
|
it 'is able to deal with an orientation tag which a tuple value for orientation' do
|
18
46
|
path = fixtures_dir + '/EXIF/double_orientation.exif.bin'
|
19
47
|
exif_data = File.open(path, 'rb') do |f|
|
@@ -110,4 +110,32 @@ describe FormatParser::MP3Parser do
|
|
110
110
|
subject.call(StringIO.new(''))
|
111
111
|
}.to raise_error(FormatParser::IOUtils::InvalidRead)
|
112
112
|
end
|
113
|
+
|
114
|
+
describe '#as_json' do
|
115
|
+
it 'converts all hash keys to string when stringify_keys: true' do
|
116
|
+
fpath = fixtures_dir + '/MP3/Cassy.mp3'
|
117
|
+
result = subject.call(File.open(fpath, 'rb')).as_json(stringify_keys: true)
|
118
|
+
|
119
|
+
expect(
|
120
|
+
result['intrinsics'].keys.map(&:class).uniq
|
121
|
+
).to eq([String])
|
122
|
+
|
123
|
+
expect(
|
124
|
+
result['intrinsics']['id3tags'].map(&:class).uniq
|
125
|
+
).to eq([ID3Tag::Tag])
|
126
|
+
end
|
127
|
+
|
128
|
+
it 'does not convert the hash keys to string when stringify_keys: false' do
|
129
|
+
fpath = fixtures_dir + '/MP3/Cassy.mp3'
|
130
|
+
result = subject.call(File.open(fpath, 'rb')).as_json
|
131
|
+
|
132
|
+
expect(
|
133
|
+
result['intrinsics'].keys.map(&:class).uniq
|
134
|
+
).to eq([Symbol])
|
135
|
+
|
136
|
+
expect(
|
137
|
+
result['intrinsics'][:id3tags].map(&:class).uniq
|
138
|
+
).to eq([ID3Tag::Tag])
|
139
|
+
end
|
140
|
+
end
|
113
141
|
end
|
@@ -0,0 +1,85 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe FormatParser::MPEGParser do
|
4
|
+
it 'parses a first example mpg file' do
|
5
|
+
parse_result = described_class.call(File.open(__dir__ + '/../fixtures/MPG/video1.mpg', 'rb'))
|
6
|
+
|
7
|
+
expect(parse_result.nature).to eq(:video)
|
8
|
+
expect(parse_result.format).to eq(:mpg)
|
9
|
+
expect(parse_result.width_px).to eq(560)
|
10
|
+
expect(parse_result.height_px).to eq(320)
|
11
|
+
expect(parse_result.intrinsics[:aspect_ratio]).to eq('1:1')
|
12
|
+
expect(parse_result.intrinsics[:frame_rate]).to eq('30')
|
13
|
+
end
|
14
|
+
|
15
|
+
it 'returns a nil if it is necessary to iterate over a very large number of bytes and the requisite sequences are not detected' do
|
16
|
+
bytes_buffer = StringIO.new
|
17
|
+
bytes_buffer.write([0x00, 0x00, 0x01, 0xBA].pack('C*')) # MPEG header
|
18
|
+
zero_bytes = [0x00].pack('C') * (1024 * 1024 * 5)
|
19
|
+
bytes_buffer.write(zero_bytes)
|
20
|
+
|
21
|
+
bytes_buffer.rewind
|
22
|
+
|
23
|
+
parse_result = described_class.call(bytes_buffer)
|
24
|
+
expect(parse_result).to be_nil
|
25
|
+
end
|
26
|
+
|
27
|
+
it 'returns a nil if the IO only contains the MPEG header bytes at the start and nothing else' do
|
28
|
+
bytes_buffer = StringIO.new
|
29
|
+
bytes_buffer.write([0x00, 0x00, 0x01, 0xBA].pack('C*')) # MPEG header
|
30
|
+
bytes_buffer.rewind
|
31
|
+
|
32
|
+
parse_result = described_class.call(bytes_buffer)
|
33
|
+
expect(parse_result).to be_nil
|
34
|
+
end
|
35
|
+
|
36
|
+
it 'parses a file with mpeg extension' do
|
37
|
+
parse_result = described_class.call(File.open(__dir__ + '/../fixtures/MPG/video2.mpeg', 'rb'))
|
38
|
+
|
39
|
+
expect(parse_result.nature).to eq(:video)
|
40
|
+
expect(parse_result.format).to eq(:mpg)
|
41
|
+
expect(parse_result.width_px).to eq(720)
|
42
|
+
expect(parse_result.height_px).to eq(480)
|
43
|
+
expect(parse_result.intrinsics[:aspect_ratio]).to eq('4:3')
|
44
|
+
expect(parse_result.intrinsics[:frame_rate]).to eq('29.97')
|
45
|
+
end
|
46
|
+
|
47
|
+
it 'parses a second example mpg file' do
|
48
|
+
parse_result = described_class.call(File.open(__dir__ + '/../fixtures/MPG/video3.mpg', 'rb'))
|
49
|
+
|
50
|
+
expect(parse_result.nature).to eq(:video)
|
51
|
+
expect(parse_result.format).to eq(:mpg)
|
52
|
+
expect(parse_result.width_px).to eq(720)
|
53
|
+
expect(parse_result.height_px).to eq(496)
|
54
|
+
expect(parse_result.intrinsics[:aspect_ratio]).to eq('4:3')
|
55
|
+
expect(parse_result.intrinsics[:frame_rate]).to eq('29.97')
|
56
|
+
end
|
57
|
+
|
58
|
+
it 'parses a bigger mpg file' do
|
59
|
+
parse_result = described_class.call(File.open(__dir__ + '/../fixtures/MPG/video4.mpg', 'rb'))
|
60
|
+
|
61
|
+
expect(parse_result.nature).to eq(:video)
|
62
|
+
expect(parse_result.format).to eq(:mpg)
|
63
|
+
expect(parse_result.width_px).to eq(1920)
|
64
|
+
expect(parse_result.height_px).to eq(1080)
|
65
|
+
expect(parse_result.intrinsics[:aspect_ratio]).to eq('16:9')
|
66
|
+
expect(parse_result.intrinsics[:frame_rate]).to eq('29.97')
|
67
|
+
end
|
68
|
+
|
69
|
+
it 'parses a file with different malformed first sequence header' do
|
70
|
+
parse_result = described_class.call(File.open(__dir__ + '/../fixtures/MPG/video5.mpg', 'rb'))
|
71
|
+
|
72
|
+
expect(parse_result.nature).to eq(:video)
|
73
|
+
expect(parse_result.format).to eq(:mpg)
|
74
|
+
expect(parse_result.width_px).to eq(1440)
|
75
|
+
expect(parse_result.height_px).to eq(1080)
|
76
|
+
expect(parse_result.intrinsics[:aspect_ratio]).to eq('16:9')
|
77
|
+
expect(parse_result.intrinsics[:frame_rate]).to eq('25')
|
78
|
+
end
|
79
|
+
|
80
|
+
it 'parses a MP4 file' do
|
81
|
+
parse_result = described_class.call(File.open(__dir__ + '/../fixtures/MOOV/MP4/bmff.mp4', 'rb'))
|
82
|
+
|
83
|
+
expect(parse_result).to be_nil
|
84
|
+
end
|
85
|
+
end
|
@@ -103,4 +103,11 @@ describe FormatParser::ZIPParser do
|
|
103
103
|
expect(first_entry.filename).to eq('Li��nia Extreme//')
|
104
104
|
expect(first_entry.type).to eq(:directory)
|
105
105
|
end
|
106
|
+
|
107
|
+
it 'is able to handle files with invalid central directory position' do
|
108
|
+
invalid_zip_path = fixtures_dir + '/ZIP/invalid_central_directory.zip'
|
109
|
+
|
110
|
+
expect { subject.call(File.open(invalid_zip_path, 'rb')) }
|
111
|
+
.to_not raise_error
|
112
|
+
end
|
106
113
|
end
|
data/spec/spec_helper.rb
CHANGED
@@ -8,7 +8,6 @@ $LOAD_PATH.unshift(File.dirname(__FILE__))
|
|
8
8
|
|
9
9
|
require 'rspec'
|
10
10
|
require 'format_parser'
|
11
|
-
require 'pry'
|
12
11
|
|
13
12
|
module SpecHelpers
|
14
13
|
def fixtures_dir
|
@@ -19,6 +18,8 @@ end
|
|
19
18
|
RSpec.configure do |c|
|
20
19
|
c.include SpecHelpers
|
21
20
|
c.extend SpecHelpers # makes fixtures_dir available for example groups too
|
21
|
+
# https://relishapp.com/rspec/rspec-core/docs/command-line/only-failures
|
22
|
+
c.example_status_persistence_file_path = 'spec/examples.txt'
|
22
23
|
end
|
23
24
|
|
24
25
|
RSpec.shared_examples 'an IO object compatible with IOConstraint' do
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: format_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.22.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Noah Berman
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: exe
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2020-08-26 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: ks
|
@@ -135,20 +135,6 @@ dependencies:
|
|
135
135
|
- - "~>"
|
136
136
|
- !ruby/object:Gem::Version
|
137
137
|
version: '0.15'
|
138
|
-
- !ruby/object:Gem::Dependency
|
139
|
-
name: pry
|
140
|
-
requirement: !ruby/object:Gem::Requirement
|
141
|
-
requirements:
|
142
|
-
- - "~>"
|
143
|
-
- !ruby/object:Gem::Version
|
144
|
-
version: '0.11'
|
145
|
-
type: :development
|
146
|
-
prerelease: false
|
147
|
-
version_requirements: !ruby/object:Gem::Requirement
|
148
|
-
requirements:
|
149
|
-
- - "~>"
|
150
|
-
- !ruby/object:Gem::Version
|
151
|
-
version: '0.11'
|
152
138
|
- !ruby/object:Gem::Dependency
|
153
139
|
name: yard
|
154
140
|
requirement: !ruby/object:Gem::Requirement
|
@@ -223,6 +209,7 @@ files:
|
|
223
209
|
- lib/document.rb
|
224
210
|
- lib/format_parser.rb
|
225
211
|
- lib/format_parser/version.rb
|
212
|
+
- lib/hash_utils.rb
|
226
213
|
- lib/image.rb
|
227
214
|
- lib/io_constraint.rb
|
228
215
|
- lib/io_utils.rb
|
@@ -240,6 +227,7 @@ files:
|
|
240
227
|
- lib/parsers/moov_parser/decoder.rb
|
241
228
|
- lib/parsers/mp3_parser.rb
|
242
229
|
- lib/parsers/mp3_parser/id3_extraction.rb
|
230
|
+
- lib/parsers/mpeg_parser.rb
|
243
231
|
- lib/parsers/ogg_parser.rb
|
244
232
|
- lib/parsers/pdf_parser.rb
|
245
233
|
- lib/parsers/png_parser.rb
|
@@ -259,6 +247,7 @@ files:
|
|
259
247
|
- spec/file_information_spec.rb
|
260
248
|
- spec/format_parser_inspect_spec.rb
|
261
249
|
- spec/format_parser_spec.rb
|
250
|
+
- spec/hash_utils_spec.rb
|
262
251
|
- spec/io_utils_spec.rb
|
263
252
|
- spec/parsers/aiff_parser_spec.rb
|
264
253
|
- spec/parsers/bmp_parser_spec.rb
|
@@ -271,6 +260,7 @@ files:
|
|
271
260
|
- spec/parsers/jpeg_parser_spec.rb
|
272
261
|
- spec/parsers/moov_parser_spec.rb
|
273
262
|
- spec/parsers/mp3_parser_spec.rb
|
263
|
+
- spec/parsers/mpeg_parser_spec.rb
|
274
264
|
- spec/parsers/ogg_parser_spec.rb
|
275
265
|
- spec/parsers/pdf_parser_spec.rb
|
276
266
|
- spec/parsers/png_parser_spec.rb
|
@@ -303,7 +293,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
303
293
|
- !ruby/object:Gem::Version
|
304
294
|
version: '0'
|
305
295
|
requirements: []
|
306
|
-
rubygems_version: 3.0.
|
296
|
+
rubygems_version: 3.0.3
|
307
297
|
signing_key:
|
308
298
|
specification_version: 4
|
309
299
|
summary: A library for efficient parsing of file metadata
|