format_parser 0.19.0 → 0.22.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +7 -0
- data/CHANGELOG.md +17 -0
- data/README.md +17 -1
- data/format_parser.gemspec +1 -2
- data/lib/attributes_json.rb +9 -1
- data/lib/format_parser.rb +1 -0
- data/lib/format_parser/version.rb +1 -1
- data/lib/hash_utils.rb +19 -0
- data/lib/parsers/exif_parser.rb +20 -13
- data/lib/parsers/mp3_parser.rb +0 -4
- data/lib/parsers/mpeg_parser.rb +127 -0
- data/spec/attributes_json_spec.rb +26 -0
- data/spec/hash_utils_spec.rb +42 -0
- data/spec/parsers/exif_parser_spec.rb +28 -0
- data/spec/parsers/mp3_parser_spec.rb +28 -0
- data/spec/parsers/mpeg_parser_spec.rb +85 -0
- data/spec/spec_helper.rb +2 -1
- metadata +8 -18
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 13d1f2a3748d62b027f80b1d6c46f8d087ddfc9cbeadeb62b330fa397797b847
|
4
|
+
data.tar.gz: eea08816482a939538aa1400ba6ca29af864f33ac12d960cbad8806d0e13a9c8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d21c310453155285236e3469ad9980df4d408b00b989fa5a9ed330f39c0c4f5a65ebfb84a79d7f9d7c9761c9dbf27958b8b44a5e8176f79cbf340b011279a0c0
|
7
|
+
data.tar.gz: 0e8949ac9c1ac6624f27539fe33f52c28c7e5b9c2cce83d9c752372653e35c0231d9a087cff88c9ea87585eaa7dcd8d6878a1345b5b914554d4a5691f11dce60
|
data/.gitignore
CHANGED
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,20 @@
|
|
1
|
+
## 0.22.0
|
2
|
+
* Adds option `stringify_keys: true` to #as_json methods (fix #151)
|
3
|
+
|
4
|
+
## 0.21.1
|
5
|
+
* MPEG: Ensure parsing does not inadvertently return an Integer instead of Result|nil
|
6
|
+
* MPEG: Scan further into the MPEG file than previously (scan 32 1KB chunks)
|
7
|
+
* MPEG: Ensure the parser does not raise an exception when there is no data to read for scanning beyound the initial header
|
8
|
+
|
9
|
+
## 0.21.0
|
10
|
+
* Adds support for MPEG video files
|
11
|
+
|
12
|
+
## 0.20.1
|
13
|
+
* Make sure EXIF results work correctly with ActiveSupport JSON encoders
|
14
|
+
|
15
|
+
## 0.20.0
|
16
|
+
* Correctly tag the license on Rubygems as MIT (Hippocratic) for easier audit
|
17
|
+
|
1
18
|
## 0.19.0
|
2
19
|
* Improve handling of Sony ARW files (make sure the width/height is correctly recognized)
|
3
20
|
* Update Travis matrix and gitignore
|
data/README.md
CHANGED
@@ -31,6 +31,7 @@ and [dimensions,](https://github.com/sstephenson/dimensions) borrowing from them
|
|
31
31
|
* ZIP
|
32
32
|
* DOCX, PPTX, XLSX
|
33
33
|
* OGG
|
34
|
+
* MPEG, MPG
|
34
35
|
|
35
36
|
...with [more](https://github.com/WeTransfer/format_parser/issues?q=is%3Aissue+is%3Aopen+label%3Aformats) on the way!
|
36
37
|
|
@@ -74,6 +75,17 @@ img_info = FormatParser.parse(File.open("myimage.jpg", "rb"))
|
|
74
75
|
JSON.pretty_generate(img_info) #=> ...
|
75
76
|
```
|
76
77
|
|
78
|
+
To convert the result to a Hash or a structure suitable for JSON serialization
|
79
|
+
|
80
|
+
```ruby
|
81
|
+
img_info = FormatParser.parse(File.open("myimage.jpg", "rb"))
|
82
|
+
img_info.as_json
|
83
|
+
|
84
|
+
# it's also possible to convert all keys to string
|
85
|
+
img_info.as_json(stringify_keys: true)
|
86
|
+
```
|
87
|
+
|
88
|
+
|
77
89
|
## Creating your own parsers
|
78
90
|
|
79
91
|
See the [section on writing parsers in CONTRIBUTING.md](CONTRIBUTING.md#so-you-want-to-contribute-a-new-parser)
|
@@ -173,6 +185,10 @@ Unless specified otherwise in this section the fixture files are MIT licensed an
|
|
173
185
|
### .docx
|
174
186
|
- The .docx files were generated by the project maintainers
|
175
187
|
|
188
|
+
### .mpg and .mpeg
|
189
|
+
- The files (video 1 to 4) were downloaded from https://standaloneinstaller.com/blog/big-list-of-sample-videos-for-testers-124.html.
|
190
|
+
- Video 5 was downloaded from https://archive.org/details/ligouHDR-HC1_sample1.
|
191
|
+
|
176
192
|
### JPEG examples of EXIF orientation
|
177
193
|
- Downloaded from Unspash (and thus freely avaliable) - https://unsplash.com/license and have then been
|
178
194
|
manipulated using the [https://github.com/recurser/exif-orientation-examples](exif-orientation-examples)
|
@@ -183,7 +199,7 @@ Unless specified otherwise in this section the fixture files are MIT licensed an
|
|
183
199
|
|
184
200
|
## Copyright
|
185
201
|
|
186
|
-
Copyright (c)
|
202
|
+
Copyright (c) 2020 WeTransfer.
|
187
203
|
|
188
204
|
`format_parser` is distributed under the conditions of the [Hippocratic License](https://firstdonoharm.dev/version/1/2/license.html)
|
189
205
|
- See LICENSE.txt for further details.
|
data/format_parser.gemspec
CHANGED
@@ -14,7 +14,7 @@ Gem::Specification.new do |spec|
|
|
14
14
|
a number of parser modules that try to recover metadata useful for post-processing and layout while reading the absolute
|
15
15
|
minimum amount of data possible."
|
16
16
|
spec.homepage = 'https://github.com/WeTransfer/format_parser'
|
17
|
-
spec.license = 'MIT'
|
17
|
+
spec.license = 'MIT (Hippocratic)'
|
18
18
|
# to allow pushing to a single host or delete this section to allow pushing to any host.
|
19
19
|
if spec.respond_to?(:metadata)
|
20
20
|
spec.metadata['allowed_push_host'] = 'https://rubygems.org'
|
@@ -39,7 +39,6 @@ Gem::Specification.new do |spec|
|
|
39
39
|
spec.add_development_dependency 'rspec', '~> 3.0'
|
40
40
|
spec.add_development_dependency 'rake', '~> 12'
|
41
41
|
spec.add_development_dependency 'simplecov', '~> 0.15'
|
42
|
-
spec.add_development_dependency 'pry', '~> 0.11'
|
43
42
|
spec.add_development_dependency 'yard', '~> 0.9'
|
44
43
|
spec.add_development_dependency 'wetransfer_style', '0.5.0'
|
45
44
|
spec.add_development_dependency 'parallel_tests'
|
data/lib/attributes_json.rb
CHANGED
@@ -15,7 +15,12 @@ module FormatParser::AttributesJSON
|
|
15
15
|
|
16
16
|
# Implements a sane default `as_json` for an object
|
17
17
|
# that accessors defined
|
18
|
-
|
18
|
+
#
|
19
|
+
# @param root[Bool] if true, it surrounds the result in a hash with a key
|
20
|
+
# `format_parser_file_info`
|
21
|
+
# @param stringify_keys[Bool] if true, it transforms all the hash keys to a string.
|
22
|
+
# The default value is false for backward compatibility
|
23
|
+
def as_json(root: false, stringify_keys: false, **)
|
19
24
|
h = {}
|
20
25
|
h['nature'] = nature if respond_to?(:nature) # Needed for file info structs
|
21
26
|
methods.grep(/\w\=$/).each_with_object(h) do |attr_writer_method_name, h|
|
@@ -27,6 +32,9 @@ module FormatParser::AttributesJSON
|
|
27
32
|
sanitized_value = _sanitize_json_value(unwrapped_attribute_value)
|
28
33
|
h[reader_method_name] = sanitized_value
|
29
34
|
end
|
35
|
+
|
36
|
+
h = FormatParser::HashUtils.deep_transform_keys(h, &:to_s) if stringify_keys
|
37
|
+
|
30
38
|
if root
|
31
39
|
{'format_parser_file_info' => h}
|
32
40
|
else
|
data/lib/format_parser.rb
CHANGED
data/lib/hash_utils.rb
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
# based on https://github.com/rails/rails/blob/master/activesupport/lib/active_support/core_ext/hash/keys.rb#L116
|
2
|
+
# I chose to copy this method instead of adding activesupport as a dependency
|
3
|
+
# because we want to have the least number of dependencies
|
4
|
+
module FormatParser
|
5
|
+
class HashUtils
|
6
|
+
def self.deep_transform_keys(object, &block)
|
7
|
+
case object
|
8
|
+
when Hash
|
9
|
+
object.each_with_object({}) do |(key, value), result|
|
10
|
+
result[yield(key)] = deep_transform_keys(value, &block)
|
11
|
+
end
|
12
|
+
when Array
|
13
|
+
object.map { |e| deep_transform_keys(e, &block) }
|
14
|
+
else
|
15
|
+
object
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
data/lib/parsers/exif_parser.rb
CHANGED
@@ -100,17 +100,7 @@ module FormatParser::EXIFParser
|
|
100
100
|
end
|
101
101
|
|
102
102
|
def to_json(*maybe_coder)
|
103
|
-
|
104
|
-
# that come earlier
|
105
|
-
overlay = @multiple_exif_results.each_with_object({}) do |one_exif_frame, h|
|
106
|
-
h.merge!(one_exif_frame.to_hash)
|
107
|
-
end
|
108
|
-
# Overwrite the orientation with our custom method implementation, because
|
109
|
-
# it does reject 0-values.
|
110
|
-
overlay[:orientation] = orientation
|
111
|
-
|
112
|
-
sanitized = FormatParser::AttributesJSON._sanitize_json_value(overlay)
|
113
|
-
sanitized.to_json(*maybe_coder)
|
103
|
+
to_hash.to_json(*maybe_coder)
|
114
104
|
end
|
115
105
|
|
116
106
|
def orientation_sym
|
@@ -135,10 +125,27 @@ module FormatParser::EXIFParser
|
|
135
125
|
0 # If none were found - the orientation is unknown
|
136
126
|
end
|
137
127
|
|
128
|
+
# ActiveSupport will attempt to call #to_hash first, and
|
129
|
+
# #to_hash is a decent default implementation to have
|
130
|
+
def to_hash
|
131
|
+
# Let EXIF tags that come later overwrite the properties from the tags
|
132
|
+
# that come earlier
|
133
|
+
overlay = @multiple_exif_results.each_with_object({}) do |one_exif_frame, h|
|
134
|
+
h.merge!(one_exif_frame.to_hash)
|
135
|
+
end
|
136
|
+
# Overwrite the orientation with our custom method implementation, because
|
137
|
+
# it does reject 0-values.
|
138
|
+
overlay[:orientation] = orientation
|
139
|
+
|
140
|
+
FormatParser::AttributesJSON._sanitize_json_value(overlay)
|
141
|
+
end
|
142
|
+
|
138
143
|
private
|
139
144
|
|
140
|
-
|
141
|
-
|
145
|
+
# respond_to_missing? accepts 2 arguments: the method name symbol
|
146
|
+
# and whether the method being looked up can be private or not
|
147
|
+
def respond_to_missing?(method_name, include_private_methods)
|
148
|
+
@multiple_exif_results.last.respond_to?(method_name, include_private_methods)
|
142
149
|
end
|
143
150
|
|
144
151
|
def method_missing(*a)
|
data/lib/parsers/mp3_parser.rb
CHANGED
@@ -0,0 +1,127 @@
|
|
1
|
+
|
2
|
+
# MPEG Headers documentation:
|
3
|
+
# http://dvd.sourceforge.net/dvdinfo/mpeghdrs.html#seq
|
4
|
+
# http://www.cs.columbia.edu/~delbert/docs/Dueck%20--%20MPEG-2%20Video%20Transcoding.pdf
|
5
|
+
# Useful tool to check the file information: https://www.metadata2go.com/
|
6
|
+
class FormatParser::MPEGParser
|
7
|
+
extend FormatParser::IOUtils
|
8
|
+
|
9
|
+
ASPECT_RATIOS = {
|
10
|
+
1 => '1:1',
|
11
|
+
2 => '4:3',
|
12
|
+
3 => '16:9',
|
13
|
+
4 => '2.21:1'
|
14
|
+
}
|
15
|
+
|
16
|
+
FRAME_RATES = {
|
17
|
+
1 => '23.976',
|
18
|
+
2 => '24',
|
19
|
+
3 => '25',
|
20
|
+
4 => '29.97',
|
21
|
+
5 => '30',
|
22
|
+
6 => '50',
|
23
|
+
7 => '59.94',
|
24
|
+
8 => '60'
|
25
|
+
}
|
26
|
+
|
27
|
+
PACK_HEADER_START_CODE = [0x00, 0x00, 0x01, 0xBA].pack('C*')
|
28
|
+
SEQUENCE_HEADER_START_CODE = [0xB3].pack('C*')
|
29
|
+
MAX_BLOCK_READS = 32
|
30
|
+
BYTES_TO_READ_PER_READ = 1024
|
31
|
+
|
32
|
+
def self.likely_match?(filename)
|
33
|
+
filename =~ /\.(mpg|mpeg)$/i
|
34
|
+
end
|
35
|
+
|
36
|
+
def self.call(io)
|
37
|
+
return unless matches_mpeg_header?(io)
|
38
|
+
|
39
|
+
# We are looping though the stream because there can be several sequence headers and some of them are not useful.
|
40
|
+
# If we detect that the header is not useful, then we look for the next one for SEEK_FOR_SEQUENCE_HEADER_TIMES_LIMIT
|
41
|
+
# If we reach the EOF, then the mpg is likely to be corrupted and we return nil
|
42
|
+
MAX_BLOCK_READS.times do
|
43
|
+
next unless pos = find_next_header_code_pos(io)
|
44
|
+
io.seek(pos + 1)
|
45
|
+
horizontal_size, vertical_size = parse_image_size(io)
|
46
|
+
ratio_code, rate_code = parse_rate_information(io)
|
47
|
+
if valid_aspect_ratio_code?(ratio_code) && valid_frame_rate_code?(rate_code)
|
48
|
+
return file_info(horizontal_size, vertical_size, ratio_code, rate_code)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
nil # otherwise the return value of Integer#times will be returned
|
52
|
+
rescue FormatParser::IOUtils::InvalidRead
|
53
|
+
nil
|
54
|
+
end
|
55
|
+
|
56
|
+
def self.file_info(width_px, height_px, ratio_code, rate_code)
|
57
|
+
FormatParser::Video.new(
|
58
|
+
format: :mpg,
|
59
|
+
width_px: width_px,
|
60
|
+
height_px: height_px,
|
61
|
+
intrinsics: {
|
62
|
+
aspect_ratio: ASPECT_RATIOS.fetch(ratio_code),
|
63
|
+
frame_rate: FRAME_RATES.fetch(rate_code)
|
64
|
+
},
|
65
|
+
)
|
66
|
+
end
|
67
|
+
|
68
|
+
# The following 3 bytes after the sequence header code, gives us information about the px size
|
69
|
+
# 1.5 bytes (12 bits) for horizontal size and 1.5 bytes for vertical size
|
70
|
+
def self.parse_image_size(io)
|
71
|
+
image_size = convert_3_bytes_to_bits(safe_read(io, 3))
|
72
|
+
[read_first_12_bits(image_size), read_last_12_bits(image_size)]
|
73
|
+
end
|
74
|
+
|
75
|
+
# The following byte gives us information about the aspect ratio and frame rate
|
76
|
+
# 4 bits corresponds to the aspect ratio and 4 bits to the frame rate code
|
77
|
+
def self.parse_rate_information(io)
|
78
|
+
rate_information = safe_read(io, 1).unpack('C').first
|
79
|
+
[read_first_4_bits(rate_information), read_last_4_bits(rate_information)]
|
80
|
+
end
|
81
|
+
|
82
|
+
def self.valid_aspect_ratio_code?(ratio_code)
|
83
|
+
ASPECT_RATIOS.include?(ratio_code)
|
84
|
+
end
|
85
|
+
|
86
|
+
def self.valid_frame_rate_code?(rate_code)
|
87
|
+
FRAME_RATES.include?(rate_code)
|
88
|
+
end
|
89
|
+
|
90
|
+
# Returns the position of the next sequence package content in the stream
|
91
|
+
# This method will read BYTES_TO_READ_PER_TIME in each loop for a maximum amount of SEEK_FOR_SEQUENCE_HEADER_START_CODE_TIMES_LIMIT times
|
92
|
+
# If the package is not found, then it returns nil.
|
93
|
+
def self.find_next_header_code_pos(io)
|
94
|
+
pos_before_read = io.pos
|
95
|
+
bin_str = io.read(BYTES_TO_READ_PER_READ) # bin_str might be nil if we are at EOF
|
96
|
+
header_relative_index = bin_str && bin_str.index(SEQUENCE_HEADER_START_CODE)
|
97
|
+
return pos_before_read + header_relative_index if header_relative_index
|
98
|
+
end
|
99
|
+
|
100
|
+
# If the first 4 bytes of the stream are equal to 00 00 01 BA, the pack start code for the Pack Header, then it's an MPEG file.
|
101
|
+
def self.matches_mpeg_header?(io)
|
102
|
+
safe_read(io, 4) == PACK_HEADER_START_CODE
|
103
|
+
end
|
104
|
+
|
105
|
+
def self.convert_3_bytes_to_bits(bytes)
|
106
|
+
bytes = bytes.unpack('CCC')
|
107
|
+
(bytes[0] << 16) | (bytes[1] << 8) | (bytes[2])
|
108
|
+
end
|
109
|
+
|
110
|
+
def self.read_first_12_bits(bits)
|
111
|
+
bits >> 12 & 0x0fff
|
112
|
+
end
|
113
|
+
|
114
|
+
def self.read_last_12_bits(bits)
|
115
|
+
bits & 0x0fff
|
116
|
+
end
|
117
|
+
|
118
|
+
def self.read_first_4_bits(byte)
|
119
|
+
byte >> 4
|
120
|
+
end
|
121
|
+
|
122
|
+
def self.read_last_4_bits(byte)
|
123
|
+
byte & 0x0F
|
124
|
+
end
|
125
|
+
|
126
|
+
FormatParser.register_parser self, natures: [:video], formats: [:mpg, :mpeg]
|
127
|
+
end
|
@@ -140,4 +140,30 @@ describe FormatParser::AttributesJSON do
|
|
140
140
|
JSON.pretty_generate(object_with_attributes_module)
|
141
141
|
}.to raise_error(/structure too deep/)
|
142
142
|
end
|
143
|
+
|
144
|
+
it 'converts all hash keys to string when stringify_keys: true' do
|
145
|
+
fixture_path = fixtures_dir + '/ZIP/arch_few_entries.zip'
|
146
|
+
fi_io = File.open(fixture_path, 'rb')
|
147
|
+
|
148
|
+
result = FormatParser::ZIPParser.new.call(fi_io).as_json(stringify_keys: true)
|
149
|
+
|
150
|
+
result['entries'].each do |entry|
|
151
|
+
entry.each do |key, _value|
|
152
|
+
expect(key).to be_a(String)
|
153
|
+
end
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
it 'does not convert hash keys to string when stringify_keys: false' do
|
158
|
+
fixture_path = fixtures_dir + '/ZIP/arch_few_entries.zip'
|
159
|
+
fi_io = File.open(fixture_path, 'rb')
|
160
|
+
|
161
|
+
result = FormatParser::ZIPParser.new.call(fi_io).as_json
|
162
|
+
|
163
|
+
result['entries'].each do |entry|
|
164
|
+
entry.each do |key, _value|
|
165
|
+
expect(key).to be_a(Symbol)
|
166
|
+
end
|
167
|
+
end
|
168
|
+
end
|
143
169
|
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe FormatParser::HashUtils do
|
4
|
+
describe '.deep_transform_keys' do
|
5
|
+
it 'transforms all the keys in a hash' do
|
6
|
+
hash = { aa: 1, 'bb' => 2 }
|
7
|
+
result = described_class.deep_transform_keys(hash, &:to_s)
|
8
|
+
|
9
|
+
expect(result).to eq('aa' => 1, 'bb' => 2)
|
10
|
+
end
|
11
|
+
|
12
|
+
it 'transforms all the keys in a array of hashes' do
|
13
|
+
array = [{ aa: 1, bb: 2 }, { cc: 3, dd: [{c: 2, d: 3}] }]
|
14
|
+
result = described_class.deep_transform_keys(array, &:to_s)
|
15
|
+
|
16
|
+
expect(result).to eq(
|
17
|
+
[{'aa' => 1, 'bb' => 2}, {'cc' => 3, 'dd' => [{'c' => 2, 'd' => 3}]}]
|
18
|
+
)
|
19
|
+
end
|
20
|
+
|
21
|
+
it 'transforms all the keys in a hash recursively' do
|
22
|
+
hash = { aa: 1, bb: { cc: 22, dd: 3 } }
|
23
|
+
result = described_class.deep_transform_keys(hash, &:to_s)
|
24
|
+
|
25
|
+
expect(result).to eq('aa' => 1, 'bb' => { 'cc' => 22, 'dd' => 3})
|
26
|
+
end
|
27
|
+
|
28
|
+
it 'does nothing for an non array/hash object' do
|
29
|
+
object = Object.new
|
30
|
+
result = described_class.deep_transform_keys(object, &:to_s)
|
31
|
+
|
32
|
+
expect(result).to eq(object)
|
33
|
+
end
|
34
|
+
|
35
|
+
it 'returns the last value if different keys are transformed into the same one' do
|
36
|
+
hash = { aa: 0, 'bb' => 2, bb: 1 }
|
37
|
+
result = described_class.deep_transform_keys(hash, &:to_s)
|
38
|
+
|
39
|
+
expect(result).to eq('aa' => 0, 'bb' => 1)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -14,6 +14,34 @@ describe FormatParser::EXIFParser do
|
|
14
14
|
end
|
15
15
|
end
|
16
16
|
|
17
|
+
describe 'EXIFStack' do
|
18
|
+
it 'supports respond_to? for methods it does not have' do
|
19
|
+
# Peculiar thing: we need to support respond_to?(:to_hash)
|
20
|
+
# for compatibility with ActiveSupport JSON output. When you call as_json
|
21
|
+
# on an object ActiveSupport implements that as_json method and will then
|
22
|
+
# call #as_json on the contained objects as necessary, _or_ call
|
23
|
+
# other methods if it thinks it is necessary.
|
24
|
+
#
|
25
|
+
# Although we _will_ be implementing to_hash specifically
|
26
|
+
# the respond_to_missing must be implemented correctly
|
27
|
+
stack = FormatParser::EXIFParser::EXIFStack.new([{}, {}])
|
28
|
+
expect(stack).not_to respond_to(:no_such_method__at_all)
|
29
|
+
end
|
30
|
+
|
31
|
+
it 'returns a Hash from #to_hash' do
|
32
|
+
first_fake_exif = double(orientation: 1, to_hash: {foo: 123, bar: 675})
|
33
|
+
second_fake_exif = double(orientation: 4, to_hash: {foo: 245})
|
34
|
+
|
35
|
+
stack = FormatParser::EXIFParser::EXIFStack.new([first_fake_exif, second_fake_exif])
|
36
|
+
stack_as_hash = stack.to_hash
|
37
|
+
|
38
|
+
# In this instance we DO need an actual type_check, because #to_hash
|
39
|
+
# is used by default type coercions in Ruby
|
40
|
+
expect(stack_as_hash).to be_kind_of(Hash)
|
41
|
+
expect(stack_as_hash).to eq(foo: 245, bar: 675, orientation: 4)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
17
45
|
it 'is able to deal with an orientation tag which a tuple value for orientation' do
|
18
46
|
path = fixtures_dir + '/EXIF/double_orientation.exif.bin'
|
19
47
|
exif_data = File.open(path, 'rb') do |f|
|
@@ -110,4 +110,32 @@ describe FormatParser::MP3Parser do
|
|
110
110
|
subject.call(StringIO.new(''))
|
111
111
|
}.to raise_error(FormatParser::IOUtils::InvalidRead)
|
112
112
|
end
|
113
|
+
|
114
|
+
describe '#as_json' do
|
115
|
+
it 'converts all hash keys to string when stringify_keys: true' do
|
116
|
+
fpath = fixtures_dir + '/MP3/Cassy.mp3'
|
117
|
+
result = subject.call(File.open(fpath, 'rb')).as_json(stringify_keys: true)
|
118
|
+
|
119
|
+
expect(
|
120
|
+
result['intrinsics'].keys.map(&:class).uniq
|
121
|
+
).to eq([String])
|
122
|
+
|
123
|
+
expect(
|
124
|
+
result['intrinsics']['id3tags'].map(&:class).uniq
|
125
|
+
).to eq([ID3Tag::Tag])
|
126
|
+
end
|
127
|
+
|
128
|
+
it 'does not convert the hash keys to string when stringify_keys: false' do
|
129
|
+
fpath = fixtures_dir + '/MP3/Cassy.mp3'
|
130
|
+
result = subject.call(File.open(fpath, 'rb')).as_json
|
131
|
+
|
132
|
+
expect(
|
133
|
+
result['intrinsics'].keys.map(&:class).uniq
|
134
|
+
).to eq([Symbol])
|
135
|
+
|
136
|
+
expect(
|
137
|
+
result['intrinsics'][:id3tags].map(&:class).uniq
|
138
|
+
).to eq([ID3Tag::Tag])
|
139
|
+
end
|
140
|
+
end
|
113
141
|
end
|
@@ -0,0 +1,85 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe FormatParser::MPEGParser do
|
4
|
+
it 'parses a first example mpg file' do
|
5
|
+
parse_result = described_class.call(File.open(__dir__ + '/../fixtures/MPG/video1.mpg', 'rb'))
|
6
|
+
|
7
|
+
expect(parse_result.nature).to eq(:video)
|
8
|
+
expect(parse_result.format).to eq(:mpg)
|
9
|
+
expect(parse_result.width_px).to eq(560)
|
10
|
+
expect(parse_result.height_px).to eq(320)
|
11
|
+
expect(parse_result.intrinsics[:aspect_ratio]).to eq('1:1')
|
12
|
+
expect(parse_result.intrinsics[:frame_rate]).to eq('30')
|
13
|
+
end
|
14
|
+
|
15
|
+
it 'returns a nil if it is necessary to iterate over a very large number of bytes and the requisite sequences are not detected' do
|
16
|
+
bytes_buffer = StringIO.new
|
17
|
+
bytes_buffer.write([0x00, 0x00, 0x01, 0xBA].pack('C*')) # MPEG header
|
18
|
+
zero_bytes = [0x00].pack('C') * (1024 * 1024 * 5)
|
19
|
+
bytes_buffer.write(zero_bytes)
|
20
|
+
|
21
|
+
bytes_buffer.rewind
|
22
|
+
|
23
|
+
parse_result = described_class.call(bytes_buffer)
|
24
|
+
expect(parse_result).to be_nil
|
25
|
+
end
|
26
|
+
|
27
|
+
it 'returns a nil if the IO only contains the MPEG header bytes at the start and nothing else' do
|
28
|
+
bytes_buffer = StringIO.new
|
29
|
+
bytes_buffer.write([0x00, 0x00, 0x01, 0xBA].pack('C*')) # MPEG header
|
30
|
+
bytes_buffer.rewind
|
31
|
+
|
32
|
+
parse_result = described_class.call(bytes_buffer)
|
33
|
+
expect(parse_result).to be_nil
|
34
|
+
end
|
35
|
+
|
36
|
+
it 'parses a file with mpeg extension' do
|
37
|
+
parse_result = described_class.call(File.open(__dir__ + '/../fixtures/MPG/video2.mpeg', 'rb'))
|
38
|
+
|
39
|
+
expect(parse_result.nature).to eq(:video)
|
40
|
+
expect(parse_result.format).to eq(:mpg)
|
41
|
+
expect(parse_result.width_px).to eq(720)
|
42
|
+
expect(parse_result.height_px).to eq(480)
|
43
|
+
expect(parse_result.intrinsics[:aspect_ratio]).to eq('4:3')
|
44
|
+
expect(parse_result.intrinsics[:frame_rate]).to eq('29.97')
|
45
|
+
end
|
46
|
+
|
47
|
+
it 'parses a second example mpg file' do
|
48
|
+
parse_result = described_class.call(File.open(__dir__ + '/../fixtures/MPG/video3.mpg', 'rb'))
|
49
|
+
|
50
|
+
expect(parse_result.nature).to eq(:video)
|
51
|
+
expect(parse_result.format).to eq(:mpg)
|
52
|
+
expect(parse_result.width_px).to eq(720)
|
53
|
+
expect(parse_result.height_px).to eq(496)
|
54
|
+
expect(parse_result.intrinsics[:aspect_ratio]).to eq('4:3')
|
55
|
+
expect(parse_result.intrinsics[:frame_rate]).to eq('29.97')
|
56
|
+
end
|
57
|
+
|
58
|
+
it 'parses a bigger mpg file' do
|
59
|
+
parse_result = described_class.call(File.open(__dir__ + '/../fixtures/MPG/video4.mpg', 'rb'))
|
60
|
+
|
61
|
+
expect(parse_result.nature).to eq(:video)
|
62
|
+
expect(parse_result.format).to eq(:mpg)
|
63
|
+
expect(parse_result.width_px).to eq(1920)
|
64
|
+
expect(parse_result.height_px).to eq(1080)
|
65
|
+
expect(parse_result.intrinsics[:aspect_ratio]).to eq('16:9')
|
66
|
+
expect(parse_result.intrinsics[:frame_rate]).to eq('29.97')
|
67
|
+
end
|
68
|
+
|
69
|
+
it 'parses a file with different malformed first sequence header' do
|
70
|
+
parse_result = described_class.call(File.open(__dir__ + '/../fixtures/MPG/video5.mpg', 'rb'))
|
71
|
+
|
72
|
+
expect(parse_result.nature).to eq(:video)
|
73
|
+
expect(parse_result.format).to eq(:mpg)
|
74
|
+
expect(parse_result.width_px).to eq(1440)
|
75
|
+
expect(parse_result.height_px).to eq(1080)
|
76
|
+
expect(parse_result.intrinsics[:aspect_ratio]).to eq('16:9')
|
77
|
+
expect(parse_result.intrinsics[:frame_rate]).to eq('25')
|
78
|
+
end
|
79
|
+
|
80
|
+
it 'parses a MP4 file' do
|
81
|
+
parse_result = described_class.call(File.open(__dir__ + '/../fixtures/MOOV/MP4/bmff.mp4', 'rb'))
|
82
|
+
|
83
|
+
expect(parse_result).to be_nil
|
84
|
+
end
|
85
|
+
end
|
data/spec/spec_helper.rb
CHANGED
@@ -8,7 +8,6 @@ $LOAD_PATH.unshift(File.dirname(__FILE__))
|
|
8
8
|
|
9
9
|
require 'rspec'
|
10
10
|
require 'format_parser'
|
11
|
-
require 'pry'
|
12
11
|
|
13
12
|
module SpecHelpers
|
14
13
|
def fixtures_dir
|
@@ -19,6 +18,8 @@ end
|
|
19
18
|
RSpec.configure do |c|
|
20
19
|
c.include SpecHelpers
|
21
20
|
c.extend SpecHelpers # makes fixtures_dir available for example groups too
|
21
|
+
# https://relishapp.com/rspec/rspec-core/docs/command-line/only-failures
|
22
|
+
c.example_status_persistence_file_path = 'spec/examples.txt'
|
22
23
|
end
|
23
24
|
|
24
25
|
RSpec.shared_examples 'an IO object compatible with IOConstraint' do
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: format_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.22.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Noah Berman
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: exe
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2020-07-15 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: ks
|
@@ -135,20 +135,6 @@ dependencies:
|
|
135
135
|
- - "~>"
|
136
136
|
- !ruby/object:Gem::Version
|
137
137
|
version: '0.15'
|
138
|
-
- !ruby/object:Gem::Dependency
|
139
|
-
name: pry
|
140
|
-
requirement: !ruby/object:Gem::Requirement
|
141
|
-
requirements:
|
142
|
-
- - "~>"
|
143
|
-
- !ruby/object:Gem::Version
|
144
|
-
version: '0.11'
|
145
|
-
type: :development
|
146
|
-
prerelease: false
|
147
|
-
version_requirements: !ruby/object:Gem::Requirement
|
148
|
-
requirements:
|
149
|
-
- - "~>"
|
150
|
-
- !ruby/object:Gem::Version
|
151
|
-
version: '0.11'
|
152
138
|
- !ruby/object:Gem::Dependency
|
153
139
|
name: yard
|
154
140
|
requirement: !ruby/object:Gem::Requirement
|
@@ -223,6 +209,7 @@ files:
|
|
223
209
|
- lib/document.rb
|
224
210
|
- lib/format_parser.rb
|
225
211
|
- lib/format_parser/version.rb
|
212
|
+
- lib/hash_utils.rb
|
226
213
|
- lib/image.rb
|
227
214
|
- lib/io_constraint.rb
|
228
215
|
- lib/io_utils.rb
|
@@ -240,6 +227,7 @@ files:
|
|
240
227
|
- lib/parsers/moov_parser/decoder.rb
|
241
228
|
- lib/parsers/mp3_parser.rb
|
242
229
|
- lib/parsers/mp3_parser/id3_extraction.rb
|
230
|
+
- lib/parsers/mpeg_parser.rb
|
243
231
|
- lib/parsers/ogg_parser.rb
|
244
232
|
- lib/parsers/pdf_parser.rb
|
245
233
|
- lib/parsers/png_parser.rb
|
@@ -259,6 +247,7 @@ files:
|
|
259
247
|
- spec/file_information_spec.rb
|
260
248
|
- spec/format_parser_inspect_spec.rb
|
261
249
|
- spec/format_parser_spec.rb
|
250
|
+
- spec/hash_utils_spec.rb
|
262
251
|
- spec/io_utils_spec.rb
|
263
252
|
- spec/parsers/aiff_parser_spec.rb
|
264
253
|
- spec/parsers/bmp_parser_spec.rb
|
@@ -271,6 +260,7 @@ files:
|
|
271
260
|
- spec/parsers/jpeg_parser_spec.rb
|
272
261
|
- spec/parsers/moov_parser_spec.rb
|
273
262
|
- spec/parsers/mp3_parser_spec.rb
|
263
|
+
- spec/parsers/mpeg_parser_spec.rb
|
274
264
|
- spec/parsers/ogg_parser_spec.rb
|
275
265
|
- spec/parsers/pdf_parser_spec.rb
|
276
266
|
- spec/parsers/png_parser_spec.rb
|
@@ -285,7 +275,7 @@ files:
|
|
285
275
|
- spec/spec_helper.rb
|
286
276
|
homepage: https://github.com/WeTransfer/format_parser
|
287
277
|
licenses:
|
288
|
-
- MIT
|
278
|
+
- MIT (Hippocratic)
|
289
279
|
metadata:
|
290
280
|
allowed_push_host: https://rubygems.org
|
291
281
|
post_install_message:
|
@@ -303,7 +293,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
303
293
|
- !ruby/object:Gem::Version
|
304
294
|
version: '0'
|
305
295
|
requirements: []
|
306
|
-
rubygems_version: 3.
|
296
|
+
rubygems_version: 3.1.4
|
307
297
|
signing_key:
|
308
298
|
specification_version: 4
|
309
299
|
summary: A library for efficient parsing of file metadata
|