format_parser 0.20.1 → 0.21.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +4 -0
- data/CHANGELOG.md +3 -0
- data/README.md +5 -0
- data/lib/format_parser/version.rb +1 -1
- data/lib/parsers/mpeg_parser.rb +131 -0
- data/spec/parsers/mpeg_parser_spec.rb +64 -0
- metadata +5 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ad2245de4a2119c7572c3962ad14abbf16395b2bec4064b218ee9f99d1e7c24b
|
4
|
+
data.tar.gz: b982bcc7f6626b66684db532317b0c0d35cd062aa89766ea1a230f93e7d996d6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2df2a3763e12e2bb0c70a8f5ec3319fcf6a3210a73461dc8abb5ec2af706028403eca48cf8c589bd40122dff0cfdabce383a79b1253237e219a5c89936ec0a5e
|
7
|
+
data.tar.gz: 0ca6084649313b2c7ad32204b4c8b745f13dbf6cf2c347cb7c80ca7976b964f00feabd9666c0372e9a6c10e4ed250500ed4e202cdb687372cbd369378b7a0faa
|
data/.gitignore
CHANGED
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -31,6 +31,7 @@ and [dimensions,](https://github.com/sstephenson/dimensions) borrowing from them
|
|
31
31
|
* ZIP
|
32
32
|
* DOCX, PPTX, XLSX
|
33
33
|
* OGG
|
34
|
+
* MPEG, MPG
|
34
35
|
|
35
36
|
...with [more](https://github.com/WeTransfer/format_parser/issues?q=is%3Aissue+is%3Aopen+label%3Aformats) on the way!
|
36
37
|
|
@@ -173,6 +174,10 @@ Unless specified otherwise in this section the fixture files are MIT licensed an
|
|
173
174
|
### .docx
|
174
175
|
- The .docx files were generated by the project maintainers
|
175
176
|
|
177
|
+
### .mpg and .mpeg
|
178
|
+
- The files (video 1 to 4) were downloaded from https://standaloneinstaller.com/blog/big-list-of-sample-videos-for-testers-124.html.
|
179
|
+
- Video 5 was downloaded from https://archive.org/details/ligouHDR-HC1_sample1.
|
180
|
+
|
176
181
|
### JPEG examples of EXIF orientation
|
177
182
|
- Downloaded from Unspash (and thus freely avaliable) - https://unsplash.com/license and have then been
|
178
183
|
manipulated using the [https://github.com/recurser/exif-orientation-examples](exif-orientation-examples)
|
@@ -0,0 +1,131 @@
|
|
1
|
+
|
2
|
+
# MPEG Headers documentation:
|
3
|
+
# http://dvd.sourceforge.net/dvdinfo/mpeghdrs.html#seq
|
4
|
+
# http://www.cs.columbia.edu/~delbert/docs/Dueck%20--%20MPEG-2%20Video%20Transcoding.pdf
|
5
|
+
# Useful tool to check the file information: https://www.metadata2go.com/
|
6
|
+
class FormatParser::MPEGParser
|
7
|
+
extend FormatParser::IOUtils
|
8
|
+
|
9
|
+
ASPECT_RATIOS = {
|
10
|
+
1 => '1:1',
|
11
|
+
2 => '4:3',
|
12
|
+
3 => '16:9',
|
13
|
+
4 => '2.21:1'
|
14
|
+
}
|
15
|
+
|
16
|
+
FRAME_RATES = {
|
17
|
+
1 => '23.976',
|
18
|
+
2 => '24',
|
19
|
+
3 => '25',
|
20
|
+
4 => '29.97',
|
21
|
+
5 => '30',
|
22
|
+
6 => '50',
|
23
|
+
7 => '59.94',
|
24
|
+
8 => '60'
|
25
|
+
}
|
26
|
+
|
27
|
+
PACK_HEADER_START_CODE = [0x00, 0x00, 0x01, 0xBA].pack('C*')
|
28
|
+
SEQUENCE_HEADER_START_CODE = [0xB3].pack('C*')
|
29
|
+
SEEK_FOR_SEQUENCE_HEADER_TIMES_LIMIT = 4
|
30
|
+
SEEK_FOR_SEQUENCE_HEADER_START_CODE_TIMES_LIMIT = 4
|
31
|
+
BYTES_TO_READ_PER_TIME = 1024
|
32
|
+
|
33
|
+
def self.likely_match?(filename)
|
34
|
+
filename =~ /\.(mpg|mpeg)$/i
|
35
|
+
end
|
36
|
+
|
37
|
+
def self.call(io)
|
38
|
+
return unless matches_mpeg_header?(io)
|
39
|
+
|
40
|
+
# We are looping though the stream because there can be several sequence headers and some of them are not usefull.
|
41
|
+
# If we detect that the header is not usefull, then we look for the next one for SEEK_FOR_SEQUENCE_HEADER_TIMES_LIMIT
|
42
|
+
# If we reach the EOF, then the mpg is likely to be corrupted and we return nil
|
43
|
+
SEEK_FOR_SEQUENCE_HEADER_TIMES_LIMIT.times do
|
44
|
+
return if fetch_next_sequence_header_code_position(io).nil?
|
45
|
+
horizontal_size, vertical_size = parse_image_size(io)
|
46
|
+
ratio_code, rate_code = parse_rate_information(io)
|
47
|
+
|
48
|
+
if valid_aspect_ratio_code?(ratio_code) && valid_frame_rate_code?(rate_code)
|
49
|
+
return file_info(horizontal_size, vertical_size, ratio_code, rate_code)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
rescue FormatParser::IOUtils::InvalidRead
|
53
|
+
nil
|
54
|
+
end
|
55
|
+
|
56
|
+
def self.file_info(width_px, height_px, ratio_code, rate_code)
|
57
|
+
FormatParser::Video.new(
|
58
|
+
format: :mpg,
|
59
|
+
width_px: width_px,
|
60
|
+
height_px: height_px,
|
61
|
+
intrinsics: {
|
62
|
+
aspect_ratio: ASPECT_RATIOS.fetch(ratio_code),
|
63
|
+
frame_rate: FRAME_RATES.fetch(rate_code)
|
64
|
+
},
|
65
|
+
)
|
66
|
+
end
|
67
|
+
|
68
|
+
# The following 3 bytes after the sequence header code, gives us information about the px size
|
69
|
+
# 1.5 bytes (12 bits) for horizontal size and 1.5 bytes for vertical size
|
70
|
+
def self.parse_image_size(io)
|
71
|
+
image_size = convert_3_bytes_to_bits(safe_read(io, 3))
|
72
|
+
[read_first_12_bits(image_size), read_last_12_bits(image_size)]
|
73
|
+
end
|
74
|
+
|
75
|
+
# The following byte gives us information about the aspect ratio and frame rate
|
76
|
+
# 4 bits corresponds to the aspect ratio and 4 bits to the frame rate code
|
77
|
+
def self.parse_rate_information(io)
|
78
|
+
rate_information = safe_read(io, 1).unpack('C').first
|
79
|
+
[read_first_4_bits(rate_information), read_last_4_bits(rate_information)]
|
80
|
+
end
|
81
|
+
|
82
|
+
def self.valid_aspect_ratio_code?(ratio_code)
|
83
|
+
ASPECT_RATIOS.include?(ratio_code)
|
84
|
+
end
|
85
|
+
|
86
|
+
def self.valid_frame_rate_code?(rate_code)
|
87
|
+
FRAME_RATES.include?(rate_code)
|
88
|
+
end
|
89
|
+
|
90
|
+
# Returns the position of the next sequence package content in the stream
|
91
|
+
# This method will read BYTES_TO_READ_PER_TIME in each loop for a maximum amount of SEEK_FOR_SEQUENCE_HEADER_START_CODE_TIMES_LIMIT times
|
92
|
+
# If the package is not found, then it returns nil.
|
93
|
+
def self.fetch_next_sequence_header_code_position(io)
|
94
|
+
SEEK_FOR_SEQUENCE_HEADER_START_CODE_TIMES_LIMIT.times do
|
95
|
+
bytes_stream_read = io.read(BYTES_TO_READ_PER_TIME)
|
96
|
+
header_relative_index = bytes_stream_read.index(SEQUENCE_HEADER_START_CODE)
|
97
|
+
next if header_relative_index.nil?
|
98
|
+
new_io_pos = io.pos - BYTES_TO_READ_PER_TIME + header_relative_index + 1
|
99
|
+
io.seek(new_io_pos)
|
100
|
+
return new_io_pos
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
# If the first 4 bytes of the stream are equal to 00 00 01 BA, the pack start code for the Pack Header, then it's an MPEG file.
|
105
|
+
def self.matches_mpeg_header?(io)
|
106
|
+
safe_read(io, 4) == PACK_HEADER_START_CODE
|
107
|
+
end
|
108
|
+
|
109
|
+
def self.convert_3_bytes_to_bits(bytes)
|
110
|
+
bytes = bytes.unpack('CCC')
|
111
|
+
(bytes[0] << 16) | (bytes[1] << 8) | (bytes[2])
|
112
|
+
end
|
113
|
+
|
114
|
+
def self.read_first_12_bits(bits)
|
115
|
+
bits >> 12 & 0x0fff
|
116
|
+
end
|
117
|
+
|
118
|
+
def self.read_last_12_bits(bits)
|
119
|
+
bits & 0x0fff
|
120
|
+
end
|
121
|
+
|
122
|
+
def self.read_first_4_bits(byte)
|
123
|
+
byte >> 4
|
124
|
+
end
|
125
|
+
|
126
|
+
def self.read_last_4_bits(byte)
|
127
|
+
byte & 0x0F
|
128
|
+
end
|
129
|
+
|
130
|
+
FormatParser.register_parser self, natures: [:video], formats: [:mpg, :mpeg]
|
131
|
+
end
|
@@ -0,0 +1,64 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe FormatParser::MPEGParser do
|
4
|
+
it 'parses a first example mpg file' do
|
5
|
+
parse_result = described_class.call(File.open(__dir__ + '/../fixtures/MPG/video1.mpg', 'rb'))
|
6
|
+
|
7
|
+
expect(parse_result.nature).to eq(:video)
|
8
|
+
expect(parse_result.format).to eq(:mpg)
|
9
|
+
expect(parse_result.width_px).to eq(560)
|
10
|
+
expect(parse_result.height_px).to eq(320)
|
11
|
+
expect(parse_result.intrinsics[:aspect_ratio]).to eq('1:1')
|
12
|
+
expect(parse_result.intrinsics[:frame_rate]).to eq('30')
|
13
|
+
end
|
14
|
+
|
15
|
+
it 'parses a file with mpeg extension' do
|
16
|
+
parse_result = described_class.call(File.open(__dir__ + '/../fixtures/MPG/video2.mpeg', 'rb'))
|
17
|
+
|
18
|
+
expect(parse_result.nature).to eq(:video)
|
19
|
+
expect(parse_result.format).to eq(:mpg)
|
20
|
+
expect(parse_result.width_px).to eq(720)
|
21
|
+
expect(parse_result.height_px).to eq(480)
|
22
|
+
expect(parse_result.intrinsics[:aspect_ratio]).to eq('4:3')
|
23
|
+
expect(parse_result.intrinsics[:frame_rate]).to eq('29.97')
|
24
|
+
end
|
25
|
+
|
26
|
+
it 'parses a second example mpg file' do
|
27
|
+
parse_result = described_class.call(File.open(__dir__ + '/../fixtures/MPG/video3.mpg', 'rb'))
|
28
|
+
|
29
|
+
expect(parse_result.nature).to eq(:video)
|
30
|
+
expect(parse_result.format).to eq(:mpg)
|
31
|
+
expect(parse_result.width_px).to eq(720)
|
32
|
+
expect(parse_result.height_px).to eq(496)
|
33
|
+
expect(parse_result.intrinsics[:aspect_ratio]).to eq('4:3')
|
34
|
+
expect(parse_result.intrinsics[:frame_rate]).to eq('29.97')
|
35
|
+
end
|
36
|
+
|
37
|
+
it 'parses a bigger mpg file' do
|
38
|
+
parse_result = described_class.call(File.open(__dir__ + '/../fixtures/MPG/video4.mpg', 'rb'))
|
39
|
+
|
40
|
+
expect(parse_result.nature).to eq(:video)
|
41
|
+
expect(parse_result.format).to eq(:mpg)
|
42
|
+
expect(parse_result.width_px).to eq(1920)
|
43
|
+
expect(parse_result.height_px).to eq(1080)
|
44
|
+
expect(parse_result.intrinsics[:aspect_ratio]).to eq('16:9')
|
45
|
+
expect(parse_result.intrinsics[:frame_rate]).to eq('29.97')
|
46
|
+
end
|
47
|
+
|
48
|
+
it 'parses a file with different malformed first sequence header' do
|
49
|
+
parse_result = described_class.call(File.open(__dir__ + '/../fixtures/MPG/video5.mpg', 'rb'))
|
50
|
+
|
51
|
+
expect(parse_result.nature).to eq(:video)
|
52
|
+
expect(parse_result.format).to eq(:mpg)
|
53
|
+
expect(parse_result.width_px).to eq(1440)
|
54
|
+
expect(parse_result.height_px).to eq(1080)
|
55
|
+
expect(parse_result.intrinsics[:aspect_ratio]).to eq('16:9')
|
56
|
+
expect(parse_result.intrinsics[:frame_rate]).to eq('25')
|
57
|
+
end
|
58
|
+
|
59
|
+
it 'parses a MP4 file' do
|
60
|
+
parse_result = described_class.call(File.open(__dir__ + '/../fixtures/MOOV/MP4/bmff.mp4', 'rb'))
|
61
|
+
|
62
|
+
expect(parse_result).to be_nil
|
63
|
+
end
|
64
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: format_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.21.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Noah Berman
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: exe
|
11
11
|
cert_chain: []
|
12
|
-
date: 2020-
|
12
|
+
date: 2020-03-27 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: ks
|
@@ -240,6 +240,7 @@ files:
|
|
240
240
|
- lib/parsers/moov_parser/decoder.rb
|
241
241
|
- lib/parsers/mp3_parser.rb
|
242
242
|
- lib/parsers/mp3_parser/id3_extraction.rb
|
243
|
+
- lib/parsers/mpeg_parser.rb
|
243
244
|
- lib/parsers/ogg_parser.rb
|
244
245
|
- lib/parsers/pdf_parser.rb
|
245
246
|
- lib/parsers/png_parser.rb
|
@@ -271,6 +272,7 @@ files:
|
|
271
272
|
- spec/parsers/jpeg_parser_spec.rb
|
272
273
|
- spec/parsers/moov_parser_spec.rb
|
273
274
|
- spec/parsers/mp3_parser_spec.rb
|
275
|
+
- spec/parsers/mpeg_parser_spec.rb
|
274
276
|
- spec/parsers/ogg_parser_spec.rb
|
275
277
|
- spec/parsers/pdf_parser_spec.rb
|
276
278
|
- spec/parsers/png_parser_spec.rb
|
@@ -303,7 +305,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
303
305
|
- !ruby/object:Gem::Version
|
304
306
|
version: '0'
|
305
307
|
requirements: []
|
306
|
-
rubygems_version: 3.0.
|
308
|
+
rubygems_version: 3.0.3
|
307
309
|
signing_key:
|
308
310
|
specification_version: 4
|
309
311
|
summary: A library for efficient parsing of file metadata
|