format_parser 0.20.1 → 0.21.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +4 -0
- data/CHANGELOG.md +3 -0
- data/README.md +5 -0
- data/lib/format_parser/version.rb +1 -1
- data/lib/parsers/mpeg_parser.rb +131 -0
- data/spec/parsers/mpeg_parser_spec.rb +64 -0
- metadata +5 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ad2245de4a2119c7572c3962ad14abbf16395b2bec4064b218ee9f99d1e7c24b
|
4
|
+
data.tar.gz: b982bcc7f6626b66684db532317b0c0d35cd062aa89766ea1a230f93e7d996d6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2df2a3763e12e2bb0c70a8f5ec3319fcf6a3210a73461dc8abb5ec2af706028403eca48cf8c589bd40122dff0cfdabce383a79b1253237e219a5c89936ec0a5e
|
7
|
+
data.tar.gz: 0ca6084649313b2c7ad32204b4c8b745f13dbf6cf2c347cb7c80ca7976b964f00feabd9666c0372e9a6c10e4ed250500ed4e202cdb687372cbd369378b7a0faa
|
data/.gitignore
CHANGED
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -31,6 +31,7 @@ and [dimensions,](https://github.com/sstephenson/dimensions) borrowing from them
|
|
31
31
|
* ZIP
|
32
32
|
* DOCX, PPTX, XLSX
|
33
33
|
* OGG
|
34
|
+
* MPEG, MPG
|
34
35
|
|
35
36
|
...with [more](https://github.com/WeTransfer/format_parser/issues?q=is%3Aissue+is%3Aopen+label%3Aformats) on the way!
|
36
37
|
|
@@ -173,6 +174,10 @@ Unless specified otherwise in this section the fixture files are MIT licensed an
|
|
173
174
|
### .docx
|
174
175
|
- The .docx files were generated by the project maintainers
|
175
176
|
|
177
|
+
### .mpg and .mpeg
|
178
|
+
- The files (video 1 to 4) were downloaded from https://standaloneinstaller.com/blog/big-list-of-sample-videos-for-testers-124.html.
|
179
|
+
- Video 5 was downloaded from https://archive.org/details/ligouHDR-HC1_sample1.
|
180
|
+
|
176
181
|
### JPEG examples of EXIF orientation
|
177
182
|
- Downloaded from Unspash (and thus freely avaliable) - https://unsplash.com/license and have then been
|
178
183
|
manipulated using the [https://github.com/recurser/exif-orientation-examples](exif-orientation-examples)
|
@@ -0,0 +1,131 @@
|
|
1
|
+
|
2
|
+
# MPEG Headers documentation:
|
3
|
+
# http://dvd.sourceforge.net/dvdinfo/mpeghdrs.html#seq
|
4
|
+
# http://www.cs.columbia.edu/~delbert/docs/Dueck%20--%20MPEG-2%20Video%20Transcoding.pdf
|
5
|
+
# Useful tool to check the file information: https://www.metadata2go.com/
|
6
|
+
class FormatParser::MPEGParser
|
7
|
+
extend FormatParser::IOUtils
|
8
|
+
|
9
|
+
ASPECT_RATIOS = {
|
10
|
+
1 => '1:1',
|
11
|
+
2 => '4:3',
|
12
|
+
3 => '16:9',
|
13
|
+
4 => '2.21:1'
|
14
|
+
}
|
15
|
+
|
16
|
+
FRAME_RATES = {
|
17
|
+
1 => '23.976',
|
18
|
+
2 => '24',
|
19
|
+
3 => '25',
|
20
|
+
4 => '29.97',
|
21
|
+
5 => '30',
|
22
|
+
6 => '50',
|
23
|
+
7 => '59.94',
|
24
|
+
8 => '60'
|
25
|
+
}
|
26
|
+
|
27
|
+
PACK_HEADER_START_CODE = [0x00, 0x00, 0x01, 0xBA].pack('C*')
|
28
|
+
SEQUENCE_HEADER_START_CODE = [0xB3].pack('C*')
|
29
|
+
SEEK_FOR_SEQUENCE_HEADER_TIMES_LIMIT = 4
|
30
|
+
SEEK_FOR_SEQUENCE_HEADER_START_CODE_TIMES_LIMIT = 4
|
31
|
+
BYTES_TO_READ_PER_TIME = 1024
|
32
|
+
|
33
|
+
def self.likely_match?(filename)
|
34
|
+
filename =~ /\.(mpg|mpeg)$/i
|
35
|
+
end
|
36
|
+
|
37
|
+
def self.call(io)
|
38
|
+
return unless matches_mpeg_header?(io)
|
39
|
+
|
40
|
+
# We are looping though the stream because there can be several sequence headers and some of them are not usefull.
|
41
|
+
# If we detect that the header is not usefull, then we look for the next one for SEEK_FOR_SEQUENCE_HEADER_TIMES_LIMIT
|
42
|
+
# If we reach the EOF, then the mpg is likely to be corrupted and we return nil
|
43
|
+
SEEK_FOR_SEQUENCE_HEADER_TIMES_LIMIT.times do
|
44
|
+
return if fetch_next_sequence_header_code_position(io).nil?
|
45
|
+
horizontal_size, vertical_size = parse_image_size(io)
|
46
|
+
ratio_code, rate_code = parse_rate_information(io)
|
47
|
+
|
48
|
+
if valid_aspect_ratio_code?(ratio_code) && valid_frame_rate_code?(rate_code)
|
49
|
+
return file_info(horizontal_size, vertical_size, ratio_code, rate_code)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
rescue FormatParser::IOUtils::InvalidRead
|
53
|
+
nil
|
54
|
+
end
|
55
|
+
|
56
|
+
def self.file_info(width_px, height_px, ratio_code, rate_code)
|
57
|
+
FormatParser::Video.new(
|
58
|
+
format: :mpg,
|
59
|
+
width_px: width_px,
|
60
|
+
height_px: height_px,
|
61
|
+
intrinsics: {
|
62
|
+
aspect_ratio: ASPECT_RATIOS.fetch(ratio_code),
|
63
|
+
frame_rate: FRAME_RATES.fetch(rate_code)
|
64
|
+
},
|
65
|
+
)
|
66
|
+
end
|
67
|
+
|
68
|
+
# The following 3 bytes after the sequence header code, gives us information about the px size
|
69
|
+
# 1.5 bytes (12 bits) for horizontal size and 1.5 bytes for vertical size
|
70
|
+
def self.parse_image_size(io)
|
71
|
+
image_size = convert_3_bytes_to_bits(safe_read(io, 3))
|
72
|
+
[read_first_12_bits(image_size), read_last_12_bits(image_size)]
|
73
|
+
end
|
74
|
+
|
75
|
+
# The following byte gives us information about the aspect ratio and frame rate
|
76
|
+
# 4 bits corresponds to the aspect ratio and 4 bits to the frame rate code
|
77
|
+
def self.parse_rate_information(io)
|
78
|
+
rate_information = safe_read(io, 1).unpack('C').first
|
79
|
+
[read_first_4_bits(rate_information), read_last_4_bits(rate_information)]
|
80
|
+
end
|
81
|
+
|
82
|
+
def self.valid_aspect_ratio_code?(ratio_code)
|
83
|
+
ASPECT_RATIOS.include?(ratio_code)
|
84
|
+
end
|
85
|
+
|
86
|
+
def self.valid_frame_rate_code?(rate_code)
|
87
|
+
FRAME_RATES.include?(rate_code)
|
88
|
+
end
|
89
|
+
|
90
|
+
# Returns the position of the next sequence package content in the stream
|
91
|
+
# This method will read BYTES_TO_READ_PER_TIME in each loop for a maximum amount of SEEK_FOR_SEQUENCE_HEADER_START_CODE_TIMES_LIMIT times
|
92
|
+
# If the package is not found, then it returns nil.
|
93
|
+
def self.fetch_next_sequence_header_code_position(io)
|
94
|
+
SEEK_FOR_SEQUENCE_HEADER_START_CODE_TIMES_LIMIT.times do
|
95
|
+
bytes_stream_read = io.read(BYTES_TO_READ_PER_TIME)
|
96
|
+
header_relative_index = bytes_stream_read.index(SEQUENCE_HEADER_START_CODE)
|
97
|
+
next if header_relative_index.nil?
|
98
|
+
new_io_pos = io.pos - BYTES_TO_READ_PER_TIME + header_relative_index + 1
|
99
|
+
io.seek(new_io_pos)
|
100
|
+
return new_io_pos
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
# If the first 4 bytes of the stream are equal to 00 00 01 BA, the pack start code for the Pack Header, then it's an MPEG file.
|
105
|
+
def self.matches_mpeg_header?(io)
|
106
|
+
safe_read(io, 4) == PACK_HEADER_START_CODE
|
107
|
+
end
|
108
|
+
|
109
|
+
def self.convert_3_bytes_to_bits(bytes)
|
110
|
+
bytes = bytes.unpack('CCC')
|
111
|
+
(bytes[0] << 16) | (bytes[1] << 8) | (bytes[2])
|
112
|
+
end
|
113
|
+
|
114
|
+
def self.read_first_12_bits(bits)
|
115
|
+
bits >> 12 & 0x0fff
|
116
|
+
end
|
117
|
+
|
118
|
+
def self.read_last_12_bits(bits)
|
119
|
+
bits & 0x0fff
|
120
|
+
end
|
121
|
+
|
122
|
+
def self.read_first_4_bits(byte)
|
123
|
+
byte >> 4
|
124
|
+
end
|
125
|
+
|
126
|
+
def self.read_last_4_bits(byte)
|
127
|
+
byte & 0x0F
|
128
|
+
end
|
129
|
+
|
130
|
+
FormatParser.register_parser self, natures: [:video], formats: [:mpg, :mpeg]
|
131
|
+
end
|
@@ -0,0 +1,64 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe FormatParser::MPEGParser do
|
4
|
+
it 'parses a first example mpg file' do
|
5
|
+
parse_result = described_class.call(File.open(__dir__ + '/../fixtures/MPG/video1.mpg', 'rb'))
|
6
|
+
|
7
|
+
expect(parse_result.nature).to eq(:video)
|
8
|
+
expect(parse_result.format).to eq(:mpg)
|
9
|
+
expect(parse_result.width_px).to eq(560)
|
10
|
+
expect(parse_result.height_px).to eq(320)
|
11
|
+
expect(parse_result.intrinsics[:aspect_ratio]).to eq('1:1')
|
12
|
+
expect(parse_result.intrinsics[:frame_rate]).to eq('30')
|
13
|
+
end
|
14
|
+
|
15
|
+
it 'parses a file with mpeg extension' do
|
16
|
+
parse_result = described_class.call(File.open(__dir__ + '/../fixtures/MPG/video2.mpeg', 'rb'))
|
17
|
+
|
18
|
+
expect(parse_result.nature).to eq(:video)
|
19
|
+
expect(parse_result.format).to eq(:mpg)
|
20
|
+
expect(parse_result.width_px).to eq(720)
|
21
|
+
expect(parse_result.height_px).to eq(480)
|
22
|
+
expect(parse_result.intrinsics[:aspect_ratio]).to eq('4:3')
|
23
|
+
expect(parse_result.intrinsics[:frame_rate]).to eq('29.97')
|
24
|
+
end
|
25
|
+
|
26
|
+
it 'parses a second example mpg file' do
|
27
|
+
parse_result = described_class.call(File.open(__dir__ + '/../fixtures/MPG/video3.mpg', 'rb'))
|
28
|
+
|
29
|
+
expect(parse_result.nature).to eq(:video)
|
30
|
+
expect(parse_result.format).to eq(:mpg)
|
31
|
+
expect(parse_result.width_px).to eq(720)
|
32
|
+
expect(parse_result.height_px).to eq(496)
|
33
|
+
expect(parse_result.intrinsics[:aspect_ratio]).to eq('4:3')
|
34
|
+
expect(parse_result.intrinsics[:frame_rate]).to eq('29.97')
|
35
|
+
end
|
36
|
+
|
37
|
+
it 'parses a bigger mpg file' do
|
38
|
+
parse_result = described_class.call(File.open(__dir__ + '/../fixtures/MPG/video4.mpg', 'rb'))
|
39
|
+
|
40
|
+
expect(parse_result.nature).to eq(:video)
|
41
|
+
expect(parse_result.format).to eq(:mpg)
|
42
|
+
expect(parse_result.width_px).to eq(1920)
|
43
|
+
expect(parse_result.height_px).to eq(1080)
|
44
|
+
expect(parse_result.intrinsics[:aspect_ratio]).to eq('16:9')
|
45
|
+
expect(parse_result.intrinsics[:frame_rate]).to eq('29.97')
|
46
|
+
end
|
47
|
+
|
48
|
+
it 'parses a file with different malformed first sequence header' do
|
49
|
+
parse_result = described_class.call(File.open(__dir__ + '/../fixtures/MPG/video5.mpg', 'rb'))
|
50
|
+
|
51
|
+
expect(parse_result.nature).to eq(:video)
|
52
|
+
expect(parse_result.format).to eq(:mpg)
|
53
|
+
expect(parse_result.width_px).to eq(1440)
|
54
|
+
expect(parse_result.height_px).to eq(1080)
|
55
|
+
expect(parse_result.intrinsics[:aspect_ratio]).to eq('16:9')
|
56
|
+
expect(parse_result.intrinsics[:frame_rate]).to eq('25')
|
57
|
+
end
|
58
|
+
|
59
|
+
it 'parses a MP4 file' do
|
60
|
+
parse_result = described_class.call(File.open(__dir__ + '/../fixtures/MOOV/MP4/bmff.mp4', 'rb'))
|
61
|
+
|
62
|
+
expect(parse_result).to be_nil
|
63
|
+
end
|
64
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: format_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.21.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Noah Berman
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: exe
|
11
11
|
cert_chain: []
|
12
|
-
date: 2020-
|
12
|
+
date: 2020-03-27 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: ks
|
@@ -240,6 +240,7 @@ files:
|
|
240
240
|
- lib/parsers/moov_parser/decoder.rb
|
241
241
|
- lib/parsers/mp3_parser.rb
|
242
242
|
- lib/parsers/mp3_parser/id3_extraction.rb
|
243
|
+
- lib/parsers/mpeg_parser.rb
|
243
244
|
- lib/parsers/ogg_parser.rb
|
244
245
|
- lib/parsers/pdf_parser.rb
|
245
246
|
- lib/parsers/png_parser.rb
|
@@ -271,6 +272,7 @@ files:
|
|
271
272
|
- spec/parsers/jpeg_parser_spec.rb
|
272
273
|
- spec/parsers/moov_parser_spec.rb
|
273
274
|
- spec/parsers/mp3_parser_spec.rb
|
275
|
+
- spec/parsers/mpeg_parser_spec.rb
|
274
276
|
- spec/parsers/ogg_parser_spec.rb
|
275
277
|
- spec/parsers/pdf_parser_spec.rb
|
276
278
|
- spec/parsers/png_parser_spec.rb
|
@@ -303,7 +305,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
303
305
|
- !ruby/object:Gem::Version
|
304
306
|
version: '0'
|
305
307
|
requirements: []
|
306
|
-
rubygems_version: 3.0.
|
308
|
+
rubygems_version: 3.0.3
|
307
309
|
signing_key:
|
308
310
|
specification_version: 4
|
309
311
|
summary: A library for efficient parsing of file metadata
|