format_parser 0.20.1 → 0.21.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 0d10f78d5d1a472fc6af704132ac8b98542a141de3d5b8a998faebfa5a994b30
4
- data.tar.gz: 65636e308c67d8ccc3e3ff592e5cfe02bf5c105ac32b85d17825279211b362db
3
+ metadata.gz: ad2245de4a2119c7572c3962ad14abbf16395b2bec4064b218ee9f99d1e7c24b
4
+ data.tar.gz: b982bcc7f6626b66684db532317b0c0d35cd062aa89766ea1a230f93e7d996d6
5
5
  SHA512:
6
- metadata.gz: bec31abec6687c7b8dd57783da5e2aec175e1f538097b03eba095c49bd636fc38d562b21e3da1b2ca24f74caffb37b9953e139372ce30be19aea502947b532ca
7
- data.tar.gz: 51e593f4c14049467f657e786a2c9d54f91713d1ab45badbdd67d27876e1b61f3542936cb184cb72f0885745673b8423ce4ceda5c0b5cdb4661f96ebf2f6d59e
6
+ metadata.gz: 2df2a3763e12e2bb0c70a8f5ec3319fcf6a3210a73461dc8abb5ec2af706028403eca48cf8c589bd40122dff0cfdabce383a79b1253237e219a5c89936ec0a5e
7
+ data.tar.gz: 0ca6084649313b2c7ad32204b4c8b745f13dbf6cf2c347cb7c80ca7976b964f00feabd9666c0372e9a6c10e4ed250500ed4e202cdb687372cbd369378b7a0faa
data/.gitignore CHANGED
@@ -54,3 +54,7 @@ Gemfile.lock
54
54
 
55
55
  # Used by RuboCop. Remote config files pulled in from inherit_from directive.
56
56
  # .rubocop-https?--*
57
+
58
+
59
+ # OSX Files
60
+ .DS_Store
data/CHANGELOG.md CHANGED
@@ -1,3 +1,6 @@
1
+ ## 0.21.0
2
+ * Adds support for MPEG video files
3
+
1
4
  ## 0.20.1
2
5
  * Make sure EXIF results work correctly with ActiveSupport JSON encoders
3
6
 
data/README.md CHANGED
@@ -31,6 +31,7 @@ and [dimensions,](https://github.com/sstephenson/dimensions) borrowing from them
31
31
  * ZIP
32
32
  * DOCX, PPTX, XLSX
33
33
  * OGG
34
+ * MPEG, MPG
34
35
 
35
36
  ...with [more](https://github.com/WeTransfer/format_parser/issues?q=is%3Aissue+is%3Aopen+label%3Aformats) on the way!
36
37
 
@@ -173,6 +174,10 @@ Unless specified otherwise in this section the fixture files are MIT licensed an
173
174
  ### .docx
174
175
  - The .docx files were generated by the project maintainers
175
176
 
177
+ ### .mpg and .mpeg
178
+ - The files (video 1 to 4) were downloaded from https://standaloneinstaller.com/blog/big-list-of-sample-videos-for-testers-124.html.
179
+ - Video 5 was downloaded from https://archive.org/details/ligouHDR-HC1_sample1.
180
+
176
181
  ### JPEG examples of EXIF orientation
177
182
  - Downloaded from Unspash (and thus freely avaliable) - https://unsplash.com/license and have then been
178
183
  manipulated using the [https://github.com/recurser/exif-orientation-examples](exif-orientation-examples)
@@ -1,3 +1,3 @@
1
1
  module FormatParser
2
- VERSION = '0.20.1'
2
+ VERSION = '0.21.0'
3
3
  end
@@ -0,0 +1,131 @@
1
+
2
+ # MPEG Headers documentation:
3
+ # http://dvd.sourceforge.net/dvdinfo/mpeghdrs.html#seq
4
+ # http://www.cs.columbia.edu/~delbert/docs/Dueck%20--%20MPEG-2%20Video%20Transcoding.pdf
5
+ # Useful tool to check the file information: https://www.metadata2go.com/
6
+ class FormatParser::MPEGParser
7
+ extend FormatParser::IOUtils
8
+
9
+ ASPECT_RATIOS = {
10
+ 1 => '1:1',
11
+ 2 => '4:3',
12
+ 3 => '16:9',
13
+ 4 => '2.21:1'
14
+ }
15
+
16
+ FRAME_RATES = {
17
+ 1 => '23.976',
18
+ 2 => '24',
19
+ 3 => '25',
20
+ 4 => '29.97',
21
+ 5 => '30',
22
+ 6 => '50',
23
+ 7 => '59.94',
24
+ 8 => '60'
25
+ }
26
+
27
+ PACK_HEADER_START_CODE = [0x00, 0x00, 0x01, 0xBA].pack('C*')
28
+ SEQUENCE_HEADER_START_CODE = [0xB3].pack('C*')
29
+ SEEK_FOR_SEQUENCE_HEADER_TIMES_LIMIT = 4
30
+ SEEK_FOR_SEQUENCE_HEADER_START_CODE_TIMES_LIMIT = 4
31
+ BYTES_TO_READ_PER_TIME = 1024
32
+
33
+ def self.likely_match?(filename)
34
+ filename =~ /\.(mpg|mpeg)$/i
35
+ end
36
+
37
+ def self.call(io)
38
+ return unless matches_mpeg_header?(io)
39
+
40
+ # We are looping though the stream because there can be several sequence headers and some of them are not usefull.
41
+ # If we detect that the header is not usefull, then we look for the next one for SEEK_FOR_SEQUENCE_HEADER_TIMES_LIMIT
42
+ # If we reach the EOF, then the mpg is likely to be corrupted and we return nil
43
+ SEEK_FOR_SEQUENCE_HEADER_TIMES_LIMIT.times do
44
+ return if fetch_next_sequence_header_code_position(io).nil?
45
+ horizontal_size, vertical_size = parse_image_size(io)
46
+ ratio_code, rate_code = parse_rate_information(io)
47
+
48
+ if valid_aspect_ratio_code?(ratio_code) && valid_frame_rate_code?(rate_code)
49
+ return file_info(horizontal_size, vertical_size, ratio_code, rate_code)
50
+ end
51
+ end
52
+ rescue FormatParser::IOUtils::InvalidRead
53
+ nil
54
+ end
55
+
56
+ def self.file_info(width_px, height_px, ratio_code, rate_code)
57
+ FormatParser::Video.new(
58
+ format: :mpg,
59
+ width_px: width_px,
60
+ height_px: height_px,
61
+ intrinsics: {
62
+ aspect_ratio: ASPECT_RATIOS.fetch(ratio_code),
63
+ frame_rate: FRAME_RATES.fetch(rate_code)
64
+ },
65
+ )
66
+ end
67
+
68
+ # The following 3 bytes after the sequence header code, gives us information about the px size
69
+ # 1.5 bytes (12 bits) for horizontal size and 1.5 bytes for vertical size
70
+ def self.parse_image_size(io)
71
+ image_size = convert_3_bytes_to_bits(safe_read(io, 3))
72
+ [read_first_12_bits(image_size), read_last_12_bits(image_size)]
73
+ end
74
+
75
+ # The following byte gives us information about the aspect ratio and frame rate
76
+ # 4 bits corresponds to the aspect ratio and 4 bits to the frame rate code
77
+ def self.parse_rate_information(io)
78
+ rate_information = safe_read(io, 1).unpack('C').first
79
+ [read_first_4_bits(rate_information), read_last_4_bits(rate_information)]
80
+ end
81
+
82
+ def self.valid_aspect_ratio_code?(ratio_code)
83
+ ASPECT_RATIOS.include?(ratio_code)
84
+ end
85
+
86
+ def self.valid_frame_rate_code?(rate_code)
87
+ FRAME_RATES.include?(rate_code)
88
+ end
89
+
90
+ # Returns the position of the next sequence package content in the stream
91
+ # This method will read BYTES_TO_READ_PER_TIME in each loop for a maximum amount of SEEK_FOR_SEQUENCE_HEADER_START_CODE_TIMES_LIMIT times
92
+ # If the package is not found, then it returns nil.
93
+ def self.fetch_next_sequence_header_code_position(io)
94
+ SEEK_FOR_SEQUENCE_HEADER_START_CODE_TIMES_LIMIT.times do
95
+ bytes_stream_read = io.read(BYTES_TO_READ_PER_TIME)
96
+ header_relative_index = bytes_stream_read.index(SEQUENCE_HEADER_START_CODE)
97
+ next if header_relative_index.nil?
98
+ new_io_pos = io.pos - BYTES_TO_READ_PER_TIME + header_relative_index + 1
99
+ io.seek(new_io_pos)
100
+ return new_io_pos
101
+ end
102
+ end
103
+
104
+ # If the first 4 bytes of the stream are equal to 00 00 01 BA, the pack start code for the Pack Header, then it's an MPEG file.
105
+ def self.matches_mpeg_header?(io)
106
+ safe_read(io, 4) == PACK_HEADER_START_CODE
107
+ end
108
+
109
+ def self.convert_3_bytes_to_bits(bytes)
110
+ bytes = bytes.unpack('CCC')
111
+ (bytes[0] << 16) | (bytes[1] << 8) | (bytes[2])
112
+ end
113
+
114
+ def self.read_first_12_bits(bits)
115
+ bits >> 12 & 0x0fff
116
+ end
117
+
118
+ def self.read_last_12_bits(bits)
119
+ bits & 0x0fff
120
+ end
121
+
122
+ def self.read_first_4_bits(byte)
123
+ byte >> 4
124
+ end
125
+
126
+ def self.read_last_4_bits(byte)
127
+ byte & 0x0F
128
+ end
129
+
130
+ FormatParser.register_parser self, natures: [:video], formats: [:mpg, :mpeg]
131
+ end
@@ -0,0 +1,64 @@
1
+ require 'spec_helper'
2
+
3
+ describe FormatParser::MPEGParser do
4
+ it 'parses a first example mpg file' do
5
+ parse_result = described_class.call(File.open(__dir__ + '/../fixtures/MPG/video1.mpg', 'rb'))
6
+
7
+ expect(parse_result.nature).to eq(:video)
8
+ expect(parse_result.format).to eq(:mpg)
9
+ expect(parse_result.width_px).to eq(560)
10
+ expect(parse_result.height_px).to eq(320)
11
+ expect(parse_result.intrinsics[:aspect_ratio]).to eq('1:1')
12
+ expect(parse_result.intrinsics[:frame_rate]).to eq('30')
13
+ end
14
+
15
+ it 'parses a file with mpeg extension' do
16
+ parse_result = described_class.call(File.open(__dir__ + '/../fixtures/MPG/video2.mpeg', 'rb'))
17
+
18
+ expect(parse_result.nature).to eq(:video)
19
+ expect(parse_result.format).to eq(:mpg)
20
+ expect(parse_result.width_px).to eq(720)
21
+ expect(parse_result.height_px).to eq(480)
22
+ expect(parse_result.intrinsics[:aspect_ratio]).to eq('4:3')
23
+ expect(parse_result.intrinsics[:frame_rate]).to eq('29.97')
24
+ end
25
+
26
+ it 'parses a second example mpg file' do
27
+ parse_result = described_class.call(File.open(__dir__ + '/../fixtures/MPG/video3.mpg', 'rb'))
28
+
29
+ expect(parse_result.nature).to eq(:video)
30
+ expect(parse_result.format).to eq(:mpg)
31
+ expect(parse_result.width_px).to eq(720)
32
+ expect(parse_result.height_px).to eq(496)
33
+ expect(parse_result.intrinsics[:aspect_ratio]).to eq('4:3')
34
+ expect(parse_result.intrinsics[:frame_rate]).to eq('29.97')
35
+ end
36
+
37
+ it 'parses a bigger mpg file' do
38
+ parse_result = described_class.call(File.open(__dir__ + '/../fixtures/MPG/video4.mpg', 'rb'))
39
+
40
+ expect(parse_result.nature).to eq(:video)
41
+ expect(parse_result.format).to eq(:mpg)
42
+ expect(parse_result.width_px).to eq(1920)
43
+ expect(parse_result.height_px).to eq(1080)
44
+ expect(parse_result.intrinsics[:aspect_ratio]).to eq('16:9')
45
+ expect(parse_result.intrinsics[:frame_rate]).to eq('29.97')
46
+ end
47
+
48
+ it 'parses a file with different malformed first sequence header' do
49
+ parse_result = described_class.call(File.open(__dir__ + '/../fixtures/MPG/video5.mpg', 'rb'))
50
+
51
+ expect(parse_result.nature).to eq(:video)
52
+ expect(parse_result.format).to eq(:mpg)
53
+ expect(parse_result.width_px).to eq(1440)
54
+ expect(parse_result.height_px).to eq(1080)
55
+ expect(parse_result.intrinsics[:aspect_ratio]).to eq('16:9')
56
+ expect(parse_result.intrinsics[:frame_rate]).to eq('25')
57
+ end
58
+
59
+ it 'parses a MP4 file' do
60
+ parse_result = described_class.call(File.open(__dir__ + '/../fixtures/MOOV/MP4/bmff.mp4', 'rb'))
61
+
62
+ expect(parse_result).to be_nil
63
+ end
64
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: format_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.20.1
4
+ version: 0.21.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Noah Berman
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: exe
11
11
  cert_chain: []
12
- date: 2020-01-05 00:00:00.000000000 Z
12
+ date: 2020-03-27 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: ks
@@ -240,6 +240,7 @@ files:
240
240
  - lib/parsers/moov_parser/decoder.rb
241
241
  - lib/parsers/mp3_parser.rb
242
242
  - lib/parsers/mp3_parser/id3_extraction.rb
243
+ - lib/parsers/mpeg_parser.rb
243
244
  - lib/parsers/ogg_parser.rb
244
245
  - lib/parsers/pdf_parser.rb
245
246
  - lib/parsers/png_parser.rb
@@ -271,6 +272,7 @@ files:
271
272
  - spec/parsers/jpeg_parser_spec.rb
272
273
  - spec/parsers/moov_parser_spec.rb
273
274
  - spec/parsers/mp3_parser_spec.rb
275
+ - spec/parsers/mpeg_parser_spec.rb
274
276
  - spec/parsers/ogg_parser_spec.rb
275
277
  - spec/parsers/pdf_parser_spec.rb
276
278
  - spec/parsers/png_parser_spec.rb
@@ -303,7 +305,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
303
305
  - !ruby/object:Gem::Version
304
306
  version: '0'
305
307
  requirements: []
306
- rubygems_version: 3.0.6
308
+ rubygems_version: 3.0.3
307
309
  signing_key:
308
310
  specification_version: 4
309
311
  summary: A library for efficient parsing of file metadata