format_parser 0.20.1 → 0.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 0d10f78d5d1a472fc6af704132ac8b98542a141de3d5b8a998faebfa5a994b30
4
- data.tar.gz: 65636e308c67d8ccc3e3ff592e5cfe02bf5c105ac32b85d17825279211b362db
3
+ metadata.gz: ad2245de4a2119c7572c3962ad14abbf16395b2bec4064b218ee9f99d1e7c24b
4
+ data.tar.gz: b982bcc7f6626b66684db532317b0c0d35cd062aa89766ea1a230f93e7d996d6
5
5
  SHA512:
6
- metadata.gz: bec31abec6687c7b8dd57783da5e2aec175e1f538097b03eba095c49bd636fc38d562b21e3da1b2ca24f74caffb37b9953e139372ce30be19aea502947b532ca
7
- data.tar.gz: 51e593f4c14049467f657e786a2c9d54f91713d1ab45badbdd67d27876e1b61f3542936cb184cb72f0885745673b8423ce4ceda5c0b5cdb4661f96ebf2f6d59e
6
+ metadata.gz: 2df2a3763e12e2bb0c70a8f5ec3319fcf6a3210a73461dc8abb5ec2af706028403eca48cf8c589bd40122dff0cfdabce383a79b1253237e219a5c89936ec0a5e
7
+ data.tar.gz: 0ca6084649313b2c7ad32204b4c8b745f13dbf6cf2c347cb7c80ca7976b964f00feabd9666c0372e9a6c10e4ed250500ed4e202cdb687372cbd369378b7a0faa
data/.gitignore CHANGED
@@ -54,3 +54,7 @@ Gemfile.lock
54
54
 
55
55
  # Used by RuboCop. Remote config files pulled in from inherit_from directive.
56
56
  # .rubocop-https?--*
57
+
58
+
59
+ # OSX Files
60
+ .DS_Store
data/CHANGELOG.md CHANGED
@@ -1,3 +1,6 @@
1
+ ## 0.21.0
2
+ * Adds support for MPEG video files
3
+
1
4
  ## 0.20.1
2
5
  * Make sure EXIF results work correctly with ActiveSupport JSON encoders
3
6
 
data/README.md CHANGED
@@ -31,6 +31,7 @@ and [dimensions,](https://github.com/sstephenson/dimensions) borrowing from them
31
31
  * ZIP
32
32
  * DOCX, PPTX, XLSX
33
33
  * OGG
34
+ * MPEG, MPG
34
35
 
35
36
  ...with [more](https://github.com/WeTransfer/format_parser/issues?q=is%3Aissue+is%3Aopen+label%3Aformats) on the way!
36
37
 
@@ -173,6 +174,10 @@ Unless specified otherwise in this section the fixture files are MIT licensed an
173
174
  ### .docx
174
175
  - The .docx files were generated by the project maintainers
175
176
 
177
+ ### .mpg and .mpeg
178
+ - The files (video 1 to 4) were downloaded from https://standaloneinstaller.com/blog/big-list-of-sample-videos-for-testers-124.html.
179
+ - Video 5 was downloaded from https://archive.org/details/ligouHDR-HC1_sample1.
180
+
176
181
  ### JPEG examples of EXIF orientation
177
182
  - Downloaded from Unspash (and thus freely avaliable) - https://unsplash.com/license and have then been
178
183
  manipulated using the [https://github.com/recurser/exif-orientation-examples](exif-orientation-examples)
@@ -1,3 +1,3 @@
1
1
  module FormatParser
2
- VERSION = '0.20.1'
2
+ VERSION = '0.21.0'
3
3
  end
@@ -0,0 +1,131 @@
1
+
2
+ # MPEG Headers documentation:
3
+ # http://dvd.sourceforge.net/dvdinfo/mpeghdrs.html#seq
4
+ # http://www.cs.columbia.edu/~delbert/docs/Dueck%20--%20MPEG-2%20Video%20Transcoding.pdf
5
+ # Useful tool to check the file information: https://www.metadata2go.com/
6
+ class FormatParser::MPEGParser
7
+ extend FormatParser::IOUtils
8
+
9
+ ASPECT_RATIOS = {
10
+ 1 => '1:1',
11
+ 2 => '4:3',
12
+ 3 => '16:9',
13
+ 4 => '2.21:1'
14
+ }
15
+
16
+ FRAME_RATES = {
17
+ 1 => '23.976',
18
+ 2 => '24',
19
+ 3 => '25',
20
+ 4 => '29.97',
21
+ 5 => '30',
22
+ 6 => '50',
23
+ 7 => '59.94',
24
+ 8 => '60'
25
+ }
26
+
27
+ PACK_HEADER_START_CODE = [0x00, 0x00, 0x01, 0xBA].pack('C*')
28
+ SEQUENCE_HEADER_START_CODE = [0xB3].pack('C*')
29
+ SEEK_FOR_SEQUENCE_HEADER_TIMES_LIMIT = 4
30
+ SEEK_FOR_SEQUENCE_HEADER_START_CODE_TIMES_LIMIT = 4
31
+ BYTES_TO_READ_PER_TIME = 1024
32
+
33
+ def self.likely_match?(filename)
34
+ filename =~ /\.(mpg|mpeg)$/i
35
+ end
36
+
37
+ def self.call(io)
38
+ return unless matches_mpeg_header?(io)
39
+
40
+ # We are looping though the stream because there can be several sequence headers and some of them are not usefull.
41
+ # If we detect that the header is not usefull, then we look for the next one for SEEK_FOR_SEQUENCE_HEADER_TIMES_LIMIT
42
+ # If we reach the EOF, then the mpg is likely to be corrupted and we return nil
43
+ SEEK_FOR_SEQUENCE_HEADER_TIMES_LIMIT.times do
44
+ return if fetch_next_sequence_header_code_position(io).nil?
45
+ horizontal_size, vertical_size = parse_image_size(io)
46
+ ratio_code, rate_code = parse_rate_information(io)
47
+
48
+ if valid_aspect_ratio_code?(ratio_code) && valid_frame_rate_code?(rate_code)
49
+ return file_info(horizontal_size, vertical_size, ratio_code, rate_code)
50
+ end
51
+ end
52
+ rescue FormatParser::IOUtils::InvalidRead
53
+ nil
54
+ end
55
+
56
+ def self.file_info(width_px, height_px, ratio_code, rate_code)
57
+ FormatParser::Video.new(
58
+ format: :mpg,
59
+ width_px: width_px,
60
+ height_px: height_px,
61
+ intrinsics: {
62
+ aspect_ratio: ASPECT_RATIOS.fetch(ratio_code),
63
+ frame_rate: FRAME_RATES.fetch(rate_code)
64
+ },
65
+ )
66
+ end
67
+
68
+ # The following 3 bytes after the sequence header code, gives us information about the px size
69
+ # 1.5 bytes (12 bits) for horizontal size and 1.5 bytes for vertical size
70
+ def self.parse_image_size(io)
71
+ image_size = convert_3_bytes_to_bits(safe_read(io, 3))
72
+ [read_first_12_bits(image_size), read_last_12_bits(image_size)]
73
+ end
74
+
75
+ # The following byte gives us information about the aspect ratio and frame rate
76
+ # 4 bits corresponds to the aspect ratio and 4 bits to the frame rate code
77
+ def self.parse_rate_information(io)
78
+ rate_information = safe_read(io, 1).unpack('C').first
79
+ [read_first_4_bits(rate_information), read_last_4_bits(rate_information)]
80
+ end
81
+
82
+ def self.valid_aspect_ratio_code?(ratio_code)
83
+ ASPECT_RATIOS.include?(ratio_code)
84
+ end
85
+
86
+ def self.valid_frame_rate_code?(rate_code)
87
+ FRAME_RATES.include?(rate_code)
88
+ end
89
+
90
+ # Returns the position of the next sequence package content in the stream
91
+ # This method will read BYTES_TO_READ_PER_TIME in each loop for a maximum amount of SEEK_FOR_SEQUENCE_HEADER_START_CODE_TIMES_LIMIT times
92
+ # If the package is not found, then it returns nil.
93
+ def self.fetch_next_sequence_header_code_position(io)
94
+ SEEK_FOR_SEQUENCE_HEADER_START_CODE_TIMES_LIMIT.times do
95
+ bytes_stream_read = io.read(BYTES_TO_READ_PER_TIME)
96
+ header_relative_index = bytes_stream_read.index(SEQUENCE_HEADER_START_CODE)
97
+ next if header_relative_index.nil?
98
+ new_io_pos = io.pos - BYTES_TO_READ_PER_TIME + header_relative_index + 1
99
+ io.seek(new_io_pos)
100
+ return new_io_pos
101
+ end
102
+ end
103
+
104
+ # If the first 4 bytes of the stream are equal to 00 00 01 BA, the pack start code for the Pack Header, then it's an MPEG file.
105
+ def self.matches_mpeg_header?(io)
106
+ safe_read(io, 4) == PACK_HEADER_START_CODE
107
+ end
108
+
109
+ def self.convert_3_bytes_to_bits(bytes)
110
+ bytes = bytes.unpack('CCC')
111
+ (bytes[0] << 16) | (bytes[1] << 8) | (bytes[2])
112
+ end
113
+
114
+ def self.read_first_12_bits(bits)
115
+ bits >> 12 & 0x0fff
116
+ end
117
+
118
+ def self.read_last_12_bits(bits)
119
+ bits & 0x0fff
120
+ end
121
+
122
+ def self.read_first_4_bits(byte)
123
+ byte >> 4
124
+ end
125
+
126
+ def self.read_last_4_bits(byte)
127
+ byte & 0x0F
128
+ end
129
+
130
+ FormatParser.register_parser self, natures: [:video], formats: [:mpg, :mpeg]
131
+ end
@@ -0,0 +1,64 @@
1
+ require 'spec_helper'
2
+
3
+ describe FormatParser::MPEGParser do
4
+ it 'parses a first example mpg file' do
5
+ parse_result = described_class.call(File.open(__dir__ + '/../fixtures/MPG/video1.mpg', 'rb'))
6
+
7
+ expect(parse_result.nature).to eq(:video)
8
+ expect(parse_result.format).to eq(:mpg)
9
+ expect(parse_result.width_px).to eq(560)
10
+ expect(parse_result.height_px).to eq(320)
11
+ expect(parse_result.intrinsics[:aspect_ratio]).to eq('1:1')
12
+ expect(parse_result.intrinsics[:frame_rate]).to eq('30')
13
+ end
14
+
15
+ it 'parses a file with mpeg extension' do
16
+ parse_result = described_class.call(File.open(__dir__ + '/../fixtures/MPG/video2.mpeg', 'rb'))
17
+
18
+ expect(parse_result.nature).to eq(:video)
19
+ expect(parse_result.format).to eq(:mpg)
20
+ expect(parse_result.width_px).to eq(720)
21
+ expect(parse_result.height_px).to eq(480)
22
+ expect(parse_result.intrinsics[:aspect_ratio]).to eq('4:3')
23
+ expect(parse_result.intrinsics[:frame_rate]).to eq('29.97')
24
+ end
25
+
26
+ it 'parses a second example mpg file' do
27
+ parse_result = described_class.call(File.open(__dir__ + '/../fixtures/MPG/video3.mpg', 'rb'))
28
+
29
+ expect(parse_result.nature).to eq(:video)
30
+ expect(parse_result.format).to eq(:mpg)
31
+ expect(parse_result.width_px).to eq(720)
32
+ expect(parse_result.height_px).to eq(496)
33
+ expect(parse_result.intrinsics[:aspect_ratio]).to eq('4:3')
34
+ expect(parse_result.intrinsics[:frame_rate]).to eq('29.97')
35
+ end
36
+
37
+ it 'parses a bigger mpg file' do
38
+ parse_result = described_class.call(File.open(__dir__ + '/../fixtures/MPG/video4.mpg', 'rb'))
39
+
40
+ expect(parse_result.nature).to eq(:video)
41
+ expect(parse_result.format).to eq(:mpg)
42
+ expect(parse_result.width_px).to eq(1920)
43
+ expect(parse_result.height_px).to eq(1080)
44
+ expect(parse_result.intrinsics[:aspect_ratio]).to eq('16:9')
45
+ expect(parse_result.intrinsics[:frame_rate]).to eq('29.97')
46
+ end
47
+
48
+ it 'parses a file with different malformed first sequence header' do
49
+ parse_result = described_class.call(File.open(__dir__ + '/../fixtures/MPG/video5.mpg', 'rb'))
50
+
51
+ expect(parse_result.nature).to eq(:video)
52
+ expect(parse_result.format).to eq(:mpg)
53
+ expect(parse_result.width_px).to eq(1440)
54
+ expect(parse_result.height_px).to eq(1080)
55
+ expect(parse_result.intrinsics[:aspect_ratio]).to eq('16:9')
56
+ expect(parse_result.intrinsics[:frame_rate]).to eq('25')
57
+ end
58
+
59
+ it 'parses a MP4 file' do
60
+ parse_result = described_class.call(File.open(__dir__ + '/../fixtures/MOOV/MP4/bmff.mp4', 'rb'))
61
+
62
+ expect(parse_result).to be_nil
63
+ end
64
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: format_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.20.1
4
+ version: 0.21.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Noah Berman
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: exe
11
11
  cert_chain: []
12
- date: 2020-01-05 00:00:00.000000000 Z
12
+ date: 2020-03-27 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: ks
@@ -240,6 +240,7 @@ files:
240
240
  - lib/parsers/moov_parser/decoder.rb
241
241
  - lib/parsers/mp3_parser.rb
242
242
  - lib/parsers/mp3_parser/id3_extraction.rb
243
+ - lib/parsers/mpeg_parser.rb
243
244
  - lib/parsers/ogg_parser.rb
244
245
  - lib/parsers/pdf_parser.rb
245
246
  - lib/parsers/png_parser.rb
@@ -271,6 +272,7 @@ files:
271
272
  - spec/parsers/jpeg_parser_spec.rb
272
273
  - spec/parsers/moov_parser_spec.rb
273
274
  - spec/parsers/mp3_parser_spec.rb
275
+ - spec/parsers/mpeg_parser_spec.rb
274
276
  - spec/parsers/ogg_parser_spec.rb
275
277
  - spec/parsers/pdf_parser_spec.rb
276
278
  - spec/parsers/png_parser_spec.rb
@@ -303,7 +305,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
303
305
  - !ruby/object:Gem::Version
304
306
  version: '0'
305
307
  requirements: []
306
- rubygems_version: 3.0.6
308
+ rubygems_version: 3.0.3
307
309
  signing_key:
308
310
  specification_version: 4
309
311
  summary: A library for efficient parsing of file metadata