format_parser 2.3.0 → 2.4.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +18 -0
- data/README.md +13 -6
- data/format_parser.gemspec +1 -0
- data/lib/format_parser/version.rb +1 -1
- data/lib/io_utils.rb +18 -33
- data/lib/parsers/cr3_parser/decoder.rb +2 -2
- data/lib/parsers/cr3_parser.rb +13 -11
- data/lib/parsers/heif_parser.rb +46 -46
- data/lib/parsers/iso_base_media_file_format/box.rb +80 -0
- data/lib/parsers/iso_base_media_file_format/decoder.rb +342 -376
- data/lib/parsers/iso_base_media_file_format/utils.rb +89 -0
- data/lib/parsers/mov_parser/decoder.rb +53 -0
- data/lib/parsers/mov_parser.rb +48 -0
- data/lib/parsers/mp4_parser.rb +80 -0
- data/lib/parsers/pdf_parser.rb +5 -2
- data/lib/parsers/webp_parser.rb +2 -2
- data/spec/format_parser_spec.rb +1 -1
- data/spec/parsers/cr3_parser_spec.rb +3 -3
- data/spec/parsers/iso_base_media_file_format/box_spec.rb +399 -0
- data/spec/parsers/iso_base_media_file_format/decoder_spec.rb +53 -178
- data/spec/parsers/iso_base_media_file_format/utils_spec.rb +632 -0
- data/spec/parsers/mov_parser_spec.rb +90 -0
- data/spec/parsers/mp4_parser_spec.rb +114 -0
- data/spec/parsers/pdf_parser_spec.rb +37 -23
- metadata +25 -5
- data/lib/parsers/moov_parser/decoder.rb +0 -353
- data/lib/parsers/moov_parser.rb +0 -165
- data/spec/parsers/moov_parser_spec.rb +0 -144
@@ -0,0 +1,90 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe FormatParser::MOVParser do
|
4
|
+
describe '#likely_match?' do
|
5
|
+
%w[mov mOv moV Mov MOv MoV MOV moov qt].each do |e|
|
6
|
+
context "with foo.#{e}" do
|
7
|
+
it 'should return true' do
|
8
|
+
expect(subject.likely_match?("foo.#{e}")).to eq(true)
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
['', 'foo', 'mov', 'foomov', 'foo.mp4', 'foo.mov.bar'].each do |f|
|
14
|
+
context "with #{f}" do
|
15
|
+
it 'should return false' do
|
16
|
+
expect(subject.likely_match?(f)).to eq(false)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
describe '#call' do
|
23
|
+
context "when magic bytes are absent" do
|
24
|
+
let(:io) do
|
25
|
+
input = [0x10].pack('N') + 'ftyp' + 'foo ' + [0x1].pack('N')
|
26
|
+
StringIO.new(input)
|
27
|
+
end
|
28
|
+
|
29
|
+
it 'should return nil' do
|
30
|
+
expect(subject.call(io)).to be_nil
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
Dir.glob(fixtures_dir + '/MOV/valid/**/*.*').sort.each do |path|
|
35
|
+
context "for #{path}" do
|
36
|
+
let(:result) { subject.call(File.open(path, 'rb')) }
|
37
|
+
|
38
|
+
it('should not be nil') { expect(result).not_to be_nil }
|
39
|
+
it('should have video nature') { expect(result.nature).to eq(:video) }
|
40
|
+
it('should have MOV video content type') { expect(result.content_type).to eq('video/quicktime') }
|
41
|
+
it('should have MOV video format') { expect(result.format).to eq(:mov) }
|
42
|
+
it('should have a non-zero height ') { expect(result.height_px).to be > 0 }
|
43
|
+
it('should have a non-zero width') { expect(result.width_px).to be > 0 }
|
44
|
+
it('should have a non-zero duration') { expect(result.media_duration_seconds).to be > 0 }
|
45
|
+
it('should have a non-nil frame rate') { expect(result.frame_rate).not_to be_nil }
|
46
|
+
it('should have intrinsics') { expect(result.intrinsics).not_to be_nil }
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
Dir.glob(fixtures_dir + '/MOV/invalid/**/*.*').sort.each do |path|
|
51
|
+
context "for #{path}" do
|
52
|
+
let(:result) { subject.call(File.open(path, 'rb')) }
|
53
|
+
|
54
|
+
it('should be nil') { expect(result).to be_nil }
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
context "for a standard MOV video" do
|
59
|
+
let(:result) do
|
60
|
+
path = fixtures_dir + '/MOV/valid/standard.mov'
|
61
|
+
subject.call(File.open(path, 'rb'))
|
62
|
+
end
|
63
|
+
|
64
|
+
it('should have the correct height') { expect(result.height_px).to eq(360) }
|
65
|
+
it('should have the correct width') { expect(result.width_px).to eq(640) }
|
66
|
+
it('should have the correct duration') { expect(result.media_duration_seconds.truncate(2)).to eq(9.36) }
|
67
|
+
it('should have the correct frame rate') { expect(result.frame_rate).to eq(30) }
|
68
|
+
end
|
69
|
+
|
70
|
+
context "for a scaled MOV video" do
|
71
|
+
let(:result) do
|
72
|
+
path = fixtures_dir + '/MOV/valid/scaled.mov'
|
73
|
+
subject.call(File.open(path, 'rb'))
|
74
|
+
end
|
75
|
+
|
76
|
+
it('should have the correct height') { expect(result.height_px).to eq(720) }
|
77
|
+
it('should have the correct width') { expect(result.width_px).to eq(1280) }
|
78
|
+
end
|
79
|
+
|
80
|
+
context "for a rotated MOV video" do
|
81
|
+
let(:result) do
|
82
|
+
path = fixtures_dir + '/MOV/valid/rotated.mov'
|
83
|
+
subject.call(File.open(path, 'rb'))
|
84
|
+
end
|
85
|
+
|
86
|
+
it('should have the correct height') { expect(result.height_px).to eq(640) }
|
87
|
+
it('should have the correct width') { expect(result.width_px).to eq(360) }
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
@@ -0,0 +1,114 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe FormatParser::MP4Parser do
|
4
|
+
describe '#likely_match?' do
|
5
|
+
%w[mp4 mP4 Mp4 MP4 m4a m4b m4p m4r m4v].each do |e|
|
6
|
+
context "with foo.#{e}" do
|
7
|
+
it 'should return true' do
|
8
|
+
expect(subject.likely_match?("foo.#{e}")).to eq(true)
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
['', 'foo', 'mp4', 'foomp4', 'foo.mp3', 'foo.mov', 'foo.mp4.bar'].each do |f|
|
14
|
+
context "with #{f}" do
|
15
|
+
it 'should return false' do
|
16
|
+
expect(subject.likely_match?(f)).to eq(false)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
describe '#call' do
|
23
|
+
context "when magic bytes are absent" do
|
24
|
+
let(:io) do
|
25
|
+
input = [0x10].pack('N') + 'ftyp' + 'foo ' + [0x1].pack('N')
|
26
|
+
StringIO.new(input)
|
27
|
+
end
|
28
|
+
|
29
|
+
it 'should return nil' do
|
30
|
+
expect(subject.call(io)).to be_nil
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
Dir.glob(fixtures_dir + '/MP4/valid/video/*.*').sort.each do |path|
|
35
|
+
context "for #{path}" do
|
36
|
+
let(:result) { subject.call(File.open(path, 'rb')) }
|
37
|
+
|
38
|
+
it('should not be nil') { expect(result).not_to be_nil }
|
39
|
+
it('should have video nature') { expect(result.nature).to eq(:video) }
|
40
|
+
it('should have MP4 video content type') { expect(result.content_type).to eq('video/mp4') }
|
41
|
+
it('should have MP4 video format') { expect([:mp4, :mv4]).to include(result.format) }
|
42
|
+
it('should have a non-zero height ') { expect(result.height_px).to be > 0 }
|
43
|
+
it('should have a non-zero width') { expect(result.width_px).to be > 0 }
|
44
|
+
it('should have a non-zero duration') { expect(result.media_duration_seconds).to be > 0 }
|
45
|
+
it('should have a non-nil frame rate') { expect(result.frame_rate).not_to be_nil }
|
46
|
+
it('should have intrinsics') { expect(result.intrinsics).not_to be_nil }
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
Dir.glob(fixtures_dir + '/MP4/valid/audio/*.*').sort.each do |path|
|
51
|
+
context "for #{path}" do
|
52
|
+
let(:result) { subject.call(File.open(path, 'rb')) }
|
53
|
+
|
54
|
+
it('should not be nil') { expect(result).not_to be_nil }
|
55
|
+
it('should have audio nature') { expect(result.nature).to eq(:audio) }
|
56
|
+
it('should have MP4 audio content type') { expect(result.content_type).to eq('audio/mp4') }
|
57
|
+
it('should have MP4 audio format') { expect([:m4a, :m4b, :m4p, :m4r]).to include(result.format) }
|
58
|
+
it('should have a non-zero duration') { expect(result.media_duration_seconds).to be > 0 }
|
59
|
+
it('should have intrinsics') { expect(result.intrinsics).not_to be_nil }
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
Dir.glob(fixtures_dir + '/MP4/invalid/**/*.*').sort.each do |path|
|
64
|
+
context "for #{path}" do
|
65
|
+
let(:result) { subject.call(File.open(path, 'rb')) }
|
66
|
+
|
67
|
+
it('should be nil') { expect(result).to be_nil }
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
context "for a standard MP4 video" do
|
72
|
+
let(:result) do
|
73
|
+
path = fixtures_dir + '/MP4/valid/video/standard.mp4'
|
74
|
+
subject.call(File.open(path, 'rb'))
|
75
|
+
end
|
76
|
+
|
77
|
+
it('should have the correct height') { expect(result.height_px).to eq(360) }
|
78
|
+
it('should have the correct width') { expect(result.width_px).to eq(640) }
|
79
|
+
it('should have the correct duration') { expect(result.media_duration_seconds.truncate(2)).to eq(9.36) }
|
80
|
+
it('should have the correct frame rate') { expect(result.frame_rate).to eq(30) }
|
81
|
+
end
|
82
|
+
|
83
|
+
context "for a scaled MP4 video" do
|
84
|
+
let(:result) do
|
85
|
+
path = fixtures_dir + '/MP4/valid/video/scaled.mp4'
|
86
|
+
subject.call(File.open(path, 'rb'))
|
87
|
+
end
|
88
|
+
|
89
|
+
it('should have the correct height') { expect(result.height_px).to eq(720) }
|
90
|
+
it('should have the correct width') { expect(result.width_px).to eq(1280) }
|
91
|
+
end
|
92
|
+
|
93
|
+
context "for a rotated MP4 video" do
|
94
|
+
let(:result) do
|
95
|
+
path = fixtures_dir + '/MP4/valid/video/rotated.mp4'
|
96
|
+
subject.call(File.open(path, 'rb'))
|
97
|
+
end
|
98
|
+
|
99
|
+
it('should have the correct height') { expect(result.height_px).to eq(640) }
|
100
|
+
it('should have the correct width') { expect(result.width_px).to eq(360) }
|
101
|
+
end
|
102
|
+
|
103
|
+
context "for a multi-track MP4 video" do
|
104
|
+
let(:result) do
|
105
|
+
path = fixtures_dir + '/MP4/valid/video/multi-track.mp4'
|
106
|
+
subject.call(File.open(path, 'rb'))
|
107
|
+
end
|
108
|
+
|
109
|
+
it('should have the correct height') { expect(result.height_px).to eq(1280) }
|
110
|
+
it('should have the correct width') { expect(result.width_px).to eq(1024) }
|
111
|
+
it('should have the correct frame rate') { expect(result.frame_rate).to eq(24) }
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
@@ -1,19 +1,18 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
3
|
describe FormatParser::PDFParser do
|
4
|
-
|
4
|
+
def parse_pdf(pdf_filename)
|
5
5
|
subject.call(
|
6
6
|
File.open(
|
7
|
-
Pathname.new(fixtures_dir).join('PDF').join(
|
7
|
+
Pathname.new(fixtures_dir).join('PDF').join(pdf_filename),
|
8
8
|
'rb'
|
9
9
|
)
|
10
10
|
)
|
11
|
-
|
12
|
-
|
13
|
-
shared_examples :behave_like_pdf do |hash|
|
14
|
-
let(:pdf_file) { hash.fetch(:file) }
|
11
|
+
end
|
15
12
|
|
16
|
-
|
13
|
+
shared_examples :behave_like_pdf do |params|
|
14
|
+
it "#{params[:file]} acts as a pdf" do
|
15
|
+
parsed_pdf = parse_pdf params[:file]
|
17
16
|
expect(parsed_pdf).not_to be_nil
|
18
17
|
expect(parsed_pdf.nature).to eq(:document)
|
19
18
|
expect(parsed_pdf.format).to eq(:pdf)
|
@@ -21,29 +20,44 @@ describe FormatParser::PDFParser do
|
|
21
20
|
end
|
22
21
|
end
|
23
22
|
|
24
|
-
describe 'a PDF file
|
25
|
-
|
26
|
-
|
27
|
-
it 'does not parse succesfully' do
|
28
|
-
expect(parsed_pdf).to be_nil
|
23
|
+
describe 'parses a PDF file' do
|
24
|
+
describe 'a single page file' do
|
25
|
+
include_examples :behave_like_pdf, file: '1_page.pdf'
|
29
26
|
end
|
30
|
-
end
|
31
27
|
|
32
|
-
|
33
|
-
|
28
|
+
describe 'various PDF versions' do
|
29
|
+
include_examples :behave_like_pdf, file: 'Lorem Ipsum PDF 1.6.pdf'
|
30
|
+
include_examples :behave_like_pdf, file: 'Lorem Ipsum PDF-A-1b.pdf'
|
31
|
+
include_examples :behave_like_pdf, file: 'Lorem Ipsum PDF-A-2b.pdf'
|
32
|
+
include_examples :behave_like_pdf, file: 'Lorem Ipsum PDF-A-3b.pdf'
|
33
|
+
include_examples :behave_like_pdf, file: 'Lorem Ipsum PDF-UA.pdf'
|
34
|
+
include_examples :behave_like_pdf, file: 'Lorem Ipsum Hybrid - ODF embedded.pdf'
|
35
|
+
include_examples :behave_like_pdf, file: 'Simple PDF 2.0 file.pdf'
|
36
|
+
end
|
34
37
|
|
35
|
-
|
38
|
+
describe 'complex PDF 2.0 files' do
|
39
|
+
include_examples :behave_like_pdf, file: 'PDF 2.0 image with BPC.pdf'
|
40
|
+
include_examples :behave_like_pdf, file: 'PDF 2.0 UTF-8 string and annotation.pdf'
|
41
|
+
include_examples :behave_like_pdf, file: 'PDF 2.0 via incremental save.pdf'
|
42
|
+
include_examples :behave_like_pdf, file: 'PDF 2.0 with page level output intent.pdf'
|
43
|
+
include_examples :behave_like_pdf, file: 'pdf20-utf8-test.pdf'
|
44
|
+
end
|
36
45
|
end
|
37
46
|
|
38
|
-
describe '
|
39
|
-
|
47
|
+
describe 'broken PDF files should not parse' do
|
48
|
+
it 'PDF with missing version header' do
|
49
|
+
parsed_pdf = parse_pdf 'not_a.pdf'
|
50
|
+
expect(parsed_pdf).to be_nil
|
51
|
+
end
|
40
52
|
|
41
|
-
|
42
|
-
|
53
|
+
it 'PDF 2.0 with offset start' do
|
54
|
+
parsed_pdf = parse_pdf 'PDF 2.0 with offset start.pdf'
|
55
|
+
expect(parsed_pdf).to be_nil
|
56
|
+
end
|
43
57
|
|
44
|
-
|
45
|
-
|
46
|
-
|
58
|
+
it 'exceeds the PDF read limit' do
|
59
|
+
parsed_pdf = parse_pdf 'exceed_PDF_read_limit.pdf'
|
60
|
+
expect(parsed_pdf).to be_nil
|
47
61
|
end
|
48
62
|
end
|
49
63
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: format_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.3
|
4
|
+
version: 2.4.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Noah Berman
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: exe
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2023-03-27 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: exifr
|
@@ -39,6 +39,20 @@ dependencies:
|
|
39
39
|
- - ">="
|
40
40
|
- !ruby/object:Gem::Version
|
41
41
|
version: 0.14.2
|
42
|
+
- !ruby/object:Gem::Dependency
|
43
|
+
name: matrix
|
44
|
+
requirement: !ruby/object:Gem::Requirement
|
45
|
+
requirements:
|
46
|
+
- - ">="
|
47
|
+
- !ruby/object:Gem::Version
|
48
|
+
version: '0'
|
49
|
+
type: :runtime
|
50
|
+
prerelease: false
|
51
|
+
version_requirements: !ruby/object:Gem::Requirement
|
52
|
+
requirements:
|
53
|
+
- - ">="
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
version: '0'
|
42
56
|
- !ruby/object:Gem::Dependency
|
43
57
|
name: measurometer
|
44
58
|
requirement: !ruby/object:Gem::Requirement
|
@@ -204,13 +218,16 @@ files:
|
|
204
218
|
- lib/parsers/flac_parser.rb
|
205
219
|
- lib/parsers/gif_parser.rb
|
206
220
|
- lib/parsers/heif_parser.rb
|
221
|
+
- lib/parsers/iso_base_media_file_format/box.rb
|
207
222
|
- lib/parsers/iso_base_media_file_format/decoder.rb
|
223
|
+
- lib/parsers/iso_base_media_file_format/utils.rb
|
208
224
|
- lib/parsers/jpeg_parser.rb
|
209
225
|
- lib/parsers/m3u_parser.rb
|
210
|
-
- lib/parsers/
|
211
|
-
- lib/parsers/
|
226
|
+
- lib/parsers/mov_parser.rb
|
227
|
+
- lib/parsers/mov_parser/decoder.rb
|
212
228
|
- lib/parsers/mp3_parser.rb
|
213
229
|
- lib/parsers/mp3_parser/id3_extraction.rb
|
230
|
+
- lib/parsers/mp4_parser.rb
|
214
231
|
- lib/parsers/mpeg_parser.rb
|
215
232
|
- lib/parsers/nef_parser.rb
|
216
233
|
- lib/parsers/ogg_parser.rb
|
@@ -254,11 +271,14 @@ files:
|
|
254
271
|
- spec/parsers/flac_parser_spec.rb
|
255
272
|
- spec/parsers/gif_parser_spec.rb
|
256
273
|
- spec/parsers/heif_parser_spec.rb
|
274
|
+
- spec/parsers/iso_base_media_file_format/box_spec.rb
|
257
275
|
- spec/parsers/iso_base_media_file_format/decoder_spec.rb
|
276
|
+
- spec/parsers/iso_base_media_file_format/utils_spec.rb
|
258
277
|
- spec/parsers/jpeg_parser_spec.rb
|
259
278
|
- spec/parsers/m3u_parser_spec.rb
|
260
|
-
- spec/parsers/
|
279
|
+
- spec/parsers/mov_parser_spec.rb
|
261
280
|
- spec/parsers/mp3_parser_spec.rb
|
281
|
+
- spec/parsers/mp4_parser_spec.rb
|
262
282
|
- spec/parsers/mpeg_parser_spec.rb
|
263
283
|
- spec/parsers/nef_parser_spec.rb
|
264
284
|
- spec/parsers/ogg_parser_spec.rb
|