format_parser 2.3.0 → 2.4.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +21 -0
- data/README.md +13 -6
- data/format_parser.gemspec +1 -0
- data/lib/format_parser/version.rb +1 -1
- data/lib/io_utils.rb +18 -33
- data/lib/parsers/cr3_parser/decoder.rb +2 -2
- data/lib/parsers/cr3_parser.rb +13 -11
- data/lib/parsers/heif_parser.rb +46 -46
- data/lib/parsers/iso_base_media_file_format/box.rb +80 -0
- data/lib/parsers/iso_base_media_file_format/decoder.rb +348 -377
- data/lib/parsers/iso_base_media_file_format/utils.rb +89 -0
- data/lib/parsers/mov_parser/decoder.rb +53 -0
- data/lib/parsers/mov_parser.rb +48 -0
- data/lib/parsers/mp4_parser.rb +80 -0
- data/lib/parsers/pdf_parser.rb +5 -2
- data/lib/parsers/webp_parser.rb +2 -2
- data/spec/format_parser_spec.rb +1 -1
- data/spec/parsers/cr3_parser_spec.rb +3 -3
- data/spec/parsers/iso_base_media_file_format/box_spec.rb +399 -0
- data/spec/parsers/iso_base_media_file_format/decoder_spec.rb +117 -151
- data/spec/parsers/iso_base_media_file_format/utils_spec.rb +632 -0
- data/spec/parsers/mov_parser_spec.rb +139 -0
- data/spec/parsers/mp4_parser_spec.rb +188 -0
- data/spec/parsers/pdf_parser_spec.rb +37 -23
- metadata +25 -5
- data/lib/parsers/moov_parser/decoder.rb +0 -353
- data/lib/parsers/moov_parser.rb +0 -165
- data/spec/parsers/moov_parser_spec.rb +0 -144
@@ -0,0 +1,139 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe FormatParser::MOVParser do
|
4
|
+
describe '#likely_match?' do
|
5
|
+
%w[mov mOv moV Mov MOv MoV MOV moov qt].each do |e|
|
6
|
+
context "with foo.#{e}" do
|
7
|
+
it 'should return true' do
|
8
|
+
expect(subject.likely_match?("foo.#{e}")).to eq(true)
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
['', 'foo', 'mov', 'foomov', 'foo.mp4', 'foo.mov.bar'].each do |f|
|
14
|
+
context "with #{f}" do
|
15
|
+
it 'should return false' do
|
16
|
+
expect(subject.likely_match?(f)).to eq(false)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
describe '#call' do
|
23
|
+
context "when magic bytes are absent" do
|
24
|
+
let(:io) do
|
25
|
+
input = [0x10].pack('N') + 'ftyp' + 'foo ' + [0x1].pack('N')
|
26
|
+
StringIO.new(input)
|
27
|
+
end
|
28
|
+
|
29
|
+
it 'should return nil' do
|
30
|
+
expect(subject.call(io)).to be_nil
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
Dir.glob(fixtures_dir + '/MOV/valid/**/*.*').sort.each do |path|
|
35
|
+
context "for #{path}" do
|
36
|
+
let(:result) { subject.call(File.open(path, 'rb')) }
|
37
|
+
|
38
|
+
it 'should not be nil' do
|
39
|
+
expect(result).not_to be_nil
|
40
|
+
end
|
41
|
+
|
42
|
+
it 'should have video nature' do
|
43
|
+
expect(result.nature).to eq(:video)
|
44
|
+
end
|
45
|
+
|
46
|
+
it 'should have MOV video content type' do
|
47
|
+
expect(result.content_type).to eq('video/quicktime')
|
48
|
+
end
|
49
|
+
|
50
|
+
it 'should have MOV video format' do
|
51
|
+
expect(result.format).to eq(:mov)
|
52
|
+
end
|
53
|
+
|
54
|
+
it 'should have a non-zero height ' do
|
55
|
+
expect(result.height_px).to be > 0
|
56
|
+
end
|
57
|
+
|
58
|
+
it 'should have a non-zero width' do
|
59
|
+
expect(result.width_px).to be > 0
|
60
|
+
end
|
61
|
+
|
62
|
+
it 'should have a non-zero duration' do
|
63
|
+
expect(result.media_duration_seconds).to be > 0
|
64
|
+
end
|
65
|
+
|
66
|
+
it 'should have a non-nil frame rate' do
|
67
|
+
expect(result.frame_rate).not_to be_nil
|
68
|
+
end
|
69
|
+
|
70
|
+
it 'should have intrinsics' do
|
71
|
+
expect(result.intrinsics).not_to be_nil
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
Dir.glob(fixtures_dir + '/MOV/invalid/**/*.*').sort.each do |path|
|
77
|
+
context "for #{path}" do
|
78
|
+
let(:result) { subject.call(File.open(path, 'rb')) }
|
79
|
+
|
80
|
+
it 'should be nil' do
|
81
|
+
expect(result).to be_nil
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
context "for a standard MOV video" do
|
87
|
+
let(:result) do
|
88
|
+
path = fixtures_dir + '/MOV/valid/standard.mov'
|
89
|
+
subject.call(File.open(path, 'rb'))
|
90
|
+
end
|
91
|
+
|
92
|
+
it 'should have the correct height' do
|
93
|
+
expect(result.height_px).to eq(360)
|
94
|
+
end
|
95
|
+
|
96
|
+
it 'should have the correct width' do
|
97
|
+
expect(result.width_px).to eq(640)
|
98
|
+
end
|
99
|
+
|
100
|
+
it 'should have the correct duration' do
|
101
|
+
expect(result.media_duration_seconds.truncate(2)).to eq(9.36)
|
102
|
+
end
|
103
|
+
|
104
|
+
it 'should have the correct frame rate' do
|
105
|
+
expect(result.frame_rate).to eq(30)
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
context "for a scaled MOV video" do
|
110
|
+
let(:result) do
|
111
|
+
path = fixtures_dir + '/MOV/valid/scaled.mov'
|
112
|
+
subject.call(File.open(path, 'rb'))
|
113
|
+
end
|
114
|
+
|
115
|
+
it 'should have the correct height' do
|
116
|
+
expect(result.height_px).to eq(720)
|
117
|
+
end
|
118
|
+
|
119
|
+
it 'should have the correct width' do
|
120
|
+
expect(result.width_px).to eq(1280)
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
context "for a rotated MOV video" do
|
125
|
+
let(:result) do
|
126
|
+
path = fixtures_dir + '/MOV/valid/rotated.mov'
|
127
|
+
subject.call(File.open(path, 'rb'))
|
128
|
+
end
|
129
|
+
|
130
|
+
it 'should have the correct height' do
|
131
|
+
expect(result.height_px).to eq(640)
|
132
|
+
end
|
133
|
+
|
134
|
+
it 'should have the correct width' do
|
135
|
+
expect(result.width_px).to eq(360)
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|
139
|
+
end
|
@@ -0,0 +1,188 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe FormatParser::MP4Parser do
|
4
|
+
describe '#likely_match?' do
|
5
|
+
%w[mp4 mP4 Mp4 MP4 m4a m4b m4p m4r m4v].each do |e|
|
6
|
+
context "with foo.#{e}" do
|
7
|
+
it 'should return true' do
|
8
|
+
expect(subject.likely_match?("foo.#{e}")).to eq(true)
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
['', 'foo', 'mp4', 'foomp4', 'foo.mp3', 'foo.mov', 'foo.mp4.bar'].each do |f|
|
14
|
+
context "with #{f}" do
|
15
|
+
it 'should return false' do
|
16
|
+
expect(subject.likely_match?(f)).to eq(false)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
describe '#call' do
|
23
|
+
context "when magic bytes are absent" do
|
24
|
+
let(:io) do
|
25
|
+
input = [0x10].pack('N') + 'ftyp' + 'foo ' + [0x1].pack('N')
|
26
|
+
StringIO.new(input)
|
27
|
+
end
|
28
|
+
|
29
|
+
it 'should return nil' do
|
30
|
+
expect(subject.call(io)).to be_nil
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
Dir.glob(fixtures_dir + '/MP4/valid/video/*.*').sort.each do |path|
|
35
|
+
context "for #{path}" do
|
36
|
+
let(:result) { subject.call(File.open(path, 'rb')) }
|
37
|
+
|
38
|
+
it 'should not be nil' do
|
39
|
+
expect(result).not_to be_nil
|
40
|
+
end
|
41
|
+
|
42
|
+
it 'should have video nature' do
|
43
|
+
expect(result.nature).to eq(:video)
|
44
|
+
end
|
45
|
+
|
46
|
+
it 'should have MP4 video content type' do
|
47
|
+
expect(result.content_type).to eq('video/mp4')
|
48
|
+
end
|
49
|
+
|
50
|
+
it 'should have MP4 video format' do
|
51
|
+
expect([:mp4, :mv4]).to include(result.format)
|
52
|
+
end
|
53
|
+
|
54
|
+
it 'should have a non-zero height ' do
|
55
|
+
expect(result.height_px).to be > 0
|
56
|
+
end
|
57
|
+
|
58
|
+
it 'should have a non-zero width' do
|
59
|
+
expect(result.width_px).to be > 0
|
60
|
+
end
|
61
|
+
|
62
|
+
it 'should have a non-zero duration' do
|
63
|
+
expect(result.media_duration_seconds).to be > 0
|
64
|
+
end
|
65
|
+
|
66
|
+
it 'should have a non-nil frame rate' do
|
67
|
+
expect(result.frame_rate).not_to be_nil
|
68
|
+
end
|
69
|
+
|
70
|
+
it 'should have intrinsics' do
|
71
|
+
expect(result.intrinsics).not_to be_nil
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
Dir.glob(fixtures_dir + '/MP4/valid/audio/*.*').sort.each do |path|
|
77
|
+
context "for #{path}" do
|
78
|
+
let(:result) { subject.call(File.open(path, 'rb')) }
|
79
|
+
|
80
|
+
it 'should not be nil' do
|
81
|
+
expect(result).not_to be_nil
|
82
|
+
end
|
83
|
+
|
84
|
+
it 'should have audio nature' do
|
85
|
+
expect(result.nature).to eq(:audio)
|
86
|
+
end
|
87
|
+
|
88
|
+
it 'should have MP4 audio content type' do
|
89
|
+
expect(result.content_type).to eq('audio/mp4')
|
90
|
+
end
|
91
|
+
|
92
|
+
it 'should have MP4 audio format' do
|
93
|
+
expect([:m4a, :m4b, :m4p, :m4r]).to include(result.format)
|
94
|
+
end
|
95
|
+
|
96
|
+
it 'should have a non-zero duration' do
|
97
|
+
expect(result.media_duration_seconds).to be > 0
|
98
|
+
end
|
99
|
+
|
100
|
+
it 'should have intrinsics' do
|
101
|
+
expect(result.intrinsics).not_to be_nil
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
Dir.glob(fixtures_dir + '/MP4/invalid/**/*.*').sort.each do |path|
|
107
|
+
context "for #{path}" do
|
108
|
+
let(:result) { subject.call(File.open(path, 'rb')) }
|
109
|
+
|
110
|
+
it 'should be nil' do
|
111
|
+
expect(result).to be_nil
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
context "for a standard MP4 video" do
|
117
|
+
let(:result) do
|
118
|
+
path = fixtures_dir + '/MP4/valid/video/standard.mp4'
|
119
|
+
subject.call(File.open(path, 'rb'))
|
120
|
+
end
|
121
|
+
|
122
|
+
it 'should have the correct height' do
|
123
|
+
expect(result.height_px).to eq(360)
|
124
|
+
end
|
125
|
+
|
126
|
+
it 'should have the correct width' do
|
127
|
+
expect(result.width_px).to eq(640)
|
128
|
+
end
|
129
|
+
|
130
|
+
it 'should have the correct duration' do
|
131
|
+
expect(result.media_duration_seconds.truncate(2)).to eq(9.36)
|
132
|
+
end
|
133
|
+
|
134
|
+
it 'should have the correct frame rate' do
|
135
|
+
expect(result.frame_rate).to eq(30)
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
context "for a scaled MP4 video" do
|
140
|
+
let(:result) do
|
141
|
+
path = fixtures_dir + '/MP4/valid/video/scaled.mp4'
|
142
|
+
subject.call(File.open(path, 'rb'))
|
143
|
+
end
|
144
|
+
|
145
|
+
it 'should have the correct height' do
|
146
|
+
expect(result.height_px).to eq(720)
|
147
|
+
end
|
148
|
+
|
149
|
+
it 'should have the correct width' do
|
150
|
+
expect(result.width_px).to eq(1280)
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
context "for a rotated MP4 video" do
|
155
|
+
let(:result) do
|
156
|
+
path = fixtures_dir + '/MP4/valid/video/rotated.mp4'
|
157
|
+
subject.call(File.open(path, 'rb'))
|
158
|
+
end
|
159
|
+
|
160
|
+
it 'should have the correct height' do
|
161
|
+
expect(result.height_px).to eq(640)
|
162
|
+
end
|
163
|
+
|
164
|
+
it 'should have the correct width' do
|
165
|
+
expect(result.width_px).to eq(360)
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
context "for a multi-track MP4 video" do
|
170
|
+
let(:result) do
|
171
|
+
path = fixtures_dir + '/MP4/valid/video/multi-track.mp4'
|
172
|
+
subject.call(File.open(path, 'rb'))
|
173
|
+
end
|
174
|
+
|
175
|
+
it 'should have the correct height' do
|
176
|
+
expect(result.height_px).to eq(1280)
|
177
|
+
end
|
178
|
+
|
179
|
+
it 'should have the correct width' do
|
180
|
+
expect(result.width_px).to eq(1024)
|
181
|
+
end
|
182
|
+
|
183
|
+
it 'should have the correct frame rate' do
|
184
|
+
expect(result.frame_rate).to eq(24)
|
185
|
+
end
|
186
|
+
end
|
187
|
+
end
|
188
|
+
end
|
@@ -1,19 +1,18 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
3
|
describe FormatParser::PDFParser do
|
4
|
-
|
4
|
+
def parse_pdf(pdf_filename)
|
5
5
|
subject.call(
|
6
6
|
File.open(
|
7
|
-
Pathname.new(fixtures_dir).join('PDF').join(
|
7
|
+
Pathname.new(fixtures_dir).join('PDF').join(pdf_filename),
|
8
8
|
'rb'
|
9
9
|
)
|
10
10
|
)
|
11
|
-
|
12
|
-
|
13
|
-
shared_examples :behave_like_pdf do |hash|
|
14
|
-
let(:pdf_file) { hash.fetch(:file) }
|
11
|
+
end
|
15
12
|
|
16
|
-
|
13
|
+
shared_examples :behave_like_pdf do |params|
|
14
|
+
it "#{params[:file]} acts as a pdf" do
|
15
|
+
parsed_pdf = parse_pdf params[:file]
|
17
16
|
expect(parsed_pdf).not_to be_nil
|
18
17
|
expect(parsed_pdf.nature).to eq(:document)
|
19
18
|
expect(parsed_pdf.format).to eq(:pdf)
|
@@ -21,29 +20,44 @@ describe FormatParser::PDFParser do
|
|
21
20
|
end
|
22
21
|
end
|
23
22
|
|
24
|
-
describe 'a PDF file
|
25
|
-
|
26
|
-
|
27
|
-
it 'does not parse succesfully' do
|
28
|
-
expect(parsed_pdf).to be_nil
|
23
|
+
describe 'parses a PDF file' do
|
24
|
+
describe 'a single page file' do
|
25
|
+
include_examples :behave_like_pdf, file: '1_page.pdf'
|
29
26
|
end
|
30
|
-
end
|
31
27
|
|
32
|
-
|
33
|
-
|
28
|
+
describe 'various PDF versions' do
|
29
|
+
include_examples :behave_like_pdf, file: 'Lorem Ipsum PDF 1.6.pdf'
|
30
|
+
include_examples :behave_like_pdf, file: 'Lorem Ipsum PDF-A-1b.pdf'
|
31
|
+
include_examples :behave_like_pdf, file: 'Lorem Ipsum PDF-A-2b.pdf'
|
32
|
+
include_examples :behave_like_pdf, file: 'Lorem Ipsum PDF-A-3b.pdf'
|
33
|
+
include_examples :behave_like_pdf, file: 'Lorem Ipsum PDF-UA.pdf'
|
34
|
+
include_examples :behave_like_pdf, file: 'Lorem Ipsum Hybrid - ODF embedded.pdf'
|
35
|
+
include_examples :behave_like_pdf, file: 'Simple PDF 2.0 file.pdf'
|
36
|
+
end
|
34
37
|
|
35
|
-
|
38
|
+
describe 'complex PDF 2.0 files' do
|
39
|
+
include_examples :behave_like_pdf, file: 'PDF 2.0 image with BPC.pdf'
|
40
|
+
include_examples :behave_like_pdf, file: 'PDF 2.0 UTF-8 string and annotation.pdf'
|
41
|
+
include_examples :behave_like_pdf, file: 'PDF 2.0 via incremental save.pdf'
|
42
|
+
include_examples :behave_like_pdf, file: 'PDF 2.0 with page level output intent.pdf'
|
43
|
+
include_examples :behave_like_pdf, file: 'pdf20-utf8-test.pdf'
|
44
|
+
end
|
36
45
|
end
|
37
46
|
|
38
|
-
describe '
|
39
|
-
|
47
|
+
describe 'broken PDF files should not parse' do
|
48
|
+
it 'PDF with missing version header' do
|
49
|
+
parsed_pdf = parse_pdf 'not_a.pdf'
|
50
|
+
expect(parsed_pdf).to be_nil
|
51
|
+
end
|
40
52
|
|
41
|
-
|
42
|
-
|
53
|
+
it 'PDF 2.0 with offset start' do
|
54
|
+
parsed_pdf = parse_pdf 'PDF 2.0 with offset start.pdf'
|
55
|
+
expect(parsed_pdf).to be_nil
|
56
|
+
end
|
43
57
|
|
44
|
-
|
45
|
-
|
46
|
-
|
58
|
+
it 'exceeds the PDF read limit' do
|
59
|
+
parsed_pdf = parse_pdf 'exceed_PDF_read_limit.pdf'
|
60
|
+
expect(parsed_pdf).to be_nil
|
47
61
|
end
|
48
62
|
end
|
49
63
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: format_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.4.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Noah Berman
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: exe
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2023-03-28 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: exifr
|
@@ -39,6 +39,20 @@ dependencies:
|
|
39
39
|
- - ">="
|
40
40
|
- !ruby/object:Gem::Version
|
41
41
|
version: 0.14.2
|
42
|
+
- !ruby/object:Gem::Dependency
|
43
|
+
name: matrix
|
44
|
+
requirement: !ruby/object:Gem::Requirement
|
45
|
+
requirements:
|
46
|
+
- - ">="
|
47
|
+
- !ruby/object:Gem::Version
|
48
|
+
version: '0'
|
49
|
+
type: :runtime
|
50
|
+
prerelease: false
|
51
|
+
version_requirements: !ruby/object:Gem::Requirement
|
52
|
+
requirements:
|
53
|
+
- - ">="
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
version: '0'
|
42
56
|
- !ruby/object:Gem::Dependency
|
43
57
|
name: measurometer
|
44
58
|
requirement: !ruby/object:Gem::Requirement
|
@@ -204,13 +218,16 @@ files:
|
|
204
218
|
- lib/parsers/flac_parser.rb
|
205
219
|
- lib/parsers/gif_parser.rb
|
206
220
|
- lib/parsers/heif_parser.rb
|
221
|
+
- lib/parsers/iso_base_media_file_format/box.rb
|
207
222
|
- lib/parsers/iso_base_media_file_format/decoder.rb
|
223
|
+
- lib/parsers/iso_base_media_file_format/utils.rb
|
208
224
|
- lib/parsers/jpeg_parser.rb
|
209
225
|
- lib/parsers/m3u_parser.rb
|
210
|
-
- lib/parsers/
|
211
|
-
- lib/parsers/
|
226
|
+
- lib/parsers/mov_parser.rb
|
227
|
+
- lib/parsers/mov_parser/decoder.rb
|
212
228
|
- lib/parsers/mp3_parser.rb
|
213
229
|
- lib/parsers/mp3_parser/id3_extraction.rb
|
230
|
+
- lib/parsers/mp4_parser.rb
|
214
231
|
- lib/parsers/mpeg_parser.rb
|
215
232
|
- lib/parsers/nef_parser.rb
|
216
233
|
- lib/parsers/ogg_parser.rb
|
@@ -254,11 +271,14 @@ files:
|
|
254
271
|
- spec/parsers/flac_parser_spec.rb
|
255
272
|
- spec/parsers/gif_parser_spec.rb
|
256
273
|
- spec/parsers/heif_parser_spec.rb
|
274
|
+
- spec/parsers/iso_base_media_file_format/box_spec.rb
|
257
275
|
- spec/parsers/iso_base_media_file_format/decoder_spec.rb
|
276
|
+
- spec/parsers/iso_base_media_file_format/utils_spec.rb
|
258
277
|
- spec/parsers/jpeg_parser_spec.rb
|
259
278
|
- spec/parsers/m3u_parser_spec.rb
|
260
|
-
- spec/parsers/
|
279
|
+
- spec/parsers/mov_parser_spec.rb
|
261
280
|
- spec/parsers/mp3_parser_spec.rb
|
281
|
+
- spec/parsers/mp4_parser_spec.rb
|
262
282
|
- spec/parsers/mpeg_parser_spec.rb
|
263
283
|
- spec/parsers/nef_parser_spec.rb
|
264
284
|
- spec/parsers/ogg_parser_spec.rb
|