format_parser 0.25.4 → 0.28.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/main.yml +104 -0
- data/CHANGELOG.md +15 -0
- data/README.md +4 -0
- data/format_parser.gemspec +1 -0
- data/lib/archive.rb +3 -0
- data/lib/audio.rb +3 -0
- data/lib/document.rb +1 -0
- data/lib/format_parser.rb +18 -3
- data/lib/format_parser/version.rb +1 -1
- data/lib/image.rb +3 -0
- data/lib/parsers/aiff_parser.rb +4 -1
- data/lib/parsers/bmp_parser.rb +3 -0
- data/lib/parsers/cr2_parser.rb +2 -0
- data/lib/parsers/dpx_parser.rb +19 -8
- data/lib/parsers/flac_parser.rb +2 -0
- data/lib/parsers/gif_parser.rb +2 -0
- data/lib/parsers/jpeg_parser.rb +2 -0
- data/lib/parsers/m3u_parser.rb +23 -0
- data/lib/parsers/moov_parser.rb +10 -1
- data/lib/parsers/mp3_parser.rb +3 -2
- data/lib/parsers/ogg_parser.rb +3 -2
- data/lib/parsers/pdf_parser.rb +2 -2
- data/lib/parsers/png_parser.rb +2 -0
- data/lib/parsers/psd_parser.rb +2 -0
- data/lib/parsers/tiff_parser.rb +10 -2
- data/lib/parsers/wav_parser.rb +3 -0
- data/lib/parsers/zip_parser.rb +5 -3
- data/lib/parsers/zip_parser/office_formats.rb +5 -5
- data/lib/remote_io.rb +7 -1
- data/lib/text.rb +19 -0
- data/lib/video.rb +3 -0
- data/spec/format_parser_spec.rb +20 -0
- data/spec/parsers/aiff_parser_spec.rb +1 -0
- data/spec/parsers/bmp_parser_spec.rb +8 -0
- data/spec/parsers/cr2_parser_spec.rb +1 -0
- data/spec/parsers/dpx_parser_spec.rb +1 -0
- data/spec/parsers/flac_parser_spec.rb +1 -0
- data/spec/parsers/gif_parser_spec.rb +1 -0
- data/spec/parsers/jpeg_parser_spec.rb +1 -0
- data/spec/parsers/m3u_parser_spec.rb +41 -0
- data/spec/parsers/moov_parser_spec.rb +4 -1
- data/spec/parsers/mp3_parser_spec.rb +1 -0
- data/spec/parsers/ogg_parser_spec.rb +1 -0
- data/spec/parsers/pdf_parser_spec.rb +1 -0
- data/spec/parsers/png_parser_spec.rb +1 -0
- data/spec/parsers/psd_parser_spec.rb +1 -0
- data/spec/parsers/tiff_parser_spec.rb +1 -0
- data/spec/parsers/wav_parser_spec.rb +1 -0
- data/spec/parsers/zip_parser_spec.rb +2 -0
- data/spec/remote_fetching_spec.rb +11 -0
- data/spec/remote_io_spec.rb +38 -13
- metadata +21 -4
- data/.travis.yml +0 -12
@@ -23,6 +23,7 @@ describe FormatParser::MP3Parser do
|
|
23
23
|
|
24
24
|
expect(parsed.nature).to eq(:audio)
|
25
25
|
expect(parsed.format).to eq(:mp3)
|
26
|
+
expect(parsed.content_type).to eq('audio/mpeg')
|
26
27
|
expect(parsed.num_audio_channels).to eq(2)
|
27
28
|
expect(parsed.audio_sample_rate_hz).to eq(48000)
|
28
29
|
expect(parsed.intrinsics).not_to be_nil
|
@@ -6,6 +6,7 @@ describe FormatParser::OggParser do
|
|
6
6
|
|
7
7
|
expect(parse_result.nature).to eq(:audio)
|
8
8
|
expect(parse_result.format).to eq(:ogg)
|
9
|
+
expect(parse_result.content_type).to eq('audio/ogg')
|
9
10
|
expect(parse_result.num_audio_channels).to eq(1)
|
10
11
|
expect(parse_result.audio_sample_rate_hz).to eq(16000)
|
11
12
|
expect(parse_result.media_duration_seconds).to be_within(0.01).of(2973.95)
|
@@ -59,6 +59,7 @@ describe FormatParser::TIFFParser do
|
|
59
59
|
expect(parsed.width_px).to eq(7952)
|
60
60
|
expect(parsed.height_px).to eq(5304)
|
61
61
|
expect(parsed.intrinsics[:exif]).not_to be_nil
|
62
|
+
expect(parsed.content_type).to eq('image/x-sony-arw')
|
62
63
|
end
|
63
64
|
|
64
65
|
describe 'correctly extracts dimensions from various TIFF flavors of the same file' do
|
@@ -14,6 +14,7 @@ describe FormatParser::ZIPParser do
|
|
14
14
|
expect(result).not_to be_nil
|
15
15
|
|
16
16
|
expect(result.format).to eq(:zip)
|
17
|
+
expect(result.content_type).to eq('application/zip')
|
17
18
|
expect(result.nature).to eq(:archive)
|
18
19
|
expect(result.entries.length).to eq(0xFFFF + 1)
|
19
20
|
|
@@ -58,6 +59,7 @@ describe FormatParser::ZIPParser do
|
|
58
59
|
result = subject.call(fi_io)
|
59
60
|
expect(result.nature).to eq(:document)
|
60
61
|
expect(result.format).to eq(:docx)
|
62
|
+
expect(result.content_type).to eq('application/vnd.openxmlformats-officedocument.wordprocessingml.document')
|
61
63
|
|
62
64
|
fixture_path = fixtures_dir + '/ZIP/sample-docx.docx'
|
63
65
|
fi_io = File.open(fixture_path, 'rb')
|
@@ -15,6 +15,10 @@ describe 'Fetching data from HTTP remotes' do
|
|
15
15
|
}
|
16
16
|
@server = WEBrick::HTTPServer.new(options)
|
17
17
|
@server.mount '/', WEBrick::HTTPServlet::FileHandler, fixtures_dir
|
18
|
+
@server.mount_proc '/redirect' do |req, res|
|
19
|
+
res.status = 302
|
20
|
+
res.header['Location'] = req.path.sub('/redirect', '')
|
21
|
+
end
|
18
22
|
trap('INT') { @server.stop }
|
19
23
|
@server_thread = Thread.new { @server.start }
|
20
24
|
end
|
@@ -91,6 +95,13 @@ describe 'Fetching data from HTTP remotes' do
|
|
91
95
|
end
|
92
96
|
end
|
93
97
|
|
98
|
+
context 'when the server responds with a redirect' do
|
99
|
+
it 'follows the redirect' do
|
100
|
+
file_information = FormatParser.parse_http('http://localhost:9399/redirect/TIFF/test.tif')
|
101
|
+
expect(file_information.format).to eq(:tif)
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
94
105
|
after(:all) do
|
95
106
|
@server.stop
|
96
107
|
@server_thread.join(0.5)
|
data/spec/remote_io_spec.rb
CHANGED
@@ -7,7 +7,9 @@ describe FormatParser::RemoteIO do
|
|
7
7
|
rio = described_class.new('https://images.invalid/img.jpg')
|
8
8
|
|
9
9
|
fake_resp = double(headers: {'Content-Range' => '10-109/2577'}, status: 206, body: 'This is the response')
|
10
|
-
|
10
|
+
faraday_conn = instance_double(Faraday::Connection, get: fake_resp)
|
11
|
+
allow(Faraday).to receive(:new).and_return(faraday_conn)
|
12
|
+
expect(faraday_conn).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=10-109')
|
11
13
|
|
12
14
|
rio.seek(10)
|
13
15
|
read_result = rio.read(100)
|
@@ -18,7 +20,9 @@ describe FormatParser::RemoteIO do
|
|
18
20
|
rio = described_class.new('https://images.invalid/img.jpg')
|
19
21
|
|
20
22
|
fake_resp = double(headers: {'Content-Range' => '10-109/2577'}, status: 200, body: 'This is the response')
|
21
|
-
|
23
|
+
faraday_conn = instance_double(Faraday::Connection, get: fake_resp)
|
24
|
+
allow(Faraday).to receive(:new).and_return(faraday_conn)
|
25
|
+
expect(faraday_conn).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=10-109')
|
22
26
|
|
23
27
|
rio.seek(10)
|
24
28
|
read_result = rio.read(100)
|
@@ -29,7 +33,9 @@ describe FormatParser::RemoteIO do
|
|
29
33
|
rio = described_class.new('https://images.invalid/img.jpg')
|
30
34
|
|
31
35
|
fake_resp = double(headers: {}, status: 403, body: 'Please log in')
|
32
|
-
|
36
|
+
faraday_conn = instance_double(Faraday::Connection, get: fake_resp)
|
37
|
+
allow(Faraday).to receive(:new).and_return(faraday_conn)
|
38
|
+
expect(faraday_conn).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199')
|
33
39
|
|
34
40
|
rio.seek(100)
|
35
41
|
expect { rio.read(100) }.to raise_error(/replied with a 403 and refused/)
|
@@ -39,7 +45,9 @@ describe FormatParser::RemoteIO do
|
|
39
45
|
rio = described_class.new('https://images.invalid/img.jpg')
|
40
46
|
|
41
47
|
fake_resp = double(headers: {}, status: 416, body: 'You stepped off the ledge of the range')
|
42
|
-
|
48
|
+
faraday_conn = instance_double(Faraday::Connection, get: fake_resp)
|
49
|
+
allow(Faraday).to receive(:new).and_return(faraday_conn)
|
50
|
+
expect(faraday_conn).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199')
|
43
51
|
|
44
52
|
rio.seek(100)
|
45
53
|
expect(rio.read(100)).to be_nil
|
@@ -49,7 +57,9 @@ describe FormatParser::RemoteIO do
|
|
49
57
|
rio = described_class.new('https://images.invalid/img.jpg')
|
50
58
|
|
51
59
|
fake_resp = double(headers: {}, status: 403, body: 'Please log in')
|
52
|
-
|
60
|
+
faraday_conn = instance_double(Faraday::Connection, get: fake_resp)
|
61
|
+
allow(Faraday).to receive(:new).and_return(faraday_conn)
|
62
|
+
expect(faraday_conn).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199')
|
53
63
|
|
54
64
|
rio.seek(100)
|
55
65
|
# rubocop: disable Lint/AmbiguousBlockAssociation
|
@@ -60,7 +70,9 @@ describe FormatParser::RemoteIO do
|
|
60
70
|
rio = described_class.new('https://images.invalid/img.jpg')
|
61
71
|
|
62
72
|
fake_resp = double(headers: {}, status: 416, body: 'You jumped off the end of the file maam')
|
63
|
-
|
73
|
+
faraday_conn = instance_double(Faraday::Connection, get: fake_resp)
|
74
|
+
allow(Faraday).to receive(:new).and_return(faraday_conn)
|
75
|
+
expect(faraday_conn).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199')
|
64
76
|
|
65
77
|
rio.seek(100)
|
66
78
|
expect(rio.read(100)).to be_nil
|
@@ -69,15 +81,24 @@ describe FormatParser::RemoteIO do
|
|
69
81
|
it 'does not overwrite size when the range cannot be satisfied and the response is 416' do
|
70
82
|
rio = described_class.new('https://images.invalid/img.jpg')
|
71
83
|
|
72
|
-
|
73
|
-
|
84
|
+
fake_resp1 = double(headers: {'Content-Range' => 'bytes 0-0/13'}, status: 206, body: 'a')
|
85
|
+
fake_resp2 = double(headers: {}, status: 416, body: 'You jumped off the end of the file maam')
|
86
|
+
|
87
|
+
faraday_conn = instance_double(Faraday::Connection)
|
88
|
+
allow(Faraday).to receive(:new).and_return(faraday_conn)
|
89
|
+
expect(faraday_conn).to receive(:get)
|
90
|
+
.with('https://images.invalid/img.jpg', nil, range: 'bytes=0-0')
|
91
|
+
.ordered
|
92
|
+
.and_return(fake_resp1)
|
93
|
+
expect(faraday_conn).to receive(:get)
|
94
|
+
.with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199')
|
95
|
+
.ordered
|
96
|
+
.and_return(fake_resp2)
|
97
|
+
|
74
98
|
rio.read(1)
|
75
99
|
|
76
100
|
expect(rio.size).to eq(13)
|
77
101
|
|
78
|
-
fake_resp = double(headers: {}, status: 416, body: 'You jumped off the end of the file maam')
|
79
|
-
expect(Faraday).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199').and_return(fake_resp)
|
80
|
-
|
81
102
|
rio.seek(100)
|
82
103
|
expect(rio.read(100)).to be_nil
|
83
104
|
|
@@ -88,7 +109,9 @@ describe FormatParser::RemoteIO do
|
|
88
109
|
rio = described_class.new('https://images.invalid/img.jpg')
|
89
110
|
|
90
111
|
fake_resp = double(headers: {}, status: 502, body: 'Guru meditation')
|
91
|
-
|
112
|
+
faraday_conn = instance_double(Faraday::Connection, get: fake_resp)
|
113
|
+
allow(Faraday).to receive(:new).and_return(faraday_conn)
|
114
|
+
expect(faraday_conn).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199')
|
92
115
|
|
93
116
|
rio.seek(100)
|
94
117
|
expect { rio.read(100) }.to raise_error(/replied with a 502 and we might want to retry/)
|
@@ -100,7 +123,9 @@ describe FormatParser::RemoteIO do
|
|
100
123
|
expect(rio.pos).to eq(0)
|
101
124
|
|
102
125
|
fake_resp = double(headers: {'Content-Range' => 'bytes 0-0/13'}, status: 206, body: 'a')
|
103
|
-
|
126
|
+
faraday_conn = instance_double(Faraday::Connection, get: fake_resp)
|
127
|
+
allow(Faraday).to receive(:new).and_return(faraday_conn)
|
128
|
+
expect(faraday_conn).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=0-0')
|
104
129
|
rio.read(1)
|
105
130
|
|
106
131
|
expect(rio.pos).to eq(1)
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: format_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.28.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Noah Berman
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: exe
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2021-02-17 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: ks
|
@@ -73,6 +73,20 @@ dependencies:
|
|
73
73
|
- - "~>"
|
74
74
|
- !ruby/object:Gem::Version
|
75
75
|
version: '0.13'
|
76
|
+
- !ruby/object:Gem::Dependency
|
77
|
+
name: faraday_middleware
|
78
|
+
requirement: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0.14'
|
83
|
+
type: :runtime
|
84
|
+
prerelease: false
|
85
|
+
version_requirements: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0.14'
|
76
90
|
- !ruby/object:Gem::Dependency
|
77
91
|
name: measurometer
|
78
92
|
requirement: !ruby/object:Gem::Requirement
|
@@ -183,10 +197,10 @@ executables:
|
|
183
197
|
extensions: []
|
184
198
|
extra_rdoc_files: []
|
185
199
|
files:
|
200
|
+
- ".github/workflows/main.yml"
|
186
201
|
- ".gitignore"
|
187
202
|
- ".rspec"
|
188
203
|
- ".rubocop.yml"
|
189
|
-
- ".travis.yml"
|
190
204
|
- CHANGELOG.md
|
191
205
|
- CODE_OF_CONDUCT.md
|
192
206
|
- CONTRIBUTING.md
|
@@ -219,6 +233,7 @@ files:
|
|
219
233
|
- lib/parsers/flac_parser.rb
|
220
234
|
- lib/parsers/gif_parser.rb
|
221
235
|
- lib/parsers/jpeg_parser.rb
|
236
|
+
- lib/parsers/m3u_parser.rb
|
222
237
|
- lib/parsers/moov_parser.rb
|
223
238
|
- lib/parsers/moov_parser/decoder.rb
|
224
239
|
- lib/parsers/mp3_parser.rb
|
@@ -236,6 +251,7 @@ files:
|
|
236
251
|
- lib/read_limiter.rb
|
237
252
|
- lib/read_limits_config.rb
|
238
253
|
- lib/remote_io.rb
|
254
|
+
- lib/text.rb
|
239
255
|
- lib/video.rb
|
240
256
|
- spec/active_storage/blob_io_spec.rb
|
241
257
|
- spec/active_storage/rails_app_spec.rb
|
@@ -257,6 +273,7 @@ files:
|
|
257
273
|
- spec/parsers/flac_parser_spec.rb
|
258
274
|
- spec/parsers/gif_parser_spec.rb
|
259
275
|
- spec/parsers/jpeg_parser_spec.rb
|
276
|
+
- spec/parsers/m3u_parser_spec.rb
|
260
277
|
- spec/parsers/moov_parser_spec.rb
|
261
278
|
- spec/parsers/mp3_parser_spec.rb
|
262
279
|
- spec/parsers/mpeg_parser_spec.rb
|
@@ -292,7 +309,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
292
309
|
- !ruby/object:Gem::Version
|
293
310
|
version: '0'
|
294
311
|
requirements: []
|
295
|
-
rubygems_version: 3.
|
312
|
+
rubygems_version: 3.0.3
|
296
313
|
signing_key:
|
297
314
|
specification_version: 4
|
298
315
|
summary: A library for efficient parsing of file metadata
|