format_parser 0.25.5 → 0.29.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/main.yml +104 -0
- data/CHANGELOG.md +15 -0
- data/README.md +4 -0
- data/format_parser.gemspec +1 -0
- data/lib/archive.rb +3 -0
- data/lib/audio.rb +3 -0
- data/lib/document.rb +1 -0
- data/lib/format_parser.rb +21 -5
- data/lib/format_parser/version.rb +1 -1
- data/lib/image.rb +3 -0
- data/lib/parsers/aiff_parser.rb +4 -1
- data/lib/parsers/bmp_parser.rb +3 -0
- data/lib/parsers/cr2_parser.rb +2 -0
- data/lib/parsers/dpx_parser.rb +6 -0
- data/lib/parsers/flac_parser.rb +2 -0
- data/lib/parsers/gif_parser.rb +2 -0
- data/lib/parsers/jpeg_parser.rb +2 -0
- data/lib/parsers/m3u_parser.rb +23 -0
- data/lib/parsers/moov_parser.rb +10 -1
- data/lib/parsers/mp3_parser.rb +3 -2
- data/lib/parsers/ogg_parser.rb +3 -2
- data/lib/parsers/pdf_parser.rb +2 -2
- data/lib/parsers/png_parser.rb +2 -0
- data/lib/parsers/psd_parser.rb +2 -0
- data/lib/parsers/tiff_parser.rb +10 -2
- data/lib/parsers/wav_parser.rb +3 -0
- data/lib/parsers/zip_parser.rb +5 -3
- data/lib/parsers/zip_parser/office_formats.rb +5 -5
- data/lib/remote_io.rb +10 -2
- data/lib/text.rb +19 -0
- data/lib/video.rb +3 -0
- data/spec/format_parser_spec.rb +20 -0
- data/spec/parsers/aiff_parser_spec.rb +1 -0
- data/spec/parsers/bmp_parser_spec.rb +8 -0
- data/spec/parsers/cr2_parser_spec.rb +1 -0
- data/spec/parsers/dpx_parser_spec.rb +1 -0
- data/spec/parsers/flac_parser_spec.rb +1 -0
- data/spec/parsers/gif_parser_spec.rb +1 -0
- data/spec/parsers/jpeg_parser_spec.rb +1 -0
- data/spec/parsers/m3u_parser_spec.rb +41 -0
- data/spec/parsers/moov_parser_spec.rb +4 -1
- data/spec/parsers/mp3_parser_spec.rb +1 -0
- data/spec/parsers/ogg_parser_spec.rb +1 -0
- data/spec/parsers/pdf_parser_spec.rb +1 -0
- data/spec/parsers/png_parser_spec.rb +1 -0
- data/spec/parsers/psd_parser_spec.rb +1 -0
- data/spec/parsers/tiff_parser_spec.rb +1 -0
- data/spec/parsers/wav_parser_spec.rb +1 -0
- data/spec/parsers/zip_parser_spec.rb +2 -0
- data/spec/remote_fetching_spec.rb +30 -0
- data/spec/remote_io_spec.rb +38 -13
- metadata +20 -3
- data/.travis.yml +0 -12
@@ -23,6 +23,7 @@ describe FormatParser::MP3Parser do
|
|
23
23
|
|
24
24
|
expect(parsed.nature).to eq(:audio)
|
25
25
|
expect(parsed.format).to eq(:mp3)
|
26
|
+
expect(parsed.content_type).to eq('audio/mpeg')
|
26
27
|
expect(parsed.num_audio_channels).to eq(2)
|
27
28
|
expect(parsed.audio_sample_rate_hz).to eq(48000)
|
28
29
|
expect(parsed.intrinsics).not_to be_nil
|
@@ -6,6 +6,7 @@ describe FormatParser::OggParser do
|
|
6
6
|
|
7
7
|
expect(parse_result.nature).to eq(:audio)
|
8
8
|
expect(parse_result.format).to eq(:ogg)
|
9
|
+
expect(parse_result.content_type).to eq('audio/ogg')
|
9
10
|
expect(parse_result.num_audio_channels).to eq(1)
|
10
11
|
expect(parse_result.audio_sample_rate_hz).to eq(16000)
|
11
12
|
expect(parse_result.media_duration_seconds).to be_within(0.01).of(2973.95)
|
@@ -59,6 +59,7 @@ describe FormatParser::TIFFParser do
|
|
59
59
|
expect(parsed.width_px).to eq(7952)
|
60
60
|
expect(parsed.height_px).to eq(5304)
|
61
61
|
expect(parsed.intrinsics[:exif]).not_to be_nil
|
62
|
+
expect(parsed.content_type).to eq('image/x-sony-arw')
|
62
63
|
end
|
63
64
|
|
64
65
|
describe 'correctly extracts dimensions from various TIFF flavors of the same file' do
|
@@ -14,6 +14,7 @@ describe FormatParser::ZIPParser do
|
|
14
14
|
expect(result).not_to be_nil
|
15
15
|
|
16
16
|
expect(result.format).to eq(:zip)
|
17
|
+
expect(result.content_type).to eq('application/zip')
|
17
18
|
expect(result.nature).to eq(:archive)
|
18
19
|
expect(result.entries.length).to eq(0xFFFF + 1)
|
19
20
|
|
@@ -58,6 +59,7 @@ describe FormatParser::ZIPParser do
|
|
58
59
|
result = subject.call(fi_io)
|
59
60
|
expect(result.nature).to eq(:document)
|
60
61
|
expect(result.format).to eq(:docx)
|
62
|
+
expect(result.content_type).to eq('application/vnd.openxmlformats-officedocument.wordprocessingml.document')
|
61
63
|
|
62
64
|
fixture_path = fixtures_dir + '/ZIP/sample-docx.docx'
|
63
65
|
fi_io = File.open(fixture_path, 'rb')
|
@@ -15,6 +15,10 @@ describe 'Fetching data from HTTP remotes' do
|
|
15
15
|
}
|
16
16
|
@server = WEBrick::HTTPServer.new(options)
|
17
17
|
@server.mount '/', WEBrick::HTTPServlet::FileHandler, fixtures_dir
|
18
|
+
@server.mount_proc '/redirect' do |req, res|
|
19
|
+
res.status = 302
|
20
|
+
res.header['Location'] = req.path.sub('/redirect', '')
|
21
|
+
end
|
18
22
|
trap('INT') { @server.stop }
|
19
23
|
@server_thread = Thread.new { @server.start }
|
20
24
|
end
|
@@ -91,6 +95,32 @@ describe 'Fetching data from HTTP remotes' do
|
|
91
95
|
end
|
92
96
|
end
|
93
97
|
|
98
|
+
context 'when the server responds with a redirect' do
|
99
|
+
it 'follows the redirect' do
|
100
|
+
file_information = FormatParser.parse_http('http://localhost:9399/redirect/TIFF/test.tif')
|
101
|
+
expect(file_information.format).to eq(:tif)
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
it 'sends provided HTTP headers in the request' do
|
106
|
+
# Faraday is required only after calling .parse_http
|
107
|
+
# This line is just to trigger this require, then it's possible to
|
108
|
+
# add an expectation of how Faraday is initialized after.
|
109
|
+
FormatParser.parse_http('invalid_url') rescue nil
|
110
|
+
|
111
|
+
expect(Faraday)
|
112
|
+
.to receive(:new)
|
113
|
+
.with(headers: {'test-header' => 'test-value'})
|
114
|
+
.and_call_original
|
115
|
+
|
116
|
+
file_information = FormatParser.parse_http(
|
117
|
+
'http://localhost:9399//TIFF/test.tif',
|
118
|
+
headers: {'test-header' => 'test-value'}
|
119
|
+
)
|
120
|
+
|
121
|
+
expect(file_information.format).to eq(:tif)
|
122
|
+
end
|
123
|
+
|
94
124
|
after(:all) do
|
95
125
|
@server.stop
|
96
126
|
@server_thread.join(0.5)
|
data/spec/remote_io_spec.rb
CHANGED
@@ -7,7 +7,9 @@ describe FormatParser::RemoteIO do
|
|
7
7
|
rio = described_class.new('https://images.invalid/img.jpg')
|
8
8
|
|
9
9
|
fake_resp = double(headers: {'Content-Range' => '10-109/2577'}, status: 206, body: 'This is the response')
|
10
|
-
|
10
|
+
faraday_conn = instance_double(Faraday::Connection, get: fake_resp)
|
11
|
+
allow(Faraday).to receive(:new).and_return(faraday_conn)
|
12
|
+
expect(faraday_conn).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=10-109')
|
11
13
|
|
12
14
|
rio.seek(10)
|
13
15
|
read_result = rio.read(100)
|
@@ -18,7 +20,9 @@ describe FormatParser::RemoteIO do
|
|
18
20
|
rio = described_class.new('https://images.invalid/img.jpg')
|
19
21
|
|
20
22
|
fake_resp = double(headers: {'Content-Range' => '10-109/2577'}, status: 200, body: 'This is the response')
|
21
|
-
|
23
|
+
faraday_conn = instance_double(Faraday::Connection, get: fake_resp)
|
24
|
+
allow(Faraday).to receive(:new).and_return(faraday_conn)
|
25
|
+
expect(faraday_conn).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=10-109')
|
22
26
|
|
23
27
|
rio.seek(10)
|
24
28
|
read_result = rio.read(100)
|
@@ -29,7 +33,9 @@ describe FormatParser::RemoteIO do
|
|
29
33
|
rio = described_class.new('https://images.invalid/img.jpg')
|
30
34
|
|
31
35
|
fake_resp = double(headers: {}, status: 403, body: 'Please log in')
|
32
|
-
|
36
|
+
faraday_conn = instance_double(Faraday::Connection, get: fake_resp)
|
37
|
+
allow(Faraday).to receive(:new).and_return(faraday_conn)
|
38
|
+
expect(faraday_conn).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199')
|
33
39
|
|
34
40
|
rio.seek(100)
|
35
41
|
expect { rio.read(100) }.to raise_error(/replied with a 403 and refused/)
|
@@ -39,7 +45,9 @@ describe FormatParser::RemoteIO do
|
|
39
45
|
rio = described_class.new('https://images.invalid/img.jpg')
|
40
46
|
|
41
47
|
fake_resp = double(headers: {}, status: 416, body: 'You stepped off the ledge of the range')
|
42
|
-
|
48
|
+
faraday_conn = instance_double(Faraday::Connection, get: fake_resp)
|
49
|
+
allow(Faraday).to receive(:new).and_return(faraday_conn)
|
50
|
+
expect(faraday_conn).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199')
|
43
51
|
|
44
52
|
rio.seek(100)
|
45
53
|
expect(rio.read(100)).to be_nil
|
@@ -49,7 +57,9 @@ describe FormatParser::RemoteIO do
|
|
49
57
|
rio = described_class.new('https://images.invalid/img.jpg')
|
50
58
|
|
51
59
|
fake_resp = double(headers: {}, status: 403, body: 'Please log in')
|
52
|
-
|
60
|
+
faraday_conn = instance_double(Faraday::Connection, get: fake_resp)
|
61
|
+
allow(Faraday).to receive(:new).and_return(faraday_conn)
|
62
|
+
expect(faraday_conn).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199')
|
53
63
|
|
54
64
|
rio.seek(100)
|
55
65
|
# rubocop: disable Lint/AmbiguousBlockAssociation
|
@@ -60,7 +70,9 @@ describe FormatParser::RemoteIO do
|
|
60
70
|
rio = described_class.new('https://images.invalid/img.jpg')
|
61
71
|
|
62
72
|
fake_resp = double(headers: {}, status: 416, body: 'You jumped off the end of the file maam')
|
63
|
-
|
73
|
+
faraday_conn = instance_double(Faraday::Connection, get: fake_resp)
|
74
|
+
allow(Faraday).to receive(:new).and_return(faraday_conn)
|
75
|
+
expect(faraday_conn).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199')
|
64
76
|
|
65
77
|
rio.seek(100)
|
66
78
|
expect(rio.read(100)).to be_nil
|
@@ -69,15 +81,24 @@ describe FormatParser::RemoteIO do
|
|
69
81
|
it 'does not overwrite size when the range cannot be satisfied and the response is 416' do
|
70
82
|
rio = described_class.new('https://images.invalid/img.jpg')
|
71
83
|
|
72
|
-
|
73
|
-
|
84
|
+
fake_resp1 = double(headers: {'Content-Range' => 'bytes 0-0/13'}, status: 206, body: 'a')
|
85
|
+
fake_resp2 = double(headers: {}, status: 416, body: 'You jumped off the end of the file maam')
|
86
|
+
|
87
|
+
faraday_conn = instance_double(Faraday::Connection)
|
88
|
+
allow(Faraday).to receive(:new).and_return(faraday_conn)
|
89
|
+
expect(faraday_conn).to receive(:get)
|
90
|
+
.with('https://images.invalid/img.jpg', nil, range: 'bytes=0-0')
|
91
|
+
.ordered
|
92
|
+
.and_return(fake_resp1)
|
93
|
+
expect(faraday_conn).to receive(:get)
|
94
|
+
.with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199')
|
95
|
+
.ordered
|
96
|
+
.and_return(fake_resp2)
|
97
|
+
|
74
98
|
rio.read(1)
|
75
99
|
|
76
100
|
expect(rio.size).to eq(13)
|
77
101
|
|
78
|
-
fake_resp = double(headers: {}, status: 416, body: 'You jumped off the end of the file maam')
|
79
|
-
expect(Faraday).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199').and_return(fake_resp)
|
80
|
-
|
81
102
|
rio.seek(100)
|
82
103
|
expect(rio.read(100)).to be_nil
|
83
104
|
|
@@ -88,7 +109,9 @@ describe FormatParser::RemoteIO do
|
|
88
109
|
rio = described_class.new('https://images.invalid/img.jpg')
|
89
110
|
|
90
111
|
fake_resp = double(headers: {}, status: 502, body: 'Guru meditation')
|
91
|
-
|
112
|
+
faraday_conn = instance_double(Faraday::Connection, get: fake_resp)
|
113
|
+
allow(Faraday).to receive(:new).and_return(faraday_conn)
|
114
|
+
expect(faraday_conn).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199')
|
92
115
|
|
93
116
|
rio.seek(100)
|
94
117
|
expect { rio.read(100) }.to raise_error(/replied with a 502 and we might want to retry/)
|
@@ -100,7 +123,9 @@ describe FormatParser::RemoteIO do
|
|
100
123
|
expect(rio.pos).to eq(0)
|
101
124
|
|
102
125
|
fake_resp = double(headers: {'Content-Range' => 'bytes 0-0/13'}, status: 206, body: 'a')
|
103
|
-
|
126
|
+
faraday_conn = instance_double(Faraday::Connection, get: fake_resp)
|
127
|
+
allow(Faraday).to receive(:new).and_return(faraday_conn)
|
128
|
+
expect(faraday_conn).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=0-0')
|
104
129
|
rio.read(1)
|
105
130
|
|
106
131
|
expect(rio.pos).to eq(1)
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: format_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.29.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Noah Berman
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: exe
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2021-02-18 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: ks
|
@@ -73,6 +73,20 @@ dependencies:
|
|
73
73
|
- - "~>"
|
74
74
|
- !ruby/object:Gem::Version
|
75
75
|
version: '0.13'
|
76
|
+
- !ruby/object:Gem::Dependency
|
77
|
+
name: faraday_middleware
|
78
|
+
requirement: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0.14'
|
83
|
+
type: :runtime
|
84
|
+
prerelease: false
|
85
|
+
version_requirements: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0.14'
|
76
90
|
- !ruby/object:Gem::Dependency
|
77
91
|
name: measurometer
|
78
92
|
requirement: !ruby/object:Gem::Requirement
|
@@ -183,10 +197,10 @@ executables:
|
|
183
197
|
extensions: []
|
184
198
|
extra_rdoc_files: []
|
185
199
|
files:
|
200
|
+
- ".github/workflows/main.yml"
|
186
201
|
- ".gitignore"
|
187
202
|
- ".rspec"
|
188
203
|
- ".rubocop.yml"
|
189
|
-
- ".travis.yml"
|
190
204
|
- CHANGELOG.md
|
191
205
|
- CODE_OF_CONDUCT.md
|
192
206
|
- CONTRIBUTING.md
|
@@ -219,6 +233,7 @@ files:
|
|
219
233
|
- lib/parsers/flac_parser.rb
|
220
234
|
- lib/parsers/gif_parser.rb
|
221
235
|
- lib/parsers/jpeg_parser.rb
|
236
|
+
- lib/parsers/m3u_parser.rb
|
222
237
|
- lib/parsers/moov_parser.rb
|
223
238
|
- lib/parsers/moov_parser/decoder.rb
|
224
239
|
- lib/parsers/mp3_parser.rb
|
@@ -236,6 +251,7 @@ files:
|
|
236
251
|
- lib/read_limiter.rb
|
237
252
|
- lib/read_limits_config.rb
|
238
253
|
- lib/remote_io.rb
|
254
|
+
- lib/text.rb
|
239
255
|
- lib/video.rb
|
240
256
|
- spec/active_storage/blob_io_spec.rb
|
241
257
|
- spec/active_storage/rails_app_spec.rb
|
@@ -257,6 +273,7 @@ files:
|
|
257
273
|
- spec/parsers/flac_parser_spec.rb
|
258
274
|
- spec/parsers/gif_parser_spec.rb
|
259
275
|
- spec/parsers/jpeg_parser_spec.rb
|
276
|
+
- spec/parsers/m3u_parser_spec.rb
|
260
277
|
- spec/parsers/moov_parser_spec.rb
|
261
278
|
- spec/parsers/mp3_parser_spec.rb
|
262
279
|
- spec/parsers/mpeg_parser_spec.rb
|