format_parser 0.25.4 → 0.28.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (54) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/main.yml +104 -0
  3. data/CHANGELOG.md +15 -0
  4. data/README.md +4 -0
  5. data/format_parser.gemspec +1 -0
  6. data/lib/archive.rb +3 -0
  7. data/lib/audio.rb +3 -0
  8. data/lib/document.rb +1 -0
  9. data/lib/format_parser.rb +18 -3
  10. data/lib/format_parser/version.rb +1 -1
  11. data/lib/image.rb +3 -0
  12. data/lib/parsers/aiff_parser.rb +4 -1
  13. data/lib/parsers/bmp_parser.rb +3 -0
  14. data/lib/parsers/cr2_parser.rb +2 -0
  15. data/lib/parsers/dpx_parser.rb +19 -8
  16. data/lib/parsers/flac_parser.rb +2 -0
  17. data/lib/parsers/gif_parser.rb +2 -0
  18. data/lib/parsers/jpeg_parser.rb +2 -0
  19. data/lib/parsers/m3u_parser.rb +23 -0
  20. data/lib/parsers/moov_parser.rb +10 -1
  21. data/lib/parsers/mp3_parser.rb +3 -2
  22. data/lib/parsers/ogg_parser.rb +3 -2
  23. data/lib/parsers/pdf_parser.rb +2 -2
  24. data/lib/parsers/png_parser.rb +2 -0
  25. data/lib/parsers/psd_parser.rb +2 -0
  26. data/lib/parsers/tiff_parser.rb +10 -2
  27. data/lib/parsers/wav_parser.rb +3 -0
  28. data/lib/parsers/zip_parser.rb +5 -3
  29. data/lib/parsers/zip_parser/office_formats.rb +5 -5
  30. data/lib/remote_io.rb +7 -1
  31. data/lib/text.rb +19 -0
  32. data/lib/video.rb +3 -0
  33. data/spec/format_parser_spec.rb +20 -0
  34. data/spec/parsers/aiff_parser_spec.rb +1 -0
  35. data/spec/parsers/bmp_parser_spec.rb +8 -0
  36. data/spec/parsers/cr2_parser_spec.rb +1 -0
  37. data/spec/parsers/dpx_parser_spec.rb +1 -0
  38. data/spec/parsers/flac_parser_spec.rb +1 -0
  39. data/spec/parsers/gif_parser_spec.rb +1 -0
  40. data/spec/parsers/jpeg_parser_spec.rb +1 -0
  41. data/spec/parsers/m3u_parser_spec.rb +41 -0
  42. data/spec/parsers/moov_parser_spec.rb +4 -1
  43. data/spec/parsers/mp3_parser_spec.rb +1 -0
  44. data/spec/parsers/ogg_parser_spec.rb +1 -0
  45. data/spec/parsers/pdf_parser_spec.rb +1 -0
  46. data/spec/parsers/png_parser_spec.rb +1 -0
  47. data/spec/parsers/psd_parser_spec.rb +1 -0
  48. data/spec/parsers/tiff_parser_spec.rb +1 -0
  49. data/spec/parsers/wav_parser_spec.rb +1 -0
  50. data/spec/parsers/zip_parser_spec.rb +2 -0
  51. data/spec/remote_fetching_spec.rb +11 -0
  52. data/spec/remote_io_spec.rb +38 -13
  53. metadata +21 -4
  54. data/.travis.yml +0 -12
@@ -23,6 +23,7 @@ describe FormatParser::MP3Parser do
23
23
 
24
24
  expect(parsed.nature).to eq(:audio)
25
25
  expect(parsed.format).to eq(:mp3)
26
+ expect(parsed.content_type).to eq('audio/mpeg')
26
27
  expect(parsed.num_audio_channels).to eq(2)
27
28
  expect(parsed.audio_sample_rate_hz).to eq(48000)
28
29
  expect(parsed.intrinsics).not_to be_nil
@@ -6,6 +6,7 @@ describe FormatParser::OggParser do
6
6
 
7
7
  expect(parse_result.nature).to eq(:audio)
8
8
  expect(parse_result.format).to eq(:ogg)
9
+ expect(parse_result.content_type).to eq('audio/ogg')
9
10
  expect(parse_result.num_audio_channels).to eq(1)
10
11
  expect(parse_result.audio_sample_rate_hz).to eq(16000)
11
12
  expect(parse_result.media_duration_seconds).to be_within(0.01).of(2973.95)
@@ -17,6 +17,7 @@ describe FormatParser::PDFParser do
17
17
  expect(parsed_pdf).not_to be_nil
18
18
  expect(parsed_pdf.nature).to eq(:document)
19
19
  expect(parsed_pdf.format).to eq(:pdf)
20
+ expect(parsed_pdf.content_type).to eq('application/pdf')
20
21
  end
21
22
  end
22
23
 
@@ -15,6 +15,7 @@ describe FormatParser::PNGParser do
15
15
 
16
16
  expect(parsed.height_px).to be_kind_of(Integer)
17
17
  expect(parsed.height_px).to be > 0
18
+ expect(parsed.content_type).to eq('image/png')
18
19
  end
19
20
  end
20
21
  end
@@ -15,6 +15,7 @@ describe FormatParser::PSDParser do
15
15
 
16
16
  expect(parsed.height_px).to be_kind_of(Integer)
17
17
  expect(parsed.height_px).to be > 0
18
+ expect(parsed.content_type).to eq('application/x-photoshop')
18
19
  end
19
20
  end
20
21
  end
@@ -59,6 +59,7 @@ describe FormatParser::TIFFParser do
59
59
  expect(parsed.width_px).to eq(7952)
60
60
  expect(parsed.height_px).to eq(5304)
61
61
  expect(parsed.intrinsics[:exif]).not_to be_nil
62
+ expect(parsed.content_type).to eq('image/x-sony-arw')
62
63
  end
63
64
 
64
65
  describe 'correctly extracts dimensions from various TIFF flavors of the same file' do
@@ -9,6 +9,7 @@ describe FormatParser::WAVParser do
9
9
 
10
10
  expect(parse_result.nature).to eq(:audio)
11
11
  expect(parse_result.format).to eq(:wav)
12
+ expect(parse_result.content_type).to eq('audio/x-wav')
12
13
  end
13
14
  end
14
15
 
@@ -14,6 +14,7 @@ describe FormatParser::ZIPParser do
14
14
  expect(result).not_to be_nil
15
15
 
16
16
  expect(result.format).to eq(:zip)
17
+ expect(result.content_type).to eq('application/zip')
17
18
  expect(result.nature).to eq(:archive)
18
19
  expect(result.entries.length).to eq(0xFFFF + 1)
19
20
 
@@ -58,6 +59,7 @@ describe FormatParser::ZIPParser do
58
59
  result = subject.call(fi_io)
59
60
  expect(result.nature).to eq(:document)
60
61
  expect(result.format).to eq(:docx)
62
+ expect(result.content_type).to eq('application/vnd.openxmlformats-officedocument.wordprocessingml.document')
61
63
 
62
64
  fixture_path = fixtures_dir + '/ZIP/sample-docx.docx'
63
65
  fi_io = File.open(fixture_path, 'rb')
@@ -15,6 +15,10 @@ describe 'Fetching data from HTTP remotes' do
15
15
  }
16
16
  @server = WEBrick::HTTPServer.new(options)
17
17
  @server.mount '/', WEBrick::HTTPServlet::FileHandler, fixtures_dir
18
+ @server.mount_proc '/redirect' do |req, res|
19
+ res.status = 302
20
+ res.header['Location'] = req.path.sub('/redirect', '')
21
+ end
18
22
  trap('INT') { @server.stop }
19
23
  @server_thread = Thread.new { @server.start }
20
24
  end
@@ -91,6 +95,13 @@ describe 'Fetching data from HTTP remotes' do
91
95
  end
92
96
  end
93
97
 
98
+ context 'when the server responds with a redirect' do
99
+ it 'follows the redirect' do
100
+ file_information = FormatParser.parse_http('http://localhost:9399/redirect/TIFF/test.tif')
101
+ expect(file_information.format).to eq(:tif)
102
+ end
103
+ end
104
+
94
105
  after(:all) do
95
106
  @server.stop
96
107
  @server_thread.join(0.5)
@@ -7,7 +7,9 @@ describe FormatParser::RemoteIO do
7
7
  rio = described_class.new('https://images.invalid/img.jpg')
8
8
 
9
9
  fake_resp = double(headers: {'Content-Range' => '10-109/2577'}, status: 206, body: 'This is the response')
10
- expect(Faraday).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=10-109').and_return(fake_resp)
10
+ faraday_conn = instance_double(Faraday::Connection, get: fake_resp)
11
+ allow(Faraday).to receive(:new).and_return(faraday_conn)
12
+ expect(faraday_conn).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=10-109')
11
13
 
12
14
  rio.seek(10)
13
15
  read_result = rio.read(100)
@@ -18,7 +20,9 @@ describe FormatParser::RemoteIO do
18
20
  rio = described_class.new('https://images.invalid/img.jpg')
19
21
 
20
22
  fake_resp = double(headers: {'Content-Range' => '10-109/2577'}, status: 200, body: 'This is the response')
21
- expect(Faraday).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=10-109').and_return(fake_resp)
23
+ faraday_conn = instance_double(Faraday::Connection, get: fake_resp)
24
+ allow(Faraday).to receive(:new).and_return(faraday_conn)
25
+ expect(faraday_conn).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=10-109')
22
26
 
23
27
  rio.seek(10)
24
28
  read_result = rio.read(100)
@@ -29,7 +33,9 @@ describe FormatParser::RemoteIO do
29
33
  rio = described_class.new('https://images.invalid/img.jpg')
30
34
 
31
35
  fake_resp = double(headers: {}, status: 403, body: 'Please log in')
32
- expect(Faraday).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199').and_return(fake_resp)
36
+ faraday_conn = instance_double(Faraday::Connection, get: fake_resp)
37
+ allow(Faraday).to receive(:new).and_return(faraday_conn)
38
+ expect(faraday_conn).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199')
33
39
 
34
40
  rio.seek(100)
35
41
  expect { rio.read(100) }.to raise_error(/replied with a 403 and refused/)
@@ -39,7 +45,9 @@ describe FormatParser::RemoteIO do
39
45
  rio = described_class.new('https://images.invalid/img.jpg')
40
46
 
41
47
  fake_resp = double(headers: {}, status: 416, body: 'You stepped off the ledge of the range')
42
- expect(Faraday).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199').and_return(fake_resp)
48
+ faraday_conn = instance_double(Faraday::Connection, get: fake_resp)
49
+ allow(Faraday).to receive(:new).and_return(faraday_conn)
50
+ expect(faraday_conn).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199')
43
51
 
44
52
  rio.seek(100)
45
53
  expect(rio.read(100)).to be_nil
@@ -49,7 +57,9 @@ describe FormatParser::RemoteIO do
49
57
  rio = described_class.new('https://images.invalid/img.jpg')
50
58
 
51
59
  fake_resp = double(headers: {}, status: 403, body: 'Please log in')
52
- expect(Faraday).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199').and_return(fake_resp)
60
+ faraday_conn = instance_double(Faraday::Connection, get: fake_resp)
61
+ allow(Faraday).to receive(:new).and_return(faraday_conn)
62
+ expect(faraday_conn).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199')
53
63
 
54
64
  rio.seek(100)
55
65
  # rubocop: disable Lint/AmbiguousBlockAssociation
@@ -60,7 +70,9 @@ describe FormatParser::RemoteIO do
60
70
  rio = described_class.new('https://images.invalid/img.jpg')
61
71
 
62
72
  fake_resp = double(headers: {}, status: 416, body: 'You jumped off the end of the file maam')
63
- expect(Faraday).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199').and_return(fake_resp)
73
+ faraday_conn = instance_double(Faraday::Connection, get: fake_resp)
74
+ allow(Faraday).to receive(:new).and_return(faraday_conn)
75
+ expect(faraday_conn).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199')
64
76
 
65
77
  rio.seek(100)
66
78
  expect(rio.read(100)).to be_nil
@@ -69,15 +81,24 @@ describe FormatParser::RemoteIO do
69
81
  it 'does not overwrite size when the range cannot be satisfied and the response is 416' do
70
82
  rio = described_class.new('https://images.invalid/img.jpg')
71
83
 
72
- fake_resp = double(headers: {'Content-Range' => 'bytes 0-0/13'}, status: 206, body: 'a')
73
- expect(Faraday).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=0-0').and_return(fake_resp)
84
+ fake_resp1 = double(headers: {'Content-Range' => 'bytes 0-0/13'}, status: 206, body: 'a')
85
+ fake_resp2 = double(headers: {}, status: 416, body: 'You jumped off the end of the file maam')
86
+
87
+ faraday_conn = instance_double(Faraday::Connection)
88
+ allow(Faraday).to receive(:new).and_return(faraday_conn)
89
+ expect(faraday_conn).to receive(:get)
90
+ .with('https://images.invalid/img.jpg', nil, range: 'bytes=0-0')
91
+ .ordered
92
+ .and_return(fake_resp1)
93
+ expect(faraday_conn).to receive(:get)
94
+ .with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199')
95
+ .ordered
96
+ .and_return(fake_resp2)
97
+
74
98
  rio.read(1)
75
99
 
76
100
  expect(rio.size).to eq(13)
77
101
 
78
- fake_resp = double(headers: {}, status: 416, body: 'You jumped off the end of the file maam')
79
- expect(Faraday).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199').and_return(fake_resp)
80
-
81
102
  rio.seek(100)
82
103
  expect(rio.read(100)).to be_nil
83
104
 
@@ -88,7 +109,9 @@ describe FormatParser::RemoteIO do
88
109
  rio = described_class.new('https://images.invalid/img.jpg')
89
110
 
90
111
  fake_resp = double(headers: {}, status: 502, body: 'Guru meditation')
91
- expect(Faraday).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199').and_return(fake_resp)
112
+ faraday_conn = instance_double(Faraday::Connection, get: fake_resp)
113
+ allow(Faraday).to receive(:new).and_return(faraday_conn)
114
+ expect(faraday_conn).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199')
92
115
 
93
116
  rio.seek(100)
94
117
  expect { rio.read(100) }.to raise_error(/replied with a 502 and we might want to retry/)
@@ -100,7 +123,9 @@ describe FormatParser::RemoteIO do
100
123
  expect(rio.pos).to eq(0)
101
124
 
102
125
  fake_resp = double(headers: {'Content-Range' => 'bytes 0-0/13'}, status: 206, body: 'a')
103
- expect(Faraday).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=0-0').and_return(fake_resp)
126
+ faraday_conn = instance_double(Faraday::Connection, get: fake_resp)
127
+ allow(Faraday).to receive(:new).and_return(faraday_conn)
128
+ expect(faraday_conn).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=0-0')
104
129
  rio.read(1)
105
130
 
106
131
  expect(rio.pos).to eq(1)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: format_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.25.4
4
+ version: 0.28.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Noah Berman
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: exe
11
11
  cert_chain: []
12
- date: 2020-12-09 00:00:00.000000000 Z
12
+ date: 2021-02-17 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: ks
@@ -73,6 +73,20 @@ dependencies:
73
73
  - - "~>"
74
74
  - !ruby/object:Gem::Version
75
75
  version: '0.13'
76
+ - !ruby/object:Gem::Dependency
77
+ name: faraday_middleware
78
+ requirement: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '0.14'
83
+ type: :runtime
84
+ prerelease: false
85
+ version_requirements: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '0.14'
76
90
  - !ruby/object:Gem::Dependency
77
91
  name: measurometer
78
92
  requirement: !ruby/object:Gem::Requirement
@@ -183,10 +197,10 @@ executables:
183
197
  extensions: []
184
198
  extra_rdoc_files: []
185
199
  files:
200
+ - ".github/workflows/main.yml"
186
201
  - ".gitignore"
187
202
  - ".rspec"
188
203
  - ".rubocop.yml"
189
- - ".travis.yml"
190
204
  - CHANGELOG.md
191
205
  - CODE_OF_CONDUCT.md
192
206
  - CONTRIBUTING.md
@@ -219,6 +233,7 @@ files:
219
233
  - lib/parsers/flac_parser.rb
220
234
  - lib/parsers/gif_parser.rb
221
235
  - lib/parsers/jpeg_parser.rb
236
+ - lib/parsers/m3u_parser.rb
222
237
  - lib/parsers/moov_parser.rb
223
238
  - lib/parsers/moov_parser/decoder.rb
224
239
  - lib/parsers/mp3_parser.rb
@@ -236,6 +251,7 @@ files:
236
251
  - lib/read_limiter.rb
237
252
  - lib/read_limits_config.rb
238
253
  - lib/remote_io.rb
254
+ - lib/text.rb
239
255
  - lib/video.rb
240
256
  - spec/active_storage/blob_io_spec.rb
241
257
  - spec/active_storage/rails_app_spec.rb
@@ -257,6 +273,7 @@ files:
257
273
  - spec/parsers/flac_parser_spec.rb
258
274
  - spec/parsers/gif_parser_spec.rb
259
275
  - spec/parsers/jpeg_parser_spec.rb
276
+ - spec/parsers/m3u_parser_spec.rb
260
277
  - spec/parsers/moov_parser_spec.rb
261
278
  - spec/parsers/mp3_parser_spec.rb
262
279
  - spec/parsers/mpeg_parser_spec.rb
@@ -292,7 +309,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
292
309
  - !ruby/object:Gem::Version
293
310
  version: '0'
294
311
  requirements: []
295
- rubygems_version: 3.1.4
312
+ rubygems_version: 3.0.3
296
313
  signing_key:
297
314
  specification_version: 4
298
315
  summary: A library for efficient parsing of file metadata
data/.travis.yml DELETED
@@ -1,12 +0,0 @@
1
- rvm:
2
- - 2.2.10
3
- - 2.3.8
4
- - 2.4.9
5
- - 2.5.8
6
- - 2.6.6
7
- - 2.7.2
8
- - jruby
9
- sudo: false
10
- cache: bundler
11
- script:
12
- - bundle exec rake