format_parser 0.25.4 → 0.28.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/main.yml +104 -0
  3. data/CHANGELOG.md +15 -0
  4. data/README.md +4 -0
  5. data/format_parser.gemspec +1 -0
  6. data/lib/archive.rb +3 -0
  7. data/lib/audio.rb +3 -0
  8. data/lib/document.rb +1 -0
  9. data/lib/format_parser.rb +18 -3
  10. data/lib/format_parser/version.rb +1 -1
  11. data/lib/image.rb +3 -0
  12. data/lib/parsers/aiff_parser.rb +4 -1
  13. data/lib/parsers/bmp_parser.rb +3 -0
  14. data/lib/parsers/cr2_parser.rb +2 -0
  15. data/lib/parsers/dpx_parser.rb +19 -8
  16. data/lib/parsers/flac_parser.rb +2 -0
  17. data/lib/parsers/gif_parser.rb +2 -0
  18. data/lib/parsers/jpeg_parser.rb +2 -0
  19. data/lib/parsers/m3u_parser.rb +23 -0
  20. data/lib/parsers/moov_parser.rb +10 -1
  21. data/lib/parsers/mp3_parser.rb +3 -2
  22. data/lib/parsers/ogg_parser.rb +3 -2
  23. data/lib/parsers/pdf_parser.rb +2 -2
  24. data/lib/parsers/png_parser.rb +2 -0
  25. data/lib/parsers/psd_parser.rb +2 -0
  26. data/lib/parsers/tiff_parser.rb +10 -2
  27. data/lib/parsers/wav_parser.rb +3 -0
  28. data/lib/parsers/zip_parser.rb +5 -3
  29. data/lib/parsers/zip_parser/office_formats.rb +5 -5
  30. data/lib/remote_io.rb +7 -1
  31. data/lib/text.rb +19 -0
  32. data/lib/video.rb +3 -0
  33. data/spec/format_parser_spec.rb +20 -0
  34. data/spec/parsers/aiff_parser_spec.rb +1 -0
  35. data/spec/parsers/bmp_parser_spec.rb +8 -0
  36. data/spec/parsers/cr2_parser_spec.rb +1 -0
  37. data/spec/parsers/dpx_parser_spec.rb +1 -0
  38. data/spec/parsers/flac_parser_spec.rb +1 -0
  39. data/spec/parsers/gif_parser_spec.rb +1 -0
  40. data/spec/parsers/jpeg_parser_spec.rb +1 -0
  41. data/spec/parsers/m3u_parser_spec.rb +41 -0
  42. data/spec/parsers/moov_parser_spec.rb +4 -1
  43. data/spec/parsers/mp3_parser_spec.rb +1 -0
  44. data/spec/parsers/ogg_parser_spec.rb +1 -0
  45. data/spec/parsers/pdf_parser_spec.rb +1 -0
  46. data/spec/parsers/png_parser_spec.rb +1 -0
  47. data/spec/parsers/psd_parser_spec.rb +1 -0
  48. data/spec/parsers/tiff_parser_spec.rb +1 -0
  49. data/spec/parsers/wav_parser_spec.rb +1 -0
  50. data/spec/parsers/zip_parser_spec.rb +2 -0
  51. data/spec/remote_fetching_spec.rb +11 -0
  52. data/spec/remote_io_spec.rb +38 -13
  53. metadata +21 -4
  54. data/.travis.yml +0 -12
@@ -23,6 +23,7 @@ describe FormatParser::MP3Parser do
23
23
 
24
24
  expect(parsed.nature).to eq(:audio)
25
25
  expect(parsed.format).to eq(:mp3)
26
+ expect(parsed.content_type).to eq('audio/mpeg')
26
27
  expect(parsed.num_audio_channels).to eq(2)
27
28
  expect(parsed.audio_sample_rate_hz).to eq(48000)
28
29
  expect(parsed.intrinsics).not_to be_nil
@@ -6,6 +6,7 @@ describe FormatParser::OggParser do
6
6
 
7
7
  expect(parse_result.nature).to eq(:audio)
8
8
  expect(parse_result.format).to eq(:ogg)
9
+ expect(parse_result.content_type).to eq('audio/ogg')
9
10
  expect(parse_result.num_audio_channels).to eq(1)
10
11
  expect(parse_result.audio_sample_rate_hz).to eq(16000)
11
12
  expect(parse_result.media_duration_seconds).to be_within(0.01).of(2973.95)
@@ -17,6 +17,7 @@ describe FormatParser::PDFParser do
17
17
  expect(parsed_pdf).not_to be_nil
18
18
  expect(parsed_pdf.nature).to eq(:document)
19
19
  expect(parsed_pdf.format).to eq(:pdf)
20
+ expect(parsed_pdf.content_type).to eq('application/pdf')
20
21
  end
21
22
  end
22
23
 
@@ -15,6 +15,7 @@ describe FormatParser::PNGParser do
15
15
 
16
16
  expect(parsed.height_px).to be_kind_of(Integer)
17
17
  expect(parsed.height_px).to be > 0
18
+ expect(parsed.content_type).to eq('image/png')
18
19
  end
19
20
  end
20
21
  end
@@ -15,6 +15,7 @@ describe FormatParser::PSDParser do
15
15
 
16
16
  expect(parsed.height_px).to be_kind_of(Integer)
17
17
  expect(parsed.height_px).to be > 0
18
+ expect(parsed.content_type).to eq('application/x-photoshop')
18
19
  end
19
20
  end
20
21
  end
@@ -59,6 +59,7 @@ describe FormatParser::TIFFParser do
59
59
  expect(parsed.width_px).to eq(7952)
60
60
  expect(parsed.height_px).to eq(5304)
61
61
  expect(parsed.intrinsics[:exif]).not_to be_nil
62
+ expect(parsed.content_type).to eq('image/x-sony-arw')
62
63
  end
63
64
 
64
65
  describe 'correctly extracts dimensions from various TIFF flavors of the same file' do
@@ -9,6 +9,7 @@ describe FormatParser::WAVParser do
9
9
 
10
10
  expect(parse_result.nature).to eq(:audio)
11
11
  expect(parse_result.format).to eq(:wav)
12
+ expect(parse_result.content_type).to eq('audio/x-wav')
12
13
  end
13
14
  end
14
15
 
@@ -14,6 +14,7 @@ describe FormatParser::ZIPParser do
14
14
  expect(result).not_to be_nil
15
15
 
16
16
  expect(result.format).to eq(:zip)
17
+ expect(result.content_type).to eq('application/zip')
17
18
  expect(result.nature).to eq(:archive)
18
19
  expect(result.entries.length).to eq(0xFFFF + 1)
19
20
 
@@ -58,6 +59,7 @@ describe FormatParser::ZIPParser do
58
59
  result = subject.call(fi_io)
59
60
  expect(result.nature).to eq(:document)
60
61
  expect(result.format).to eq(:docx)
62
+ expect(result.content_type).to eq('application/vnd.openxmlformats-officedocument.wordprocessingml.document')
61
63
 
62
64
  fixture_path = fixtures_dir + '/ZIP/sample-docx.docx'
63
65
  fi_io = File.open(fixture_path, 'rb')
@@ -15,6 +15,10 @@ describe 'Fetching data from HTTP remotes' do
15
15
  }
16
16
  @server = WEBrick::HTTPServer.new(options)
17
17
  @server.mount '/', WEBrick::HTTPServlet::FileHandler, fixtures_dir
18
+ @server.mount_proc '/redirect' do |req, res|
19
+ res.status = 302
20
+ res.header['Location'] = req.path.sub('/redirect', '')
21
+ end
18
22
  trap('INT') { @server.stop }
19
23
  @server_thread = Thread.new { @server.start }
20
24
  end
@@ -91,6 +95,13 @@ describe 'Fetching data from HTTP remotes' do
91
95
  end
92
96
  end
93
97
 
98
+ context 'when the server responds with a redirect' do
99
+ it 'follows the redirect' do
100
+ file_information = FormatParser.parse_http('http://localhost:9399/redirect/TIFF/test.tif')
101
+ expect(file_information.format).to eq(:tif)
102
+ end
103
+ end
104
+
94
105
  after(:all) do
95
106
  @server.stop
96
107
  @server_thread.join(0.5)
@@ -7,7 +7,9 @@ describe FormatParser::RemoteIO do
7
7
  rio = described_class.new('https://images.invalid/img.jpg')
8
8
 
9
9
  fake_resp = double(headers: {'Content-Range' => '10-109/2577'}, status: 206, body: 'This is the response')
10
- expect(Faraday).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=10-109').and_return(fake_resp)
10
+ faraday_conn = instance_double(Faraday::Connection, get: fake_resp)
11
+ allow(Faraday).to receive(:new).and_return(faraday_conn)
12
+ expect(faraday_conn).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=10-109')
11
13
 
12
14
  rio.seek(10)
13
15
  read_result = rio.read(100)
@@ -18,7 +20,9 @@ describe FormatParser::RemoteIO do
18
20
  rio = described_class.new('https://images.invalid/img.jpg')
19
21
 
20
22
  fake_resp = double(headers: {'Content-Range' => '10-109/2577'}, status: 200, body: 'This is the response')
21
- expect(Faraday).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=10-109').and_return(fake_resp)
23
+ faraday_conn = instance_double(Faraday::Connection, get: fake_resp)
24
+ allow(Faraday).to receive(:new).and_return(faraday_conn)
25
+ expect(faraday_conn).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=10-109')
22
26
 
23
27
  rio.seek(10)
24
28
  read_result = rio.read(100)
@@ -29,7 +33,9 @@ describe FormatParser::RemoteIO do
29
33
  rio = described_class.new('https://images.invalid/img.jpg')
30
34
 
31
35
  fake_resp = double(headers: {}, status: 403, body: 'Please log in')
32
- expect(Faraday).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199').and_return(fake_resp)
36
+ faraday_conn = instance_double(Faraday::Connection, get: fake_resp)
37
+ allow(Faraday).to receive(:new).and_return(faraday_conn)
38
+ expect(faraday_conn).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199')
33
39
 
34
40
  rio.seek(100)
35
41
  expect { rio.read(100) }.to raise_error(/replied with a 403 and refused/)
@@ -39,7 +45,9 @@ describe FormatParser::RemoteIO do
39
45
  rio = described_class.new('https://images.invalid/img.jpg')
40
46
 
41
47
  fake_resp = double(headers: {}, status: 416, body: 'You stepped off the ledge of the range')
42
- expect(Faraday).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199').and_return(fake_resp)
48
+ faraday_conn = instance_double(Faraday::Connection, get: fake_resp)
49
+ allow(Faraday).to receive(:new).and_return(faraday_conn)
50
+ expect(faraday_conn).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199')
43
51
 
44
52
  rio.seek(100)
45
53
  expect(rio.read(100)).to be_nil
@@ -49,7 +57,9 @@ describe FormatParser::RemoteIO do
49
57
  rio = described_class.new('https://images.invalid/img.jpg')
50
58
 
51
59
  fake_resp = double(headers: {}, status: 403, body: 'Please log in')
52
- expect(Faraday).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199').and_return(fake_resp)
60
+ faraday_conn = instance_double(Faraday::Connection, get: fake_resp)
61
+ allow(Faraday).to receive(:new).and_return(faraday_conn)
62
+ expect(faraday_conn).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199')
53
63
 
54
64
  rio.seek(100)
55
65
  # rubocop: disable Lint/AmbiguousBlockAssociation
@@ -60,7 +70,9 @@ describe FormatParser::RemoteIO do
60
70
  rio = described_class.new('https://images.invalid/img.jpg')
61
71
 
62
72
  fake_resp = double(headers: {}, status: 416, body: 'You jumped off the end of the file maam')
63
- expect(Faraday).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199').and_return(fake_resp)
73
+ faraday_conn = instance_double(Faraday::Connection, get: fake_resp)
74
+ allow(Faraday).to receive(:new).and_return(faraday_conn)
75
+ expect(faraday_conn).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199')
64
76
 
65
77
  rio.seek(100)
66
78
  expect(rio.read(100)).to be_nil
@@ -69,15 +81,24 @@ describe FormatParser::RemoteIO do
69
81
  it 'does not overwrite size when the range cannot be satisfied and the response is 416' do
70
82
  rio = described_class.new('https://images.invalid/img.jpg')
71
83
 
72
- fake_resp = double(headers: {'Content-Range' => 'bytes 0-0/13'}, status: 206, body: 'a')
73
- expect(Faraday).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=0-0').and_return(fake_resp)
84
+ fake_resp1 = double(headers: {'Content-Range' => 'bytes 0-0/13'}, status: 206, body: 'a')
85
+ fake_resp2 = double(headers: {}, status: 416, body: 'You jumped off the end of the file maam')
86
+
87
+ faraday_conn = instance_double(Faraday::Connection)
88
+ allow(Faraday).to receive(:new).and_return(faraday_conn)
89
+ expect(faraday_conn).to receive(:get)
90
+ .with('https://images.invalid/img.jpg', nil, range: 'bytes=0-0')
91
+ .ordered
92
+ .and_return(fake_resp1)
93
+ expect(faraday_conn).to receive(:get)
94
+ .with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199')
95
+ .ordered
96
+ .and_return(fake_resp2)
97
+
74
98
  rio.read(1)
75
99
 
76
100
  expect(rio.size).to eq(13)
77
101
 
78
- fake_resp = double(headers: {}, status: 416, body: 'You jumped off the end of the file maam')
79
- expect(Faraday).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199').and_return(fake_resp)
80
-
81
102
  rio.seek(100)
82
103
  expect(rio.read(100)).to be_nil
83
104
 
@@ -88,7 +109,9 @@ describe FormatParser::RemoteIO do
88
109
  rio = described_class.new('https://images.invalid/img.jpg')
89
110
 
90
111
  fake_resp = double(headers: {}, status: 502, body: 'Guru meditation')
91
- expect(Faraday).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199').and_return(fake_resp)
112
+ faraday_conn = instance_double(Faraday::Connection, get: fake_resp)
113
+ allow(Faraday).to receive(:new).and_return(faraday_conn)
114
+ expect(faraday_conn).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199')
92
115
 
93
116
  rio.seek(100)
94
117
  expect { rio.read(100) }.to raise_error(/replied with a 502 and we might want to retry/)
@@ -100,7 +123,9 @@ describe FormatParser::RemoteIO do
100
123
  expect(rio.pos).to eq(0)
101
124
 
102
125
  fake_resp = double(headers: {'Content-Range' => 'bytes 0-0/13'}, status: 206, body: 'a')
103
- expect(Faraday).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=0-0').and_return(fake_resp)
126
+ faraday_conn = instance_double(Faraday::Connection, get: fake_resp)
127
+ allow(Faraday).to receive(:new).and_return(faraday_conn)
128
+ expect(faraday_conn).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=0-0')
104
129
  rio.read(1)
105
130
 
106
131
  expect(rio.pos).to eq(1)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: format_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.25.4
4
+ version: 0.28.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Noah Berman
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: exe
11
11
  cert_chain: []
12
- date: 2020-12-09 00:00:00.000000000 Z
12
+ date: 2021-02-17 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: ks
@@ -73,6 +73,20 @@ dependencies:
73
73
  - - "~>"
74
74
  - !ruby/object:Gem::Version
75
75
  version: '0.13'
76
+ - !ruby/object:Gem::Dependency
77
+ name: faraday_middleware
78
+ requirement: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '0.14'
83
+ type: :runtime
84
+ prerelease: false
85
+ version_requirements: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '0.14'
76
90
  - !ruby/object:Gem::Dependency
77
91
  name: measurometer
78
92
  requirement: !ruby/object:Gem::Requirement
@@ -183,10 +197,10 @@ executables:
183
197
  extensions: []
184
198
  extra_rdoc_files: []
185
199
  files:
200
+ - ".github/workflows/main.yml"
186
201
  - ".gitignore"
187
202
  - ".rspec"
188
203
  - ".rubocop.yml"
189
- - ".travis.yml"
190
204
  - CHANGELOG.md
191
205
  - CODE_OF_CONDUCT.md
192
206
  - CONTRIBUTING.md
@@ -219,6 +233,7 @@ files:
219
233
  - lib/parsers/flac_parser.rb
220
234
  - lib/parsers/gif_parser.rb
221
235
  - lib/parsers/jpeg_parser.rb
236
+ - lib/parsers/m3u_parser.rb
222
237
  - lib/parsers/moov_parser.rb
223
238
  - lib/parsers/moov_parser/decoder.rb
224
239
  - lib/parsers/mp3_parser.rb
@@ -236,6 +251,7 @@ files:
236
251
  - lib/read_limiter.rb
237
252
  - lib/read_limits_config.rb
238
253
  - lib/remote_io.rb
254
+ - lib/text.rb
239
255
  - lib/video.rb
240
256
  - spec/active_storage/blob_io_spec.rb
241
257
  - spec/active_storage/rails_app_spec.rb
@@ -257,6 +273,7 @@ files:
257
273
  - spec/parsers/flac_parser_spec.rb
258
274
  - spec/parsers/gif_parser_spec.rb
259
275
  - spec/parsers/jpeg_parser_spec.rb
276
+ - spec/parsers/m3u_parser_spec.rb
260
277
  - spec/parsers/moov_parser_spec.rb
261
278
  - spec/parsers/mp3_parser_spec.rb
262
279
  - spec/parsers/mpeg_parser_spec.rb
@@ -292,7 +309,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
292
309
  - !ruby/object:Gem::Version
293
310
  version: '0'
294
311
  requirements: []
295
- rubygems_version: 3.1.4
312
+ rubygems_version: 3.0.3
296
313
  signing_key:
297
314
  specification_version: 4
298
315
  summary: A library for efficient parsing of file metadata
data/.travis.yml DELETED
@@ -1,12 +0,0 @@
1
- rvm:
2
- - 2.2.10
3
- - 2.3.8
4
- - 2.4.9
5
- - 2.5.8
6
- - 2.6.6
7
- - 2.7.2
8
- - jruby
9
- sudo: false
10
- cache: bundler
11
- script:
12
- - bundle exec rake