format_parser 0.26.0 → 0.29.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/main.yml +104 -0
- data/CHANGELOG.md +12 -0
- data/format_parser.gemspec +1 -0
- data/lib/archive.rb +3 -0
- data/lib/audio.rb +3 -0
- data/lib/document.rb +1 -0
- data/lib/format_parser/version.rb +1 -1
- data/lib/format_parser.rb +3 -2
- data/lib/image.rb +3 -0
- data/lib/parsers/aiff_parser.rb +4 -1
- data/lib/parsers/bmp_parser.rb +3 -0
- data/lib/parsers/cr2_parser.rb +2 -0
- data/lib/parsers/dpx_parser.rb +6 -0
- data/lib/parsers/flac_parser.rb +2 -0
- data/lib/parsers/gif_parser.rb +2 -0
- data/lib/parsers/jpeg_parser.rb +2 -0
- data/lib/parsers/m3u_parser.rb +3 -1
- data/lib/parsers/moov_parser.rb +10 -1
- data/lib/parsers/mp3_parser.rb +3 -2
- data/lib/parsers/ogg_parser.rb +3 -2
- data/lib/parsers/pdf_parser.rb +2 -2
- data/lib/parsers/png_parser.rb +2 -0
- data/lib/parsers/psd_parser.rb +2 -0
- data/lib/parsers/tiff_parser.rb +10 -2
- data/lib/parsers/wav_parser.rb +3 -0
- data/lib/parsers/zip_parser/office_formats.rb +5 -5
- data/lib/parsers/zip_parser.rb +5 -3
- data/lib/remote_io.rb +29 -7
- data/lib/text.rb +1 -0
- data/lib/video.rb +3 -0
- data/spec/parsers/aiff_parser_spec.rb +1 -0
- data/spec/parsers/bmp_parser_spec.rb +8 -0
- data/spec/parsers/cr2_parser_spec.rb +1 -0
- data/spec/parsers/dpx_parser_spec.rb +1 -0
- data/spec/parsers/flac_parser_spec.rb +1 -0
- data/spec/parsers/gif_parser_spec.rb +1 -0
- data/spec/parsers/jpeg_parser_spec.rb +1 -0
- data/spec/parsers/m3u_parser_spec.rb +1 -0
- data/spec/parsers/moov_parser_spec.rb +4 -1
- data/spec/parsers/mp3_parser_spec.rb +1 -0
- data/spec/parsers/ogg_parser_spec.rb +1 -0
- data/spec/parsers/pdf_parser_spec.rb +1 -0
- data/spec/parsers/png_parser_spec.rb +1 -0
- data/spec/parsers/psd_parser_spec.rb +1 -0
- data/spec/parsers/tiff_parser_spec.rb +1 -0
- data/spec/parsers/wav_parser_spec.rb +1 -0
- data/spec/parsers/zip_parser_spec.rb +2 -0
- data/spec/remote_fetching_spec.rb +53 -2
- data/spec/remote_io_spec.rb +38 -13
- metadata +17 -3
- data/.travis.yml +0 -12
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 75ee83f55840e3031d4d60d8dc07ca038812188613e2b740079e1c965efb2886
|
4
|
+
data.tar.gz: 31c3ee84434560c18e6ea74a23160b909e6f880f52b2ed6f0e888e847c557bd9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 536cfb1bac7926f56ba760959d7c5a0905d3b2b0944b16248b6c81be00b722dad894a8d6d5773134fb0471e42d016a20be15e3d5a01c671f0cc65658f2fc05b4
|
7
|
+
data.tar.gz: cb3f73df051b8612cb6d0e1a4e55c045e461bf2c5e4667dd6e461e779b1f39d01be8d70d084e252e5198a966ac590129286811f5822808a0c980c2ad72a087a1
|
@@ -0,0 +1,104 @@
|
|
1
|
+
name: CI
|
2
|
+
|
3
|
+
on: [push,pull_request]
|
4
|
+
|
5
|
+
env:
|
6
|
+
BUNDLE_PATH: vendor/bundle
|
7
|
+
|
8
|
+
jobs:
|
9
|
+
lint:
|
10
|
+
name: Code Style
|
11
|
+
runs-on: ubuntu-18.04
|
12
|
+
if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository
|
13
|
+
strategy:
|
14
|
+
matrix:
|
15
|
+
ruby:
|
16
|
+
- 2.7
|
17
|
+
- 2.6
|
18
|
+
- 2.5
|
19
|
+
- 2.4
|
20
|
+
- 2.3
|
21
|
+
- 2.2
|
22
|
+
- jruby
|
23
|
+
steps:
|
24
|
+
- name: Checkout
|
25
|
+
uses: actions/checkout@v2
|
26
|
+
- name: Setup Ruby
|
27
|
+
uses: ruby/setup-ruby@v1
|
28
|
+
with:
|
29
|
+
ruby-version: ${{ matrix.ruby }}
|
30
|
+
- name: Gemfile Cache
|
31
|
+
uses: actions/cache@v2
|
32
|
+
with:
|
33
|
+
path: Gemfile.lock
|
34
|
+
key: ${{ runner.os }}-gemlock-${{ matrix.ruby }}-${{ hashFiles('Gemfile', 'format_parser.gemspec') }}
|
35
|
+
restore-keys: |
|
36
|
+
${{ runner.os }}-gemlock-${{ matrix.ruby }}-
|
37
|
+
- name: Bundle Cache
|
38
|
+
id: cache-gems
|
39
|
+
uses: actions/cache@v2
|
40
|
+
with:
|
41
|
+
path: vendor/bundle
|
42
|
+
key: ${{ runner.os }}-gems-${{ matrix.ruby }}-${{ hashFiles('Gemfile', 'Gemfile.lock', 'format_parser.gemspec') }}
|
43
|
+
restore-keys: |
|
44
|
+
${{ runner.os }}-gems-${{ matrix.ruby }}-
|
45
|
+
${{ runner.os }}-gems-
|
46
|
+
- name: Bundle Install
|
47
|
+
if: steps.cache-gems.outputs.cache-hit != 'true'
|
48
|
+
run: bundle install --jobs 4 --retry 3
|
49
|
+
- name: Rubocop Cache
|
50
|
+
uses: actions/cache@v2
|
51
|
+
with:
|
52
|
+
path: ~/.cache/rubocop_cache
|
53
|
+
key: ${{ runner.os }}-rubocop-${{ hashFiles('.rubocop.yml') }}
|
54
|
+
restore-keys: |
|
55
|
+
${{ runner.os }}-rubocop-
|
56
|
+
- name: Rubocop
|
57
|
+
run: bundle exec rubocop
|
58
|
+
test:
|
59
|
+
name: Specs
|
60
|
+
runs-on: ubuntu-18.04
|
61
|
+
if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository
|
62
|
+
strategy:
|
63
|
+
matrix:
|
64
|
+
ruby:
|
65
|
+
- 2.7
|
66
|
+
- 2.6
|
67
|
+
- 2.5
|
68
|
+
- 2.4
|
69
|
+
- 2.3
|
70
|
+
- 2.2
|
71
|
+
- jruby
|
72
|
+
experimental: [false]
|
73
|
+
include:
|
74
|
+
- ruby: 3.0
|
75
|
+
experimental: true
|
76
|
+
steps:
|
77
|
+
- name: Checkout
|
78
|
+
uses: actions/checkout@v2
|
79
|
+
- name: Setup Ruby
|
80
|
+
uses: ruby/setup-ruby@v1
|
81
|
+
with:
|
82
|
+
ruby-version: ${{ matrix.ruby }}
|
83
|
+
- name: Gemfile Cache
|
84
|
+
uses: actions/cache@v2
|
85
|
+
with:
|
86
|
+
path: Gemfile.lock
|
87
|
+
key: ${{ runner.os }}-gemlock-${{ matrix.ruby }}-${{ hashFiles('Gemfile', 'format_parser.gemspec') }}
|
88
|
+
restore-keys: |
|
89
|
+
${{ runner.os }}-gemlock-${{ matrix.ruby }}-
|
90
|
+
- name: Bundle Cache
|
91
|
+
id: cache-gems
|
92
|
+
uses: actions/cache@v2
|
93
|
+
with:
|
94
|
+
path: vendor/bundle
|
95
|
+
key: ${{ runner.os }}-gems-${{ matrix.ruby }}-${{ hashFiles('Gemfile', 'Gemfile.lock', 'format_parser.gemspec') }}
|
96
|
+
restore-keys: |
|
97
|
+
${{ runner.os }}-gems-${{ matrix.ruby }}-
|
98
|
+
${{ runner.os }}-gems-
|
99
|
+
- name: Bundle Install
|
100
|
+
if: steps.cache-gems.outputs.cache-hit != 'true'
|
101
|
+
run: bundle install --jobs 4 --retry 3
|
102
|
+
- name: RSpec
|
103
|
+
continue-on-error: ${{ matrix.experimental }}
|
104
|
+
run: bundle exec rake parallel:spec
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,15 @@
|
|
1
|
+
## 0.29.1
|
2
|
+
* Fix handling of 200 responses with `parse_http` as well as handling of very small responses which do not need range access
|
3
|
+
|
4
|
+
## 0.29.0
|
5
|
+
* Add option `headers:` to `FormatParser.parse_http`
|
6
|
+
|
7
|
+
## 0.28.0
|
8
|
+
* Change `FormatParser.parse_http` to follow HTTP redirects
|
9
|
+
|
10
|
+
## 0.27.0
|
11
|
+
* Add `#content_type` on `Result` return values which makes sense for the detected filetype
|
12
|
+
|
1
13
|
## 0.26.0
|
2
14
|
* Add support for M3U format files
|
3
15
|
|
data/format_parser.gemspec
CHANGED
@@ -34,6 +34,7 @@ Gem::Specification.new do |spec|
|
|
34
34
|
spec.add_dependency 'exifr', '~> 1', '>= 1.3.8'
|
35
35
|
spec.add_dependency 'id3tag', '~> 0.14'
|
36
36
|
spec.add_dependency 'faraday', '~> 0.13'
|
37
|
+
spec.add_dependency 'faraday_middleware', '~> 0.14'
|
37
38
|
spec.add_dependency 'measurometer', '~> 1'
|
38
39
|
|
39
40
|
spec.add_development_dependency 'rspec', '~> 3.0'
|
data/lib/archive.rb
CHANGED
@@ -26,6 +26,9 @@ module FormatParser
|
|
26
26
|
# it can be placed here
|
27
27
|
attr_accessor :intrinsics
|
28
28
|
|
29
|
+
# The MIME type of the archive
|
30
|
+
attr_accessor :content_type
|
31
|
+
|
29
32
|
# Only permits assignments via defined accessors
|
30
33
|
def initialize(**attributes)
|
31
34
|
attributes.map { |(k, v)| public_send("#{k}=", v) }
|
data/lib/audio.rb
CHANGED
@@ -35,6 +35,9 @@ module FormatParser
|
|
35
35
|
# it can be placed here
|
36
36
|
attr_accessor :intrinsics
|
37
37
|
|
38
|
+
# The MIME type of the sound file
|
39
|
+
attr_accessor :content_type
|
40
|
+
|
38
41
|
# Only permits assignments via defined accessors
|
39
42
|
def initialize(**attributes)
|
40
43
|
attributes.map { |(k, v)| public_send("#{k}=", v) }
|
data/lib/document.rb
CHANGED
data/lib/format_parser.rb
CHANGED
@@ -88,13 +88,14 @@ module FormatParser
|
|
88
88
|
# given to `.parse`. The accepted keyword arguments are the same as the ones for `parse`.
|
89
89
|
#
|
90
90
|
# @param url[String, URI] the HTTP(S) URL to request the object from using Faraday and `Range:` requests
|
91
|
+
# @param headers[Hash] (optional) the HTTP headers to request the object from using Faraday
|
91
92
|
# @param kwargs the keyword arguments to be delegated to `.parse`
|
92
93
|
# @see {.parse}
|
93
|
-
def self.parse_http(url, **kwargs)
|
94
|
+
def self.parse_http(url, headers: {}, **kwargs)
|
94
95
|
# Do not extract the filename, since the URL
|
95
96
|
# can really be "anything". But if the caller
|
96
97
|
# provides filename_hint it will be carried over
|
97
|
-
parse(RemoteIO.new(url), **kwargs)
|
98
|
+
parse(RemoteIO.new(url, headers: headers), **kwargs)
|
98
99
|
end
|
99
100
|
|
100
101
|
# Parses the file at the given `path` and returns the results as if it were any IO
|
data/lib/image.rb
CHANGED
@@ -64,6 +64,9 @@ module FormatParser
|
|
64
64
|
# it can be placed here
|
65
65
|
attr_accessor :intrinsics
|
66
66
|
|
67
|
+
# The MIME type of the image file
|
68
|
+
attr_accessor :content_type
|
69
|
+
|
67
70
|
# Only permits assignments via defined accessors
|
68
71
|
def initialize(**attributes)
|
69
72
|
attributes.map { |(k, v)| public_send("#{k}=", v) }
|
data/lib/parsers/aiff_parser.rb
CHANGED
@@ -1,6 +1,8 @@
|
|
1
1
|
class FormatParser::AIFFParser
|
2
2
|
include FormatParser::IOUtils
|
3
3
|
|
4
|
+
AIFF_MIME_TYPE = 'audio/x-aiff'
|
5
|
+
|
4
6
|
# Known chunk types we can omit when parsing,
|
5
7
|
# grossly lifted from http://www.muratnkonar.com/aiff/
|
6
8
|
KNOWN_CHUNKS = [
|
@@ -70,7 +72,8 @@ class FormatParser::AIFFParser
|
|
70
72
|
num_audio_channels: channels,
|
71
73
|
audio_sample_rate_hz: sample_rate.to_i,
|
72
74
|
media_duration_frames: sample_frames,
|
73
|
-
media_duration_seconds: duration_in_seconds
|
75
|
+
media_duration_seconds: duration_in_seconds,
|
76
|
+
content_type: AIFF_MIME_TYPE,
|
74
77
|
)
|
75
78
|
end
|
76
79
|
|
data/lib/parsers/bmp_parser.rb
CHANGED
@@ -5,6 +5,7 @@ class FormatParser::BMPParser
|
|
5
5
|
|
6
6
|
VALID_BMP = 'BM'
|
7
7
|
PERMISSIBLE_PIXEL_ARRAY_LOCATIONS = 26..512
|
8
|
+
BMP_MIME_TYPE = 'image/bmp'
|
8
9
|
|
9
10
|
def likely_match?(filename)
|
10
11
|
filename =~ /\.bmp$/i
|
@@ -42,6 +43,7 @@ class FormatParser::BMPParser
|
|
42
43
|
width_px: width,
|
43
44
|
height_px: height,
|
44
45
|
color_mode: :rgb,
|
46
|
+
content_type: BMP_MIME_TYPE,
|
45
47
|
intrinsics: {
|
46
48
|
data_order: data_order,
|
47
49
|
bits_per_pixel: bit_depth
|
@@ -63,6 +65,7 @@ class FormatParser::BMPParser
|
|
63
65
|
width_px: width,
|
64
66
|
height_px: height.abs,
|
65
67
|
color_mode: :rgb,
|
68
|
+
content_type: BMP_MIME_TYPE,
|
66
69
|
intrinsics: {
|
67
70
|
vertical_resolution: vertical_res,
|
68
71
|
horizontal_resolution: horizontal_res,
|
data/lib/parsers/cr2_parser.rb
CHANGED
@@ -6,6 +6,7 @@ class FormatParser::CR2Parser
|
|
6
6
|
|
7
7
|
TIFF_HEADER = [0x49, 0x49, 0x2a, 0x00]
|
8
8
|
CR2_HEADER = [0x43, 0x52, 0x02, 0x00]
|
9
|
+
CR2_MIME_TYPE = 'image/x-canon-cr2'
|
9
10
|
|
10
11
|
def likely_match?(filename)
|
11
12
|
filename =~ /\.cr2$/i
|
@@ -39,6 +40,7 @@ class FormatParser::CR2Parser
|
|
39
40
|
display_height_px: exif_data.rotated? ? w : h,
|
40
41
|
orientation: exif_data.orientation_sym,
|
41
42
|
intrinsics: {exif: exif_data},
|
43
|
+
content_type: CR2_MIME_TYPE,
|
42
44
|
)
|
43
45
|
rescue EXIFR::MalformedTIFF
|
44
46
|
nil
|
data/lib/parsers/dpx_parser.rb
CHANGED
@@ -6,6 +6,11 @@ class FormatParser::DPXParser
|
|
6
6
|
BE_MAGIC = 'SDPX'
|
7
7
|
LE_MAGIC = BE_MAGIC.reverse
|
8
8
|
|
9
|
+
# There is no official MIME type for DPX, so we have
|
10
|
+
# to invent something useful. We will prefix it with x-
|
11
|
+
# to indicate that it is a vendor subtype
|
12
|
+
DPX_MIME_TYPE = 'image/x-dpx'
|
13
|
+
|
9
14
|
class ByteOrderHintIO < SimpleDelegator
|
10
15
|
def initialize(io, is_little_endian)
|
11
16
|
super(io)
|
@@ -61,6 +66,7 @@ class FormatParser::DPXParser
|
|
61
66
|
display_width_px: display_w,
|
62
67
|
display_height_px: display_h,
|
63
68
|
intrinsics: dpx_structure,
|
69
|
+
content_type: DPX_MIME_TYPE,
|
64
70
|
)
|
65
71
|
end
|
66
72
|
|
data/lib/parsers/flac_parser.rb
CHANGED
@@ -4,6 +4,7 @@ class FormatParser::FLACParser
|
|
4
4
|
MAGIC_BYTES = 4
|
5
5
|
MAGIC_BYTE_STRING = 'fLaC'
|
6
6
|
BLOCK_HEADER_BYTES = 4
|
7
|
+
FLAC_MIME_TYPE = 'audio/x-flac'
|
7
8
|
|
8
9
|
def likely_match?(filename)
|
9
10
|
filename =~ /\.flac$/i
|
@@ -61,6 +62,7 @@ class FormatParser::FLACParser
|
|
61
62
|
audio_sample_rate_hz: sample_rate,
|
62
63
|
media_duration_seconds: duration,
|
63
64
|
media_duration_frames: total_samples,
|
65
|
+
content_type: FLAC_MIME_TYPE,
|
64
66
|
intrinsics: {
|
65
67
|
bits_per_sample: bits_per_sample,
|
66
68
|
minimum_frame_size: minimum_frame_size,
|
data/lib/parsers/gif_parser.rb
CHANGED
@@ -3,6 +3,7 @@ class FormatParser::GIFParser
|
|
3
3
|
|
4
4
|
HEADERS = ['GIF87a', 'GIF89a'].map(&:b)
|
5
5
|
NETSCAPE_AND_AUTHENTICATION_CODE = 'NETSCAPE2.0'
|
6
|
+
GIF_MIME_TYPE = 'image/gif'
|
6
7
|
|
7
8
|
def likely_match?(filename)
|
8
9
|
filename =~ /\.gif$/i
|
@@ -45,6 +46,7 @@ class FormatParser::GIFParser
|
|
45
46
|
height_px: h,
|
46
47
|
has_multiple_frames: is_animated,
|
47
48
|
color_mode: :indexed,
|
49
|
+
content_type: GIF_MIME_TYPE
|
48
50
|
)
|
49
51
|
end
|
50
52
|
|
data/lib/parsers/jpeg_parser.rb
CHANGED
@@ -12,6 +12,7 @@ class FormatParser::JPEGParser
|
|
12
12
|
APP1_MARKER = 0xE1 # maybe EXIF
|
13
13
|
EXIF_MAGIC_STRING = "Exif\0\0".b
|
14
14
|
MUST_FIND_NEXT_MARKER_WITHIN_BYTES = 1024
|
15
|
+
JPEG_MIME_TYPE = 'image/jpeg'
|
15
16
|
|
16
17
|
def self.likely_match?(filename)
|
17
18
|
filename =~ /\.jpe?g$/i
|
@@ -88,6 +89,7 @@ class FormatParser::JPEGParser
|
|
88
89
|
display_height_px: dh,
|
89
90
|
orientation: flat_exif.orientation_sym,
|
90
91
|
intrinsics: {exif: flat_exif},
|
92
|
+
content_type: JPEG_MIME_TYPE
|
91
93
|
)
|
92
94
|
|
93
95
|
return result
|
data/lib/parsers/m3u_parser.rb
CHANGED
@@ -2,6 +2,7 @@ class FormatParser::M3UParser
|
|
2
2
|
include FormatParser::IOUtils
|
3
3
|
|
4
4
|
HEADER = '#EXTM3U'
|
5
|
+
M3U8_MIME_TYPE = 'application/vnd.apple.mpegurl' # https://en.wikipedia.org/wiki/M3U#Internet_media_types
|
5
6
|
|
6
7
|
def likely_match?(filename)
|
7
8
|
filename =~ /\.m3u8?$/i
|
@@ -14,7 +15,8 @@ class FormatParser::M3UParser
|
|
14
15
|
return unless HEADER.eql?(header)
|
15
16
|
|
16
17
|
FormatParser::Text.new(
|
17
|
-
format: :m3u
|
18
|
+
format: :m3u,
|
19
|
+
content_type: M3U8_MIME_TYPE,
|
18
20
|
)
|
19
21
|
end
|
20
22
|
FormatParser.register_parser new, natures: :text, formats: :m3u
|
data/lib/parsers/moov_parser.rb
CHANGED
@@ -11,6 +11,12 @@ class FormatParser::MOOVParser
|
|
11
11
|
'm4a ' => :m4a,
|
12
12
|
}
|
13
13
|
|
14
|
+
# https://tools.ietf.org/html/rfc4337#section-2
|
15
|
+
# There is also video/quicktime which we should be able to capture
|
16
|
+
# here, but there is currently no detection for MOVs versus MP4s
|
17
|
+
MP4_AU_MIME_TYPE = 'audio/mp4'
|
18
|
+
MP4_MIXED_MIME_TYPE = 'video/mp4'
|
19
|
+
|
14
20
|
def likely_match?(filename)
|
15
21
|
filename =~ /\.(mov|m4a|ma4|mp4|aac|m4v)$/i
|
16
22
|
end
|
@@ -49,10 +55,12 @@ class FormatParser::MOOVParser
|
|
49
55
|
end
|
50
56
|
|
51
57
|
# M4A only contains audio, while MP4 and friends can contain video.
|
52
|
-
|
58
|
+
fmt = format_from_moov_type(file_type)
|
59
|
+
if fmt == :m4a
|
53
60
|
FormatParser::Audio.new(
|
54
61
|
format: format_from_moov_type(file_type),
|
55
62
|
media_duration_seconds: media_duration_s,
|
63
|
+
content_type: MP4_AU_MIME_TYPE,
|
56
64
|
intrinsics: atom_tree,
|
57
65
|
)
|
58
66
|
else
|
@@ -61,6 +69,7 @@ class FormatParser::MOOVParser
|
|
61
69
|
width_px: width,
|
62
70
|
height_px: height,
|
63
71
|
media_duration_seconds: media_duration_s,
|
72
|
+
content_type: MP4_MIXED_MIME_TYPE,
|
64
73
|
intrinsics: atom_tree,
|
65
74
|
)
|
66
75
|
end
|
data/lib/parsers/mp3_parser.rb
CHANGED
@@ -32,7 +32,7 @@ class FormatParser::MP3Parser
|
|
32
32
|
MAGIC_LE = [0x49, 0x49, 0x2A, 0x0].pack('C4')
|
33
33
|
MAGIC_BE = [0x4D, 0x4D, 0x0, 0x2A].pack('C4')
|
34
34
|
TIFF_HEADER_BYTES = [MAGIC_LE, MAGIC_BE]
|
35
|
-
|
35
|
+
MP3_MIME_TYPE = 'audio/mpeg'
|
36
36
|
# Wraps the Tag object returned by ID3Tag in such
|
37
37
|
# a way that a usable JSON representation gets
|
38
38
|
# returned
|
@@ -104,7 +104,8 @@ class FormatParser::MP3Parser
|
|
104
104
|
# do not tell anything of substance
|
105
105
|
num_audio_channels: first_frame.channels,
|
106
106
|
audio_sample_rate_hz: first_frame.sample_rate,
|
107
|
-
intrinsics: id3tags_hash.merge(id3tags: tags)
|
107
|
+
intrinsics: id3tags_hash.merge(id3tags: tags),
|
108
|
+
content_type: MP3_MIME_TYPE,
|
108
109
|
)
|
109
110
|
|
110
111
|
extra_file_attirbutes = fetch_extra_attributes_from_id3_tags(id3tags_hash)
|
data/lib/parsers/ogg_parser.rb
CHANGED
@@ -3,8 +3,8 @@
|
|
3
3
|
class FormatParser::OggParser
|
4
4
|
include FormatParser::IOUtils
|
5
5
|
|
6
|
-
# Maximum size of an Ogg page
|
7
6
|
MAX_POSSIBLE_PAGE_SIZE = 65307
|
7
|
+
OGG_MIME_TYPE = 'audio/ogg'
|
8
8
|
|
9
9
|
def likely_match?(filename)
|
10
10
|
filename =~ /\.ogg$/i
|
@@ -45,7 +45,8 @@ class FormatParser::OggParser
|
|
45
45
|
format: :ogg,
|
46
46
|
audio_sample_rate_hz: sample_rate,
|
47
47
|
num_audio_channels: channels,
|
48
|
-
media_duration_seconds: duration
|
48
|
+
media_duration_seconds: duration,
|
49
|
+
content_type: OGG_MIME_TYPE,
|
49
50
|
)
|
50
51
|
end
|
51
52
|
|
data/lib/parsers/pdf_parser.rb
CHANGED
@@ -1,6 +1,5 @@
|
|
1
1
|
class FormatParser::PDFParser
|
2
2
|
include FormatParser::IOUtils
|
3
|
-
|
4
3
|
# First 9 bytes of a PDF should be in this format, according to:
|
5
4
|
#
|
6
5
|
# https://stackoverflow.com/questions/3108201/detect-if-pdf-file-is-correct-header-pdf
|
@@ -8,6 +7,7 @@ class FormatParser::PDFParser
|
|
8
7
|
# There are however exceptions, which are left out for now.
|
9
8
|
#
|
10
9
|
PDF_MARKER = /%PDF-1\.[0-8]{1}/
|
10
|
+
PDF_CONTENT_TYPE = 'application/pdf'
|
11
11
|
|
12
12
|
def likely_match?(filename)
|
13
13
|
filename =~ /\.(pdf|ai)$/i
|
@@ -18,7 +18,7 @@ class FormatParser::PDFParser
|
|
18
18
|
|
19
19
|
return unless safe_read(io, 9) =~ PDF_MARKER
|
20
20
|
|
21
|
-
FormatParser::Document.new(format: :pdf)
|
21
|
+
FormatParser::Document.new(format: :pdf, content_type: PDF_CONTENT_TYPE)
|
22
22
|
end
|
23
23
|
|
24
24
|
FormatParser.register_parser new, natures: :document, formats: :pdf, priority: 1
|
data/lib/parsers/png_parser.rb
CHANGED
@@ -14,6 +14,7 @@ class FormatParser::PNGParser
|
|
14
14
|
4 => true, # Grayscale with alpha
|
15
15
|
6 => true,
|
16
16
|
}
|
17
|
+
PNG_MIME_TYPE = 'image/png'
|
17
18
|
|
18
19
|
def likely_match?(filename)
|
19
20
|
filename =~ /\.png$/i
|
@@ -67,6 +68,7 @@ class FormatParser::PNGParser
|
|
67
68
|
color_mode: color_mode,
|
68
69
|
has_multiple_frames: has_animation,
|
69
70
|
num_animation_or_video_frames: num_frames,
|
71
|
+
content_type: PNG_MIME_TYPE,
|
70
72
|
)
|
71
73
|
end
|
72
74
|
|
data/lib/parsers/psd_parser.rb
CHANGED
@@ -2,6 +2,7 @@ class FormatParser::PSDParser
|
|
2
2
|
include FormatParser::IOUtils
|
3
3
|
|
4
4
|
PSD_HEADER = [0x38, 0x42, 0x50, 0x53]
|
5
|
+
PSD_MIME_TYPE = 'application/x-photoshop'
|
5
6
|
|
6
7
|
def likely_match?(filename)
|
7
8
|
filename =~ /\.psd$/i # Maybe also PSB at some point
|
@@ -20,6 +21,7 @@ class FormatParser::PSDParser
|
|
20
21
|
format: :psd,
|
21
22
|
width_px: w,
|
22
23
|
height_px: h,
|
24
|
+
content_type: PSD_MIME_TYPE,
|
23
25
|
)
|
24
26
|
end
|
25
27
|
|
data/lib/parsers/tiff_parser.rb
CHANGED
@@ -5,6 +5,8 @@ class FormatParser::TIFFParser
|
|
5
5
|
MAGIC_LE = [0x49, 0x49, 0x2A, 0x0].pack('C4')
|
6
6
|
MAGIC_BE = [0x4D, 0x4D, 0x0, 0x2A].pack('C4')
|
7
7
|
HEADER_BYTES = [MAGIC_LE, MAGIC_BE]
|
8
|
+
TIFF_MIME_TYPE = 'image/tiff'
|
9
|
+
ARW_MIME_TYPE = 'image/x-sony-arw'
|
8
10
|
|
9
11
|
def likely_match?(filename)
|
10
12
|
filename =~ /\.tiff?$/i
|
@@ -14,7 +16,10 @@ class FormatParser::TIFFParser
|
|
14
16
|
io = FormatParser::IOConstraint.new(io)
|
15
17
|
|
16
18
|
return unless HEADER_BYTES.include?(safe_read(io, 4))
|
17
|
-
|
19
|
+
|
20
|
+
# Skip over the offset of the IFD,
|
21
|
+
# EXIFR will re-read it anyway
|
22
|
+
io.seek(io.pos + 2)
|
18
23
|
return if cr2?(io)
|
19
24
|
|
20
25
|
# The TIFF scanner in EXIFR is plenty good enough,
|
@@ -26,14 +31,17 @@ class FormatParser::TIFFParser
|
|
26
31
|
w = exif_data.width || exif_data.pixel_x_dimension
|
27
32
|
h = exif_data.height || exif_data.pixel_y_dimension
|
28
33
|
|
34
|
+
format = arw?(exif_data) ? :arw : :tif
|
35
|
+
mime_type = arw?(exif_data) ? ARW_MIME_TYPE : TIFF_MIME_TYPE
|
29
36
|
FormatParser::Image.new(
|
30
|
-
format:
|
37
|
+
format: format,
|
31
38
|
width_px: w,
|
32
39
|
height_px: h,
|
33
40
|
display_width_px: exif_data.rotated? ? h : w,
|
34
41
|
display_height_px: exif_data.rotated? ? w : h,
|
35
42
|
orientation: exif_data.orientation_sym,
|
36
43
|
intrinsics: {exif: exif_data},
|
44
|
+
content_type: mime_type,
|
37
45
|
)
|
38
46
|
rescue EXIFR::MalformedTIFF
|
39
47
|
nil
|
data/lib/parsers/wav_parser.rb
CHANGED
@@ -1,6 +1,8 @@
|
|
1
1
|
class FormatParser::WAVParser
|
2
2
|
include FormatParser::IOUtils
|
3
3
|
|
4
|
+
WAV_MIME_TYPE = 'audio/x-wav'
|
5
|
+
|
4
6
|
def likely_match?(filename)
|
5
7
|
filename =~ /\.wav$/i
|
6
8
|
end
|
@@ -96,6 +98,7 @@ class FormatParser::WAVParser
|
|
96
98
|
audio_sample_rate_hz: fmt_data[:sample_rate],
|
97
99
|
media_duration_frames: sample_frames,
|
98
100
|
media_duration_seconds: duration_in_seconds,
|
101
|
+
content_type: WAV_MIME_TYPE,
|
99
102
|
)
|
100
103
|
end
|
101
104
|
|
@@ -37,15 +37,15 @@ module FormatParser::ZIPParser::OfficeFormats
|
|
37
37
|
OFFICE_MARKER_FILES.subset?(filenames_set)
|
38
38
|
end
|
39
39
|
|
40
|
-
def
|
40
|
+
def office_file_format_and_mime_type_from_entry_set(filenames_set)
|
41
41
|
if filenames_set.include?('word/document.xml')
|
42
|
-
:docx
|
42
|
+
[:docx, 'application/vnd.openxmlformats-officedocument.wordprocessingml.document']
|
43
43
|
elsif filenames_set.include?('xl/workbook.xml')
|
44
|
-
:xlsx
|
44
|
+
[:xlsx, 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet']
|
45
45
|
elsif filenames_set.include?('ppt/presentation.xml')
|
46
|
-
:pptx
|
46
|
+
[:pptx, 'application/vnd.openxmlformats-officedocument.presentationml.presentation']
|
47
47
|
else
|
48
|
-
:unknown
|
48
|
+
[:unknown, 'application/zip']
|
49
49
|
end
|
50
50
|
end
|
51
51
|
end
|
data/lib/parsers/zip_parser.rb
CHANGED
@@ -5,6 +5,8 @@ class FormatParser::ZIPParser
|
|
5
5
|
include OfficeFormats
|
6
6
|
include FormatParser::IOUtils
|
7
7
|
|
8
|
+
ZIP_MIME_TYPE = 'application/zip'
|
9
|
+
|
8
10
|
def likely_match?(filename)
|
9
11
|
filename =~ /\.(zip|docx|keynote|numbers|pptx|xlsx)$/i
|
10
12
|
end
|
@@ -25,10 +27,10 @@ class FormatParser::ZIPParser
|
|
25
27
|
end
|
26
28
|
|
27
29
|
if office_document?(filenames_set)
|
28
|
-
office_format =
|
29
|
-
FormatParser::Archive.new(nature: :document, format: office_format, entries: entries_archive)
|
30
|
+
office_format, mime_type = office_file_format_and_mime_type_from_entry_set(filenames_set)
|
31
|
+
FormatParser::Archive.new(nature: :document, format: office_format, entries: entries_archive, content_type: mime_type)
|
30
32
|
else
|
31
|
-
FormatParser::Archive.new(nature: :archive, format: :zip, entries: entries_archive)
|
33
|
+
FormatParser::Archive.new(nature: :archive, format: :zip, entries: entries_archive, content_type: ZIP_MIME_TYPE)
|
32
34
|
end
|
33
35
|
rescue FileReader::Error
|
34
36
|
# This is not a ZIP, or a broken ZIP.
|
data/lib/remote_io.rb
CHANGED
@@ -24,8 +24,11 @@ class FormatParser::RemoteIO
|
|
24
24
|
end
|
25
25
|
|
26
26
|
# @param uri[URI, String] the remote URL to obtain
|
27
|
-
|
27
|
+
# @param headers[Hash] (optional) the HTTP headers to be used in the HTTP request
|
28
|
+
def initialize(uri, headers: {})
|
28
29
|
require 'faraday'
|
30
|
+
require 'faraday_middleware/response/follow_redirects'
|
31
|
+
@headers = headers
|
29
32
|
@uri = uri
|
30
33
|
@pos = 0
|
31
34
|
@remote_size = false
|
@@ -78,21 +81,40 @@ class FormatParser::RemoteIO
|
|
78
81
|
# We use a GET and not a HEAD request followed by a GET because
|
79
82
|
# S3 does not allow HEAD requests if you only presigned your URL for GETs, so we
|
80
83
|
# combine the first GET of a segment and retrieving the size of the resource
|
81
|
-
|
84
|
+
conn = Faraday.new(headers: @headers) do |faraday|
|
85
|
+
faraday.use FaradayMiddleware::FollowRedirects
|
86
|
+
# we still need the default adapter, more details: https://blog.thecodewhisperer.com/permalink/losing-time-to-faraday
|
87
|
+
faraday.adapter Faraday.default_adapter
|
88
|
+
end
|
89
|
+
response = conn.get(@uri, nil, range: 'bytes=%d-%d' % [range.begin, range.end])
|
82
90
|
|
83
91
|
case response.status
|
84
|
-
when 200
|
92
|
+
when 200
|
93
|
+
# S3 returns 200 when you request a Range that is fully satisfied by the entire object,
|
94
|
+
# we take that into account here. Also, for very tiny responses (and also for empty responses)
|
95
|
+
# the responses are going to be 200 which does not mean we cannot proceed
|
96
|
+
# To have a good check for both of these conditions we need to know whether the ranges overlap fully
|
97
|
+
response_size = response.body.bytesize
|
98
|
+
requested_range_size = range.end - range.begin + 1
|
99
|
+
if response_size > requested_range_size
|
100
|
+
error_message = [
|
101
|
+
"We requested #{requested_range_size} bytes, but the server sent us more",
|
102
|
+
"(#{response_size} bytes) - it likely has no `Range:` support.",
|
103
|
+
"The error occurred when talking to #{@uri})"
|
104
|
+
]
|
105
|
+
raise InvalidRequest.new(response.status, error_message.join("\n"))
|
106
|
+
end
|
107
|
+
[response_size, response.body]
|
108
|
+
when 206
|
85
109
|
# Figure out of the server supports content ranges, if it doesn't we have no
|
86
110
|
# business working with that server
|
87
111
|
range_header = response.headers['Content-Range']
|
88
|
-
raise InvalidRequest.new(response.status, "
|
112
|
+
raise InvalidRequest.new(response.status, "The server replied with 206 status but no Content-Range at #{@uri}") unless range_header
|
89
113
|
|
90
114
|
# "Content-Range: bytes 0-0/307404381" is how the response header is structured
|
91
115
|
size = range_header[/\/(\d+)$/, 1].to_i
|
92
116
|
|
93
|
-
#
|
94
|
-
# we take that into account here. For other servers, 206 is the expected response code.
|
95
|
-
# Also, if we request a _larger_ range than what can be satisfied by the server,
|
117
|
+
# If we request a _larger_ range than what can be satisfied by the server,
|
96
118
|
# the response is going to only contain what _can_ be sent and the status is also going
|
97
119
|
# to be 206
|
98
120
|
return [size, response.body]
|
data/lib/text.rb
CHANGED
data/lib/video.rb
CHANGED
@@ -23,6 +23,9 @@ module FormatParser
|
|
23
23
|
# it can be placed here
|
24
24
|
attr_accessor :intrinsics
|
25
25
|
|
26
|
+
# The MIME type of the video
|
27
|
+
attr_accessor :content_type
|
28
|
+
|
26
29
|
# Only permits assignments via defined accessors
|
27
30
|
def initialize(**attributes)
|
28
31
|
attributes.map { |(k, v)| public_send("#{k}=", v) }
|
@@ -10,6 +10,7 @@ describe FormatParser::AIFFParser do
|
|
10
10
|
expect(parse_result.num_audio_channels).to eq(2)
|
11
11
|
expect(parse_result.audio_sample_rate_hz).to be_within(0.01).of(44100)
|
12
12
|
expect(parse_result.media_duration_seconds).to be_within(0.01).of(1.05)
|
13
|
+
expect(parse_result.content_type).to eq('audio/x-aiff')
|
13
14
|
end
|
14
15
|
|
15
16
|
it 'parses a Logic Pro created AIFF sample file having a COMT chunk before a COMM chunk' do
|
@@ -13,6 +13,8 @@ describe FormatParser::BMPParser do
|
|
13
13
|
expect(parsed.width_px).to eq(40)
|
14
14
|
expect(parsed.height_px).to eq(27)
|
15
15
|
|
16
|
+
expect(parsed.content_type).to eq('image/bmp')
|
17
|
+
|
16
18
|
expect(parsed.intrinsics).not_to be_nil
|
17
19
|
expect(parsed.intrinsics[:vertical_resolution]).to eq(2834)
|
18
20
|
expect(parsed.intrinsics[:horizontal_resolution]).to eq(2834)
|
@@ -32,6 +34,8 @@ describe FormatParser::BMPParser do
|
|
32
34
|
expect(parsed.width_px).to eq(1920)
|
33
35
|
expect(parsed.height_px).to eq(1080)
|
34
36
|
|
37
|
+
expect(parsed.content_type).to eq('image/bmp')
|
38
|
+
|
35
39
|
expect(parsed.intrinsics).not_to be_nil
|
36
40
|
expect(parsed.intrinsics[:vertical_resolution]).to eq(2835)
|
37
41
|
expect(parsed.intrinsics[:horizontal_resolution]).to eq(2835)
|
@@ -51,6 +55,8 @@ describe FormatParser::BMPParser do
|
|
51
55
|
expect(parsed.width_px).to eq(200)
|
52
56
|
expect(parsed.height_px).to eq(200)
|
53
57
|
|
58
|
+
expect(parsed.content_type).to eq('image/bmp')
|
59
|
+
|
54
60
|
expect(parsed.intrinsics).not_to be_nil
|
55
61
|
end
|
56
62
|
|
@@ -64,6 +70,7 @@ describe FormatParser::BMPParser do
|
|
64
70
|
expect(parsed.color_mode).to eq(:rgb)
|
65
71
|
expect(parsed.width_px).to eq(40)
|
66
72
|
expect(parsed.height_px).to eq(27)
|
73
|
+
expect(parsed.content_type).to eq('image/bmp')
|
67
74
|
expect(parsed.intrinsics[:bits_per_pixel]).to eq(24)
|
68
75
|
expect(parsed.intrinsics[:data_order]).to eq(:normal)
|
69
76
|
|
@@ -76,6 +83,7 @@ describe FormatParser::BMPParser do
|
|
76
83
|
expect(parsed.color_mode).to eq(:rgb)
|
77
84
|
expect(parsed.width_px).to eq(40)
|
78
85
|
expect(parsed.height_px).to eq(27)
|
86
|
+
expect(parsed.content_type).to eq('image/bmp')
|
79
87
|
expect(parsed.intrinsics[:bits_per_pixel]).to eq(24)
|
80
88
|
expect(parsed.intrinsics[:data_order]).to eq(:normal)
|
81
89
|
end
|
@@ -14,6 +14,7 @@ describe FormatParser::FLACParser do
|
|
14
14
|
expect(parsed.intrinsics).not_to be_nil
|
15
15
|
expect(parsed.media_duration_frames).to eq(33810)
|
16
16
|
expect(parsed.media_duration_seconds).to be_within(0.1).of(0.836)
|
17
|
+
expect(parsed.content_type).to eq('audio/x-flac')
|
17
18
|
end
|
18
19
|
|
19
20
|
it 'decodes and estimates duration for the 16bit FLAC File' do
|
@@ -37,7 +37,7 @@ describe FormatParser::MOOVParser do
|
|
37
37
|
expect(result.nature).to eq(:audio)
|
38
38
|
expect(result.media_duration_seconds).to be_kind_of(Float)
|
39
39
|
expect(result.media_duration_seconds).to be > 0
|
40
|
-
|
40
|
+
expect(result.content_type).to be_kind_of(String)
|
41
41
|
expect(result.intrinsics).not_to be_nil
|
42
42
|
end
|
43
43
|
end
|
@@ -52,6 +52,7 @@ describe FormatParser::MOOVParser do
|
|
52
52
|
expect(result.height_px).to be > 0
|
53
53
|
expect(result.media_duration_seconds).to be_kind_of(Float)
|
54
54
|
expect(result.media_duration_seconds).to be > 0
|
55
|
+
expect(result.content_type).to eq('video/mp4')
|
55
56
|
|
56
57
|
expect(result.intrinsics).not_to be_nil
|
57
58
|
end
|
@@ -67,6 +68,7 @@ describe FormatParser::MOOVParser do
|
|
67
68
|
expect(result.height_px).to be > 0
|
68
69
|
expect(result.media_duration_seconds).to be_kind_of(Float)
|
69
70
|
expect(result.media_duration_seconds).to be > 0
|
71
|
+
expect(result.content_type).to eq('video/mp4')
|
70
72
|
|
71
73
|
expect(result.intrinsics).not_to be_nil
|
72
74
|
end
|
@@ -79,6 +81,7 @@ describe FormatParser::MOOVParser do
|
|
79
81
|
expect(result).not_to be_nil
|
80
82
|
expect(result.nature).to eq(:audio)
|
81
83
|
expect(result.format).to eq(:m4a)
|
84
|
+
expect(result.content_type).to eq('audio/mp4')
|
82
85
|
end
|
83
86
|
|
84
87
|
it 'parses a MOV file and provides the necessary metadata' do
|
@@ -23,6 +23,7 @@ describe FormatParser::MP3Parser do
|
|
23
23
|
|
24
24
|
expect(parsed.nature).to eq(:audio)
|
25
25
|
expect(parsed.format).to eq(:mp3)
|
26
|
+
expect(parsed.content_type).to eq('audio/mpeg')
|
26
27
|
expect(parsed.num_audio_channels).to eq(2)
|
27
28
|
expect(parsed.audio_sample_rate_hz).to eq(48000)
|
28
29
|
expect(parsed.intrinsics).not_to be_nil
|
@@ -6,6 +6,7 @@ describe FormatParser::OggParser do
|
|
6
6
|
|
7
7
|
expect(parse_result.nature).to eq(:audio)
|
8
8
|
expect(parse_result.format).to eq(:ogg)
|
9
|
+
expect(parse_result.content_type).to eq('audio/ogg')
|
9
10
|
expect(parse_result.num_audio_channels).to eq(1)
|
10
11
|
expect(parse_result.audio_sample_rate_hz).to eq(16000)
|
11
12
|
expect(parse_result.media_duration_seconds).to be_within(0.01).of(2973.95)
|
@@ -59,6 +59,7 @@ describe FormatParser::TIFFParser do
|
|
59
59
|
expect(parsed.width_px).to eq(7952)
|
60
60
|
expect(parsed.height_px).to eq(5304)
|
61
61
|
expect(parsed.intrinsics[:exif]).not_to be_nil
|
62
|
+
expect(parsed.content_type).to eq('image/x-sony-arw')
|
62
63
|
end
|
63
64
|
|
64
65
|
describe 'correctly extracts dimensions from various TIFF flavors of the same file' do
|
@@ -14,6 +14,7 @@ describe FormatParser::ZIPParser do
|
|
14
14
|
expect(result).not_to be_nil
|
15
15
|
|
16
16
|
expect(result.format).to eq(:zip)
|
17
|
+
expect(result.content_type).to eq('application/zip')
|
17
18
|
expect(result.nature).to eq(:archive)
|
18
19
|
expect(result.entries.length).to eq(0xFFFF + 1)
|
19
20
|
|
@@ -58,6 +59,7 @@ describe FormatParser::ZIPParser do
|
|
58
59
|
result = subject.call(fi_io)
|
59
60
|
expect(result.nature).to eq(:document)
|
60
61
|
expect(result.format).to eq(:docx)
|
62
|
+
expect(result.content_type).to eq('application/vnd.openxmlformats-officedocument.wordprocessingml.document')
|
61
63
|
|
62
64
|
fixture_path = fixtures_dir + '/ZIP/sample-docx.docx'
|
63
65
|
fi_io = File.open(fixture_path, 'rb')
|
@@ -15,18 +15,31 @@ describe 'Fetching data from HTTP remotes' do
|
|
15
15
|
}
|
16
16
|
@server = WEBrick::HTTPServer.new(options)
|
17
17
|
@server.mount '/', WEBrick::HTTPServlet::FileHandler, fixtures_dir
|
18
|
+
@server.mount_proc '/redirect' do |req, res|
|
19
|
+
res.status = 302
|
20
|
+
res.header['Location'] = req.path.sub('/redirect', '')
|
21
|
+
end
|
22
|
+
@server.mount_proc '/empty' do |_req, res|
|
23
|
+
res.status = 200
|
24
|
+
res.body = ''
|
25
|
+
end
|
26
|
+
@server.mount_proc '/tiny' do |_req, res|
|
27
|
+
res.status = 200
|
28
|
+
res.body = File.read(fixtures_dir + '/test.gif')
|
29
|
+
end
|
30
|
+
|
18
31
|
trap('INT') { @server.stop }
|
19
32
|
@server_thread = Thread.new { @server.start }
|
20
33
|
end
|
21
34
|
|
22
|
-
it '
|
35
|
+
it 'works with .parse_http called without any options' do
|
23
36
|
result = FormatParser.parse_http('http://localhost:9399/PNG/anim.png')
|
24
37
|
|
25
38
|
expect(result.format).to eq(:png)
|
26
39
|
expect(result.height_px).to eq(180)
|
27
40
|
end
|
28
41
|
|
29
|
-
it '
|
42
|
+
it 'works with .parse_http called with additional options' do
|
30
43
|
fake_result = double(nature: :audio, format: :aiff)
|
31
44
|
expect_any_instance_of(FormatParser::AIFFParser).to receive(:call).and_return(fake_result)
|
32
45
|
results = FormatParser.parse_http('http://localhost:9399/PNG/anim.png', results: :all)
|
@@ -35,6 +48,18 @@ describe 'Fetching data from HTTP remotes' do
|
|
35
48
|
expect(results).to include(fake_result)
|
36
49
|
end
|
37
50
|
|
51
|
+
it 'is able to cope with a 0-size resource which does not provide Content-Range' do
|
52
|
+
file_information = FormatParser.parse_http('http://localhost:9399/empty')
|
53
|
+
|
54
|
+
expect(file_information).to be_nil
|
55
|
+
end
|
56
|
+
|
57
|
+
it 'is able to cope with a tiny resource which fits into the first requested range completely' do
|
58
|
+
file_information = FormatParser.parse_http('http://localhost:9399/tiny')
|
59
|
+
expect(file_information).not_to be_nil
|
60
|
+
expect(file_information.nature).to eq(:image)
|
61
|
+
end
|
62
|
+
|
38
63
|
it 'parses the animated PNG over HTTP' do
|
39
64
|
file_information = FormatParser.parse_http('http://localhost:9399/PNG/anim.png')
|
40
65
|
expect(file_information).not_to be_nil
|
@@ -91,6 +116,32 @@ describe 'Fetching data from HTTP remotes' do
|
|
91
116
|
end
|
92
117
|
end
|
93
118
|
|
119
|
+
context 'when the server responds with a redirect' do
|
120
|
+
it 'follows the redirect' do
|
121
|
+
file_information = FormatParser.parse_http('http://localhost:9399/redirect/TIFF/test.tif')
|
122
|
+
expect(file_information.format).to eq(:tif)
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
it 'sends provided HTTP headers in the request' do
|
127
|
+
# Faraday is required only after calling .parse_http
|
128
|
+
# This line is just to trigger this require, then it's possible to
|
129
|
+
# add an expectation of how Faraday is initialized after.
|
130
|
+
FormatParser.parse_http('invalid_url') rescue nil
|
131
|
+
|
132
|
+
expect(Faraday)
|
133
|
+
.to receive(:new)
|
134
|
+
.with(headers: {'test-header' => 'test-value'})
|
135
|
+
.and_call_original
|
136
|
+
|
137
|
+
file_information = FormatParser.parse_http(
|
138
|
+
'http://localhost:9399//TIFF/test.tif',
|
139
|
+
headers: {'test-header' => 'test-value'}
|
140
|
+
)
|
141
|
+
|
142
|
+
expect(file_information.format).to eq(:tif)
|
143
|
+
end
|
144
|
+
|
94
145
|
after(:all) do
|
95
146
|
@server.stop
|
96
147
|
@server_thread.join(0.5)
|
data/spec/remote_io_spec.rb
CHANGED
@@ -7,7 +7,9 @@ describe FormatParser::RemoteIO do
|
|
7
7
|
rio = described_class.new('https://images.invalid/img.jpg')
|
8
8
|
|
9
9
|
fake_resp = double(headers: {'Content-Range' => '10-109/2577'}, status: 206, body: 'This is the response')
|
10
|
-
|
10
|
+
faraday_conn = instance_double(Faraday::Connection, get: fake_resp)
|
11
|
+
allow(Faraday).to receive(:new).and_return(faraday_conn)
|
12
|
+
expect(faraday_conn).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=10-109')
|
11
13
|
|
12
14
|
rio.seek(10)
|
13
15
|
read_result = rio.read(100)
|
@@ -18,7 +20,9 @@ describe FormatParser::RemoteIO do
|
|
18
20
|
rio = described_class.new('https://images.invalid/img.jpg')
|
19
21
|
|
20
22
|
fake_resp = double(headers: {'Content-Range' => '10-109/2577'}, status: 200, body: 'This is the response')
|
21
|
-
|
23
|
+
faraday_conn = instance_double(Faraday::Connection, get: fake_resp)
|
24
|
+
allow(Faraday).to receive(:new).and_return(faraday_conn)
|
25
|
+
expect(faraday_conn).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=10-109')
|
22
26
|
|
23
27
|
rio.seek(10)
|
24
28
|
read_result = rio.read(100)
|
@@ -29,7 +33,9 @@ describe FormatParser::RemoteIO do
|
|
29
33
|
rio = described_class.new('https://images.invalid/img.jpg')
|
30
34
|
|
31
35
|
fake_resp = double(headers: {}, status: 403, body: 'Please log in')
|
32
|
-
|
36
|
+
faraday_conn = instance_double(Faraday::Connection, get: fake_resp)
|
37
|
+
allow(Faraday).to receive(:new).and_return(faraday_conn)
|
38
|
+
expect(faraday_conn).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199')
|
33
39
|
|
34
40
|
rio.seek(100)
|
35
41
|
expect { rio.read(100) }.to raise_error(/replied with a 403 and refused/)
|
@@ -39,7 +45,9 @@ describe FormatParser::RemoteIO do
|
|
39
45
|
rio = described_class.new('https://images.invalid/img.jpg')
|
40
46
|
|
41
47
|
fake_resp = double(headers: {}, status: 416, body: 'You stepped off the ledge of the range')
|
42
|
-
|
48
|
+
faraday_conn = instance_double(Faraday::Connection, get: fake_resp)
|
49
|
+
allow(Faraday).to receive(:new).and_return(faraday_conn)
|
50
|
+
expect(faraday_conn).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199')
|
43
51
|
|
44
52
|
rio.seek(100)
|
45
53
|
expect(rio.read(100)).to be_nil
|
@@ -49,7 +57,9 @@ describe FormatParser::RemoteIO do
|
|
49
57
|
rio = described_class.new('https://images.invalid/img.jpg')
|
50
58
|
|
51
59
|
fake_resp = double(headers: {}, status: 403, body: 'Please log in')
|
52
|
-
|
60
|
+
faraday_conn = instance_double(Faraday::Connection, get: fake_resp)
|
61
|
+
allow(Faraday).to receive(:new).and_return(faraday_conn)
|
62
|
+
expect(faraday_conn).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199')
|
53
63
|
|
54
64
|
rio.seek(100)
|
55
65
|
# rubocop: disable Lint/AmbiguousBlockAssociation
|
@@ -60,7 +70,9 @@ describe FormatParser::RemoteIO do
|
|
60
70
|
rio = described_class.new('https://images.invalid/img.jpg')
|
61
71
|
|
62
72
|
fake_resp = double(headers: {}, status: 416, body: 'You jumped off the end of the file maam')
|
63
|
-
|
73
|
+
faraday_conn = instance_double(Faraday::Connection, get: fake_resp)
|
74
|
+
allow(Faraday).to receive(:new).and_return(faraday_conn)
|
75
|
+
expect(faraday_conn).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199')
|
64
76
|
|
65
77
|
rio.seek(100)
|
66
78
|
expect(rio.read(100)).to be_nil
|
@@ -69,15 +81,24 @@ describe FormatParser::RemoteIO do
|
|
69
81
|
it 'does not overwrite size when the range cannot be satisfied and the response is 416' do
|
70
82
|
rio = described_class.new('https://images.invalid/img.jpg')
|
71
83
|
|
72
|
-
|
73
|
-
|
84
|
+
fake_resp1 = double(headers: {'Content-Range' => 'bytes 0-0/13'}, status: 206, body: 'a')
|
85
|
+
fake_resp2 = double(headers: {}, status: 416, body: 'You jumped off the end of the file maam')
|
86
|
+
|
87
|
+
faraday_conn = instance_double(Faraday::Connection)
|
88
|
+
allow(Faraday).to receive(:new).and_return(faraday_conn)
|
89
|
+
expect(faraday_conn).to receive(:get)
|
90
|
+
.with('https://images.invalid/img.jpg', nil, range: 'bytes=0-0')
|
91
|
+
.ordered
|
92
|
+
.and_return(fake_resp1)
|
93
|
+
expect(faraday_conn).to receive(:get)
|
94
|
+
.with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199')
|
95
|
+
.ordered
|
96
|
+
.and_return(fake_resp2)
|
97
|
+
|
74
98
|
rio.read(1)
|
75
99
|
|
76
100
|
expect(rio.size).to eq(13)
|
77
101
|
|
78
|
-
fake_resp = double(headers: {}, status: 416, body: 'You jumped off the end of the file maam')
|
79
|
-
expect(Faraday).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199').and_return(fake_resp)
|
80
|
-
|
81
102
|
rio.seek(100)
|
82
103
|
expect(rio.read(100)).to be_nil
|
83
104
|
|
@@ -88,7 +109,9 @@ describe FormatParser::RemoteIO do
|
|
88
109
|
rio = described_class.new('https://images.invalid/img.jpg')
|
89
110
|
|
90
111
|
fake_resp = double(headers: {}, status: 502, body: 'Guru meditation')
|
91
|
-
|
112
|
+
faraday_conn = instance_double(Faraday::Connection, get: fake_resp)
|
113
|
+
allow(Faraday).to receive(:new).and_return(faraday_conn)
|
114
|
+
expect(faraday_conn).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199')
|
92
115
|
|
93
116
|
rio.seek(100)
|
94
117
|
expect { rio.read(100) }.to raise_error(/replied with a 502 and we might want to retry/)
|
@@ -100,7 +123,9 @@ describe FormatParser::RemoteIO do
|
|
100
123
|
expect(rio.pos).to eq(0)
|
101
124
|
|
102
125
|
fake_resp = double(headers: {'Content-Range' => 'bytes 0-0/13'}, status: 206, body: 'a')
|
103
|
-
|
126
|
+
faraday_conn = instance_double(Faraday::Connection, get: fake_resp)
|
127
|
+
allow(Faraday).to receive(:new).and_return(faraday_conn)
|
128
|
+
expect(faraday_conn).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=0-0')
|
104
129
|
rio.read(1)
|
105
130
|
|
106
131
|
expect(rio.pos).to eq(1)
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: format_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.29.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Noah Berman
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: exe
|
11
11
|
cert_chain: []
|
12
|
-
date: 2021-
|
12
|
+
date: 2021-09-10 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: ks
|
@@ -73,6 +73,20 @@ dependencies:
|
|
73
73
|
- - "~>"
|
74
74
|
- !ruby/object:Gem::Version
|
75
75
|
version: '0.13'
|
76
|
+
- !ruby/object:Gem::Dependency
|
77
|
+
name: faraday_middleware
|
78
|
+
requirement: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0.14'
|
83
|
+
type: :runtime
|
84
|
+
prerelease: false
|
85
|
+
version_requirements: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0.14'
|
76
90
|
- !ruby/object:Gem::Dependency
|
77
91
|
name: measurometer
|
78
92
|
requirement: !ruby/object:Gem::Requirement
|
@@ -183,10 +197,10 @@ executables:
|
|
183
197
|
extensions: []
|
184
198
|
extra_rdoc_files: []
|
185
199
|
files:
|
200
|
+
- ".github/workflows/main.yml"
|
186
201
|
- ".gitignore"
|
187
202
|
- ".rspec"
|
188
203
|
- ".rubocop.yml"
|
189
|
-
- ".travis.yml"
|
190
204
|
- CHANGELOG.md
|
191
205
|
- CODE_OF_CONDUCT.md
|
192
206
|
- CONTRIBUTING.md
|