format_parser 0.26.0 → 0.29.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/main.yml +104 -0
- data/CHANGELOG.md +12 -0
- data/format_parser.gemspec +1 -0
- data/lib/archive.rb +3 -0
- data/lib/audio.rb +3 -0
- data/lib/document.rb +1 -0
- data/lib/format_parser/version.rb +1 -1
- data/lib/format_parser.rb +3 -2
- data/lib/image.rb +3 -0
- data/lib/parsers/aiff_parser.rb +4 -1
- data/lib/parsers/bmp_parser.rb +3 -0
- data/lib/parsers/cr2_parser.rb +2 -0
- data/lib/parsers/dpx_parser.rb +6 -0
- data/lib/parsers/flac_parser.rb +2 -0
- data/lib/parsers/gif_parser.rb +2 -0
- data/lib/parsers/jpeg_parser.rb +2 -0
- data/lib/parsers/m3u_parser.rb +3 -1
- data/lib/parsers/moov_parser.rb +10 -1
- data/lib/parsers/mp3_parser.rb +3 -2
- data/lib/parsers/ogg_parser.rb +3 -2
- data/lib/parsers/pdf_parser.rb +2 -2
- data/lib/parsers/png_parser.rb +2 -0
- data/lib/parsers/psd_parser.rb +2 -0
- data/lib/parsers/tiff_parser.rb +10 -2
- data/lib/parsers/wav_parser.rb +3 -0
- data/lib/parsers/zip_parser/office_formats.rb +5 -5
- data/lib/parsers/zip_parser.rb +5 -3
- data/lib/remote_io.rb +29 -7
- data/lib/text.rb +1 -0
- data/lib/video.rb +3 -0
- data/spec/parsers/aiff_parser_spec.rb +1 -0
- data/spec/parsers/bmp_parser_spec.rb +8 -0
- data/spec/parsers/cr2_parser_spec.rb +1 -0
- data/spec/parsers/dpx_parser_spec.rb +1 -0
- data/spec/parsers/flac_parser_spec.rb +1 -0
- data/spec/parsers/gif_parser_spec.rb +1 -0
- data/spec/parsers/jpeg_parser_spec.rb +1 -0
- data/spec/parsers/m3u_parser_spec.rb +1 -0
- data/spec/parsers/moov_parser_spec.rb +4 -1
- data/spec/parsers/mp3_parser_spec.rb +1 -0
- data/spec/parsers/ogg_parser_spec.rb +1 -0
- data/spec/parsers/pdf_parser_spec.rb +1 -0
- data/spec/parsers/png_parser_spec.rb +1 -0
- data/spec/parsers/psd_parser_spec.rb +1 -0
- data/spec/parsers/tiff_parser_spec.rb +1 -0
- data/spec/parsers/wav_parser_spec.rb +1 -0
- data/spec/parsers/zip_parser_spec.rb +2 -0
- data/spec/remote_fetching_spec.rb +53 -2
- data/spec/remote_io_spec.rb +38 -13
- metadata +17 -3
- data/.travis.yml +0 -12
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 75ee83f55840e3031d4d60d8dc07ca038812188613e2b740079e1c965efb2886
|
4
|
+
data.tar.gz: 31c3ee84434560c18e6ea74a23160b909e6f880f52b2ed6f0e888e847c557bd9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 536cfb1bac7926f56ba760959d7c5a0905d3b2b0944b16248b6c81be00b722dad894a8d6d5773134fb0471e42d016a20be15e3d5a01c671f0cc65658f2fc05b4
|
7
|
+
data.tar.gz: cb3f73df051b8612cb6d0e1a4e55c045e461bf2c5e4667dd6e461e779b1f39d01be8d70d084e252e5198a966ac590129286811f5822808a0c980c2ad72a087a1
|
@@ -0,0 +1,104 @@
|
|
1
|
+
name: CI
|
2
|
+
|
3
|
+
on: [push,pull_request]
|
4
|
+
|
5
|
+
env:
|
6
|
+
BUNDLE_PATH: vendor/bundle
|
7
|
+
|
8
|
+
jobs:
|
9
|
+
lint:
|
10
|
+
name: Code Style
|
11
|
+
runs-on: ubuntu-18.04
|
12
|
+
if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository
|
13
|
+
strategy:
|
14
|
+
matrix:
|
15
|
+
ruby:
|
16
|
+
- 2.7
|
17
|
+
- 2.6
|
18
|
+
- 2.5
|
19
|
+
- 2.4
|
20
|
+
- 2.3
|
21
|
+
- 2.2
|
22
|
+
- jruby
|
23
|
+
steps:
|
24
|
+
- name: Checkout
|
25
|
+
uses: actions/checkout@v2
|
26
|
+
- name: Setup Ruby
|
27
|
+
uses: ruby/setup-ruby@v1
|
28
|
+
with:
|
29
|
+
ruby-version: ${{ matrix.ruby }}
|
30
|
+
- name: Gemfile Cache
|
31
|
+
uses: actions/cache@v2
|
32
|
+
with:
|
33
|
+
path: Gemfile.lock
|
34
|
+
key: ${{ runner.os }}-gemlock-${{ matrix.ruby }}-${{ hashFiles('Gemfile', 'format_parser.gemspec') }}
|
35
|
+
restore-keys: |
|
36
|
+
${{ runner.os }}-gemlock-${{ matrix.ruby }}-
|
37
|
+
- name: Bundle Cache
|
38
|
+
id: cache-gems
|
39
|
+
uses: actions/cache@v2
|
40
|
+
with:
|
41
|
+
path: vendor/bundle
|
42
|
+
key: ${{ runner.os }}-gems-${{ matrix.ruby }}-${{ hashFiles('Gemfile', 'Gemfile.lock', 'format_parser.gemspec') }}
|
43
|
+
restore-keys: |
|
44
|
+
${{ runner.os }}-gems-${{ matrix.ruby }}-
|
45
|
+
${{ runner.os }}-gems-
|
46
|
+
- name: Bundle Install
|
47
|
+
if: steps.cache-gems.outputs.cache-hit != 'true'
|
48
|
+
run: bundle install --jobs 4 --retry 3
|
49
|
+
- name: Rubocop Cache
|
50
|
+
uses: actions/cache@v2
|
51
|
+
with:
|
52
|
+
path: ~/.cache/rubocop_cache
|
53
|
+
key: ${{ runner.os }}-rubocop-${{ hashFiles('.rubocop.yml') }}
|
54
|
+
restore-keys: |
|
55
|
+
${{ runner.os }}-rubocop-
|
56
|
+
- name: Rubocop
|
57
|
+
run: bundle exec rubocop
|
58
|
+
test:
|
59
|
+
name: Specs
|
60
|
+
runs-on: ubuntu-18.04
|
61
|
+
if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository
|
62
|
+
strategy:
|
63
|
+
matrix:
|
64
|
+
ruby:
|
65
|
+
- 2.7
|
66
|
+
- 2.6
|
67
|
+
- 2.5
|
68
|
+
- 2.4
|
69
|
+
- 2.3
|
70
|
+
- 2.2
|
71
|
+
- jruby
|
72
|
+
experimental: [false]
|
73
|
+
include:
|
74
|
+
- ruby: 3.0
|
75
|
+
experimental: true
|
76
|
+
steps:
|
77
|
+
- name: Checkout
|
78
|
+
uses: actions/checkout@v2
|
79
|
+
- name: Setup Ruby
|
80
|
+
uses: ruby/setup-ruby@v1
|
81
|
+
with:
|
82
|
+
ruby-version: ${{ matrix.ruby }}
|
83
|
+
- name: Gemfile Cache
|
84
|
+
uses: actions/cache@v2
|
85
|
+
with:
|
86
|
+
path: Gemfile.lock
|
87
|
+
key: ${{ runner.os }}-gemlock-${{ matrix.ruby }}-${{ hashFiles('Gemfile', 'format_parser.gemspec') }}
|
88
|
+
restore-keys: |
|
89
|
+
${{ runner.os }}-gemlock-${{ matrix.ruby }}-
|
90
|
+
- name: Bundle Cache
|
91
|
+
id: cache-gems
|
92
|
+
uses: actions/cache@v2
|
93
|
+
with:
|
94
|
+
path: vendor/bundle
|
95
|
+
key: ${{ runner.os }}-gems-${{ matrix.ruby }}-${{ hashFiles('Gemfile', 'Gemfile.lock', 'format_parser.gemspec') }}
|
96
|
+
restore-keys: |
|
97
|
+
${{ runner.os }}-gems-${{ matrix.ruby }}-
|
98
|
+
${{ runner.os }}-gems-
|
99
|
+
- name: Bundle Install
|
100
|
+
if: steps.cache-gems.outputs.cache-hit != 'true'
|
101
|
+
run: bundle install --jobs 4 --retry 3
|
102
|
+
- name: RSpec
|
103
|
+
continue-on-error: ${{ matrix.experimental }}
|
104
|
+
run: bundle exec rake parallel:spec
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,15 @@
|
|
1
|
+
## 0.29.1
|
2
|
+
* Fix handling of 200 responses with `parse_http` as well as handling of very small responses which do not need range access
|
3
|
+
|
4
|
+
## 0.29.0
|
5
|
+
* Add option `headers:` to `FormatParser.parse_http`
|
6
|
+
|
7
|
+
## 0.28.0
|
8
|
+
* Change `FormatParser.parse_http` to follow HTTP redirects
|
9
|
+
|
10
|
+
## 0.27.0
|
11
|
+
* Add `#content_type` on `Result` return values which makes sense for the detected filetype
|
12
|
+
|
1
13
|
## 0.26.0
|
2
14
|
* Add support for M3U format files
|
3
15
|
|
data/format_parser.gemspec
CHANGED
@@ -34,6 +34,7 @@ Gem::Specification.new do |spec|
|
|
34
34
|
spec.add_dependency 'exifr', '~> 1', '>= 1.3.8'
|
35
35
|
spec.add_dependency 'id3tag', '~> 0.14'
|
36
36
|
spec.add_dependency 'faraday', '~> 0.13'
|
37
|
+
spec.add_dependency 'faraday_middleware', '~> 0.14'
|
37
38
|
spec.add_dependency 'measurometer', '~> 1'
|
38
39
|
|
39
40
|
spec.add_development_dependency 'rspec', '~> 3.0'
|
data/lib/archive.rb
CHANGED
@@ -26,6 +26,9 @@ module FormatParser
|
|
26
26
|
# it can be placed here
|
27
27
|
attr_accessor :intrinsics
|
28
28
|
|
29
|
+
# The MIME type of the archive
|
30
|
+
attr_accessor :content_type
|
31
|
+
|
29
32
|
# Only permits assignments via defined accessors
|
30
33
|
def initialize(**attributes)
|
31
34
|
attributes.map { |(k, v)| public_send("#{k}=", v) }
|
data/lib/audio.rb
CHANGED
@@ -35,6 +35,9 @@ module FormatParser
|
|
35
35
|
# it can be placed here
|
36
36
|
attr_accessor :intrinsics
|
37
37
|
|
38
|
+
# The MIME type of the sound file
|
39
|
+
attr_accessor :content_type
|
40
|
+
|
38
41
|
# Only permits assignments via defined accessors
|
39
42
|
def initialize(**attributes)
|
40
43
|
attributes.map { |(k, v)| public_send("#{k}=", v) }
|
data/lib/document.rb
CHANGED
data/lib/format_parser.rb
CHANGED
@@ -88,13 +88,14 @@ module FormatParser
|
|
88
88
|
# given to `.parse`. The accepted keyword arguments are the same as the ones for `parse`.
|
89
89
|
#
|
90
90
|
# @param url[String, URI] the HTTP(S) URL to request the object from using Faraday and `Range:` requests
|
91
|
+
# @param headers[Hash] (optional) the HTTP headers to request the object from using Faraday
|
91
92
|
# @param kwargs the keyword arguments to be delegated to `.parse`
|
92
93
|
# @see {.parse}
|
93
|
-
def self.parse_http(url, **kwargs)
|
94
|
+
def self.parse_http(url, headers: {}, **kwargs)
|
94
95
|
# Do not extract the filename, since the URL
|
95
96
|
# can really be "anything". But if the caller
|
96
97
|
# provides filename_hint it will be carried over
|
97
|
-
parse(RemoteIO.new(url), **kwargs)
|
98
|
+
parse(RemoteIO.new(url, headers: headers), **kwargs)
|
98
99
|
end
|
99
100
|
|
100
101
|
# Parses the file at the given `path` and returns the results as if it were any IO
|
data/lib/image.rb
CHANGED
@@ -64,6 +64,9 @@ module FormatParser
|
|
64
64
|
# it can be placed here
|
65
65
|
attr_accessor :intrinsics
|
66
66
|
|
67
|
+
# The MIME type of the image file
|
68
|
+
attr_accessor :content_type
|
69
|
+
|
67
70
|
# Only permits assignments via defined accessors
|
68
71
|
def initialize(**attributes)
|
69
72
|
attributes.map { |(k, v)| public_send("#{k}=", v) }
|
data/lib/parsers/aiff_parser.rb
CHANGED
@@ -1,6 +1,8 @@
|
|
1
1
|
class FormatParser::AIFFParser
|
2
2
|
include FormatParser::IOUtils
|
3
3
|
|
4
|
+
AIFF_MIME_TYPE = 'audio/x-aiff'
|
5
|
+
|
4
6
|
# Known chunk types we can omit when parsing,
|
5
7
|
# grossly lifted from http://www.muratnkonar.com/aiff/
|
6
8
|
KNOWN_CHUNKS = [
|
@@ -70,7 +72,8 @@ class FormatParser::AIFFParser
|
|
70
72
|
num_audio_channels: channels,
|
71
73
|
audio_sample_rate_hz: sample_rate.to_i,
|
72
74
|
media_duration_frames: sample_frames,
|
73
|
-
media_duration_seconds: duration_in_seconds
|
75
|
+
media_duration_seconds: duration_in_seconds,
|
76
|
+
content_type: AIFF_MIME_TYPE,
|
74
77
|
)
|
75
78
|
end
|
76
79
|
|
data/lib/parsers/bmp_parser.rb
CHANGED
@@ -5,6 +5,7 @@ class FormatParser::BMPParser
|
|
5
5
|
|
6
6
|
VALID_BMP = 'BM'
|
7
7
|
PERMISSIBLE_PIXEL_ARRAY_LOCATIONS = 26..512
|
8
|
+
BMP_MIME_TYPE = 'image/bmp'
|
8
9
|
|
9
10
|
def likely_match?(filename)
|
10
11
|
filename =~ /\.bmp$/i
|
@@ -42,6 +43,7 @@ class FormatParser::BMPParser
|
|
42
43
|
width_px: width,
|
43
44
|
height_px: height,
|
44
45
|
color_mode: :rgb,
|
46
|
+
content_type: BMP_MIME_TYPE,
|
45
47
|
intrinsics: {
|
46
48
|
data_order: data_order,
|
47
49
|
bits_per_pixel: bit_depth
|
@@ -63,6 +65,7 @@ class FormatParser::BMPParser
|
|
63
65
|
width_px: width,
|
64
66
|
height_px: height.abs,
|
65
67
|
color_mode: :rgb,
|
68
|
+
content_type: BMP_MIME_TYPE,
|
66
69
|
intrinsics: {
|
67
70
|
vertical_resolution: vertical_res,
|
68
71
|
horizontal_resolution: horizontal_res,
|
data/lib/parsers/cr2_parser.rb
CHANGED
@@ -6,6 +6,7 @@ class FormatParser::CR2Parser
|
|
6
6
|
|
7
7
|
TIFF_HEADER = [0x49, 0x49, 0x2a, 0x00]
|
8
8
|
CR2_HEADER = [0x43, 0x52, 0x02, 0x00]
|
9
|
+
CR2_MIME_TYPE = 'image/x-canon-cr2'
|
9
10
|
|
10
11
|
def likely_match?(filename)
|
11
12
|
filename =~ /\.cr2$/i
|
@@ -39,6 +40,7 @@ class FormatParser::CR2Parser
|
|
39
40
|
display_height_px: exif_data.rotated? ? w : h,
|
40
41
|
orientation: exif_data.orientation_sym,
|
41
42
|
intrinsics: {exif: exif_data},
|
43
|
+
content_type: CR2_MIME_TYPE,
|
42
44
|
)
|
43
45
|
rescue EXIFR::MalformedTIFF
|
44
46
|
nil
|
data/lib/parsers/dpx_parser.rb
CHANGED
@@ -6,6 +6,11 @@ class FormatParser::DPXParser
|
|
6
6
|
BE_MAGIC = 'SDPX'
|
7
7
|
LE_MAGIC = BE_MAGIC.reverse
|
8
8
|
|
9
|
+
# There is no official MIME type for DPX, so we have
|
10
|
+
# to invent something useful. We will prefix it with x-
|
11
|
+
# to indicate that it is a vendor subtype
|
12
|
+
DPX_MIME_TYPE = 'image/x-dpx'
|
13
|
+
|
9
14
|
class ByteOrderHintIO < SimpleDelegator
|
10
15
|
def initialize(io, is_little_endian)
|
11
16
|
super(io)
|
@@ -61,6 +66,7 @@ class FormatParser::DPXParser
|
|
61
66
|
display_width_px: display_w,
|
62
67
|
display_height_px: display_h,
|
63
68
|
intrinsics: dpx_structure,
|
69
|
+
content_type: DPX_MIME_TYPE,
|
64
70
|
)
|
65
71
|
end
|
66
72
|
|
data/lib/parsers/flac_parser.rb
CHANGED
@@ -4,6 +4,7 @@ class FormatParser::FLACParser
|
|
4
4
|
MAGIC_BYTES = 4
|
5
5
|
MAGIC_BYTE_STRING = 'fLaC'
|
6
6
|
BLOCK_HEADER_BYTES = 4
|
7
|
+
FLAC_MIME_TYPE = 'audio/x-flac'
|
7
8
|
|
8
9
|
def likely_match?(filename)
|
9
10
|
filename =~ /\.flac$/i
|
@@ -61,6 +62,7 @@ class FormatParser::FLACParser
|
|
61
62
|
audio_sample_rate_hz: sample_rate,
|
62
63
|
media_duration_seconds: duration,
|
63
64
|
media_duration_frames: total_samples,
|
65
|
+
content_type: FLAC_MIME_TYPE,
|
64
66
|
intrinsics: {
|
65
67
|
bits_per_sample: bits_per_sample,
|
66
68
|
minimum_frame_size: minimum_frame_size,
|
data/lib/parsers/gif_parser.rb
CHANGED
@@ -3,6 +3,7 @@ class FormatParser::GIFParser
|
|
3
3
|
|
4
4
|
HEADERS = ['GIF87a', 'GIF89a'].map(&:b)
|
5
5
|
NETSCAPE_AND_AUTHENTICATION_CODE = 'NETSCAPE2.0'
|
6
|
+
GIF_MIME_TYPE = 'image/gif'
|
6
7
|
|
7
8
|
def likely_match?(filename)
|
8
9
|
filename =~ /\.gif$/i
|
@@ -45,6 +46,7 @@ class FormatParser::GIFParser
|
|
45
46
|
height_px: h,
|
46
47
|
has_multiple_frames: is_animated,
|
47
48
|
color_mode: :indexed,
|
49
|
+
content_type: GIF_MIME_TYPE
|
48
50
|
)
|
49
51
|
end
|
50
52
|
|
data/lib/parsers/jpeg_parser.rb
CHANGED
@@ -12,6 +12,7 @@ class FormatParser::JPEGParser
|
|
12
12
|
APP1_MARKER = 0xE1 # maybe EXIF
|
13
13
|
EXIF_MAGIC_STRING = "Exif\0\0".b
|
14
14
|
MUST_FIND_NEXT_MARKER_WITHIN_BYTES = 1024
|
15
|
+
JPEG_MIME_TYPE = 'image/jpeg'
|
15
16
|
|
16
17
|
def self.likely_match?(filename)
|
17
18
|
filename =~ /\.jpe?g$/i
|
@@ -88,6 +89,7 @@ class FormatParser::JPEGParser
|
|
88
89
|
display_height_px: dh,
|
89
90
|
orientation: flat_exif.orientation_sym,
|
90
91
|
intrinsics: {exif: flat_exif},
|
92
|
+
content_type: JPEG_MIME_TYPE
|
91
93
|
)
|
92
94
|
|
93
95
|
return result
|
data/lib/parsers/m3u_parser.rb
CHANGED
@@ -2,6 +2,7 @@ class FormatParser::M3UParser
|
|
2
2
|
include FormatParser::IOUtils
|
3
3
|
|
4
4
|
HEADER = '#EXTM3U'
|
5
|
+
M3U8_MIME_TYPE = 'application/vnd.apple.mpegurl' # https://en.wikipedia.org/wiki/M3U#Internet_media_types
|
5
6
|
|
6
7
|
def likely_match?(filename)
|
7
8
|
filename =~ /\.m3u8?$/i
|
@@ -14,7 +15,8 @@ class FormatParser::M3UParser
|
|
14
15
|
return unless HEADER.eql?(header)
|
15
16
|
|
16
17
|
FormatParser::Text.new(
|
17
|
-
format: :m3u
|
18
|
+
format: :m3u,
|
19
|
+
content_type: M3U8_MIME_TYPE,
|
18
20
|
)
|
19
21
|
end
|
20
22
|
FormatParser.register_parser new, natures: :text, formats: :m3u
|
data/lib/parsers/moov_parser.rb
CHANGED
@@ -11,6 +11,12 @@ class FormatParser::MOOVParser
|
|
11
11
|
'm4a ' => :m4a,
|
12
12
|
}
|
13
13
|
|
14
|
+
# https://tools.ietf.org/html/rfc4337#section-2
|
15
|
+
# There is also video/quicktime which we should be able to capture
|
16
|
+
# here, but there is currently no detection for MOVs versus MP4s
|
17
|
+
MP4_AU_MIME_TYPE = 'audio/mp4'
|
18
|
+
MP4_MIXED_MIME_TYPE = 'video/mp4'
|
19
|
+
|
14
20
|
def likely_match?(filename)
|
15
21
|
filename =~ /\.(mov|m4a|ma4|mp4|aac|m4v)$/i
|
16
22
|
end
|
@@ -49,10 +55,12 @@ class FormatParser::MOOVParser
|
|
49
55
|
end
|
50
56
|
|
51
57
|
# M4A only contains audio, while MP4 and friends can contain video.
|
52
|
-
|
58
|
+
fmt = format_from_moov_type(file_type)
|
59
|
+
if fmt == :m4a
|
53
60
|
FormatParser::Audio.new(
|
54
61
|
format: format_from_moov_type(file_type),
|
55
62
|
media_duration_seconds: media_duration_s,
|
63
|
+
content_type: MP4_AU_MIME_TYPE,
|
56
64
|
intrinsics: atom_tree,
|
57
65
|
)
|
58
66
|
else
|
@@ -61,6 +69,7 @@ class FormatParser::MOOVParser
|
|
61
69
|
width_px: width,
|
62
70
|
height_px: height,
|
63
71
|
media_duration_seconds: media_duration_s,
|
72
|
+
content_type: MP4_MIXED_MIME_TYPE,
|
64
73
|
intrinsics: atom_tree,
|
65
74
|
)
|
66
75
|
end
|
data/lib/parsers/mp3_parser.rb
CHANGED
@@ -32,7 +32,7 @@ class FormatParser::MP3Parser
|
|
32
32
|
MAGIC_LE = [0x49, 0x49, 0x2A, 0x0].pack('C4')
|
33
33
|
MAGIC_BE = [0x4D, 0x4D, 0x0, 0x2A].pack('C4')
|
34
34
|
TIFF_HEADER_BYTES = [MAGIC_LE, MAGIC_BE]
|
35
|
-
|
35
|
+
MP3_MIME_TYPE = 'audio/mpeg'
|
36
36
|
# Wraps the Tag object returned by ID3Tag in such
|
37
37
|
# a way that a usable JSON representation gets
|
38
38
|
# returned
|
@@ -104,7 +104,8 @@ class FormatParser::MP3Parser
|
|
104
104
|
# do not tell anything of substance
|
105
105
|
num_audio_channels: first_frame.channels,
|
106
106
|
audio_sample_rate_hz: first_frame.sample_rate,
|
107
|
-
intrinsics: id3tags_hash.merge(id3tags: tags)
|
107
|
+
intrinsics: id3tags_hash.merge(id3tags: tags),
|
108
|
+
content_type: MP3_MIME_TYPE,
|
108
109
|
)
|
109
110
|
|
110
111
|
extra_file_attirbutes = fetch_extra_attributes_from_id3_tags(id3tags_hash)
|
data/lib/parsers/ogg_parser.rb
CHANGED
@@ -3,8 +3,8 @@
|
|
3
3
|
class FormatParser::OggParser
|
4
4
|
include FormatParser::IOUtils
|
5
5
|
|
6
|
-
# Maximum size of an Ogg page
|
7
6
|
MAX_POSSIBLE_PAGE_SIZE = 65307
|
7
|
+
OGG_MIME_TYPE = 'audio/ogg'
|
8
8
|
|
9
9
|
def likely_match?(filename)
|
10
10
|
filename =~ /\.ogg$/i
|
@@ -45,7 +45,8 @@ class FormatParser::OggParser
|
|
45
45
|
format: :ogg,
|
46
46
|
audio_sample_rate_hz: sample_rate,
|
47
47
|
num_audio_channels: channels,
|
48
|
-
media_duration_seconds: duration
|
48
|
+
media_duration_seconds: duration,
|
49
|
+
content_type: OGG_MIME_TYPE,
|
49
50
|
)
|
50
51
|
end
|
51
52
|
|
data/lib/parsers/pdf_parser.rb
CHANGED
@@ -1,6 +1,5 @@
|
|
1
1
|
class FormatParser::PDFParser
|
2
2
|
include FormatParser::IOUtils
|
3
|
-
|
4
3
|
# First 9 bytes of a PDF should be in this format, according to:
|
5
4
|
#
|
6
5
|
# https://stackoverflow.com/questions/3108201/detect-if-pdf-file-is-correct-header-pdf
|
@@ -8,6 +7,7 @@ class FormatParser::PDFParser
|
|
8
7
|
# There are however exceptions, which are left out for now.
|
9
8
|
#
|
10
9
|
PDF_MARKER = /%PDF-1\.[0-8]{1}/
|
10
|
+
PDF_CONTENT_TYPE = 'application/pdf'
|
11
11
|
|
12
12
|
def likely_match?(filename)
|
13
13
|
filename =~ /\.(pdf|ai)$/i
|
@@ -18,7 +18,7 @@ class FormatParser::PDFParser
|
|
18
18
|
|
19
19
|
return unless safe_read(io, 9) =~ PDF_MARKER
|
20
20
|
|
21
|
-
FormatParser::Document.new(format: :pdf)
|
21
|
+
FormatParser::Document.new(format: :pdf, content_type: PDF_CONTENT_TYPE)
|
22
22
|
end
|
23
23
|
|
24
24
|
FormatParser.register_parser new, natures: :document, formats: :pdf, priority: 1
|
data/lib/parsers/png_parser.rb
CHANGED
@@ -14,6 +14,7 @@ class FormatParser::PNGParser
|
|
14
14
|
4 => true, # Grayscale with alpha
|
15
15
|
6 => true,
|
16
16
|
}
|
17
|
+
PNG_MIME_TYPE = 'image/png'
|
17
18
|
|
18
19
|
def likely_match?(filename)
|
19
20
|
filename =~ /\.png$/i
|
@@ -67,6 +68,7 @@ class FormatParser::PNGParser
|
|
67
68
|
color_mode: color_mode,
|
68
69
|
has_multiple_frames: has_animation,
|
69
70
|
num_animation_or_video_frames: num_frames,
|
71
|
+
content_type: PNG_MIME_TYPE,
|
70
72
|
)
|
71
73
|
end
|
72
74
|
|
data/lib/parsers/psd_parser.rb
CHANGED
@@ -2,6 +2,7 @@ class FormatParser::PSDParser
|
|
2
2
|
include FormatParser::IOUtils
|
3
3
|
|
4
4
|
PSD_HEADER = [0x38, 0x42, 0x50, 0x53]
|
5
|
+
PSD_MIME_TYPE = 'application/x-photoshop'
|
5
6
|
|
6
7
|
def likely_match?(filename)
|
7
8
|
filename =~ /\.psd$/i # Maybe also PSB at some point
|
@@ -20,6 +21,7 @@ class FormatParser::PSDParser
|
|
20
21
|
format: :psd,
|
21
22
|
width_px: w,
|
22
23
|
height_px: h,
|
24
|
+
content_type: PSD_MIME_TYPE,
|
23
25
|
)
|
24
26
|
end
|
25
27
|
|
data/lib/parsers/tiff_parser.rb
CHANGED
@@ -5,6 +5,8 @@ class FormatParser::TIFFParser
|
|
5
5
|
MAGIC_LE = [0x49, 0x49, 0x2A, 0x0].pack('C4')
|
6
6
|
MAGIC_BE = [0x4D, 0x4D, 0x0, 0x2A].pack('C4')
|
7
7
|
HEADER_BYTES = [MAGIC_LE, MAGIC_BE]
|
8
|
+
TIFF_MIME_TYPE = 'image/tiff'
|
9
|
+
ARW_MIME_TYPE = 'image/x-sony-arw'
|
8
10
|
|
9
11
|
def likely_match?(filename)
|
10
12
|
filename =~ /\.tiff?$/i
|
@@ -14,7 +16,10 @@ class FormatParser::TIFFParser
|
|
14
16
|
io = FormatParser::IOConstraint.new(io)
|
15
17
|
|
16
18
|
return unless HEADER_BYTES.include?(safe_read(io, 4))
|
17
|
-
|
19
|
+
|
20
|
+
# Skip over the offset of the IFD,
|
21
|
+
# EXIFR will re-read it anyway
|
22
|
+
io.seek(io.pos + 2)
|
18
23
|
return if cr2?(io)
|
19
24
|
|
20
25
|
# The TIFF scanner in EXIFR is plenty good enough,
|
@@ -26,14 +31,17 @@ class FormatParser::TIFFParser
|
|
26
31
|
w = exif_data.width || exif_data.pixel_x_dimension
|
27
32
|
h = exif_data.height || exif_data.pixel_y_dimension
|
28
33
|
|
34
|
+
format = arw?(exif_data) ? :arw : :tif
|
35
|
+
mime_type = arw?(exif_data) ? ARW_MIME_TYPE : TIFF_MIME_TYPE
|
29
36
|
FormatParser::Image.new(
|
30
|
-
format:
|
37
|
+
format: format,
|
31
38
|
width_px: w,
|
32
39
|
height_px: h,
|
33
40
|
display_width_px: exif_data.rotated? ? h : w,
|
34
41
|
display_height_px: exif_data.rotated? ? w : h,
|
35
42
|
orientation: exif_data.orientation_sym,
|
36
43
|
intrinsics: {exif: exif_data},
|
44
|
+
content_type: mime_type,
|
37
45
|
)
|
38
46
|
rescue EXIFR::MalformedTIFF
|
39
47
|
nil
|
data/lib/parsers/wav_parser.rb
CHANGED
@@ -1,6 +1,8 @@
|
|
1
1
|
class FormatParser::WAVParser
|
2
2
|
include FormatParser::IOUtils
|
3
3
|
|
4
|
+
WAV_MIME_TYPE = 'audio/x-wav'
|
5
|
+
|
4
6
|
def likely_match?(filename)
|
5
7
|
filename =~ /\.wav$/i
|
6
8
|
end
|
@@ -96,6 +98,7 @@ class FormatParser::WAVParser
|
|
96
98
|
audio_sample_rate_hz: fmt_data[:sample_rate],
|
97
99
|
media_duration_frames: sample_frames,
|
98
100
|
media_duration_seconds: duration_in_seconds,
|
101
|
+
content_type: WAV_MIME_TYPE,
|
99
102
|
)
|
100
103
|
end
|
101
104
|
|
@@ -37,15 +37,15 @@ module FormatParser::ZIPParser::OfficeFormats
|
|
37
37
|
OFFICE_MARKER_FILES.subset?(filenames_set)
|
38
38
|
end
|
39
39
|
|
40
|
-
def
|
40
|
+
def office_file_format_and_mime_type_from_entry_set(filenames_set)
|
41
41
|
if filenames_set.include?('word/document.xml')
|
42
|
-
:docx
|
42
|
+
[:docx, 'application/vnd.openxmlformats-officedocument.wordprocessingml.document']
|
43
43
|
elsif filenames_set.include?('xl/workbook.xml')
|
44
|
-
:xlsx
|
44
|
+
[:xlsx, 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet']
|
45
45
|
elsif filenames_set.include?('ppt/presentation.xml')
|
46
|
-
:pptx
|
46
|
+
[:pptx, 'application/vnd.openxmlformats-officedocument.presentationml.presentation']
|
47
47
|
else
|
48
|
-
:unknown
|
48
|
+
[:unknown, 'application/zip']
|
49
49
|
end
|
50
50
|
end
|
51
51
|
end
|
data/lib/parsers/zip_parser.rb
CHANGED
@@ -5,6 +5,8 @@ class FormatParser::ZIPParser
|
|
5
5
|
include OfficeFormats
|
6
6
|
include FormatParser::IOUtils
|
7
7
|
|
8
|
+
ZIP_MIME_TYPE = 'application/zip'
|
9
|
+
|
8
10
|
def likely_match?(filename)
|
9
11
|
filename =~ /\.(zip|docx|keynote|numbers|pptx|xlsx)$/i
|
10
12
|
end
|
@@ -25,10 +27,10 @@ class FormatParser::ZIPParser
|
|
25
27
|
end
|
26
28
|
|
27
29
|
if office_document?(filenames_set)
|
28
|
-
office_format =
|
29
|
-
FormatParser::Archive.new(nature: :document, format: office_format, entries: entries_archive)
|
30
|
+
office_format, mime_type = office_file_format_and_mime_type_from_entry_set(filenames_set)
|
31
|
+
FormatParser::Archive.new(nature: :document, format: office_format, entries: entries_archive, content_type: mime_type)
|
30
32
|
else
|
31
|
-
FormatParser::Archive.new(nature: :archive, format: :zip, entries: entries_archive)
|
33
|
+
FormatParser::Archive.new(nature: :archive, format: :zip, entries: entries_archive, content_type: ZIP_MIME_TYPE)
|
32
34
|
end
|
33
35
|
rescue FileReader::Error
|
34
36
|
# This is not a ZIP, or a broken ZIP.
|
data/lib/remote_io.rb
CHANGED
@@ -24,8 +24,11 @@ class FormatParser::RemoteIO
|
|
24
24
|
end
|
25
25
|
|
26
26
|
# @param uri[URI, String] the remote URL to obtain
|
27
|
-
|
27
|
+
# @param headers[Hash] (optional) the HTTP headers to be used in the HTTP request
|
28
|
+
def initialize(uri, headers: {})
|
28
29
|
require 'faraday'
|
30
|
+
require 'faraday_middleware/response/follow_redirects'
|
31
|
+
@headers = headers
|
29
32
|
@uri = uri
|
30
33
|
@pos = 0
|
31
34
|
@remote_size = false
|
@@ -78,21 +81,40 @@ class FormatParser::RemoteIO
|
|
78
81
|
# We use a GET and not a HEAD request followed by a GET because
|
79
82
|
# S3 does not allow HEAD requests if you only presigned your URL for GETs, so we
|
80
83
|
# combine the first GET of a segment and retrieving the size of the resource
|
81
|
-
|
84
|
+
conn = Faraday.new(headers: @headers) do |faraday|
|
85
|
+
faraday.use FaradayMiddleware::FollowRedirects
|
86
|
+
# we still need the default adapter, more details: https://blog.thecodewhisperer.com/permalink/losing-time-to-faraday
|
87
|
+
faraday.adapter Faraday.default_adapter
|
88
|
+
end
|
89
|
+
response = conn.get(@uri, nil, range: 'bytes=%d-%d' % [range.begin, range.end])
|
82
90
|
|
83
91
|
case response.status
|
84
|
-
when 200
|
92
|
+
when 200
|
93
|
+
# S3 returns 200 when you request a Range that is fully satisfied by the entire object,
|
94
|
+
# we take that into account here. Also, for very tiny responses (and also for empty responses)
|
95
|
+
# the responses are going to be 200 which does not mean we cannot proceed
|
96
|
+
# To have a good check for both of these conditions we need to know whether the ranges overlap fully
|
97
|
+
response_size = response.body.bytesize
|
98
|
+
requested_range_size = range.end - range.begin + 1
|
99
|
+
if response_size > requested_range_size
|
100
|
+
error_message = [
|
101
|
+
"We requested #{requested_range_size} bytes, but the server sent us more",
|
102
|
+
"(#{response_size} bytes) - it likely has no `Range:` support.",
|
103
|
+
"The error occurred when talking to #{@uri})"
|
104
|
+
]
|
105
|
+
raise InvalidRequest.new(response.status, error_message.join("\n"))
|
106
|
+
end
|
107
|
+
[response_size, response.body]
|
108
|
+
when 206
|
85
109
|
# Figure out of the server supports content ranges, if it doesn't we have no
|
86
110
|
# business working with that server
|
87
111
|
range_header = response.headers['Content-Range']
|
88
|
-
raise InvalidRequest.new(response.status, "
|
112
|
+
raise InvalidRequest.new(response.status, "The server replied with 206 status but no Content-Range at #{@uri}") unless range_header
|
89
113
|
|
90
114
|
# "Content-Range: bytes 0-0/307404381" is how the response header is structured
|
91
115
|
size = range_header[/\/(\d+)$/, 1].to_i
|
92
116
|
|
93
|
-
#
|
94
|
-
# we take that into account here. For other servers, 206 is the expected response code.
|
95
|
-
# Also, if we request a _larger_ range than what can be satisfied by the server,
|
117
|
+
# If we request a _larger_ range than what can be satisfied by the server,
|
96
118
|
# the response is going to only contain what _can_ be sent and the status is also going
|
97
119
|
# to be 206
|
98
120
|
return [size, response.body]
|
data/lib/text.rb
CHANGED
data/lib/video.rb
CHANGED
@@ -23,6 +23,9 @@ module FormatParser
|
|
23
23
|
# it can be placed here
|
24
24
|
attr_accessor :intrinsics
|
25
25
|
|
26
|
+
# The MIME type of the video
|
27
|
+
attr_accessor :content_type
|
28
|
+
|
26
29
|
# Only permits assignments via defined accessors
|
27
30
|
def initialize(**attributes)
|
28
31
|
attributes.map { |(k, v)| public_send("#{k}=", v) }
|
@@ -10,6 +10,7 @@ describe FormatParser::AIFFParser do
|
|
10
10
|
expect(parse_result.num_audio_channels).to eq(2)
|
11
11
|
expect(parse_result.audio_sample_rate_hz).to be_within(0.01).of(44100)
|
12
12
|
expect(parse_result.media_duration_seconds).to be_within(0.01).of(1.05)
|
13
|
+
expect(parse_result.content_type).to eq('audio/x-aiff')
|
13
14
|
end
|
14
15
|
|
15
16
|
it 'parses a Logic Pro created AIFF sample file having a COMT chunk before a COMM chunk' do
|
@@ -13,6 +13,8 @@ describe FormatParser::BMPParser do
|
|
13
13
|
expect(parsed.width_px).to eq(40)
|
14
14
|
expect(parsed.height_px).to eq(27)
|
15
15
|
|
16
|
+
expect(parsed.content_type).to eq('image/bmp')
|
17
|
+
|
16
18
|
expect(parsed.intrinsics).not_to be_nil
|
17
19
|
expect(parsed.intrinsics[:vertical_resolution]).to eq(2834)
|
18
20
|
expect(parsed.intrinsics[:horizontal_resolution]).to eq(2834)
|
@@ -32,6 +34,8 @@ describe FormatParser::BMPParser do
|
|
32
34
|
expect(parsed.width_px).to eq(1920)
|
33
35
|
expect(parsed.height_px).to eq(1080)
|
34
36
|
|
37
|
+
expect(parsed.content_type).to eq('image/bmp')
|
38
|
+
|
35
39
|
expect(parsed.intrinsics).not_to be_nil
|
36
40
|
expect(parsed.intrinsics[:vertical_resolution]).to eq(2835)
|
37
41
|
expect(parsed.intrinsics[:horizontal_resolution]).to eq(2835)
|
@@ -51,6 +55,8 @@ describe FormatParser::BMPParser do
|
|
51
55
|
expect(parsed.width_px).to eq(200)
|
52
56
|
expect(parsed.height_px).to eq(200)
|
53
57
|
|
58
|
+
expect(parsed.content_type).to eq('image/bmp')
|
59
|
+
|
54
60
|
expect(parsed.intrinsics).not_to be_nil
|
55
61
|
end
|
56
62
|
|
@@ -64,6 +70,7 @@ describe FormatParser::BMPParser do
|
|
64
70
|
expect(parsed.color_mode).to eq(:rgb)
|
65
71
|
expect(parsed.width_px).to eq(40)
|
66
72
|
expect(parsed.height_px).to eq(27)
|
73
|
+
expect(parsed.content_type).to eq('image/bmp')
|
67
74
|
expect(parsed.intrinsics[:bits_per_pixel]).to eq(24)
|
68
75
|
expect(parsed.intrinsics[:data_order]).to eq(:normal)
|
69
76
|
|
@@ -76,6 +83,7 @@ describe FormatParser::BMPParser do
|
|
76
83
|
expect(parsed.color_mode).to eq(:rgb)
|
77
84
|
expect(parsed.width_px).to eq(40)
|
78
85
|
expect(parsed.height_px).to eq(27)
|
86
|
+
expect(parsed.content_type).to eq('image/bmp')
|
79
87
|
expect(parsed.intrinsics[:bits_per_pixel]).to eq(24)
|
80
88
|
expect(parsed.intrinsics[:data_order]).to eq(:normal)
|
81
89
|
end
|
@@ -14,6 +14,7 @@ describe FormatParser::FLACParser do
|
|
14
14
|
expect(parsed.intrinsics).not_to be_nil
|
15
15
|
expect(parsed.media_duration_frames).to eq(33810)
|
16
16
|
expect(parsed.media_duration_seconds).to be_within(0.1).of(0.836)
|
17
|
+
expect(parsed.content_type).to eq('audio/x-flac')
|
17
18
|
end
|
18
19
|
|
19
20
|
it 'decodes and estimates duration for the 16bit FLAC File' do
|
@@ -37,7 +37,7 @@ describe FormatParser::MOOVParser do
|
|
37
37
|
expect(result.nature).to eq(:audio)
|
38
38
|
expect(result.media_duration_seconds).to be_kind_of(Float)
|
39
39
|
expect(result.media_duration_seconds).to be > 0
|
40
|
-
|
40
|
+
expect(result.content_type).to be_kind_of(String)
|
41
41
|
expect(result.intrinsics).not_to be_nil
|
42
42
|
end
|
43
43
|
end
|
@@ -52,6 +52,7 @@ describe FormatParser::MOOVParser do
|
|
52
52
|
expect(result.height_px).to be > 0
|
53
53
|
expect(result.media_duration_seconds).to be_kind_of(Float)
|
54
54
|
expect(result.media_duration_seconds).to be > 0
|
55
|
+
expect(result.content_type).to eq('video/mp4')
|
55
56
|
|
56
57
|
expect(result.intrinsics).not_to be_nil
|
57
58
|
end
|
@@ -67,6 +68,7 @@ describe FormatParser::MOOVParser do
|
|
67
68
|
expect(result.height_px).to be > 0
|
68
69
|
expect(result.media_duration_seconds).to be_kind_of(Float)
|
69
70
|
expect(result.media_duration_seconds).to be > 0
|
71
|
+
expect(result.content_type).to eq('video/mp4')
|
70
72
|
|
71
73
|
expect(result.intrinsics).not_to be_nil
|
72
74
|
end
|
@@ -79,6 +81,7 @@ describe FormatParser::MOOVParser do
|
|
79
81
|
expect(result).not_to be_nil
|
80
82
|
expect(result.nature).to eq(:audio)
|
81
83
|
expect(result.format).to eq(:m4a)
|
84
|
+
expect(result.content_type).to eq('audio/mp4')
|
82
85
|
end
|
83
86
|
|
84
87
|
it 'parses a MOV file and provides the necessary metadata' do
|
@@ -23,6 +23,7 @@ describe FormatParser::MP3Parser do
|
|
23
23
|
|
24
24
|
expect(parsed.nature).to eq(:audio)
|
25
25
|
expect(parsed.format).to eq(:mp3)
|
26
|
+
expect(parsed.content_type).to eq('audio/mpeg')
|
26
27
|
expect(parsed.num_audio_channels).to eq(2)
|
27
28
|
expect(parsed.audio_sample_rate_hz).to eq(48000)
|
28
29
|
expect(parsed.intrinsics).not_to be_nil
|
@@ -6,6 +6,7 @@ describe FormatParser::OggParser do
|
|
6
6
|
|
7
7
|
expect(parse_result.nature).to eq(:audio)
|
8
8
|
expect(parse_result.format).to eq(:ogg)
|
9
|
+
expect(parse_result.content_type).to eq('audio/ogg')
|
9
10
|
expect(parse_result.num_audio_channels).to eq(1)
|
10
11
|
expect(parse_result.audio_sample_rate_hz).to eq(16000)
|
11
12
|
expect(parse_result.media_duration_seconds).to be_within(0.01).of(2973.95)
|
@@ -59,6 +59,7 @@ describe FormatParser::TIFFParser do
|
|
59
59
|
expect(parsed.width_px).to eq(7952)
|
60
60
|
expect(parsed.height_px).to eq(5304)
|
61
61
|
expect(parsed.intrinsics[:exif]).not_to be_nil
|
62
|
+
expect(parsed.content_type).to eq('image/x-sony-arw')
|
62
63
|
end
|
63
64
|
|
64
65
|
describe 'correctly extracts dimensions from various TIFF flavors of the same file' do
|
@@ -14,6 +14,7 @@ describe FormatParser::ZIPParser do
|
|
14
14
|
expect(result).not_to be_nil
|
15
15
|
|
16
16
|
expect(result.format).to eq(:zip)
|
17
|
+
expect(result.content_type).to eq('application/zip')
|
17
18
|
expect(result.nature).to eq(:archive)
|
18
19
|
expect(result.entries.length).to eq(0xFFFF + 1)
|
19
20
|
|
@@ -58,6 +59,7 @@ describe FormatParser::ZIPParser do
|
|
58
59
|
result = subject.call(fi_io)
|
59
60
|
expect(result.nature).to eq(:document)
|
60
61
|
expect(result.format).to eq(:docx)
|
62
|
+
expect(result.content_type).to eq('application/vnd.openxmlformats-officedocument.wordprocessingml.document')
|
61
63
|
|
62
64
|
fixture_path = fixtures_dir + '/ZIP/sample-docx.docx'
|
63
65
|
fi_io = File.open(fixture_path, 'rb')
|
@@ -15,18 +15,31 @@ describe 'Fetching data from HTTP remotes' do
|
|
15
15
|
}
|
16
16
|
@server = WEBrick::HTTPServer.new(options)
|
17
17
|
@server.mount '/', WEBrick::HTTPServlet::FileHandler, fixtures_dir
|
18
|
+
@server.mount_proc '/redirect' do |req, res|
|
19
|
+
res.status = 302
|
20
|
+
res.header['Location'] = req.path.sub('/redirect', '')
|
21
|
+
end
|
22
|
+
@server.mount_proc '/empty' do |_req, res|
|
23
|
+
res.status = 200
|
24
|
+
res.body = ''
|
25
|
+
end
|
26
|
+
@server.mount_proc '/tiny' do |_req, res|
|
27
|
+
res.status = 200
|
28
|
+
res.body = File.read(fixtures_dir + '/test.gif')
|
29
|
+
end
|
30
|
+
|
18
31
|
trap('INT') { @server.stop }
|
19
32
|
@server_thread = Thread.new { @server.start }
|
20
33
|
end
|
21
34
|
|
22
|
-
it '
|
35
|
+
it 'works with .parse_http called without any options' do
|
23
36
|
result = FormatParser.parse_http('http://localhost:9399/PNG/anim.png')
|
24
37
|
|
25
38
|
expect(result.format).to eq(:png)
|
26
39
|
expect(result.height_px).to eq(180)
|
27
40
|
end
|
28
41
|
|
29
|
-
it '
|
42
|
+
it 'works with .parse_http called with additional options' do
|
30
43
|
fake_result = double(nature: :audio, format: :aiff)
|
31
44
|
expect_any_instance_of(FormatParser::AIFFParser).to receive(:call).and_return(fake_result)
|
32
45
|
results = FormatParser.parse_http('http://localhost:9399/PNG/anim.png', results: :all)
|
@@ -35,6 +48,18 @@ describe 'Fetching data from HTTP remotes' do
|
|
35
48
|
expect(results).to include(fake_result)
|
36
49
|
end
|
37
50
|
|
51
|
+
it 'is able to cope with a 0-size resource which does not provide Content-Range' do
|
52
|
+
file_information = FormatParser.parse_http('http://localhost:9399/empty')
|
53
|
+
|
54
|
+
expect(file_information).to be_nil
|
55
|
+
end
|
56
|
+
|
57
|
+
it 'is able to cope with a tiny resource which fits into the first requested range completely' do
|
58
|
+
file_information = FormatParser.parse_http('http://localhost:9399/tiny')
|
59
|
+
expect(file_information).not_to be_nil
|
60
|
+
expect(file_information.nature).to eq(:image)
|
61
|
+
end
|
62
|
+
|
38
63
|
it 'parses the animated PNG over HTTP' do
|
39
64
|
file_information = FormatParser.parse_http('http://localhost:9399/PNG/anim.png')
|
40
65
|
expect(file_information).not_to be_nil
|
@@ -91,6 +116,32 @@ describe 'Fetching data from HTTP remotes' do
|
|
91
116
|
end
|
92
117
|
end
|
93
118
|
|
119
|
+
context 'when the server responds with a redirect' do
|
120
|
+
it 'follows the redirect' do
|
121
|
+
file_information = FormatParser.parse_http('http://localhost:9399/redirect/TIFF/test.tif')
|
122
|
+
expect(file_information.format).to eq(:tif)
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
it 'sends provided HTTP headers in the request' do
|
127
|
+
# Faraday is required only after calling .parse_http
|
128
|
+
# This line is just to trigger this require, then it's possible to
|
129
|
+
# add an expectation of how Faraday is initialized after.
|
130
|
+
FormatParser.parse_http('invalid_url') rescue nil
|
131
|
+
|
132
|
+
expect(Faraday)
|
133
|
+
.to receive(:new)
|
134
|
+
.with(headers: {'test-header' => 'test-value'})
|
135
|
+
.and_call_original
|
136
|
+
|
137
|
+
file_information = FormatParser.parse_http(
|
138
|
+
'http://localhost:9399//TIFF/test.tif',
|
139
|
+
headers: {'test-header' => 'test-value'}
|
140
|
+
)
|
141
|
+
|
142
|
+
expect(file_information.format).to eq(:tif)
|
143
|
+
end
|
144
|
+
|
94
145
|
after(:all) do
|
95
146
|
@server.stop
|
96
147
|
@server_thread.join(0.5)
|
data/spec/remote_io_spec.rb
CHANGED
@@ -7,7 +7,9 @@ describe FormatParser::RemoteIO do
|
|
7
7
|
rio = described_class.new('https://images.invalid/img.jpg')
|
8
8
|
|
9
9
|
fake_resp = double(headers: {'Content-Range' => '10-109/2577'}, status: 206, body: 'This is the response')
|
10
|
-
|
10
|
+
faraday_conn = instance_double(Faraday::Connection, get: fake_resp)
|
11
|
+
allow(Faraday).to receive(:new).and_return(faraday_conn)
|
12
|
+
expect(faraday_conn).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=10-109')
|
11
13
|
|
12
14
|
rio.seek(10)
|
13
15
|
read_result = rio.read(100)
|
@@ -18,7 +20,9 @@ describe FormatParser::RemoteIO do
|
|
18
20
|
rio = described_class.new('https://images.invalid/img.jpg')
|
19
21
|
|
20
22
|
fake_resp = double(headers: {'Content-Range' => '10-109/2577'}, status: 200, body: 'This is the response')
|
21
|
-
|
23
|
+
faraday_conn = instance_double(Faraday::Connection, get: fake_resp)
|
24
|
+
allow(Faraday).to receive(:new).and_return(faraday_conn)
|
25
|
+
expect(faraday_conn).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=10-109')
|
22
26
|
|
23
27
|
rio.seek(10)
|
24
28
|
read_result = rio.read(100)
|
@@ -29,7 +33,9 @@ describe FormatParser::RemoteIO do
|
|
29
33
|
rio = described_class.new('https://images.invalid/img.jpg')
|
30
34
|
|
31
35
|
fake_resp = double(headers: {}, status: 403, body: 'Please log in')
|
32
|
-
|
36
|
+
faraday_conn = instance_double(Faraday::Connection, get: fake_resp)
|
37
|
+
allow(Faraday).to receive(:new).and_return(faraday_conn)
|
38
|
+
expect(faraday_conn).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199')
|
33
39
|
|
34
40
|
rio.seek(100)
|
35
41
|
expect { rio.read(100) }.to raise_error(/replied with a 403 and refused/)
|
@@ -39,7 +45,9 @@ describe FormatParser::RemoteIO do
|
|
39
45
|
rio = described_class.new('https://images.invalid/img.jpg')
|
40
46
|
|
41
47
|
fake_resp = double(headers: {}, status: 416, body: 'You stepped off the ledge of the range')
|
42
|
-
|
48
|
+
faraday_conn = instance_double(Faraday::Connection, get: fake_resp)
|
49
|
+
allow(Faraday).to receive(:new).and_return(faraday_conn)
|
50
|
+
expect(faraday_conn).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199')
|
43
51
|
|
44
52
|
rio.seek(100)
|
45
53
|
expect(rio.read(100)).to be_nil
|
@@ -49,7 +57,9 @@ describe FormatParser::RemoteIO do
|
|
49
57
|
rio = described_class.new('https://images.invalid/img.jpg')
|
50
58
|
|
51
59
|
fake_resp = double(headers: {}, status: 403, body: 'Please log in')
|
52
|
-
|
60
|
+
faraday_conn = instance_double(Faraday::Connection, get: fake_resp)
|
61
|
+
allow(Faraday).to receive(:new).and_return(faraday_conn)
|
62
|
+
expect(faraday_conn).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199')
|
53
63
|
|
54
64
|
rio.seek(100)
|
55
65
|
# rubocop: disable Lint/AmbiguousBlockAssociation
|
@@ -60,7 +70,9 @@ describe FormatParser::RemoteIO do
|
|
60
70
|
rio = described_class.new('https://images.invalid/img.jpg')
|
61
71
|
|
62
72
|
fake_resp = double(headers: {}, status: 416, body: 'You jumped off the end of the file maam')
|
63
|
-
|
73
|
+
faraday_conn = instance_double(Faraday::Connection, get: fake_resp)
|
74
|
+
allow(Faraday).to receive(:new).and_return(faraday_conn)
|
75
|
+
expect(faraday_conn).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199')
|
64
76
|
|
65
77
|
rio.seek(100)
|
66
78
|
expect(rio.read(100)).to be_nil
|
@@ -69,15 +81,24 @@ describe FormatParser::RemoteIO do
|
|
69
81
|
it 'does not overwrite size when the range cannot be satisfied and the response is 416' do
|
70
82
|
rio = described_class.new('https://images.invalid/img.jpg')
|
71
83
|
|
72
|
-
|
73
|
-
|
84
|
+
fake_resp1 = double(headers: {'Content-Range' => 'bytes 0-0/13'}, status: 206, body: 'a')
|
85
|
+
fake_resp2 = double(headers: {}, status: 416, body: 'You jumped off the end of the file maam')
|
86
|
+
|
87
|
+
faraday_conn = instance_double(Faraday::Connection)
|
88
|
+
allow(Faraday).to receive(:new).and_return(faraday_conn)
|
89
|
+
expect(faraday_conn).to receive(:get)
|
90
|
+
.with('https://images.invalid/img.jpg', nil, range: 'bytes=0-0')
|
91
|
+
.ordered
|
92
|
+
.and_return(fake_resp1)
|
93
|
+
expect(faraday_conn).to receive(:get)
|
94
|
+
.with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199')
|
95
|
+
.ordered
|
96
|
+
.and_return(fake_resp2)
|
97
|
+
|
74
98
|
rio.read(1)
|
75
99
|
|
76
100
|
expect(rio.size).to eq(13)
|
77
101
|
|
78
|
-
fake_resp = double(headers: {}, status: 416, body: 'You jumped off the end of the file maam')
|
79
|
-
expect(Faraday).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199').and_return(fake_resp)
|
80
|
-
|
81
102
|
rio.seek(100)
|
82
103
|
expect(rio.read(100)).to be_nil
|
83
104
|
|
@@ -88,7 +109,9 @@ describe FormatParser::RemoteIO do
|
|
88
109
|
rio = described_class.new('https://images.invalid/img.jpg')
|
89
110
|
|
90
111
|
fake_resp = double(headers: {}, status: 502, body: 'Guru meditation')
|
91
|
-
|
112
|
+
faraday_conn = instance_double(Faraday::Connection, get: fake_resp)
|
113
|
+
allow(Faraday).to receive(:new).and_return(faraday_conn)
|
114
|
+
expect(faraday_conn).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199')
|
92
115
|
|
93
116
|
rio.seek(100)
|
94
117
|
expect { rio.read(100) }.to raise_error(/replied with a 502 and we might want to retry/)
|
@@ -100,7 +123,9 @@ describe FormatParser::RemoteIO do
|
|
100
123
|
expect(rio.pos).to eq(0)
|
101
124
|
|
102
125
|
fake_resp = double(headers: {'Content-Range' => 'bytes 0-0/13'}, status: 206, body: 'a')
|
103
|
-
|
126
|
+
faraday_conn = instance_double(Faraday::Connection, get: fake_resp)
|
127
|
+
allow(Faraday).to receive(:new).and_return(faraday_conn)
|
128
|
+
expect(faraday_conn).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=0-0')
|
104
129
|
rio.read(1)
|
105
130
|
|
106
131
|
expect(rio.pos).to eq(1)
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: format_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.29.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Noah Berman
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: exe
|
11
11
|
cert_chain: []
|
12
|
-
date: 2021-
|
12
|
+
date: 2021-09-10 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: ks
|
@@ -73,6 +73,20 @@ dependencies:
|
|
73
73
|
- - "~>"
|
74
74
|
- !ruby/object:Gem::Version
|
75
75
|
version: '0.13'
|
76
|
+
- !ruby/object:Gem::Dependency
|
77
|
+
name: faraday_middleware
|
78
|
+
requirement: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0.14'
|
83
|
+
type: :runtime
|
84
|
+
prerelease: false
|
85
|
+
version_requirements: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0.14'
|
76
90
|
- !ruby/object:Gem::Dependency
|
77
91
|
name: measurometer
|
78
92
|
requirement: !ruby/object:Gem::Requirement
|
@@ -183,10 +197,10 @@ executables:
|
|
183
197
|
extensions: []
|
184
198
|
extra_rdoc_files: []
|
185
199
|
files:
|
200
|
+
- ".github/workflows/main.yml"
|
186
201
|
- ".gitignore"
|
187
202
|
- ".rspec"
|
188
203
|
- ".rubocop.yml"
|
189
|
-
- ".travis.yml"
|
190
204
|
- CHANGELOG.md
|
191
205
|
- CODE_OF_CONDUCT.md
|
192
206
|
- CONTRIBUTING.md
|