format_parser 0.26.0 → 0.29.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (52) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/main.yml +104 -0
  3. data/CHANGELOG.md +12 -0
  4. data/format_parser.gemspec +1 -0
  5. data/lib/archive.rb +3 -0
  6. data/lib/audio.rb +3 -0
  7. data/lib/document.rb +1 -0
  8. data/lib/format_parser/version.rb +1 -1
  9. data/lib/format_parser.rb +3 -2
  10. data/lib/image.rb +3 -0
  11. data/lib/parsers/aiff_parser.rb +4 -1
  12. data/lib/parsers/bmp_parser.rb +3 -0
  13. data/lib/parsers/cr2_parser.rb +2 -0
  14. data/lib/parsers/dpx_parser.rb +6 -0
  15. data/lib/parsers/flac_parser.rb +2 -0
  16. data/lib/parsers/gif_parser.rb +2 -0
  17. data/lib/parsers/jpeg_parser.rb +2 -0
  18. data/lib/parsers/m3u_parser.rb +3 -1
  19. data/lib/parsers/moov_parser.rb +10 -1
  20. data/lib/parsers/mp3_parser.rb +3 -2
  21. data/lib/parsers/ogg_parser.rb +3 -2
  22. data/lib/parsers/pdf_parser.rb +2 -2
  23. data/lib/parsers/png_parser.rb +2 -0
  24. data/lib/parsers/psd_parser.rb +2 -0
  25. data/lib/parsers/tiff_parser.rb +10 -2
  26. data/lib/parsers/wav_parser.rb +3 -0
  27. data/lib/parsers/zip_parser/office_formats.rb +5 -5
  28. data/lib/parsers/zip_parser.rb +5 -3
  29. data/lib/remote_io.rb +29 -7
  30. data/lib/text.rb +1 -0
  31. data/lib/video.rb +3 -0
  32. data/spec/parsers/aiff_parser_spec.rb +1 -0
  33. data/spec/parsers/bmp_parser_spec.rb +8 -0
  34. data/spec/parsers/cr2_parser_spec.rb +1 -0
  35. data/spec/parsers/dpx_parser_spec.rb +1 -0
  36. data/spec/parsers/flac_parser_spec.rb +1 -0
  37. data/spec/parsers/gif_parser_spec.rb +1 -0
  38. data/spec/parsers/jpeg_parser_spec.rb +1 -0
  39. data/spec/parsers/m3u_parser_spec.rb +1 -0
  40. data/spec/parsers/moov_parser_spec.rb +4 -1
  41. data/spec/parsers/mp3_parser_spec.rb +1 -0
  42. data/spec/parsers/ogg_parser_spec.rb +1 -0
  43. data/spec/parsers/pdf_parser_spec.rb +1 -0
  44. data/spec/parsers/png_parser_spec.rb +1 -0
  45. data/spec/parsers/psd_parser_spec.rb +1 -0
  46. data/spec/parsers/tiff_parser_spec.rb +1 -0
  47. data/spec/parsers/wav_parser_spec.rb +1 -0
  48. data/spec/parsers/zip_parser_spec.rb +2 -0
  49. data/spec/remote_fetching_spec.rb +53 -2
  50. data/spec/remote_io_spec.rb +38 -13
  51. metadata +17 -3
  52. data/.travis.yml +0 -12
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1074a8172a2830a11df0fb7874936c2b8abab8d74fd39985f9c9f7b72d5b348c
4
- data.tar.gz: 5eafd2d610cd30bc85a056bd1c31331bcb49e8ce6b5b538cd13e280b138be7db
3
+ metadata.gz: 75ee83f55840e3031d4d60d8dc07ca038812188613e2b740079e1c965efb2886
4
+ data.tar.gz: 31c3ee84434560c18e6ea74a23160b909e6f880f52b2ed6f0e888e847c557bd9
5
5
  SHA512:
6
- metadata.gz: 5ce396a71fedd82b8041bcb6c833e559c7eef74886e73095eaf0b3d21e0c0d49b1620a83aba9796a8134dd5a7fc679156cd967cf82cba95b5941941be73d70c4
7
- data.tar.gz: '0395e5a8fb35e860060e9c3b040b788aaad97eb5883f2d662b418156f1f3986bc4a26a814b9a8552ce7dcbd271da27e977cfb77a5e5b155ebd35db6a49a97719'
6
+ metadata.gz: 536cfb1bac7926f56ba760959d7c5a0905d3b2b0944b16248b6c81be00b722dad894a8d6d5773134fb0471e42d016a20be15e3d5a01c671f0cc65658f2fc05b4
7
+ data.tar.gz: cb3f73df051b8612cb6d0e1a4e55c045e461bf2c5e4667dd6e461e779b1f39d01be8d70d084e252e5198a966ac590129286811f5822808a0c980c2ad72a087a1
@@ -0,0 +1,104 @@
1
+ name: CI
2
+
3
+ on: [push,pull_request]
4
+
5
+ env:
6
+ BUNDLE_PATH: vendor/bundle
7
+
8
+ jobs:
9
+ lint:
10
+ name: Code Style
11
+ runs-on: ubuntu-18.04
12
+ if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository
13
+ strategy:
14
+ matrix:
15
+ ruby:
16
+ - 2.7
17
+ - 2.6
18
+ - 2.5
19
+ - 2.4
20
+ - 2.3
21
+ - 2.2
22
+ - jruby
23
+ steps:
24
+ - name: Checkout
25
+ uses: actions/checkout@v2
26
+ - name: Setup Ruby
27
+ uses: ruby/setup-ruby@v1
28
+ with:
29
+ ruby-version: ${{ matrix.ruby }}
30
+ - name: Gemfile Cache
31
+ uses: actions/cache@v2
32
+ with:
33
+ path: Gemfile.lock
34
+ key: ${{ runner.os }}-gemlock-${{ matrix.ruby }}-${{ hashFiles('Gemfile', 'format_parser.gemspec') }}
35
+ restore-keys: |
36
+ ${{ runner.os }}-gemlock-${{ matrix.ruby }}-
37
+ - name: Bundle Cache
38
+ id: cache-gems
39
+ uses: actions/cache@v2
40
+ with:
41
+ path: vendor/bundle
42
+ key: ${{ runner.os }}-gems-${{ matrix.ruby }}-${{ hashFiles('Gemfile', 'Gemfile.lock', 'format_parser.gemspec') }}
43
+ restore-keys: |
44
+ ${{ runner.os }}-gems-${{ matrix.ruby }}-
45
+ ${{ runner.os }}-gems-
46
+ - name: Bundle Install
47
+ if: steps.cache-gems.outputs.cache-hit != 'true'
48
+ run: bundle install --jobs 4 --retry 3
49
+ - name: Rubocop Cache
50
+ uses: actions/cache@v2
51
+ with:
52
+ path: ~/.cache/rubocop_cache
53
+ key: ${{ runner.os }}-rubocop-${{ hashFiles('.rubocop.yml') }}
54
+ restore-keys: |
55
+ ${{ runner.os }}-rubocop-
56
+ - name: Rubocop
57
+ run: bundle exec rubocop
58
+ test:
59
+ name: Specs
60
+ runs-on: ubuntu-18.04
61
+ if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository
62
+ strategy:
63
+ matrix:
64
+ ruby:
65
+ - 2.7
66
+ - 2.6
67
+ - 2.5
68
+ - 2.4
69
+ - 2.3
70
+ - 2.2
71
+ - jruby
72
+ experimental: [false]
73
+ include:
74
+ - ruby: 3.0
75
+ experimental: true
76
+ steps:
77
+ - name: Checkout
78
+ uses: actions/checkout@v2
79
+ - name: Setup Ruby
80
+ uses: ruby/setup-ruby@v1
81
+ with:
82
+ ruby-version: ${{ matrix.ruby }}
83
+ - name: Gemfile Cache
84
+ uses: actions/cache@v2
85
+ with:
86
+ path: Gemfile.lock
87
+ key: ${{ runner.os }}-gemlock-${{ matrix.ruby }}-${{ hashFiles('Gemfile', 'format_parser.gemspec') }}
88
+ restore-keys: |
89
+ ${{ runner.os }}-gemlock-${{ matrix.ruby }}-
90
+ - name: Bundle Cache
91
+ id: cache-gems
92
+ uses: actions/cache@v2
93
+ with:
94
+ path: vendor/bundle
95
+ key: ${{ runner.os }}-gems-${{ matrix.ruby }}-${{ hashFiles('Gemfile', 'Gemfile.lock', 'format_parser.gemspec') }}
96
+ restore-keys: |
97
+ ${{ runner.os }}-gems-${{ matrix.ruby }}-
98
+ ${{ runner.os }}-gems-
99
+ - name: Bundle Install
100
+ if: steps.cache-gems.outputs.cache-hit != 'true'
101
+ run: bundle install --jobs 4 --retry 3
102
+ - name: RSpec
103
+ continue-on-error: ${{ matrix.experimental }}
104
+ run: bundle exec rake parallel:spec
data/CHANGELOG.md CHANGED
@@ -1,3 +1,15 @@
1
+ ## 0.29.1
2
+ * Fix handling of 200 responses with `parse_http` as well as handling of very small responses which do not need range access
3
+
4
+ ## 0.29.0
5
+ * Add option `headers:` to `FormatParser.parse_http`
6
+
7
+ ## 0.28.0
8
+ * Change `FormatParser.parse_http` to follow HTTP redirects
9
+
10
+ ## 0.27.0
11
+ * Add `#content_type` on `Result` return values which makes sense for the detected filetype
12
+
1
13
  ## 0.26.0
2
14
  * Add support for M3U format files
3
15
 
@@ -34,6 +34,7 @@ Gem::Specification.new do |spec|
34
34
  spec.add_dependency 'exifr', '~> 1', '>= 1.3.8'
35
35
  spec.add_dependency 'id3tag', '~> 0.14'
36
36
  spec.add_dependency 'faraday', '~> 0.13'
37
+ spec.add_dependency 'faraday_middleware', '~> 0.14'
37
38
  spec.add_dependency 'measurometer', '~> 1'
38
39
 
39
40
  spec.add_development_dependency 'rspec', '~> 3.0'
data/lib/archive.rb CHANGED
@@ -26,6 +26,9 @@ module FormatParser
26
26
  # it can be placed here
27
27
  attr_accessor :intrinsics
28
28
 
29
+ # The MIME type of the archive
30
+ attr_accessor :content_type
31
+
29
32
  # Only permits assignments via defined accessors
30
33
  def initialize(**attributes)
31
34
  attributes.map { |(k, v)| public_send("#{k}=", v) }
data/lib/audio.rb CHANGED
@@ -35,6 +35,9 @@ module FormatParser
35
35
  # it can be placed here
36
36
  attr_accessor :intrinsics
37
37
 
38
+ # The MIME type of the sound file
39
+ attr_accessor :content_type
40
+
38
41
  # Only permits assignments via defined accessors
39
42
  def initialize(**attributes)
40
43
  attributes.map { |(k, v)| public_send("#{k}=", v) }
data/lib/document.rb CHANGED
@@ -7,6 +7,7 @@ module FormatParser
7
7
  attr_accessor :format
8
8
  attr_accessor :document_type
9
9
  attr_accessor :page_count
10
+ attr_accessor :content_type
10
11
 
11
12
  # Only permits assignments via defined accessors
12
13
  def initialize(**attributes)
@@ -1,3 +1,3 @@
1
1
  module FormatParser
2
- VERSION = '0.26.0'
2
+ VERSION = '0.29.1'
3
3
  end
data/lib/format_parser.rb CHANGED
@@ -88,13 +88,14 @@ module FormatParser
88
88
  # given to `.parse`. The accepted keyword arguments are the same as the ones for `parse`.
89
89
  #
90
90
  # @param url[String, URI] the HTTP(S) URL to request the object from using Faraday and `Range:` requests
91
+ # @param headers[Hash] (optional) the HTTP headers to request the object from using Faraday
91
92
  # @param kwargs the keyword arguments to be delegated to `.parse`
92
93
  # @see {.parse}
93
- def self.parse_http(url, **kwargs)
94
+ def self.parse_http(url, headers: {}, **kwargs)
94
95
  # Do not extract the filename, since the URL
95
96
  # can really be "anything". But if the caller
96
97
  # provides filename_hint it will be carried over
97
- parse(RemoteIO.new(url), **kwargs)
98
+ parse(RemoteIO.new(url, headers: headers), **kwargs)
98
99
  end
99
100
 
100
101
  # Parses the file at the given `path` and returns the results as if it were any IO
data/lib/image.rb CHANGED
@@ -64,6 +64,9 @@ module FormatParser
64
64
  # it can be placed here
65
65
  attr_accessor :intrinsics
66
66
 
67
+ # The MIME type of the image file
68
+ attr_accessor :content_type
69
+
67
70
  # Only permits assignments via defined accessors
68
71
  def initialize(**attributes)
69
72
  attributes.map { |(k, v)| public_send("#{k}=", v) }
@@ -1,6 +1,8 @@
1
1
  class FormatParser::AIFFParser
2
2
  include FormatParser::IOUtils
3
3
 
4
+ AIFF_MIME_TYPE = 'audio/x-aiff'
5
+
4
6
  # Known chunk types we can omit when parsing,
5
7
  # grossly lifted from http://www.muratnkonar.com/aiff/
6
8
  KNOWN_CHUNKS = [
@@ -70,7 +72,8 @@ class FormatParser::AIFFParser
70
72
  num_audio_channels: channels,
71
73
  audio_sample_rate_hz: sample_rate.to_i,
72
74
  media_duration_frames: sample_frames,
73
- media_duration_seconds: duration_in_seconds
75
+ media_duration_seconds: duration_in_seconds,
76
+ content_type: AIFF_MIME_TYPE,
74
77
  )
75
78
  end
76
79
 
@@ -5,6 +5,7 @@ class FormatParser::BMPParser
5
5
 
6
6
  VALID_BMP = 'BM'
7
7
  PERMISSIBLE_PIXEL_ARRAY_LOCATIONS = 26..512
8
+ BMP_MIME_TYPE = 'image/bmp'
8
9
 
9
10
  def likely_match?(filename)
10
11
  filename =~ /\.bmp$/i
@@ -42,6 +43,7 @@ class FormatParser::BMPParser
42
43
  width_px: width,
43
44
  height_px: height,
44
45
  color_mode: :rgb,
46
+ content_type: BMP_MIME_TYPE,
45
47
  intrinsics: {
46
48
  data_order: data_order,
47
49
  bits_per_pixel: bit_depth
@@ -63,6 +65,7 @@ class FormatParser::BMPParser
63
65
  width_px: width,
64
66
  height_px: height.abs,
65
67
  color_mode: :rgb,
68
+ content_type: BMP_MIME_TYPE,
66
69
  intrinsics: {
67
70
  vertical_resolution: vertical_res,
68
71
  horizontal_resolution: horizontal_res,
@@ -6,6 +6,7 @@ class FormatParser::CR2Parser
6
6
 
7
7
  TIFF_HEADER = [0x49, 0x49, 0x2a, 0x00]
8
8
  CR2_HEADER = [0x43, 0x52, 0x02, 0x00]
9
+ CR2_MIME_TYPE = 'image/x-canon-cr2'
9
10
 
10
11
  def likely_match?(filename)
11
12
  filename =~ /\.cr2$/i
@@ -39,6 +40,7 @@ class FormatParser::CR2Parser
39
40
  display_height_px: exif_data.rotated? ? w : h,
40
41
  orientation: exif_data.orientation_sym,
41
42
  intrinsics: {exif: exif_data},
43
+ content_type: CR2_MIME_TYPE,
42
44
  )
43
45
  rescue EXIFR::MalformedTIFF
44
46
  nil
@@ -6,6 +6,11 @@ class FormatParser::DPXParser
6
6
  BE_MAGIC = 'SDPX'
7
7
  LE_MAGIC = BE_MAGIC.reverse
8
8
 
9
+ # There is no official MIME type for DPX, so we have
10
+ # to invent something useful. We will prefix it with x-
11
+ # to indicate that it is a vendor subtype
12
+ DPX_MIME_TYPE = 'image/x-dpx'
13
+
9
14
  class ByteOrderHintIO < SimpleDelegator
10
15
  def initialize(io, is_little_endian)
11
16
  super(io)
@@ -61,6 +66,7 @@ class FormatParser::DPXParser
61
66
  display_width_px: display_w,
62
67
  display_height_px: display_h,
63
68
  intrinsics: dpx_structure,
69
+ content_type: DPX_MIME_TYPE,
64
70
  )
65
71
  end
66
72
 
@@ -4,6 +4,7 @@ class FormatParser::FLACParser
4
4
  MAGIC_BYTES = 4
5
5
  MAGIC_BYTE_STRING = 'fLaC'
6
6
  BLOCK_HEADER_BYTES = 4
7
+ FLAC_MIME_TYPE = 'audio/x-flac'
7
8
 
8
9
  def likely_match?(filename)
9
10
  filename =~ /\.flac$/i
@@ -61,6 +62,7 @@ class FormatParser::FLACParser
61
62
  audio_sample_rate_hz: sample_rate,
62
63
  media_duration_seconds: duration,
63
64
  media_duration_frames: total_samples,
65
+ content_type: FLAC_MIME_TYPE,
64
66
  intrinsics: {
65
67
  bits_per_sample: bits_per_sample,
66
68
  minimum_frame_size: minimum_frame_size,
@@ -3,6 +3,7 @@ class FormatParser::GIFParser
3
3
 
4
4
  HEADERS = ['GIF87a', 'GIF89a'].map(&:b)
5
5
  NETSCAPE_AND_AUTHENTICATION_CODE = 'NETSCAPE2.0'
6
+ GIF_MIME_TYPE = 'image/gif'
6
7
 
7
8
  def likely_match?(filename)
8
9
  filename =~ /\.gif$/i
@@ -45,6 +46,7 @@ class FormatParser::GIFParser
45
46
  height_px: h,
46
47
  has_multiple_frames: is_animated,
47
48
  color_mode: :indexed,
49
+ content_type: GIF_MIME_TYPE
48
50
  )
49
51
  end
50
52
 
@@ -12,6 +12,7 @@ class FormatParser::JPEGParser
12
12
  APP1_MARKER = 0xE1 # maybe EXIF
13
13
  EXIF_MAGIC_STRING = "Exif\0\0".b
14
14
  MUST_FIND_NEXT_MARKER_WITHIN_BYTES = 1024
15
+ JPEG_MIME_TYPE = 'image/jpeg'
15
16
 
16
17
  def self.likely_match?(filename)
17
18
  filename =~ /\.jpe?g$/i
@@ -88,6 +89,7 @@ class FormatParser::JPEGParser
88
89
  display_height_px: dh,
89
90
  orientation: flat_exif.orientation_sym,
90
91
  intrinsics: {exif: flat_exif},
92
+ content_type: JPEG_MIME_TYPE
91
93
  )
92
94
 
93
95
  return result
@@ -2,6 +2,7 @@ class FormatParser::M3UParser
2
2
  include FormatParser::IOUtils
3
3
 
4
4
  HEADER = '#EXTM3U'
5
+ M3U8_MIME_TYPE = 'application/vnd.apple.mpegurl' # https://en.wikipedia.org/wiki/M3U#Internet_media_types
5
6
 
6
7
  def likely_match?(filename)
7
8
  filename =~ /\.m3u8?$/i
@@ -14,7 +15,8 @@ class FormatParser::M3UParser
14
15
  return unless HEADER.eql?(header)
15
16
 
16
17
  FormatParser::Text.new(
17
- format: :m3u
18
+ format: :m3u,
19
+ content_type: M3U8_MIME_TYPE,
18
20
  )
19
21
  end
20
22
  FormatParser.register_parser new, natures: :text, formats: :m3u
@@ -11,6 +11,12 @@ class FormatParser::MOOVParser
11
11
  'm4a ' => :m4a,
12
12
  }
13
13
 
14
+ # https://tools.ietf.org/html/rfc4337#section-2
15
+ # There is also video/quicktime which we should be able to capture
16
+ # here, but there is currently no detection for MOVs versus MP4s
17
+ MP4_AU_MIME_TYPE = 'audio/mp4'
18
+ MP4_MIXED_MIME_TYPE = 'video/mp4'
19
+
14
20
  def likely_match?(filename)
15
21
  filename =~ /\.(mov|m4a|ma4|mp4|aac|m4v)$/i
16
22
  end
@@ -49,10 +55,12 @@ class FormatParser::MOOVParser
49
55
  end
50
56
 
51
57
  # M4A only contains audio, while MP4 and friends can contain video.
52
- if format_from_moov_type(file_type) == :m4a
58
+ fmt = format_from_moov_type(file_type)
59
+ if fmt == :m4a
53
60
  FormatParser::Audio.new(
54
61
  format: format_from_moov_type(file_type),
55
62
  media_duration_seconds: media_duration_s,
63
+ content_type: MP4_AU_MIME_TYPE,
56
64
  intrinsics: atom_tree,
57
65
  )
58
66
  else
@@ -61,6 +69,7 @@ class FormatParser::MOOVParser
61
69
  width_px: width,
62
70
  height_px: height,
63
71
  media_duration_seconds: media_duration_s,
72
+ content_type: MP4_MIXED_MIME_TYPE,
64
73
  intrinsics: atom_tree,
65
74
  )
66
75
  end
@@ -32,7 +32,7 @@ class FormatParser::MP3Parser
32
32
  MAGIC_LE = [0x49, 0x49, 0x2A, 0x0].pack('C4')
33
33
  MAGIC_BE = [0x4D, 0x4D, 0x0, 0x2A].pack('C4')
34
34
  TIFF_HEADER_BYTES = [MAGIC_LE, MAGIC_BE]
35
-
35
+ MP3_MIME_TYPE = 'audio/mpeg'
36
36
  # Wraps the Tag object returned by ID3Tag in such
37
37
  # a way that a usable JSON representation gets
38
38
  # returned
@@ -104,7 +104,8 @@ class FormatParser::MP3Parser
104
104
  # do not tell anything of substance
105
105
  num_audio_channels: first_frame.channels,
106
106
  audio_sample_rate_hz: first_frame.sample_rate,
107
- intrinsics: id3tags_hash.merge(id3tags: tags)
107
+ intrinsics: id3tags_hash.merge(id3tags: tags),
108
+ content_type: MP3_MIME_TYPE,
108
109
  )
109
110
 
110
111
  extra_file_attirbutes = fetch_extra_attributes_from_id3_tags(id3tags_hash)
@@ -3,8 +3,8 @@
3
3
  class FormatParser::OggParser
4
4
  include FormatParser::IOUtils
5
5
 
6
- # Maximum size of an Ogg page
7
6
  MAX_POSSIBLE_PAGE_SIZE = 65307
7
+ OGG_MIME_TYPE = 'audio/ogg'
8
8
 
9
9
  def likely_match?(filename)
10
10
  filename =~ /\.ogg$/i
@@ -45,7 +45,8 @@ class FormatParser::OggParser
45
45
  format: :ogg,
46
46
  audio_sample_rate_hz: sample_rate,
47
47
  num_audio_channels: channels,
48
- media_duration_seconds: duration
48
+ media_duration_seconds: duration,
49
+ content_type: OGG_MIME_TYPE,
49
50
  )
50
51
  end
51
52
 
@@ -1,6 +1,5 @@
1
1
  class FormatParser::PDFParser
2
2
  include FormatParser::IOUtils
3
-
4
3
  # First 9 bytes of a PDF should be in this format, according to:
5
4
  #
6
5
  # https://stackoverflow.com/questions/3108201/detect-if-pdf-file-is-correct-header-pdf
@@ -8,6 +7,7 @@ class FormatParser::PDFParser
8
7
  # There are however exceptions, which are left out for now.
9
8
  #
10
9
  PDF_MARKER = /%PDF-1\.[0-8]{1}/
10
+ PDF_CONTENT_TYPE = 'application/pdf'
11
11
 
12
12
  def likely_match?(filename)
13
13
  filename =~ /\.(pdf|ai)$/i
@@ -18,7 +18,7 @@ class FormatParser::PDFParser
18
18
 
19
19
  return unless safe_read(io, 9) =~ PDF_MARKER
20
20
 
21
- FormatParser::Document.new(format: :pdf)
21
+ FormatParser::Document.new(format: :pdf, content_type: PDF_CONTENT_TYPE)
22
22
  end
23
23
 
24
24
  FormatParser.register_parser new, natures: :document, formats: :pdf, priority: 1
@@ -14,6 +14,7 @@ class FormatParser::PNGParser
14
14
  4 => true, # Grayscale with alpha
15
15
  6 => true,
16
16
  }
17
+ PNG_MIME_TYPE = 'image/png'
17
18
 
18
19
  def likely_match?(filename)
19
20
  filename =~ /\.png$/i
@@ -67,6 +68,7 @@ class FormatParser::PNGParser
67
68
  color_mode: color_mode,
68
69
  has_multiple_frames: has_animation,
69
70
  num_animation_or_video_frames: num_frames,
71
+ content_type: PNG_MIME_TYPE,
70
72
  )
71
73
  end
72
74
 
@@ -2,6 +2,7 @@ class FormatParser::PSDParser
2
2
  include FormatParser::IOUtils
3
3
 
4
4
  PSD_HEADER = [0x38, 0x42, 0x50, 0x53]
5
+ PSD_MIME_TYPE = 'application/x-photoshop'
5
6
 
6
7
  def likely_match?(filename)
7
8
  filename =~ /\.psd$/i # Maybe also PSB at some point
@@ -20,6 +21,7 @@ class FormatParser::PSDParser
20
21
  format: :psd,
21
22
  width_px: w,
22
23
  height_px: h,
24
+ content_type: PSD_MIME_TYPE,
23
25
  )
24
26
  end
25
27
 
@@ -5,6 +5,8 @@ class FormatParser::TIFFParser
5
5
  MAGIC_LE = [0x49, 0x49, 0x2A, 0x0].pack('C4')
6
6
  MAGIC_BE = [0x4D, 0x4D, 0x0, 0x2A].pack('C4')
7
7
  HEADER_BYTES = [MAGIC_LE, MAGIC_BE]
8
+ TIFF_MIME_TYPE = 'image/tiff'
9
+ ARW_MIME_TYPE = 'image/x-sony-arw'
8
10
 
9
11
  def likely_match?(filename)
10
12
  filename =~ /\.tiff?$/i
@@ -14,7 +16,10 @@ class FormatParser::TIFFParser
14
16
  io = FormatParser::IOConstraint.new(io)
15
17
 
16
18
  return unless HEADER_BYTES.include?(safe_read(io, 4))
17
- io.seek(io.pos + 2) # Skip over the offset of the IFD, EXIFR will re-read it anyway
19
+
20
+ # Skip over the offset of the IFD,
21
+ # EXIFR will re-read it anyway
22
+ io.seek(io.pos + 2)
18
23
  return if cr2?(io)
19
24
 
20
25
  # The TIFF scanner in EXIFR is plenty good enough,
@@ -26,14 +31,17 @@ class FormatParser::TIFFParser
26
31
  w = exif_data.width || exif_data.pixel_x_dimension
27
32
  h = exif_data.height || exif_data.pixel_y_dimension
28
33
 
34
+ format = arw?(exif_data) ? :arw : :tif
35
+ mime_type = arw?(exif_data) ? ARW_MIME_TYPE : TIFF_MIME_TYPE
29
36
  FormatParser::Image.new(
30
- format: arw?(exif_data) ? :arw : :tif, # Specify format as arw for Sony ARW format images, else tif
37
+ format: format,
31
38
  width_px: w,
32
39
  height_px: h,
33
40
  display_width_px: exif_data.rotated? ? h : w,
34
41
  display_height_px: exif_data.rotated? ? w : h,
35
42
  orientation: exif_data.orientation_sym,
36
43
  intrinsics: {exif: exif_data},
44
+ content_type: mime_type,
37
45
  )
38
46
  rescue EXIFR::MalformedTIFF
39
47
  nil
@@ -1,6 +1,8 @@
1
1
  class FormatParser::WAVParser
2
2
  include FormatParser::IOUtils
3
3
 
4
+ WAV_MIME_TYPE = 'audio/x-wav'
5
+
4
6
  def likely_match?(filename)
5
7
  filename =~ /\.wav$/i
6
8
  end
@@ -96,6 +98,7 @@ class FormatParser::WAVParser
96
98
  audio_sample_rate_hz: fmt_data[:sample_rate],
97
99
  media_duration_frames: sample_frames,
98
100
  media_duration_seconds: duration_in_seconds,
101
+ content_type: WAV_MIME_TYPE,
99
102
  )
100
103
  end
101
104
 
@@ -37,15 +37,15 @@ module FormatParser::ZIPParser::OfficeFormats
37
37
  OFFICE_MARKER_FILES.subset?(filenames_set)
38
38
  end
39
39
 
40
- def office_file_format_from_entry_set(filenames_set)
40
+ def office_file_format_and_mime_type_from_entry_set(filenames_set)
41
41
  if filenames_set.include?('word/document.xml')
42
- :docx
42
+ [:docx, 'application/vnd.openxmlformats-officedocument.wordprocessingml.document']
43
43
  elsif filenames_set.include?('xl/workbook.xml')
44
- :xlsx
44
+ [:xlsx, 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet']
45
45
  elsif filenames_set.include?('ppt/presentation.xml')
46
- :pptx
46
+ [:pptx, 'application/vnd.openxmlformats-officedocument.presentationml.presentation']
47
47
  else
48
- :unknown
48
+ [:unknown, 'application/zip']
49
49
  end
50
50
  end
51
51
  end
@@ -5,6 +5,8 @@ class FormatParser::ZIPParser
5
5
  include OfficeFormats
6
6
  include FormatParser::IOUtils
7
7
 
8
+ ZIP_MIME_TYPE = 'application/zip'
9
+
8
10
  def likely_match?(filename)
9
11
  filename =~ /\.(zip|docx|keynote|numbers|pptx|xlsx)$/i
10
12
  end
@@ -25,10 +27,10 @@ class FormatParser::ZIPParser
25
27
  end
26
28
 
27
29
  if office_document?(filenames_set)
28
- office_format = office_file_format_from_entry_set(filenames_set)
29
- FormatParser::Archive.new(nature: :document, format: office_format, entries: entries_archive)
30
+ office_format, mime_type = office_file_format_and_mime_type_from_entry_set(filenames_set)
31
+ FormatParser::Archive.new(nature: :document, format: office_format, entries: entries_archive, content_type: mime_type)
30
32
  else
31
- FormatParser::Archive.new(nature: :archive, format: :zip, entries: entries_archive)
33
+ FormatParser::Archive.new(nature: :archive, format: :zip, entries: entries_archive, content_type: ZIP_MIME_TYPE)
32
34
  end
33
35
  rescue FileReader::Error
34
36
  # This is not a ZIP, or a broken ZIP.
data/lib/remote_io.rb CHANGED
@@ -24,8 +24,11 @@ class FormatParser::RemoteIO
24
24
  end
25
25
 
26
26
  # @param uri[URI, String] the remote URL to obtain
27
- def initialize(uri)
27
+ # @param headers[Hash] (optional) the HTTP headers to be used in the HTTP request
28
+ def initialize(uri, headers: {})
28
29
  require 'faraday'
30
+ require 'faraday_middleware/response/follow_redirects'
31
+ @headers = headers
29
32
  @uri = uri
30
33
  @pos = 0
31
34
  @remote_size = false
@@ -78,21 +81,40 @@ class FormatParser::RemoteIO
78
81
  # We use a GET and not a HEAD request followed by a GET because
79
82
  # S3 does not allow HEAD requests if you only presigned your URL for GETs, so we
80
83
  # combine the first GET of a segment and retrieving the size of the resource
81
- response = Faraday.get(@uri, nil, range: 'bytes=%d-%d' % [range.begin, range.end])
84
+ conn = Faraday.new(headers: @headers) do |faraday|
85
+ faraday.use FaradayMiddleware::FollowRedirects
86
+ # we still need the default adapter, more details: https://blog.thecodewhisperer.com/permalink/losing-time-to-faraday
87
+ faraday.adapter Faraday.default_adapter
88
+ end
89
+ response = conn.get(@uri, nil, range: 'bytes=%d-%d' % [range.begin, range.end])
82
90
 
83
91
  case response.status
84
- when 200, 206
92
+ when 200
93
+ # S3 returns 200 when you request a Range that is fully satisfied by the entire object,
94
+ # we take that into account here. Also, for very tiny responses (and also for empty responses)
95
+ # the responses are going to be 200 which does not mean we cannot proceed
96
+ # To have a good check for both of these conditions we need to know whether the ranges overlap fully
97
+ response_size = response.body.bytesize
98
+ requested_range_size = range.end - range.begin + 1
99
+ if response_size > requested_range_size
100
+ error_message = [
101
+ "We requested #{requested_range_size} bytes, but the server sent us more",
102
+ "(#{response_size} bytes) - it likely has no `Range:` support.",
103
+ "The error occurred when talking to #{@uri})"
104
+ ]
105
+ raise InvalidRequest.new(response.status, error_message.join("\n"))
106
+ end
107
+ [response_size, response.body]
108
+ when 206
85
109
  # Figure out of the server supports content ranges, if it doesn't we have no
86
110
  # business working with that server
87
111
  range_header = response.headers['Content-Range']
88
- raise InvalidRequest.new(response.status, "No range support at #{@uri}") unless range_header
112
+ raise InvalidRequest.new(response.status, "The server replied with 206 status but no Content-Range at #{@uri}") unless range_header
89
113
 
90
114
  # "Content-Range: bytes 0-0/307404381" is how the response header is structured
91
115
  size = range_header[/\/(\d+)$/, 1].to_i
92
116
 
93
- # S3 returns 200 when you request a Range that is fully satisfied by the entire object,
94
- # we take that into account here. For other servers, 206 is the expected response code.
95
- # Also, if we request a _larger_ range than what can be satisfied by the server,
117
+ # If we request a _larger_ range than what can be satisfied by the server,
96
118
  # the response is going to only contain what _can_ be sent and the status is also going
97
119
  # to be 206
98
120
  return [size, response.body]
data/lib/text.rb CHANGED
@@ -5,6 +5,7 @@ module FormatParser
5
5
  NATURE = :text
6
6
 
7
7
  attr_accessor :format
8
+ attr_accessor :content_type
8
9
 
9
10
  # Only permits assignments via defined accessors
10
11
  def initialize(**attributes)
data/lib/video.rb CHANGED
@@ -23,6 +23,9 @@ module FormatParser
23
23
  # it can be placed here
24
24
  attr_accessor :intrinsics
25
25
 
26
+ # The MIME type of the video
27
+ attr_accessor :content_type
28
+
26
29
  # Only permits assignments via defined accessors
27
30
  def initialize(**attributes)
28
31
  attributes.map { |(k, v)| public_send("#{k}=", v) }
@@ -10,6 +10,7 @@ describe FormatParser::AIFFParser do
10
10
  expect(parse_result.num_audio_channels).to eq(2)
11
11
  expect(parse_result.audio_sample_rate_hz).to be_within(0.01).of(44100)
12
12
  expect(parse_result.media_duration_seconds).to be_within(0.01).of(1.05)
13
+ expect(parse_result.content_type).to eq('audio/x-aiff')
13
14
  end
14
15
 
15
16
  it 'parses a Logic Pro created AIFF sample file having a COMT chunk before a COMM chunk' do
@@ -13,6 +13,8 @@ describe FormatParser::BMPParser do
13
13
  expect(parsed.width_px).to eq(40)
14
14
  expect(parsed.height_px).to eq(27)
15
15
 
16
+ expect(parsed.content_type).to eq('image/bmp')
17
+
16
18
  expect(parsed.intrinsics).not_to be_nil
17
19
  expect(parsed.intrinsics[:vertical_resolution]).to eq(2834)
18
20
  expect(parsed.intrinsics[:horizontal_resolution]).to eq(2834)
@@ -32,6 +34,8 @@ describe FormatParser::BMPParser do
32
34
  expect(parsed.width_px).to eq(1920)
33
35
  expect(parsed.height_px).to eq(1080)
34
36
 
37
+ expect(parsed.content_type).to eq('image/bmp')
38
+
35
39
  expect(parsed.intrinsics).not_to be_nil
36
40
  expect(parsed.intrinsics[:vertical_resolution]).to eq(2835)
37
41
  expect(parsed.intrinsics[:horizontal_resolution]).to eq(2835)
@@ -51,6 +55,8 @@ describe FormatParser::BMPParser do
51
55
  expect(parsed.width_px).to eq(200)
52
56
  expect(parsed.height_px).to eq(200)
53
57
 
58
+ expect(parsed.content_type).to eq('image/bmp')
59
+
54
60
  expect(parsed.intrinsics).not_to be_nil
55
61
  end
56
62
 
@@ -64,6 +70,7 @@ describe FormatParser::BMPParser do
64
70
  expect(parsed.color_mode).to eq(:rgb)
65
71
  expect(parsed.width_px).to eq(40)
66
72
  expect(parsed.height_px).to eq(27)
73
+ expect(parsed.content_type).to eq('image/bmp')
67
74
  expect(parsed.intrinsics[:bits_per_pixel]).to eq(24)
68
75
  expect(parsed.intrinsics[:data_order]).to eq(:normal)
69
76
 
@@ -76,6 +83,7 @@ describe FormatParser::BMPParser do
76
83
  expect(parsed.color_mode).to eq(:rgb)
77
84
  expect(parsed.width_px).to eq(40)
78
85
  expect(parsed.height_px).to eq(27)
86
+ expect(parsed.content_type).to eq('image/bmp')
79
87
  expect(parsed.intrinsics[:bits_per_pixel]).to eq(24)
80
88
  expect(parsed.intrinsics[:data_order]).to eq(:normal)
81
89
  end
@@ -17,6 +17,7 @@ describe FormatParser::CR2Parser do
17
17
  expect(parsed.height_px).to be > 0
18
18
 
19
19
  expect(parsed.orientation).not_to be_nil
20
+ expect(parsed.content_type).to eq('image/x-canon-cr2')
20
21
  end
21
22
  end
22
23
  end
@@ -15,6 +15,7 @@ describe FormatParser::DPXParser do
15
15
  expect(parsed.width_px).to be_between(0, 2048)
16
16
  expect(parsed.height_px).to be_kind_of(Integer)
17
17
  expect(parsed.height_px).to be_between(0, 4000)
18
+ expect(parsed.content_type).to eq('image/x-dpx')
18
19
  end
19
20
  end
20
21
 
@@ -14,6 +14,7 @@ describe FormatParser::FLACParser do
14
14
  expect(parsed.intrinsics).not_to be_nil
15
15
  expect(parsed.media_duration_frames).to eq(33810)
16
16
  expect(parsed.media_duration_seconds).to be_within(0.1).of(0.836)
17
+ expect(parsed.content_type).to eq('audio/x-flac')
17
18
  end
18
19
 
19
20
  it 'decodes and estimates duration for the 16bit FLAC File' do
@@ -17,6 +17,7 @@ describe FormatParser::GIFParser do
17
17
 
18
18
  expect(parsed.height_px).to be_kind_of(Integer)
19
19
  expect(parsed.height_px).to be > 0
20
+ expect(parsed.content_type).to eq('image/gif')
20
21
  end
21
22
  end
22
23
  end
@@ -14,6 +14,7 @@ describe FormatParser::JPEGParser do
14
14
 
15
15
  expect(parsed.height_px).to be_kind_of(Integer)
16
16
  expect(parsed.height_px).to be > 0
17
+ expect(parsed.content_type).to eq('image/jpeg')
17
18
  end
18
19
  end
19
20
  end
@@ -25,6 +25,7 @@ describe FormatParser::M3UParser do
25
25
  expect(parsed_m3u).not_to be_nil
26
26
  expect(parsed_m3u.nature).to eq(:text)
27
27
  expect(parsed_m3u.format).to eq(:m3u)
28
+ expect(parsed_m3u.content_type).to eq('application/vnd.apple.mpegurl')
28
29
  end
29
30
  end
30
31
 
@@ -37,7 +37,7 @@ describe FormatParser::MOOVParser do
37
37
  expect(result.nature).to eq(:audio)
38
38
  expect(result.media_duration_seconds).to be_kind_of(Float)
39
39
  expect(result.media_duration_seconds).to be > 0
40
-
40
+ expect(result.content_type).to be_kind_of(String)
41
41
  expect(result.intrinsics).not_to be_nil
42
42
  end
43
43
  end
@@ -52,6 +52,7 @@ describe FormatParser::MOOVParser do
52
52
  expect(result.height_px).to be > 0
53
53
  expect(result.media_duration_seconds).to be_kind_of(Float)
54
54
  expect(result.media_duration_seconds).to be > 0
55
+ expect(result.content_type).to eq('video/mp4')
55
56
 
56
57
  expect(result.intrinsics).not_to be_nil
57
58
  end
@@ -67,6 +68,7 @@ describe FormatParser::MOOVParser do
67
68
  expect(result.height_px).to be > 0
68
69
  expect(result.media_duration_seconds).to be_kind_of(Float)
69
70
  expect(result.media_duration_seconds).to be > 0
71
+ expect(result.content_type).to eq('video/mp4')
70
72
 
71
73
  expect(result.intrinsics).not_to be_nil
72
74
  end
@@ -79,6 +81,7 @@ describe FormatParser::MOOVParser do
79
81
  expect(result).not_to be_nil
80
82
  expect(result.nature).to eq(:audio)
81
83
  expect(result.format).to eq(:m4a)
84
+ expect(result.content_type).to eq('audio/mp4')
82
85
  end
83
86
 
84
87
  it 'parses a MOV file and provides the necessary metadata' do
@@ -23,6 +23,7 @@ describe FormatParser::MP3Parser do
23
23
 
24
24
  expect(parsed.nature).to eq(:audio)
25
25
  expect(parsed.format).to eq(:mp3)
26
+ expect(parsed.content_type).to eq('audio/mpeg')
26
27
  expect(parsed.num_audio_channels).to eq(2)
27
28
  expect(parsed.audio_sample_rate_hz).to eq(48000)
28
29
  expect(parsed.intrinsics).not_to be_nil
@@ -6,6 +6,7 @@ describe FormatParser::OggParser do
6
6
 
7
7
  expect(parse_result.nature).to eq(:audio)
8
8
  expect(parse_result.format).to eq(:ogg)
9
+ expect(parse_result.content_type).to eq('audio/ogg')
9
10
  expect(parse_result.num_audio_channels).to eq(1)
10
11
  expect(parse_result.audio_sample_rate_hz).to eq(16000)
11
12
  expect(parse_result.media_duration_seconds).to be_within(0.01).of(2973.95)
@@ -17,6 +17,7 @@ describe FormatParser::PDFParser do
17
17
  expect(parsed_pdf).not_to be_nil
18
18
  expect(parsed_pdf.nature).to eq(:document)
19
19
  expect(parsed_pdf.format).to eq(:pdf)
20
+ expect(parsed_pdf.content_type).to eq('application/pdf')
20
21
  end
21
22
  end
22
23
 
@@ -15,6 +15,7 @@ describe FormatParser::PNGParser do
15
15
 
16
16
  expect(parsed.height_px).to be_kind_of(Integer)
17
17
  expect(parsed.height_px).to be > 0
18
+ expect(parsed.content_type).to eq('image/png')
18
19
  end
19
20
  end
20
21
  end
@@ -15,6 +15,7 @@ describe FormatParser::PSDParser do
15
15
 
16
16
  expect(parsed.height_px).to be_kind_of(Integer)
17
17
  expect(parsed.height_px).to be > 0
18
+ expect(parsed.content_type).to eq('application/x-photoshop')
18
19
  end
19
20
  end
20
21
  end
@@ -59,6 +59,7 @@ describe FormatParser::TIFFParser do
59
59
  expect(parsed.width_px).to eq(7952)
60
60
  expect(parsed.height_px).to eq(5304)
61
61
  expect(parsed.intrinsics[:exif]).not_to be_nil
62
+ expect(parsed.content_type).to eq('image/x-sony-arw')
62
63
  end
63
64
 
64
65
  describe 'correctly extracts dimensions from various TIFF flavors of the same file' do
@@ -9,6 +9,7 @@ describe FormatParser::WAVParser do
9
9
 
10
10
  expect(parse_result.nature).to eq(:audio)
11
11
  expect(parse_result.format).to eq(:wav)
12
+ expect(parse_result.content_type).to eq('audio/x-wav')
12
13
  end
13
14
  end
14
15
 
@@ -14,6 +14,7 @@ describe FormatParser::ZIPParser do
14
14
  expect(result).not_to be_nil
15
15
 
16
16
  expect(result.format).to eq(:zip)
17
+ expect(result.content_type).to eq('application/zip')
17
18
  expect(result.nature).to eq(:archive)
18
19
  expect(result.entries.length).to eq(0xFFFF + 1)
19
20
 
@@ -58,6 +59,7 @@ describe FormatParser::ZIPParser do
58
59
  result = subject.call(fi_io)
59
60
  expect(result.nature).to eq(:document)
60
61
  expect(result.format).to eq(:docx)
62
+ expect(result.content_type).to eq('application/vnd.openxmlformats-officedocument.wordprocessingml.document')
61
63
 
62
64
  fixture_path = fixtures_dir + '/ZIP/sample-docx.docx'
63
65
  fi_io = File.open(fixture_path, 'rb')
@@ -15,18 +15,31 @@ describe 'Fetching data from HTTP remotes' do
15
15
  }
16
16
  @server = WEBrick::HTTPServer.new(options)
17
17
  @server.mount '/', WEBrick::HTTPServlet::FileHandler, fixtures_dir
18
+ @server.mount_proc '/redirect' do |req, res|
19
+ res.status = 302
20
+ res.header['Location'] = req.path.sub('/redirect', '')
21
+ end
22
+ @server.mount_proc '/empty' do |_req, res|
23
+ res.status = 200
24
+ res.body = ''
25
+ end
26
+ @server.mount_proc '/tiny' do |_req, res|
27
+ res.status = 200
28
+ res.body = File.read(fixtures_dir + '/test.gif')
29
+ end
30
+
18
31
  trap('INT') { @server.stop }
19
32
  @server_thread = Thread.new { @server.start }
20
33
  end
21
34
 
22
- it '#parse_http is called without any option' do
35
+ it 'works with .parse_http called without any options' do
23
36
  result = FormatParser.parse_http('http://localhost:9399/PNG/anim.png')
24
37
 
25
38
  expect(result.format).to eq(:png)
26
39
  expect(result.height_px).to eq(180)
27
40
  end
28
41
 
29
- it '#parse_http is called with hash options' do
42
+ it 'works with .parse_http called with additional options' do
30
43
  fake_result = double(nature: :audio, format: :aiff)
31
44
  expect_any_instance_of(FormatParser::AIFFParser).to receive(:call).and_return(fake_result)
32
45
  results = FormatParser.parse_http('http://localhost:9399/PNG/anim.png', results: :all)
@@ -35,6 +48,18 @@ describe 'Fetching data from HTTP remotes' do
35
48
  expect(results).to include(fake_result)
36
49
  end
37
50
 
51
+ it 'is able to cope with a 0-size resource which does not provide Content-Range' do
52
+ file_information = FormatParser.parse_http('http://localhost:9399/empty')
53
+
54
+ expect(file_information).to be_nil
55
+ end
56
+
57
+ it 'is able to cope with a tiny resource which fits into the first requested range completely' do
58
+ file_information = FormatParser.parse_http('http://localhost:9399/tiny')
59
+ expect(file_information).not_to be_nil
60
+ expect(file_information.nature).to eq(:image)
61
+ end
62
+
38
63
  it 'parses the animated PNG over HTTP' do
39
64
  file_information = FormatParser.parse_http('http://localhost:9399/PNG/anim.png')
40
65
  expect(file_information).not_to be_nil
@@ -91,6 +116,32 @@ describe 'Fetching data from HTTP remotes' do
91
116
  end
92
117
  end
93
118
 
119
+ context 'when the server responds with a redirect' do
120
+ it 'follows the redirect' do
121
+ file_information = FormatParser.parse_http('http://localhost:9399/redirect/TIFF/test.tif')
122
+ expect(file_information.format).to eq(:tif)
123
+ end
124
+ end
125
+
126
+ it 'sends provided HTTP headers in the request' do
127
+ # Faraday is required only after calling .parse_http
128
+ # This line is just to trigger this require, then it's possible to
129
+ # add an expectation of how Faraday is initialized after.
130
+ FormatParser.parse_http('invalid_url') rescue nil
131
+
132
+ expect(Faraday)
133
+ .to receive(:new)
134
+ .with(headers: {'test-header' => 'test-value'})
135
+ .and_call_original
136
+
137
+ file_information = FormatParser.parse_http(
138
+ 'http://localhost:9399//TIFF/test.tif',
139
+ headers: {'test-header' => 'test-value'}
140
+ )
141
+
142
+ expect(file_information.format).to eq(:tif)
143
+ end
144
+
94
145
  after(:all) do
95
146
  @server.stop
96
147
  @server_thread.join(0.5)
@@ -7,7 +7,9 @@ describe FormatParser::RemoteIO do
7
7
  rio = described_class.new('https://images.invalid/img.jpg')
8
8
 
9
9
  fake_resp = double(headers: {'Content-Range' => '10-109/2577'}, status: 206, body: 'This is the response')
10
- expect(Faraday).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=10-109').and_return(fake_resp)
10
+ faraday_conn = instance_double(Faraday::Connection, get: fake_resp)
11
+ allow(Faraday).to receive(:new).and_return(faraday_conn)
12
+ expect(faraday_conn).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=10-109')
11
13
 
12
14
  rio.seek(10)
13
15
  read_result = rio.read(100)
@@ -18,7 +20,9 @@ describe FormatParser::RemoteIO do
18
20
  rio = described_class.new('https://images.invalid/img.jpg')
19
21
 
20
22
  fake_resp = double(headers: {'Content-Range' => '10-109/2577'}, status: 200, body: 'This is the response')
21
- expect(Faraday).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=10-109').and_return(fake_resp)
23
+ faraday_conn = instance_double(Faraday::Connection, get: fake_resp)
24
+ allow(Faraday).to receive(:new).and_return(faraday_conn)
25
+ expect(faraday_conn).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=10-109')
22
26
 
23
27
  rio.seek(10)
24
28
  read_result = rio.read(100)
@@ -29,7 +33,9 @@ describe FormatParser::RemoteIO do
29
33
  rio = described_class.new('https://images.invalid/img.jpg')
30
34
 
31
35
  fake_resp = double(headers: {}, status: 403, body: 'Please log in')
32
- expect(Faraday).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199').and_return(fake_resp)
36
+ faraday_conn = instance_double(Faraday::Connection, get: fake_resp)
37
+ allow(Faraday).to receive(:new).and_return(faraday_conn)
38
+ expect(faraday_conn).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199')
33
39
 
34
40
  rio.seek(100)
35
41
  expect { rio.read(100) }.to raise_error(/replied with a 403 and refused/)
@@ -39,7 +45,9 @@ describe FormatParser::RemoteIO do
39
45
  rio = described_class.new('https://images.invalid/img.jpg')
40
46
 
41
47
  fake_resp = double(headers: {}, status: 416, body: 'You stepped off the ledge of the range')
42
- expect(Faraday).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199').and_return(fake_resp)
48
+ faraday_conn = instance_double(Faraday::Connection, get: fake_resp)
49
+ allow(Faraday).to receive(:new).and_return(faraday_conn)
50
+ expect(faraday_conn).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199')
43
51
 
44
52
  rio.seek(100)
45
53
  expect(rio.read(100)).to be_nil
@@ -49,7 +57,9 @@ describe FormatParser::RemoteIO do
49
57
  rio = described_class.new('https://images.invalid/img.jpg')
50
58
 
51
59
  fake_resp = double(headers: {}, status: 403, body: 'Please log in')
52
- expect(Faraday).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199').and_return(fake_resp)
60
+ faraday_conn = instance_double(Faraday::Connection, get: fake_resp)
61
+ allow(Faraday).to receive(:new).and_return(faraday_conn)
62
+ expect(faraday_conn).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199')
53
63
 
54
64
  rio.seek(100)
55
65
  # rubocop: disable Lint/AmbiguousBlockAssociation
@@ -60,7 +70,9 @@ describe FormatParser::RemoteIO do
60
70
  rio = described_class.new('https://images.invalid/img.jpg')
61
71
 
62
72
  fake_resp = double(headers: {}, status: 416, body: 'You jumped off the end of the file maam')
63
- expect(Faraday).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199').and_return(fake_resp)
73
+ faraday_conn = instance_double(Faraday::Connection, get: fake_resp)
74
+ allow(Faraday).to receive(:new).and_return(faraday_conn)
75
+ expect(faraday_conn).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199')
64
76
 
65
77
  rio.seek(100)
66
78
  expect(rio.read(100)).to be_nil
@@ -69,15 +81,24 @@ describe FormatParser::RemoteIO do
69
81
  it 'does not overwrite size when the range cannot be satisfied and the response is 416' do
70
82
  rio = described_class.new('https://images.invalid/img.jpg')
71
83
 
72
- fake_resp = double(headers: {'Content-Range' => 'bytes 0-0/13'}, status: 206, body: 'a')
73
- expect(Faraday).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=0-0').and_return(fake_resp)
84
+ fake_resp1 = double(headers: {'Content-Range' => 'bytes 0-0/13'}, status: 206, body: 'a')
85
+ fake_resp2 = double(headers: {}, status: 416, body: 'You jumped off the end of the file maam')
86
+
87
+ faraday_conn = instance_double(Faraday::Connection)
88
+ allow(Faraday).to receive(:new).and_return(faraday_conn)
89
+ expect(faraday_conn).to receive(:get)
90
+ .with('https://images.invalid/img.jpg', nil, range: 'bytes=0-0')
91
+ .ordered
92
+ .and_return(fake_resp1)
93
+ expect(faraday_conn).to receive(:get)
94
+ .with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199')
95
+ .ordered
96
+ .and_return(fake_resp2)
97
+
74
98
  rio.read(1)
75
99
 
76
100
  expect(rio.size).to eq(13)
77
101
 
78
- fake_resp = double(headers: {}, status: 416, body: 'You jumped off the end of the file maam')
79
- expect(Faraday).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199').and_return(fake_resp)
80
-
81
102
  rio.seek(100)
82
103
  expect(rio.read(100)).to be_nil
83
104
 
@@ -88,7 +109,9 @@ describe FormatParser::RemoteIO do
88
109
  rio = described_class.new('https://images.invalid/img.jpg')
89
110
 
90
111
  fake_resp = double(headers: {}, status: 502, body: 'Guru meditation')
91
- expect(Faraday).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199').and_return(fake_resp)
112
+ faraday_conn = instance_double(Faraday::Connection, get: fake_resp)
113
+ allow(Faraday).to receive(:new).and_return(faraday_conn)
114
+ expect(faraday_conn).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199')
92
115
 
93
116
  rio.seek(100)
94
117
  expect { rio.read(100) }.to raise_error(/replied with a 502 and we might want to retry/)
@@ -100,7 +123,9 @@ describe FormatParser::RemoteIO do
100
123
  expect(rio.pos).to eq(0)
101
124
 
102
125
  fake_resp = double(headers: {'Content-Range' => 'bytes 0-0/13'}, status: 206, body: 'a')
103
- expect(Faraday).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=0-0').and_return(fake_resp)
126
+ faraday_conn = instance_double(Faraday::Connection, get: fake_resp)
127
+ allow(Faraday).to receive(:new).and_return(faraday_conn)
128
+ expect(faraday_conn).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=0-0')
104
129
  rio.read(1)
105
130
 
106
131
  expect(rio.pos).to eq(1)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: format_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.26.0
4
+ version: 0.29.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Noah Berman
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: exe
11
11
  cert_chain: []
12
- date: 2021-01-08 00:00:00.000000000 Z
12
+ date: 2021-09-10 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: ks
@@ -73,6 +73,20 @@ dependencies:
73
73
  - - "~>"
74
74
  - !ruby/object:Gem::Version
75
75
  version: '0.13'
76
+ - !ruby/object:Gem::Dependency
77
+ name: faraday_middleware
78
+ requirement: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '0.14'
83
+ type: :runtime
84
+ prerelease: false
85
+ version_requirements: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '0.14'
76
90
  - !ruby/object:Gem::Dependency
77
91
  name: measurometer
78
92
  requirement: !ruby/object:Gem::Requirement
@@ -183,10 +197,10 @@ executables:
183
197
  extensions: []
184
198
  extra_rdoc_files: []
185
199
  files:
200
+ - ".github/workflows/main.yml"
186
201
  - ".gitignore"
187
202
  - ".rspec"
188
203
  - ".rubocop.yml"
189
- - ".travis.yml"
190
204
  - CHANGELOG.md
191
205
  - CODE_OF_CONDUCT.md
192
206
  - CONTRIBUTING.md
data/.travis.yml DELETED
@@ -1,12 +0,0 @@
1
- rvm:
2
- - 2.2.10
3
- - 2.3.8
4
- - 2.4.9
5
- - 2.5.8
6
- - 2.6.6
7
- - 2.7.2
8
- - jruby
9
- sudo: false
10
- cache: bundler
11
- script:
12
- - bundle exec rake