format_parser 0.26.0 → 0.29.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/main.yml +104 -0
  3. data/CHANGELOG.md +12 -0
  4. data/format_parser.gemspec +1 -0
  5. data/lib/archive.rb +3 -0
  6. data/lib/audio.rb +3 -0
  7. data/lib/document.rb +1 -0
  8. data/lib/format_parser/version.rb +1 -1
  9. data/lib/format_parser.rb +3 -2
  10. data/lib/image.rb +3 -0
  11. data/lib/parsers/aiff_parser.rb +4 -1
  12. data/lib/parsers/bmp_parser.rb +3 -0
  13. data/lib/parsers/cr2_parser.rb +2 -0
  14. data/lib/parsers/dpx_parser.rb +6 -0
  15. data/lib/parsers/flac_parser.rb +2 -0
  16. data/lib/parsers/gif_parser.rb +2 -0
  17. data/lib/parsers/jpeg_parser.rb +2 -0
  18. data/lib/parsers/m3u_parser.rb +3 -1
  19. data/lib/parsers/moov_parser.rb +10 -1
  20. data/lib/parsers/mp3_parser.rb +3 -2
  21. data/lib/parsers/ogg_parser.rb +3 -2
  22. data/lib/parsers/pdf_parser.rb +2 -2
  23. data/lib/parsers/png_parser.rb +2 -0
  24. data/lib/parsers/psd_parser.rb +2 -0
  25. data/lib/parsers/tiff_parser.rb +10 -2
  26. data/lib/parsers/wav_parser.rb +3 -0
  27. data/lib/parsers/zip_parser/office_formats.rb +5 -5
  28. data/lib/parsers/zip_parser.rb +5 -3
  29. data/lib/remote_io.rb +29 -7
  30. data/lib/text.rb +1 -0
  31. data/lib/video.rb +3 -0
  32. data/spec/parsers/aiff_parser_spec.rb +1 -0
  33. data/spec/parsers/bmp_parser_spec.rb +8 -0
  34. data/spec/parsers/cr2_parser_spec.rb +1 -0
  35. data/spec/parsers/dpx_parser_spec.rb +1 -0
  36. data/spec/parsers/flac_parser_spec.rb +1 -0
  37. data/spec/parsers/gif_parser_spec.rb +1 -0
  38. data/spec/parsers/jpeg_parser_spec.rb +1 -0
  39. data/spec/parsers/m3u_parser_spec.rb +1 -0
  40. data/spec/parsers/moov_parser_spec.rb +4 -1
  41. data/spec/parsers/mp3_parser_spec.rb +1 -0
  42. data/spec/parsers/ogg_parser_spec.rb +1 -0
  43. data/spec/parsers/pdf_parser_spec.rb +1 -0
  44. data/spec/parsers/png_parser_spec.rb +1 -0
  45. data/spec/parsers/psd_parser_spec.rb +1 -0
  46. data/spec/parsers/tiff_parser_spec.rb +1 -0
  47. data/spec/parsers/wav_parser_spec.rb +1 -0
  48. data/spec/parsers/zip_parser_spec.rb +2 -0
  49. data/spec/remote_fetching_spec.rb +53 -2
  50. data/spec/remote_io_spec.rb +38 -13
  51. metadata +17 -3
  52. data/.travis.yml +0 -12
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1074a8172a2830a11df0fb7874936c2b8abab8d74fd39985f9c9f7b72d5b348c
4
- data.tar.gz: 5eafd2d610cd30bc85a056bd1c31331bcb49e8ce6b5b538cd13e280b138be7db
3
+ metadata.gz: 75ee83f55840e3031d4d60d8dc07ca038812188613e2b740079e1c965efb2886
4
+ data.tar.gz: 31c3ee84434560c18e6ea74a23160b909e6f880f52b2ed6f0e888e847c557bd9
5
5
  SHA512:
6
- metadata.gz: 5ce396a71fedd82b8041bcb6c833e559c7eef74886e73095eaf0b3d21e0c0d49b1620a83aba9796a8134dd5a7fc679156cd967cf82cba95b5941941be73d70c4
7
- data.tar.gz: '0395e5a8fb35e860060e9c3b040b788aaad97eb5883f2d662b418156f1f3986bc4a26a814b9a8552ce7dcbd271da27e977cfb77a5e5b155ebd35db6a49a97719'
6
+ metadata.gz: 536cfb1bac7926f56ba760959d7c5a0905d3b2b0944b16248b6c81be00b722dad894a8d6d5773134fb0471e42d016a20be15e3d5a01c671f0cc65658f2fc05b4
7
+ data.tar.gz: cb3f73df051b8612cb6d0e1a4e55c045e461bf2c5e4667dd6e461e779b1f39d01be8d70d084e252e5198a966ac590129286811f5822808a0c980c2ad72a087a1
@@ -0,0 +1,104 @@
1
+ name: CI
2
+
3
+ on: [push,pull_request]
4
+
5
+ env:
6
+ BUNDLE_PATH: vendor/bundle
7
+
8
+ jobs:
9
+ lint:
10
+ name: Code Style
11
+ runs-on: ubuntu-18.04
12
+ if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository
13
+ strategy:
14
+ matrix:
15
+ ruby:
16
+ - 2.7
17
+ - 2.6
18
+ - 2.5
19
+ - 2.4
20
+ - 2.3
21
+ - 2.2
22
+ - jruby
23
+ steps:
24
+ - name: Checkout
25
+ uses: actions/checkout@v2
26
+ - name: Setup Ruby
27
+ uses: ruby/setup-ruby@v1
28
+ with:
29
+ ruby-version: ${{ matrix.ruby }}
30
+ - name: Gemfile Cache
31
+ uses: actions/cache@v2
32
+ with:
33
+ path: Gemfile.lock
34
+ key: ${{ runner.os }}-gemlock-${{ matrix.ruby }}-${{ hashFiles('Gemfile', 'format_parser.gemspec') }}
35
+ restore-keys: |
36
+ ${{ runner.os }}-gemlock-${{ matrix.ruby }}-
37
+ - name: Bundle Cache
38
+ id: cache-gems
39
+ uses: actions/cache@v2
40
+ with:
41
+ path: vendor/bundle
42
+ key: ${{ runner.os }}-gems-${{ matrix.ruby }}-${{ hashFiles('Gemfile', 'Gemfile.lock', 'format_parser.gemspec') }}
43
+ restore-keys: |
44
+ ${{ runner.os }}-gems-${{ matrix.ruby }}-
45
+ ${{ runner.os }}-gems-
46
+ - name: Bundle Install
47
+ if: steps.cache-gems.outputs.cache-hit != 'true'
48
+ run: bundle install --jobs 4 --retry 3
49
+ - name: Rubocop Cache
50
+ uses: actions/cache@v2
51
+ with:
52
+ path: ~/.cache/rubocop_cache
53
+ key: ${{ runner.os }}-rubocop-${{ hashFiles('.rubocop.yml') }}
54
+ restore-keys: |
55
+ ${{ runner.os }}-rubocop-
56
+ - name: Rubocop
57
+ run: bundle exec rubocop
58
+ test:
59
+ name: Specs
60
+ runs-on: ubuntu-18.04
61
+ if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository
62
+ strategy:
63
+ matrix:
64
+ ruby:
65
+ - 2.7
66
+ - 2.6
67
+ - 2.5
68
+ - 2.4
69
+ - 2.3
70
+ - 2.2
71
+ - jruby
72
+ experimental: [false]
73
+ include:
74
+ - ruby: 3.0
75
+ experimental: true
76
+ steps:
77
+ - name: Checkout
78
+ uses: actions/checkout@v2
79
+ - name: Setup Ruby
80
+ uses: ruby/setup-ruby@v1
81
+ with:
82
+ ruby-version: ${{ matrix.ruby }}
83
+ - name: Gemfile Cache
84
+ uses: actions/cache@v2
85
+ with:
86
+ path: Gemfile.lock
87
+ key: ${{ runner.os }}-gemlock-${{ matrix.ruby }}-${{ hashFiles('Gemfile', 'format_parser.gemspec') }}
88
+ restore-keys: |
89
+ ${{ runner.os }}-gemlock-${{ matrix.ruby }}-
90
+ - name: Bundle Cache
91
+ id: cache-gems
92
+ uses: actions/cache@v2
93
+ with:
94
+ path: vendor/bundle
95
+ key: ${{ runner.os }}-gems-${{ matrix.ruby }}-${{ hashFiles('Gemfile', 'Gemfile.lock', 'format_parser.gemspec') }}
96
+ restore-keys: |
97
+ ${{ runner.os }}-gems-${{ matrix.ruby }}-
98
+ ${{ runner.os }}-gems-
99
+ - name: Bundle Install
100
+ if: steps.cache-gems.outputs.cache-hit != 'true'
101
+ run: bundle install --jobs 4 --retry 3
102
+ - name: RSpec
103
+ continue-on-error: ${{ matrix.experimental }}
104
+ run: bundle exec rake parallel:spec
data/CHANGELOG.md CHANGED
@@ -1,3 +1,15 @@
1
+ ## 0.29.1
2
+ * Fix handling of 200 responses with `parse_http` as well as handling of very small responses which do not need range access
3
+
4
+ ## 0.29.0
5
+ * Add option `headers:` to `FormatParser.parse_http`
6
+
7
+ ## 0.28.0
8
+ * Change `FormatParser.parse_http` to follow HTTP redirects
9
+
10
+ ## 0.27.0
11
+ * Add `#content_type` on `Result` return values which makes sense for the detected filetype
12
+
1
13
  ## 0.26.0
2
14
  * Add support for M3U format files
3
15
 
@@ -34,6 +34,7 @@ Gem::Specification.new do |spec|
34
34
  spec.add_dependency 'exifr', '~> 1', '>= 1.3.8'
35
35
  spec.add_dependency 'id3tag', '~> 0.14'
36
36
  spec.add_dependency 'faraday', '~> 0.13'
37
+ spec.add_dependency 'faraday_middleware', '~> 0.14'
37
38
  spec.add_dependency 'measurometer', '~> 1'
38
39
 
39
40
  spec.add_development_dependency 'rspec', '~> 3.0'
data/lib/archive.rb CHANGED
@@ -26,6 +26,9 @@ module FormatParser
26
26
  # it can be placed here
27
27
  attr_accessor :intrinsics
28
28
 
29
+ # The MIME type of the archive
30
+ attr_accessor :content_type
31
+
29
32
  # Only permits assignments via defined accessors
30
33
  def initialize(**attributes)
31
34
  attributes.map { |(k, v)| public_send("#{k}=", v) }
data/lib/audio.rb CHANGED
@@ -35,6 +35,9 @@ module FormatParser
35
35
  # it can be placed here
36
36
  attr_accessor :intrinsics
37
37
 
38
+ # The MIME type of the sound file
39
+ attr_accessor :content_type
40
+
38
41
  # Only permits assignments via defined accessors
39
42
  def initialize(**attributes)
40
43
  attributes.map { |(k, v)| public_send("#{k}=", v) }
data/lib/document.rb CHANGED
@@ -7,6 +7,7 @@ module FormatParser
7
7
  attr_accessor :format
8
8
  attr_accessor :document_type
9
9
  attr_accessor :page_count
10
+ attr_accessor :content_type
10
11
 
11
12
  # Only permits assignments via defined accessors
12
13
  def initialize(**attributes)
@@ -1,3 +1,3 @@
1
1
  module FormatParser
2
- VERSION = '0.26.0'
2
+ VERSION = '0.29.1'
3
3
  end
data/lib/format_parser.rb CHANGED
@@ -88,13 +88,14 @@ module FormatParser
88
88
  # given to `.parse`. The accepted keyword arguments are the same as the ones for `parse`.
89
89
  #
90
90
  # @param url[String, URI] the HTTP(S) URL to request the object from using Faraday and `Range:` requests
91
+ # @param headers[Hash] (optional) the HTTP headers to request the object from using Faraday
91
92
  # @param kwargs the keyword arguments to be delegated to `.parse`
92
93
  # @see {.parse}
93
- def self.parse_http(url, **kwargs)
94
+ def self.parse_http(url, headers: {}, **kwargs)
94
95
  # Do not extract the filename, since the URL
95
96
  # can really be "anything". But if the caller
96
97
  # provides filename_hint it will be carried over
97
- parse(RemoteIO.new(url), **kwargs)
98
+ parse(RemoteIO.new(url, headers: headers), **kwargs)
98
99
  end
99
100
 
100
101
  # Parses the file at the given `path` and returns the results as if it were any IO
data/lib/image.rb CHANGED
@@ -64,6 +64,9 @@ module FormatParser
64
64
  # it can be placed here
65
65
  attr_accessor :intrinsics
66
66
 
67
+ # The MIME type of the image file
68
+ attr_accessor :content_type
69
+
67
70
  # Only permits assignments via defined accessors
68
71
  def initialize(**attributes)
69
72
  attributes.map { |(k, v)| public_send("#{k}=", v) }
@@ -1,6 +1,8 @@
1
1
  class FormatParser::AIFFParser
2
2
  include FormatParser::IOUtils
3
3
 
4
+ AIFF_MIME_TYPE = 'audio/x-aiff'
5
+
4
6
  # Known chunk types we can omit when parsing,
5
7
  # grossly lifted from http://www.muratnkonar.com/aiff/
6
8
  KNOWN_CHUNKS = [
@@ -70,7 +72,8 @@ class FormatParser::AIFFParser
70
72
  num_audio_channels: channels,
71
73
  audio_sample_rate_hz: sample_rate.to_i,
72
74
  media_duration_frames: sample_frames,
73
- media_duration_seconds: duration_in_seconds
75
+ media_duration_seconds: duration_in_seconds,
76
+ content_type: AIFF_MIME_TYPE,
74
77
  )
75
78
  end
76
79
 
@@ -5,6 +5,7 @@ class FormatParser::BMPParser
5
5
 
6
6
  VALID_BMP = 'BM'
7
7
  PERMISSIBLE_PIXEL_ARRAY_LOCATIONS = 26..512
8
+ BMP_MIME_TYPE = 'image/bmp'
8
9
 
9
10
  def likely_match?(filename)
10
11
  filename =~ /\.bmp$/i
@@ -42,6 +43,7 @@ class FormatParser::BMPParser
42
43
  width_px: width,
43
44
  height_px: height,
44
45
  color_mode: :rgb,
46
+ content_type: BMP_MIME_TYPE,
45
47
  intrinsics: {
46
48
  data_order: data_order,
47
49
  bits_per_pixel: bit_depth
@@ -63,6 +65,7 @@ class FormatParser::BMPParser
63
65
  width_px: width,
64
66
  height_px: height.abs,
65
67
  color_mode: :rgb,
68
+ content_type: BMP_MIME_TYPE,
66
69
  intrinsics: {
67
70
  vertical_resolution: vertical_res,
68
71
  horizontal_resolution: horizontal_res,
@@ -6,6 +6,7 @@ class FormatParser::CR2Parser
6
6
 
7
7
  TIFF_HEADER = [0x49, 0x49, 0x2a, 0x00]
8
8
  CR2_HEADER = [0x43, 0x52, 0x02, 0x00]
9
+ CR2_MIME_TYPE = 'image/x-canon-cr2'
9
10
 
10
11
  def likely_match?(filename)
11
12
  filename =~ /\.cr2$/i
@@ -39,6 +40,7 @@ class FormatParser::CR2Parser
39
40
  display_height_px: exif_data.rotated? ? w : h,
40
41
  orientation: exif_data.orientation_sym,
41
42
  intrinsics: {exif: exif_data},
43
+ content_type: CR2_MIME_TYPE,
42
44
  )
43
45
  rescue EXIFR::MalformedTIFF
44
46
  nil
@@ -6,6 +6,11 @@ class FormatParser::DPXParser
6
6
  BE_MAGIC = 'SDPX'
7
7
  LE_MAGIC = BE_MAGIC.reverse
8
8
 
9
+ # There is no official MIME type for DPX, so we have
10
+ # to invent something useful. We will prefix it with x-
11
+ # to indicate that it is a vendor subtype
12
+ DPX_MIME_TYPE = 'image/x-dpx'
13
+
9
14
  class ByteOrderHintIO < SimpleDelegator
10
15
  def initialize(io, is_little_endian)
11
16
  super(io)
@@ -61,6 +66,7 @@ class FormatParser::DPXParser
61
66
  display_width_px: display_w,
62
67
  display_height_px: display_h,
63
68
  intrinsics: dpx_structure,
69
+ content_type: DPX_MIME_TYPE,
64
70
  )
65
71
  end
66
72
 
@@ -4,6 +4,7 @@ class FormatParser::FLACParser
4
4
  MAGIC_BYTES = 4
5
5
  MAGIC_BYTE_STRING = 'fLaC'
6
6
  BLOCK_HEADER_BYTES = 4
7
+ FLAC_MIME_TYPE = 'audio/x-flac'
7
8
 
8
9
  def likely_match?(filename)
9
10
  filename =~ /\.flac$/i
@@ -61,6 +62,7 @@ class FormatParser::FLACParser
61
62
  audio_sample_rate_hz: sample_rate,
62
63
  media_duration_seconds: duration,
63
64
  media_duration_frames: total_samples,
65
+ content_type: FLAC_MIME_TYPE,
64
66
  intrinsics: {
65
67
  bits_per_sample: bits_per_sample,
66
68
  minimum_frame_size: minimum_frame_size,
@@ -3,6 +3,7 @@ class FormatParser::GIFParser
3
3
 
4
4
  HEADERS = ['GIF87a', 'GIF89a'].map(&:b)
5
5
  NETSCAPE_AND_AUTHENTICATION_CODE = 'NETSCAPE2.0'
6
+ GIF_MIME_TYPE = 'image/gif'
6
7
 
7
8
  def likely_match?(filename)
8
9
  filename =~ /\.gif$/i
@@ -45,6 +46,7 @@ class FormatParser::GIFParser
45
46
  height_px: h,
46
47
  has_multiple_frames: is_animated,
47
48
  color_mode: :indexed,
49
+ content_type: GIF_MIME_TYPE
48
50
  )
49
51
  end
50
52
 
@@ -12,6 +12,7 @@ class FormatParser::JPEGParser
12
12
  APP1_MARKER = 0xE1 # maybe EXIF
13
13
  EXIF_MAGIC_STRING = "Exif\0\0".b
14
14
  MUST_FIND_NEXT_MARKER_WITHIN_BYTES = 1024
15
+ JPEG_MIME_TYPE = 'image/jpeg'
15
16
 
16
17
  def self.likely_match?(filename)
17
18
  filename =~ /\.jpe?g$/i
@@ -88,6 +89,7 @@ class FormatParser::JPEGParser
88
89
  display_height_px: dh,
89
90
  orientation: flat_exif.orientation_sym,
90
91
  intrinsics: {exif: flat_exif},
92
+ content_type: JPEG_MIME_TYPE
91
93
  )
92
94
 
93
95
  return result
@@ -2,6 +2,7 @@ class FormatParser::M3UParser
2
2
  include FormatParser::IOUtils
3
3
 
4
4
  HEADER = '#EXTM3U'
5
+ M3U8_MIME_TYPE = 'application/vnd.apple.mpegurl' # https://en.wikipedia.org/wiki/M3U#Internet_media_types
5
6
 
6
7
  def likely_match?(filename)
7
8
  filename =~ /\.m3u8?$/i
@@ -14,7 +15,8 @@ class FormatParser::M3UParser
14
15
  return unless HEADER.eql?(header)
15
16
 
16
17
  FormatParser::Text.new(
17
- format: :m3u
18
+ format: :m3u,
19
+ content_type: M3U8_MIME_TYPE,
18
20
  )
19
21
  end
20
22
  FormatParser.register_parser new, natures: :text, formats: :m3u
@@ -11,6 +11,12 @@ class FormatParser::MOOVParser
11
11
  'm4a ' => :m4a,
12
12
  }
13
13
 
14
+ # https://tools.ietf.org/html/rfc4337#section-2
15
+ # There is also video/quicktime which we should be able to capture
16
+ # here, but there is currently no detection for MOVs versus MP4s
17
+ MP4_AU_MIME_TYPE = 'audio/mp4'
18
+ MP4_MIXED_MIME_TYPE = 'video/mp4'
19
+
14
20
  def likely_match?(filename)
15
21
  filename =~ /\.(mov|m4a|ma4|mp4|aac|m4v)$/i
16
22
  end
@@ -49,10 +55,12 @@ class FormatParser::MOOVParser
49
55
  end
50
56
 
51
57
  # M4A only contains audio, while MP4 and friends can contain video.
52
- if format_from_moov_type(file_type) == :m4a
58
+ fmt = format_from_moov_type(file_type)
59
+ if fmt == :m4a
53
60
  FormatParser::Audio.new(
54
61
  format: format_from_moov_type(file_type),
55
62
  media_duration_seconds: media_duration_s,
63
+ content_type: MP4_AU_MIME_TYPE,
56
64
  intrinsics: atom_tree,
57
65
  )
58
66
  else
@@ -61,6 +69,7 @@ class FormatParser::MOOVParser
61
69
  width_px: width,
62
70
  height_px: height,
63
71
  media_duration_seconds: media_duration_s,
72
+ content_type: MP4_MIXED_MIME_TYPE,
64
73
  intrinsics: atom_tree,
65
74
  )
66
75
  end
@@ -32,7 +32,7 @@ class FormatParser::MP3Parser
32
32
  MAGIC_LE = [0x49, 0x49, 0x2A, 0x0].pack('C4')
33
33
  MAGIC_BE = [0x4D, 0x4D, 0x0, 0x2A].pack('C4')
34
34
  TIFF_HEADER_BYTES = [MAGIC_LE, MAGIC_BE]
35
-
35
+ MP3_MIME_TYPE = 'audio/mpeg'
36
36
  # Wraps the Tag object returned by ID3Tag in such
37
37
  # a way that a usable JSON representation gets
38
38
  # returned
@@ -104,7 +104,8 @@ class FormatParser::MP3Parser
104
104
  # do not tell anything of substance
105
105
  num_audio_channels: first_frame.channels,
106
106
  audio_sample_rate_hz: first_frame.sample_rate,
107
- intrinsics: id3tags_hash.merge(id3tags: tags)
107
+ intrinsics: id3tags_hash.merge(id3tags: tags),
108
+ content_type: MP3_MIME_TYPE,
108
109
  )
109
110
 
110
111
  extra_file_attirbutes = fetch_extra_attributes_from_id3_tags(id3tags_hash)
@@ -3,8 +3,8 @@
3
3
  class FormatParser::OggParser
4
4
  include FormatParser::IOUtils
5
5
 
6
- # Maximum size of an Ogg page
7
6
  MAX_POSSIBLE_PAGE_SIZE = 65307
7
+ OGG_MIME_TYPE = 'audio/ogg'
8
8
 
9
9
  def likely_match?(filename)
10
10
  filename =~ /\.ogg$/i
@@ -45,7 +45,8 @@ class FormatParser::OggParser
45
45
  format: :ogg,
46
46
  audio_sample_rate_hz: sample_rate,
47
47
  num_audio_channels: channels,
48
- media_duration_seconds: duration
48
+ media_duration_seconds: duration,
49
+ content_type: OGG_MIME_TYPE,
49
50
  )
50
51
  end
51
52
 
@@ -1,6 +1,5 @@
1
1
  class FormatParser::PDFParser
2
2
  include FormatParser::IOUtils
3
-
4
3
  # First 9 bytes of a PDF should be in this format, according to:
5
4
  #
6
5
  # https://stackoverflow.com/questions/3108201/detect-if-pdf-file-is-correct-header-pdf
@@ -8,6 +7,7 @@ class FormatParser::PDFParser
8
7
  # There are however exceptions, which are left out for now.
9
8
  #
10
9
  PDF_MARKER = /%PDF-1\.[0-8]{1}/
10
+ PDF_CONTENT_TYPE = 'application/pdf'
11
11
 
12
12
  def likely_match?(filename)
13
13
  filename =~ /\.(pdf|ai)$/i
@@ -18,7 +18,7 @@ class FormatParser::PDFParser
18
18
 
19
19
  return unless safe_read(io, 9) =~ PDF_MARKER
20
20
 
21
- FormatParser::Document.new(format: :pdf)
21
+ FormatParser::Document.new(format: :pdf, content_type: PDF_CONTENT_TYPE)
22
22
  end
23
23
 
24
24
  FormatParser.register_parser new, natures: :document, formats: :pdf, priority: 1
@@ -14,6 +14,7 @@ class FormatParser::PNGParser
14
14
  4 => true, # Grayscale with alpha
15
15
  6 => true,
16
16
  }
17
+ PNG_MIME_TYPE = 'image/png'
17
18
 
18
19
  def likely_match?(filename)
19
20
  filename =~ /\.png$/i
@@ -67,6 +68,7 @@ class FormatParser::PNGParser
67
68
  color_mode: color_mode,
68
69
  has_multiple_frames: has_animation,
69
70
  num_animation_or_video_frames: num_frames,
71
+ content_type: PNG_MIME_TYPE,
70
72
  )
71
73
  end
72
74
 
@@ -2,6 +2,7 @@ class FormatParser::PSDParser
2
2
  include FormatParser::IOUtils
3
3
 
4
4
  PSD_HEADER = [0x38, 0x42, 0x50, 0x53]
5
+ PSD_MIME_TYPE = 'application/x-photoshop'
5
6
 
6
7
  def likely_match?(filename)
7
8
  filename =~ /\.psd$/i # Maybe also PSB at some point
@@ -20,6 +21,7 @@ class FormatParser::PSDParser
20
21
  format: :psd,
21
22
  width_px: w,
22
23
  height_px: h,
24
+ content_type: PSD_MIME_TYPE,
23
25
  )
24
26
  end
25
27
 
@@ -5,6 +5,8 @@ class FormatParser::TIFFParser
5
5
  MAGIC_LE = [0x49, 0x49, 0x2A, 0x0].pack('C4')
6
6
  MAGIC_BE = [0x4D, 0x4D, 0x0, 0x2A].pack('C4')
7
7
  HEADER_BYTES = [MAGIC_LE, MAGIC_BE]
8
+ TIFF_MIME_TYPE = 'image/tiff'
9
+ ARW_MIME_TYPE = 'image/x-sony-arw'
8
10
 
9
11
  def likely_match?(filename)
10
12
  filename =~ /\.tiff?$/i
@@ -14,7 +16,10 @@ class FormatParser::TIFFParser
14
16
  io = FormatParser::IOConstraint.new(io)
15
17
 
16
18
  return unless HEADER_BYTES.include?(safe_read(io, 4))
17
- io.seek(io.pos + 2) # Skip over the offset of the IFD, EXIFR will re-read it anyway
19
+
20
+ # Skip over the offset of the IFD,
21
+ # EXIFR will re-read it anyway
22
+ io.seek(io.pos + 2)
18
23
  return if cr2?(io)
19
24
 
20
25
  # The TIFF scanner in EXIFR is plenty good enough,
@@ -26,14 +31,17 @@ class FormatParser::TIFFParser
26
31
  w = exif_data.width || exif_data.pixel_x_dimension
27
32
  h = exif_data.height || exif_data.pixel_y_dimension
28
33
 
34
+ format = arw?(exif_data) ? :arw : :tif
35
+ mime_type = arw?(exif_data) ? ARW_MIME_TYPE : TIFF_MIME_TYPE
29
36
  FormatParser::Image.new(
30
- format: arw?(exif_data) ? :arw : :tif, # Specify format as arw for Sony ARW format images, else tif
37
+ format: format,
31
38
  width_px: w,
32
39
  height_px: h,
33
40
  display_width_px: exif_data.rotated? ? h : w,
34
41
  display_height_px: exif_data.rotated? ? w : h,
35
42
  orientation: exif_data.orientation_sym,
36
43
  intrinsics: {exif: exif_data},
44
+ content_type: mime_type,
37
45
  )
38
46
  rescue EXIFR::MalformedTIFF
39
47
  nil
@@ -1,6 +1,8 @@
1
1
  class FormatParser::WAVParser
2
2
  include FormatParser::IOUtils
3
3
 
4
+ WAV_MIME_TYPE = 'audio/x-wav'
5
+
4
6
  def likely_match?(filename)
5
7
  filename =~ /\.wav$/i
6
8
  end
@@ -96,6 +98,7 @@ class FormatParser::WAVParser
96
98
  audio_sample_rate_hz: fmt_data[:sample_rate],
97
99
  media_duration_frames: sample_frames,
98
100
  media_duration_seconds: duration_in_seconds,
101
+ content_type: WAV_MIME_TYPE,
99
102
  )
100
103
  end
101
104
 
@@ -37,15 +37,15 @@ module FormatParser::ZIPParser::OfficeFormats
37
37
  OFFICE_MARKER_FILES.subset?(filenames_set)
38
38
  end
39
39
 
40
- def office_file_format_from_entry_set(filenames_set)
40
+ def office_file_format_and_mime_type_from_entry_set(filenames_set)
41
41
  if filenames_set.include?('word/document.xml')
42
- :docx
42
+ [:docx, 'application/vnd.openxmlformats-officedocument.wordprocessingml.document']
43
43
  elsif filenames_set.include?('xl/workbook.xml')
44
- :xlsx
44
+ [:xlsx, 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet']
45
45
  elsif filenames_set.include?('ppt/presentation.xml')
46
- :pptx
46
+ [:pptx, 'application/vnd.openxmlformats-officedocument.presentationml.presentation']
47
47
  else
48
- :unknown
48
+ [:unknown, 'application/zip']
49
49
  end
50
50
  end
51
51
  end
@@ -5,6 +5,8 @@ class FormatParser::ZIPParser
5
5
  include OfficeFormats
6
6
  include FormatParser::IOUtils
7
7
 
8
+ ZIP_MIME_TYPE = 'application/zip'
9
+
8
10
  def likely_match?(filename)
9
11
  filename =~ /\.(zip|docx|keynote|numbers|pptx|xlsx)$/i
10
12
  end
@@ -25,10 +27,10 @@ class FormatParser::ZIPParser
25
27
  end
26
28
 
27
29
  if office_document?(filenames_set)
28
- office_format = office_file_format_from_entry_set(filenames_set)
29
- FormatParser::Archive.new(nature: :document, format: office_format, entries: entries_archive)
30
+ office_format, mime_type = office_file_format_and_mime_type_from_entry_set(filenames_set)
31
+ FormatParser::Archive.new(nature: :document, format: office_format, entries: entries_archive, content_type: mime_type)
30
32
  else
31
- FormatParser::Archive.new(nature: :archive, format: :zip, entries: entries_archive)
33
+ FormatParser::Archive.new(nature: :archive, format: :zip, entries: entries_archive, content_type: ZIP_MIME_TYPE)
32
34
  end
33
35
  rescue FileReader::Error
34
36
  # This is not a ZIP, or a broken ZIP.
data/lib/remote_io.rb CHANGED
@@ -24,8 +24,11 @@ class FormatParser::RemoteIO
24
24
  end
25
25
 
26
26
  # @param uri[URI, String] the remote URL to obtain
27
- def initialize(uri)
27
+ # @param headers[Hash] (optional) the HTTP headers to be used in the HTTP request
28
+ def initialize(uri, headers: {})
28
29
  require 'faraday'
30
+ require 'faraday_middleware/response/follow_redirects'
31
+ @headers = headers
29
32
  @uri = uri
30
33
  @pos = 0
31
34
  @remote_size = false
@@ -78,21 +81,40 @@ class FormatParser::RemoteIO
78
81
  # We use a GET and not a HEAD request followed by a GET because
79
82
  # S3 does not allow HEAD requests if you only presigned your URL for GETs, so we
80
83
  # combine the first GET of a segment and retrieving the size of the resource
81
- response = Faraday.get(@uri, nil, range: 'bytes=%d-%d' % [range.begin, range.end])
84
+ conn = Faraday.new(headers: @headers) do |faraday|
85
+ faraday.use FaradayMiddleware::FollowRedirects
86
+ # we still need the default adapter, more details: https://blog.thecodewhisperer.com/permalink/losing-time-to-faraday
87
+ faraday.adapter Faraday.default_adapter
88
+ end
89
+ response = conn.get(@uri, nil, range: 'bytes=%d-%d' % [range.begin, range.end])
82
90
 
83
91
  case response.status
84
- when 200, 206
92
+ when 200
93
+ # S3 returns 200 when you request a Range that is fully satisfied by the entire object,
94
+ # we take that into account here. Also, for very tiny responses (and also for empty responses)
95
+ # the responses are going to be 200 which does not mean we cannot proceed
96
+ # To have a good check for both of these conditions we need to know whether the ranges overlap fully
97
+ response_size = response.body.bytesize
98
+ requested_range_size = range.end - range.begin + 1
99
+ if response_size > requested_range_size
100
+ error_message = [
101
+ "We requested #{requested_range_size} bytes, but the server sent us more",
102
+ "(#{response_size} bytes) - it likely has no `Range:` support.",
103
+ "The error occurred when talking to #{@uri})"
104
+ ]
105
+ raise InvalidRequest.new(response.status, error_message.join("\n"))
106
+ end
107
+ [response_size, response.body]
108
+ when 206
85
109
  # Figure out of the server supports content ranges, if it doesn't we have no
86
110
  # business working with that server
87
111
  range_header = response.headers['Content-Range']
88
- raise InvalidRequest.new(response.status, "No range support at #{@uri}") unless range_header
112
+ raise InvalidRequest.new(response.status, "The server replied with 206 status but no Content-Range at #{@uri}") unless range_header
89
113
 
90
114
  # "Content-Range: bytes 0-0/307404381" is how the response header is structured
91
115
  size = range_header[/\/(\d+)$/, 1].to_i
92
116
 
93
- # S3 returns 200 when you request a Range that is fully satisfied by the entire object,
94
- # we take that into account here. For other servers, 206 is the expected response code.
95
- # Also, if we request a _larger_ range than what can be satisfied by the server,
117
+ # If we request a _larger_ range than what can be satisfied by the server,
96
118
  # the response is going to only contain what _can_ be sent and the status is also going
97
119
  # to be 206
98
120
  return [size, response.body]
data/lib/text.rb CHANGED
@@ -5,6 +5,7 @@ module FormatParser
5
5
  NATURE = :text
6
6
 
7
7
  attr_accessor :format
8
+ attr_accessor :content_type
8
9
 
9
10
  # Only permits assignments via defined accessors
10
11
  def initialize(**attributes)
data/lib/video.rb CHANGED
@@ -23,6 +23,9 @@ module FormatParser
23
23
  # it can be placed here
24
24
  attr_accessor :intrinsics
25
25
 
26
+ # The MIME type of the video
27
+ attr_accessor :content_type
28
+
26
29
  # Only permits assignments via defined accessors
27
30
  def initialize(**attributes)
28
31
  attributes.map { |(k, v)| public_send("#{k}=", v) }
@@ -10,6 +10,7 @@ describe FormatParser::AIFFParser do
10
10
  expect(parse_result.num_audio_channels).to eq(2)
11
11
  expect(parse_result.audio_sample_rate_hz).to be_within(0.01).of(44100)
12
12
  expect(parse_result.media_duration_seconds).to be_within(0.01).of(1.05)
13
+ expect(parse_result.content_type).to eq('audio/x-aiff')
13
14
  end
14
15
 
15
16
  it 'parses a Logic Pro created AIFF sample file having a COMT chunk before a COMM chunk' do
@@ -13,6 +13,8 @@ describe FormatParser::BMPParser do
13
13
  expect(parsed.width_px).to eq(40)
14
14
  expect(parsed.height_px).to eq(27)
15
15
 
16
+ expect(parsed.content_type).to eq('image/bmp')
17
+
16
18
  expect(parsed.intrinsics).not_to be_nil
17
19
  expect(parsed.intrinsics[:vertical_resolution]).to eq(2834)
18
20
  expect(parsed.intrinsics[:horizontal_resolution]).to eq(2834)
@@ -32,6 +34,8 @@ describe FormatParser::BMPParser do
32
34
  expect(parsed.width_px).to eq(1920)
33
35
  expect(parsed.height_px).to eq(1080)
34
36
 
37
+ expect(parsed.content_type).to eq('image/bmp')
38
+
35
39
  expect(parsed.intrinsics).not_to be_nil
36
40
  expect(parsed.intrinsics[:vertical_resolution]).to eq(2835)
37
41
  expect(parsed.intrinsics[:horizontal_resolution]).to eq(2835)
@@ -51,6 +55,8 @@ describe FormatParser::BMPParser do
51
55
  expect(parsed.width_px).to eq(200)
52
56
  expect(parsed.height_px).to eq(200)
53
57
 
58
+ expect(parsed.content_type).to eq('image/bmp')
59
+
54
60
  expect(parsed.intrinsics).not_to be_nil
55
61
  end
56
62
 
@@ -64,6 +70,7 @@ describe FormatParser::BMPParser do
64
70
  expect(parsed.color_mode).to eq(:rgb)
65
71
  expect(parsed.width_px).to eq(40)
66
72
  expect(parsed.height_px).to eq(27)
73
+ expect(parsed.content_type).to eq('image/bmp')
67
74
  expect(parsed.intrinsics[:bits_per_pixel]).to eq(24)
68
75
  expect(parsed.intrinsics[:data_order]).to eq(:normal)
69
76
 
@@ -76,6 +83,7 @@ describe FormatParser::BMPParser do
76
83
  expect(parsed.color_mode).to eq(:rgb)
77
84
  expect(parsed.width_px).to eq(40)
78
85
  expect(parsed.height_px).to eq(27)
86
+ expect(parsed.content_type).to eq('image/bmp')
79
87
  expect(parsed.intrinsics[:bits_per_pixel]).to eq(24)
80
88
  expect(parsed.intrinsics[:data_order]).to eq(:normal)
81
89
  end
@@ -17,6 +17,7 @@ describe FormatParser::CR2Parser do
17
17
  expect(parsed.height_px).to be > 0
18
18
 
19
19
  expect(parsed.orientation).not_to be_nil
20
+ expect(parsed.content_type).to eq('image/x-canon-cr2')
20
21
  end
21
22
  end
22
23
  end
@@ -15,6 +15,7 @@ describe FormatParser::DPXParser do
15
15
  expect(parsed.width_px).to be_between(0, 2048)
16
16
  expect(parsed.height_px).to be_kind_of(Integer)
17
17
  expect(parsed.height_px).to be_between(0, 4000)
18
+ expect(parsed.content_type).to eq('image/x-dpx')
18
19
  end
19
20
  end
20
21
 
@@ -14,6 +14,7 @@ describe FormatParser::FLACParser do
14
14
  expect(parsed.intrinsics).not_to be_nil
15
15
  expect(parsed.media_duration_frames).to eq(33810)
16
16
  expect(parsed.media_duration_seconds).to be_within(0.1).of(0.836)
17
+ expect(parsed.content_type).to eq('audio/x-flac')
17
18
  end
18
19
 
19
20
  it 'decodes and estimates duration for the 16bit FLAC File' do
@@ -17,6 +17,7 @@ describe FormatParser::GIFParser do
17
17
 
18
18
  expect(parsed.height_px).to be_kind_of(Integer)
19
19
  expect(parsed.height_px).to be > 0
20
+ expect(parsed.content_type).to eq('image/gif')
20
21
  end
21
22
  end
22
23
  end
@@ -14,6 +14,7 @@ describe FormatParser::JPEGParser do
14
14
 
15
15
  expect(parsed.height_px).to be_kind_of(Integer)
16
16
  expect(parsed.height_px).to be > 0
17
+ expect(parsed.content_type).to eq('image/jpeg')
17
18
  end
18
19
  end
19
20
  end
@@ -25,6 +25,7 @@ describe FormatParser::M3UParser do
25
25
  expect(parsed_m3u).not_to be_nil
26
26
  expect(parsed_m3u.nature).to eq(:text)
27
27
  expect(parsed_m3u.format).to eq(:m3u)
28
+ expect(parsed_m3u.content_type).to eq('application/vnd.apple.mpegurl')
28
29
  end
29
30
  end
30
31
 
@@ -37,7 +37,7 @@ describe FormatParser::MOOVParser do
37
37
  expect(result.nature).to eq(:audio)
38
38
  expect(result.media_duration_seconds).to be_kind_of(Float)
39
39
  expect(result.media_duration_seconds).to be > 0
40
-
40
+ expect(result.content_type).to be_kind_of(String)
41
41
  expect(result.intrinsics).not_to be_nil
42
42
  end
43
43
  end
@@ -52,6 +52,7 @@ describe FormatParser::MOOVParser do
52
52
  expect(result.height_px).to be > 0
53
53
  expect(result.media_duration_seconds).to be_kind_of(Float)
54
54
  expect(result.media_duration_seconds).to be > 0
55
+ expect(result.content_type).to eq('video/mp4')
55
56
 
56
57
  expect(result.intrinsics).not_to be_nil
57
58
  end
@@ -67,6 +68,7 @@ describe FormatParser::MOOVParser do
67
68
  expect(result.height_px).to be > 0
68
69
  expect(result.media_duration_seconds).to be_kind_of(Float)
69
70
  expect(result.media_duration_seconds).to be > 0
71
+ expect(result.content_type).to eq('video/mp4')
70
72
 
71
73
  expect(result.intrinsics).not_to be_nil
72
74
  end
@@ -79,6 +81,7 @@ describe FormatParser::MOOVParser do
79
81
  expect(result).not_to be_nil
80
82
  expect(result.nature).to eq(:audio)
81
83
  expect(result.format).to eq(:m4a)
84
+ expect(result.content_type).to eq('audio/mp4')
82
85
  end
83
86
 
84
87
  it 'parses a MOV file and provides the necessary metadata' do
@@ -23,6 +23,7 @@ describe FormatParser::MP3Parser do
23
23
 
24
24
  expect(parsed.nature).to eq(:audio)
25
25
  expect(parsed.format).to eq(:mp3)
26
+ expect(parsed.content_type).to eq('audio/mpeg')
26
27
  expect(parsed.num_audio_channels).to eq(2)
27
28
  expect(parsed.audio_sample_rate_hz).to eq(48000)
28
29
  expect(parsed.intrinsics).not_to be_nil
@@ -6,6 +6,7 @@ describe FormatParser::OggParser do
6
6
 
7
7
  expect(parse_result.nature).to eq(:audio)
8
8
  expect(parse_result.format).to eq(:ogg)
9
+ expect(parse_result.content_type).to eq('audio/ogg')
9
10
  expect(parse_result.num_audio_channels).to eq(1)
10
11
  expect(parse_result.audio_sample_rate_hz).to eq(16000)
11
12
  expect(parse_result.media_duration_seconds).to be_within(0.01).of(2973.95)
@@ -17,6 +17,7 @@ describe FormatParser::PDFParser do
17
17
  expect(parsed_pdf).not_to be_nil
18
18
  expect(parsed_pdf.nature).to eq(:document)
19
19
  expect(parsed_pdf.format).to eq(:pdf)
20
+ expect(parsed_pdf.content_type).to eq('application/pdf')
20
21
  end
21
22
  end
22
23
 
@@ -15,6 +15,7 @@ describe FormatParser::PNGParser do
15
15
 
16
16
  expect(parsed.height_px).to be_kind_of(Integer)
17
17
  expect(parsed.height_px).to be > 0
18
+ expect(parsed.content_type).to eq('image/png')
18
19
  end
19
20
  end
20
21
  end
@@ -15,6 +15,7 @@ describe FormatParser::PSDParser do
15
15
 
16
16
  expect(parsed.height_px).to be_kind_of(Integer)
17
17
  expect(parsed.height_px).to be > 0
18
+ expect(parsed.content_type).to eq('application/x-photoshop')
18
19
  end
19
20
  end
20
21
  end
@@ -59,6 +59,7 @@ describe FormatParser::TIFFParser do
59
59
  expect(parsed.width_px).to eq(7952)
60
60
  expect(parsed.height_px).to eq(5304)
61
61
  expect(parsed.intrinsics[:exif]).not_to be_nil
62
+ expect(parsed.content_type).to eq('image/x-sony-arw')
62
63
  end
63
64
 
64
65
  describe 'correctly extracts dimensions from various TIFF flavors of the same file' do
@@ -9,6 +9,7 @@ describe FormatParser::WAVParser do
9
9
 
10
10
  expect(parse_result.nature).to eq(:audio)
11
11
  expect(parse_result.format).to eq(:wav)
12
+ expect(parse_result.content_type).to eq('audio/x-wav')
12
13
  end
13
14
  end
14
15
 
@@ -14,6 +14,7 @@ describe FormatParser::ZIPParser do
14
14
  expect(result).not_to be_nil
15
15
 
16
16
  expect(result.format).to eq(:zip)
17
+ expect(result.content_type).to eq('application/zip')
17
18
  expect(result.nature).to eq(:archive)
18
19
  expect(result.entries.length).to eq(0xFFFF + 1)
19
20
 
@@ -58,6 +59,7 @@ describe FormatParser::ZIPParser do
58
59
  result = subject.call(fi_io)
59
60
  expect(result.nature).to eq(:document)
60
61
  expect(result.format).to eq(:docx)
62
+ expect(result.content_type).to eq('application/vnd.openxmlformats-officedocument.wordprocessingml.document')
61
63
 
62
64
  fixture_path = fixtures_dir + '/ZIP/sample-docx.docx'
63
65
  fi_io = File.open(fixture_path, 'rb')
@@ -15,18 +15,31 @@ describe 'Fetching data from HTTP remotes' do
15
15
  }
16
16
  @server = WEBrick::HTTPServer.new(options)
17
17
  @server.mount '/', WEBrick::HTTPServlet::FileHandler, fixtures_dir
18
+ @server.mount_proc '/redirect' do |req, res|
19
+ res.status = 302
20
+ res.header['Location'] = req.path.sub('/redirect', '')
21
+ end
22
+ @server.mount_proc '/empty' do |_req, res|
23
+ res.status = 200
24
+ res.body = ''
25
+ end
26
+ @server.mount_proc '/tiny' do |_req, res|
27
+ res.status = 200
28
+ res.body = File.read(fixtures_dir + '/test.gif')
29
+ end
30
+
18
31
  trap('INT') { @server.stop }
19
32
  @server_thread = Thread.new { @server.start }
20
33
  end
21
34
 
22
- it '#parse_http is called without any option' do
35
+ it 'works with .parse_http called without any options' do
23
36
  result = FormatParser.parse_http('http://localhost:9399/PNG/anim.png')
24
37
 
25
38
  expect(result.format).to eq(:png)
26
39
  expect(result.height_px).to eq(180)
27
40
  end
28
41
 
29
- it '#parse_http is called with hash options' do
42
+ it 'works with .parse_http called with additional options' do
30
43
  fake_result = double(nature: :audio, format: :aiff)
31
44
  expect_any_instance_of(FormatParser::AIFFParser).to receive(:call).and_return(fake_result)
32
45
  results = FormatParser.parse_http('http://localhost:9399/PNG/anim.png', results: :all)
@@ -35,6 +48,18 @@ describe 'Fetching data from HTTP remotes' do
35
48
  expect(results).to include(fake_result)
36
49
  end
37
50
 
51
+ it 'is able to cope with a 0-size resource which does not provide Content-Range' do
52
+ file_information = FormatParser.parse_http('http://localhost:9399/empty')
53
+
54
+ expect(file_information).to be_nil
55
+ end
56
+
57
+ it 'is able to cope with a tiny resource which fits into the first requested range completely' do
58
+ file_information = FormatParser.parse_http('http://localhost:9399/tiny')
59
+ expect(file_information).not_to be_nil
60
+ expect(file_information.nature).to eq(:image)
61
+ end
62
+
38
63
  it 'parses the animated PNG over HTTP' do
39
64
  file_information = FormatParser.parse_http('http://localhost:9399/PNG/anim.png')
40
65
  expect(file_information).not_to be_nil
@@ -91,6 +116,32 @@ describe 'Fetching data from HTTP remotes' do
91
116
  end
92
117
  end
93
118
 
119
+ context 'when the server responds with a redirect' do
120
+ it 'follows the redirect' do
121
+ file_information = FormatParser.parse_http('http://localhost:9399/redirect/TIFF/test.tif')
122
+ expect(file_information.format).to eq(:tif)
123
+ end
124
+ end
125
+
126
+ it 'sends provided HTTP headers in the request' do
127
+ # Faraday is required only after calling .parse_http
128
+ # This line is just to trigger this require, then it's possible to
129
+ # add an expectation of how Faraday is initialized after.
130
+ FormatParser.parse_http('invalid_url') rescue nil
131
+
132
+ expect(Faraday)
133
+ .to receive(:new)
134
+ .with(headers: {'test-header' => 'test-value'})
135
+ .and_call_original
136
+
137
+ file_information = FormatParser.parse_http(
138
+ 'http://localhost:9399//TIFF/test.tif',
139
+ headers: {'test-header' => 'test-value'}
140
+ )
141
+
142
+ expect(file_information.format).to eq(:tif)
143
+ end
144
+
94
145
  after(:all) do
95
146
  @server.stop
96
147
  @server_thread.join(0.5)
@@ -7,7 +7,9 @@ describe FormatParser::RemoteIO do
7
7
  rio = described_class.new('https://images.invalid/img.jpg')
8
8
 
9
9
  fake_resp = double(headers: {'Content-Range' => '10-109/2577'}, status: 206, body: 'This is the response')
10
- expect(Faraday).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=10-109').and_return(fake_resp)
10
+ faraday_conn = instance_double(Faraday::Connection, get: fake_resp)
11
+ allow(Faraday).to receive(:new).and_return(faraday_conn)
12
+ expect(faraday_conn).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=10-109')
11
13
 
12
14
  rio.seek(10)
13
15
  read_result = rio.read(100)
@@ -18,7 +20,9 @@ describe FormatParser::RemoteIO do
18
20
  rio = described_class.new('https://images.invalid/img.jpg')
19
21
 
20
22
  fake_resp = double(headers: {'Content-Range' => '10-109/2577'}, status: 200, body: 'This is the response')
21
- expect(Faraday).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=10-109').and_return(fake_resp)
23
+ faraday_conn = instance_double(Faraday::Connection, get: fake_resp)
24
+ allow(Faraday).to receive(:new).and_return(faraday_conn)
25
+ expect(faraday_conn).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=10-109')
22
26
 
23
27
  rio.seek(10)
24
28
  read_result = rio.read(100)
@@ -29,7 +33,9 @@ describe FormatParser::RemoteIO do
29
33
  rio = described_class.new('https://images.invalid/img.jpg')
30
34
 
31
35
  fake_resp = double(headers: {}, status: 403, body: 'Please log in')
32
- expect(Faraday).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199').and_return(fake_resp)
36
+ faraday_conn = instance_double(Faraday::Connection, get: fake_resp)
37
+ allow(Faraday).to receive(:new).and_return(faraday_conn)
38
+ expect(faraday_conn).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199')
33
39
 
34
40
  rio.seek(100)
35
41
  expect { rio.read(100) }.to raise_error(/replied with a 403 and refused/)
@@ -39,7 +45,9 @@ describe FormatParser::RemoteIO do
39
45
  rio = described_class.new('https://images.invalid/img.jpg')
40
46
 
41
47
  fake_resp = double(headers: {}, status: 416, body: 'You stepped off the ledge of the range')
42
- expect(Faraday).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199').and_return(fake_resp)
48
+ faraday_conn = instance_double(Faraday::Connection, get: fake_resp)
49
+ allow(Faraday).to receive(:new).and_return(faraday_conn)
50
+ expect(faraday_conn).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199')
43
51
 
44
52
  rio.seek(100)
45
53
  expect(rio.read(100)).to be_nil
@@ -49,7 +57,9 @@ describe FormatParser::RemoteIO do
49
57
  rio = described_class.new('https://images.invalid/img.jpg')
50
58
 
51
59
  fake_resp = double(headers: {}, status: 403, body: 'Please log in')
52
- expect(Faraday).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199').and_return(fake_resp)
60
+ faraday_conn = instance_double(Faraday::Connection, get: fake_resp)
61
+ allow(Faraday).to receive(:new).and_return(faraday_conn)
62
+ expect(faraday_conn).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199')
53
63
 
54
64
  rio.seek(100)
55
65
  # rubocop: disable Lint/AmbiguousBlockAssociation
@@ -60,7 +70,9 @@ describe FormatParser::RemoteIO do
60
70
  rio = described_class.new('https://images.invalid/img.jpg')
61
71
 
62
72
  fake_resp = double(headers: {}, status: 416, body: 'You jumped off the end of the file maam')
63
- expect(Faraday).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199').and_return(fake_resp)
73
+ faraday_conn = instance_double(Faraday::Connection, get: fake_resp)
74
+ allow(Faraday).to receive(:new).and_return(faraday_conn)
75
+ expect(faraday_conn).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199')
64
76
 
65
77
  rio.seek(100)
66
78
  expect(rio.read(100)).to be_nil
@@ -69,15 +81,24 @@ describe FormatParser::RemoteIO do
69
81
  it 'does not overwrite size when the range cannot be satisfied and the response is 416' do
70
82
  rio = described_class.new('https://images.invalid/img.jpg')
71
83
 
72
- fake_resp = double(headers: {'Content-Range' => 'bytes 0-0/13'}, status: 206, body: 'a')
73
- expect(Faraday).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=0-0').and_return(fake_resp)
84
+ fake_resp1 = double(headers: {'Content-Range' => 'bytes 0-0/13'}, status: 206, body: 'a')
85
+ fake_resp2 = double(headers: {}, status: 416, body: 'You jumped off the end of the file maam')
86
+
87
+ faraday_conn = instance_double(Faraday::Connection)
88
+ allow(Faraday).to receive(:new).and_return(faraday_conn)
89
+ expect(faraday_conn).to receive(:get)
90
+ .with('https://images.invalid/img.jpg', nil, range: 'bytes=0-0')
91
+ .ordered
92
+ .and_return(fake_resp1)
93
+ expect(faraday_conn).to receive(:get)
94
+ .with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199')
95
+ .ordered
96
+ .and_return(fake_resp2)
97
+
74
98
  rio.read(1)
75
99
 
76
100
  expect(rio.size).to eq(13)
77
101
 
78
- fake_resp = double(headers: {}, status: 416, body: 'You jumped off the end of the file maam')
79
- expect(Faraday).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199').and_return(fake_resp)
80
-
81
102
  rio.seek(100)
82
103
  expect(rio.read(100)).to be_nil
83
104
 
@@ -88,7 +109,9 @@ describe FormatParser::RemoteIO do
88
109
  rio = described_class.new('https://images.invalid/img.jpg')
89
110
 
90
111
  fake_resp = double(headers: {}, status: 502, body: 'Guru meditation')
91
- expect(Faraday).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199').and_return(fake_resp)
112
+ faraday_conn = instance_double(Faraday::Connection, get: fake_resp)
113
+ allow(Faraday).to receive(:new).and_return(faraday_conn)
114
+ expect(faraday_conn).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199')
92
115
 
93
116
  rio.seek(100)
94
117
  expect { rio.read(100) }.to raise_error(/replied with a 502 and we might want to retry/)
@@ -100,7 +123,9 @@ describe FormatParser::RemoteIO do
100
123
  expect(rio.pos).to eq(0)
101
124
 
102
125
  fake_resp = double(headers: {'Content-Range' => 'bytes 0-0/13'}, status: 206, body: 'a')
103
- expect(Faraday).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=0-0').and_return(fake_resp)
126
+ faraday_conn = instance_double(Faraday::Connection, get: fake_resp)
127
+ allow(Faraday).to receive(:new).and_return(faraday_conn)
128
+ expect(faraday_conn).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=0-0')
104
129
  rio.read(1)
105
130
 
106
131
  expect(rio.pos).to eq(1)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: format_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.26.0
4
+ version: 0.29.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Noah Berman
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: exe
11
11
  cert_chain: []
12
- date: 2021-01-08 00:00:00.000000000 Z
12
+ date: 2021-09-10 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: ks
@@ -73,6 +73,20 @@ dependencies:
73
73
  - - "~>"
74
74
  - !ruby/object:Gem::Version
75
75
  version: '0.13'
76
+ - !ruby/object:Gem::Dependency
77
+ name: faraday_middleware
78
+ requirement: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '0.14'
83
+ type: :runtime
84
+ prerelease: false
85
+ version_requirements: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '0.14'
76
90
  - !ruby/object:Gem::Dependency
77
91
  name: measurometer
78
92
  requirement: !ruby/object:Gem::Requirement
@@ -183,10 +197,10 @@ executables:
183
197
  extensions: []
184
198
  extra_rdoc_files: []
185
199
  files:
200
+ - ".github/workflows/main.yml"
186
201
  - ".gitignore"
187
202
  - ".rspec"
188
203
  - ".rubocop.yml"
189
- - ".travis.yml"
190
204
  - CHANGELOG.md
191
205
  - CODE_OF_CONDUCT.md
192
206
  - CONTRIBUTING.md
data/.travis.yml DELETED
@@ -1,12 +0,0 @@
1
- rvm:
2
- - 2.2.10
3
- - 2.3.8
4
- - 2.4.9
5
- - 2.5.8
6
- - 2.6.6
7
- - 2.7.2
8
- - jruby
9
- sudo: false
10
- cache: bundler
11
- script:
12
- - bundle exec rake