format_parser 1.6.0 → 2.0.0.pre

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -27,52 +27,43 @@ class FormatParser::ZIPParser::FileReader
27
27
  # To prevent too many tiny reads, read the maximum possible size of end of
28
28
  # central directory record upfront (all the fixed fields + at most 0xFFFF
29
29
  # bytes of the archive comment)
30
- MAX_END_OF_CENTRAL_DIRECTORY_RECORD_SIZE =
31
- begin
32
- 4 + # Offset of the start of central directory
33
- 4 + # Size of the central directory
34
- 2 + # Number of files in the cdir
35
- 4 + # End-of-central-directory signature
36
- 2 + # Number of this disk
37
- 2 + # Number of disk with the start of cdir
38
- 2 + # Number of files in the cdir of this disk
39
- 2 + # The comment size
40
- 0xFFFF # Maximum comment size
41
- end
30
+ MAX_END_OF_CENTRAL_DIRECTORY_RECORD_SIZE = 4 + # Offset of the start of central directory
31
+ 4 + # Size of the central directory
32
+ 2 + # Number of files in the cdir
33
+ 4 + # End-of-central-directory signature
34
+ 2 + # Number of this disk
35
+ 2 + # Number of disk with the start of cdir
36
+ 2 + # Number of files in the cdir of this disk
37
+ 2 + # The comment size
38
+ 0xFFFF # Maximum comment size
42
39
 
43
40
  # To prevent too many tiny reads, read the maximum possible size of the local file header upfront.
44
41
  # The maximum size is all the usual items, plus the maximum size
45
42
  # of the filename (0xFFFF bytes) and the maximum size of the extras (0xFFFF bytes)
46
- MAX_LOCAL_HEADER_SIZE =
47
- begin
48
- 4 + # signature
49
- 2 + # Version needed to extract
50
- 2 + # gp flags
51
- 2 + # storage mode
52
- 2 + # dos time
53
- 2 + # dos date
54
- 4 + # CRC32
55
- 4 + # Comp size
56
- 4 + # Uncomp size
57
- 2 + # Filename size
58
- 2 + # Extra fields size
59
- 0xFFFF + # Maximum filename size
60
- 0xFFFF # Maximum extra fields size
61
- end
62
-
63
- SIZE_OF_USABLE_EOCD_RECORD =
64
- begin
65
- 4 + # Signature
66
- 2 + # Number of this disk
67
- 2 + # Number of the disk with the EOCD record
68
- 2 + # Number of entries in the central directory of this disk
69
- 2 + # Number of entries in the central directory total
70
- 4 + # Size of the central directory
71
- 4 # Start of the central directory offset
72
- end
43
+ MAX_LOCAL_HEADER_SIZE = 4 + # signature
44
+ 2 + # Version needed to extract
45
+ 2 + # gp flags
46
+ 2 + # storage mode
47
+ 2 + # dos time
48
+ 2 + # dos date
49
+ 4 + # CRC32
50
+ 4 + # Comp size
51
+ 4 + # Uncomp size
52
+ 2 + # Filename size
53
+ 2 + # Extra fields size
54
+ 0xFFFF + # Maximum filename size
55
+ 0xFFFF # Maximum extra fields size
56
+
57
+ SIZE_OF_USABLE_EOCD_RECORD = 4 + # Signature
58
+ 2 + # Number of this disk
59
+ 2 + # Number of the disk with the EOCD record
60
+ 2 + # Number of entries in the central directory of this disk
61
+ 2 + # Number of entries in the central directory total
62
+ 4 + # Size of the central directory
63
+ 4 # Start of the central directory offset
73
64
 
74
65
  private_constant :C_UINT32LE, :C_UINT16LE, :C_UINT64LE, :MAX_END_OF_CENTRAL_DIRECTORY_RECORD_SIZE,
75
- :MAX_LOCAL_HEADER_SIZE, :SIZE_OF_USABLE_EOCD_RECORD
66
+ :MAX_LOCAL_HEADER_SIZE, :SIZE_OF_USABLE_EOCD_RECORD
76
67
 
77
68
  # Represents a file within the ZIP archive being read
78
69
  class ZipEntry
@@ -216,7 +207,7 @@ class FormatParser::ZIPParser::FileReader
216
207
  io.seek(absolute_pos)
217
208
  unless absolute_pos == io.pos
218
209
  raise ReadError,
219
- "Expected to seek to #{absolute_pos} but only got to #{io.pos}"
210
+ "Expected to seek to #{absolute_pos} but only got to #{io.pos}"
220
211
  end
221
212
  nil
222
213
  end
@@ -235,18 +226,14 @@ class FormatParser::ZIPParser::FileReader
235
226
  io.seek(io.pos + n)
236
227
  pos_after = io.pos
237
228
  delta = pos_after - pos_before
238
- unless delta == n
239
- raise ReadError, "Expected to seek #{n} bytes ahead, but could only seek #{delta} bytes ahead"
240
- end
229
+ raise ReadError, "Expected to seek #{n} bytes ahead, but could only seek #{delta} bytes ahead" unless delta == n
241
230
  nil
242
231
  end
243
232
 
244
233
  def read_n(io, n_bytes)
245
234
  io.read(n_bytes).tap do |d|
246
235
  raise ReadError, "Expected to read #{n_bytes} bytes, but the IO was at the end" if d.nil?
247
- unless d.bytesize == n_bytes
248
- raise ReadError, "Expected to read #{n_bytes} bytes, read #{d.bytesize}"
249
- end
236
+ raise ReadError, "Expected to read #{n_bytes} bytes, read #{d.bytesize}" unless d.bytesize == n_bytes
250
237
  end
251
238
  end
252
239
 
@@ -310,15 +297,9 @@ class FormatParser::ZIPParser::FileReader
310
297
  #
311
298
  # It means that before we read this stuff we need to check if the previously-read
312
299
  # values are at overflow, and only _then_ proceed to read them. Bah.
313
- if e.uncompressed_size == 0xFFFFFFFF
314
- e.uncompressed_size = read_8b(zip64_extra)
315
- end
316
- if e.compressed_size == 0xFFFFFFFF
317
- e.compressed_size = read_8b(zip64_extra)
318
- end
319
- if e.local_file_header_offset == 0xFFFFFFFF
320
- e.local_file_header_offset = read_8b(zip64_extra)
321
- end
300
+ e.uncompressed_size = read_8b(zip64_extra) if e.uncompressed_size == 0xFFFFFFFF
301
+ e.compressed_size = read_8b(zip64_extra) if e.compressed_size == 0xFFFFFFFF
302
+ e.local_file_header_offset = read_8b(zip64_extra) if e.local_file_header_offset == 0xFFFFFFFF
322
303
  # Disk number comes last and we can skip it anyway, since we do
323
304
  # not support multi-disk archives
324
305
  end
@@ -370,9 +351,7 @@ class FormatParser::ZIPParser::FileReader
370
351
  signature, *_rest, comment_size = maybe_record.unpack(unpack_pattern)
371
352
 
372
353
  # Check the only condition for the match
373
- if signature == 0x06054b50 && (maybe_record.bytesize - minimum_record_size) == comment_size
374
- return check_at # Found the EOCD marker location
375
- end
354
+ return check_at if signature == 0x06054b50 && (maybe_record.bytesize - minimum_record_size) == comment_size
376
355
  end
377
356
  # If we haven't caught anything, return nil deliberately instead of returning the last statement
378
357
  nil
@@ -422,16 +401,12 @@ class FormatParser::ZIPParser::FileReader
422
401
 
423
402
  disk_n = read_4b(zip64_eocdr) # number of this disk
424
403
  disk_n_with_eocdr = read_4b(zip64_eocdr) # number of the disk with the EOCDR
425
- if disk_n != disk_n_with_eocdr
426
- raise UnsupportedFeature, 'The archive spans multiple disks'
427
- end
404
+ raise UnsupportedFeature, 'The archive spans multiple disks' if disk_n != disk_n_with_eocdr
428
405
 
429
406
  num_files_this_disk = read_8b(zip64_eocdr) # number of files on this disk
430
- num_files_total = read_8b(zip64_eocdr) # files total in the central directory
407
+ num_files_total = read_8b(zip64_eocdr) # files total in the central directory
431
408
 
432
- if num_files_this_disk != num_files_total
433
- raise UnsupportedFeature, 'The archive spans multiple disks'
434
- end
409
+ raise UnsupportedFeature, 'The archive spans multiple disks' if num_files_this_disk != num_files_total
435
410
 
436
411
  log do
437
412
  format(
@@ -439,8 +414,8 @@ class FormatParser::ZIPParser::FileReader
439
414
  num_files_total)
440
415
  end
441
416
 
442
- central_dir_size = read_8b(zip64_eocdr) # Size of the central directory
443
- central_dir_offset = read_8b(zip64_eocdr) # Where the central directory starts
417
+ central_dir_size = read_8b(zip64_eocdr) # Size of the central directory
418
+ central_dir_offset = read_8b(zip64_eocdr) # Where the central directory starts
444
419
 
445
420
  [num_files_total, central_dir_offset, central_dir_size]
446
421
  end
@@ -456,8 +431,8 @@ class FormatParser::ZIPParser::FileReader
456
431
  skip_ahead_2(io) # number_of_this_disk
457
432
  skip_ahead_2(io) # number of the disk with the EOCD record
458
433
  skip_ahead_2(io) # number of entries in the central directory of this disk
459
- num_files = read_2b(io) # number of entries in the central directory total
460
- cdir_size = read_4b(io) # size of the central directory
434
+ num_files = read_2b(io) # number of entries in the central directory total
435
+ cdir_size = read_4b(io) # size of the central directory
461
436
  cdir_offset = read_4b(io) # start of central directorty offset
462
437
  [num_files, cdir_offset, cdir_size]
463
438
  end
@@ -34,7 +34,7 @@ class FormatParser::ZIPParser
34
34
  end
35
35
  rescue FileReader::Error
36
36
  # This is not a ZIP, or a broken ZIP.
37
- return
37
+ nil
38
38
  end
39
39
 
40
40
  def directory?(zip_entry)
data/lib/read_limiter.rb CHANGED
@@ -45,9 +45,7 @@ class FormatParser::ReadLimiter
45
45
  # @return Integer
46
46
  def seek(to)
47
47
  @seeks += 1
48
- if @max_seeks && @seeks > @max_seeks
49
- raise BudgetExceeded, 'Seek budget exceeded (%d seeks performed)' % @max_seeks
50
- end
48
+ raise BudgetExceeded, 'Seek budget exceeded (%d seeks performed)' % @max_seeks if @max_seeks && @seeks > @max_seeks
51
49
  @io.seek(to)
52
50
  end
53
51
 
@@ -60,26 +58,20 @@ class FormatParser::ReadLimiter
60
58
  @bytes += n_bytes
61
59
  @reads += 1
62
60
 
63
- if @max_bytes && @bytes > @max_bytes
64
- raise BudgetExceeded, 'Read bytes budget (%d) exceeded' % @max_bytes
65
- end
66
-
67
- if @max_reads && @reads > @max_reads
68
- raise BudgetExceeded, 'Number of read() calls exceeded (%d max)' % @max_reads
69
- end
61
+ raise BudgetExceeded, 'Read bytes budget (%d) exceeded' % @max_bytes if @max_bytes && @bytes > @max_bytes
62
+ raise BudgetExceeded, 'Number of read() calls exceeded (%d max)' % @max_reads if @max_reads && @reads > @max_reads
70
63
 
71
64
  @io.read(n_bytes)
72
65
  end
73
66
 
74
67
  # Sends the metrics about the state of this ReadLimiter to a Measurometer
75
68
  #
76
- # @param prefix[String] the prefix to set. For example, with prefix "TIFF" the metrics will be called
77
- # `format_parser.TIFF.read_limiter.num_seeks` and so forth
69
+ # @param parser[String] the parser to add as a tag.
78
70
  # @return void
79
- def send_metrics(prefix)
80
- Measurometer.add_distribution_value('format_parser.%s.read_limiter.num_seeks' % prefix, @seeks)
81
- Measurometer.add_distribution_value('format_parser.%s.read_limiter.num_reads' % prefix, @reads)
82
- Measurometer.add_distribution_value('format_parser.%s.read_limiter.read_bytes' % prefix, @bytes)
71
+ def send_metrics(parser)
72
+ Measurometer.add_distribution_value('format_parser.read_limiter.num_seeks', @seeks, parser: parser)
73
+ Measurometer.add_distribution_value('format_parser.read_limiter.num_reads', @reads, parser: parser)
74
+ Measurometer.add_distribution_value('format_parser.read_limiter.read_bytes', @bytes, parser: parser)
83
75
  end
84
76
 
85
77
  # Resets all the recorded call counters so that the object can be reused for the next parser,
data/lib/remote_io.rb CHANGED
@@ -1,14 +1,14 @@
1
+ require 'net/http'
2
+
1
3
  # Acts as a wrapper for turning a given URL into an IO object
2
- # you can read from and seek in. Uses Faraday under the hood
3
- # to perform fetches, so if you apply Faraday configuration
4
- # tweaks using `Faraday.default_connection = ...` these will
5
- # take effect for these RemoteIO objects as well
4
+ # you can read from and seek in.
6
5
  class FormatParser::RemoteIO
7
6
  class UpstreamError < StandardError
8
7
  # @return Integer
9
8
  attr_reader :status_code
9
+
10
10
  def initialize(status_code, message)
11
- @status_code = status_code
11
+ @status_code = Integer(status_code)
12
12
  super(message)
13
13
  end
14
14
  end
@@ -23,13 +23,19 @@ class FormatParser::RemoteIO
23
23
  class InvalidRequest < UpstreamError
24
24
  end
25
25
 
26
- # @param uri[URI, String] the remote URL to obtain
26
+ # Represents a failure where the maximum amount of
27
+ # redirect requests are exceeded.
28
+ class RedirectLimitReached < UpstreamError
29
+ def initialize(uri)
30
+ super(504, "Too many redirects; last one to: #{uri}")
31
+ end
32
+ end
33
+
34
+ # @param uri[String, URI::Generic] the remote URL to obtain
27
35
  # @param headers[Hash] (optional) the HTTP headers to be used in the HTTP request
28
36
  def initialize(uri, headers: {})
29
- require 'faraday'
30
- require 'faraday_middleware/response/follow_redirects'
31
37
  @headers = headers
32
- @uri = uri
38
+ @uri = URI(uri)
33
39
  @pos = 0
34
40
  @remote_size = false
35
41
  end
@@ -63,7 +69,7 @@ class FormatParser::RemoteIO
63
69
  # @return [String] the read bytes
64
70
  def read(n_bytes)
65
71
  http_range = (@pos..(@pos + n_bytes - 1))
66
- maybe_size, maybe_body = Measurometer.instrument('format_parser.RemoteIO.read') { request_range(http_range) }
72
+ maybe_size, maybe_body = Measurometer.instrument('format_parser.remote_io.read') { request_range(http_range) }
67
73
  if maybe_size && maybe_body
68
74
  @remote_size = maybe_size
69
75
  @pos += maybe_body.bytesize
@@ -73,23 +79,39 @@ class FormatParser::RemoteIO
73
79
 
74
80
  protected
75
81
 
82
+ REDIRECT_LIMIT = 3
83
+ UNSAFE_URI_CHARS = %r{[^\-_.!~*'()a-zA-Z\d;/?:@&=+$,\[\]%]}
84
+
85
+ # Generate the URI to fetch from following a redirect response.
86
+ #
87
+ # @param location[String] The new URI reference, as provided by the Location header of the previous response.
88
+ # @param previous_uri[URI] The URI used in the previous request.
89
+ def redirect_uri(location, previous_uri)
90
+ # Escape unsafe characters in location. Use location as new URI if absolute, otherwise use it to replace the path of
91
+ # the previous URI.
92
+ new_uri = previous_uri.merge(location.to_s.gsub(UNSAFE_URI_CHARS) do |unsafe_char|
93
+ "%#{unsafe_char.unpack('H2' * unsafe_char.bytesize).join('%').upcase}"
94
+ end)
95
+ # Keep previous URI's fragment if not present in location (https://www.rfc-editor.org/rfc/rfc9110.html#section-10.2.2-5)
96
+ new_uri.fragment = previous_uri.fragment unless new_uri.fragment
97
+ new_uri
98
+ end
99
+
76
100
  # Only used internally when reading the remote file
77
101
  #
78
- # @param range[Range] the HTTP range of data to fetch from remote
79
- # @return [String] the response body of the ranged request
80
- def request_range(range)
102
+ # @param range[Range] The HTTP range of data to fetch from remote
103
+ # @param uri[URI] The URI to fetch from
104
+ # @param redirects[Integer] The amount of remaining permitted redirects
105
+ # @return [[Integer, String]] The response body of the ranged request
106
+ def request_range(range, uri = @uri, redirects = REDIRECT_LIMIT)
81
107
  # We use a GET and not a HEAD request followed by a GET because
82
108
  # S3 does not allow HEAD requests if you only presigned your URL for GETs, so we
83
109
  # combine the first GET of a segment and retrieving the size of the resource
84
- conn = Faraday.new(headers: @headers) do |faraday|
85
- faraday.use FaradayMiddleware::FollowRedirects
86
- # we still need the default adapter, more details: https://blog.thecodewhisperer.com/permalink/losing-time-to-faraday
87
- faraday.adapter Faraday.default_adapter
110
+ response = Net::HTTP.start(uri.hostname, uri.port, use_ssl: uri.scheme == 'https') do |http|
111
+ http.request_get(uri, @headers.merge({ 'range' => 'bytes=%d-%d' % [range.begin, range.end] }))
88
112
  end
89
- response = conn.get(@uri, nil, range: 'bytes=%d-%d' % [range.begin, range.end])
90
-
91
- case response.status
92
- when 200
113
+ case response
114
+ when Net::HTTPOK
93
115
  # S3 returns 200 when you request a Range that is fully satisfied by the entire object,
94
116
  # we take that into account here. Also, for very tiny responses (and also for empty responses)
95
117
  # the responses are going to be 200 which does not mean we cannot proceed
@@ -100,16 +122,16 @@ class FormatParser::RemoteIO
100
122
  error_message = [
101
123
  "We requested #{requested_range_size} bytes, but the server sent us more",
102
124
  "(#{response_size} bytes) - it likely has no `Range:` support.",
103
- "The error occurred when talking to #{@uri})"
125
+ "The error occurred when talking to #{uri})"
104
126
  ]
105
- raise InvalidRequest.new(response.status, error_message.join("\n"))
127
+ raise InvalidRequest.new(response.code, error_message.join("\n"))
106
128
  end
107
129
  [response_size, response.body]
108
- when 206
130
+ when Net::HTTPPartialContent
109
131
  # Figure out of the server supports content ranges, if it doesn't we have no
110
132
  # business working with that server
111
- range_header = response.headers['Content-Range']
112
- raise InvalidRequest.new(response.status, "The server replied with 206 status but no Content-Range at #{@uri}") unless range_header
133
+ range_header = response['Content-Range']
134
+ raise InvalidRequest.new(response.code, "The server replied with 206 status but no Content-Range at #{uri}") unless range_header
113
135
 
114
136
  # "Content-Range: bytes 0-0/307404381" is how the response header is structured
115
137
  size = range_header[/\/(\d+)$/, 1].to_i
@@ -117,19 +139,27 @@ class FormatParser::RemoteIO
117
139
  # If we request a _larger_ range than what can be satisfied by the server,
118
140
  # the response is going to only contain what _can_ be sent and the status is also going
119
141
  # to be 206
120
- return [size, response.body]
121
- when 416
142
+ [size, response.body]
143
+ when Net::HTTPMovedPermanently, Net::HTTPFound, Net::HTTPSeeOther, Net::HTTPTemporaryRedirect, Net::HTTPPermanentRedirect
144
+ raise RedirectLimitReached(uri) if redirects == 0
145
+ location = response['location']
146
+ if location
147
+ request_range(range, redirect_uri(location, uri), redirects - 1)
148
+ else
149
+ raise InvalidRequest.new(response.code, "Server at #{uri} replied with a #{response.code}, indicating redirection; however, the location header was empty.")
150
+ end
151
+ when Net::HTTPRangeNotSatisfiable
122
152
  # We return `nil` if we tried to read past the end of the IO,
123
153
  # which satisfies the Ruby IO convention. The caller should deal with `nil` being the result of a read()
124
154
  # S3 will also handily _not_ supply us with the Content-Range of the actual resource, so we
125
155
  # cannot hint size with this response - at lease not when working with S3
126
- return
127
- when 500..599
128
- Measurometer.increment_counter('format_parser.RemoteIO.upstream50x_errors', 1)
129
- raise IntermittentFailure.new(response.status, "Server at #{@uri} replied with a #{response.status} and we might want to retry")
156
+ nil
157
+ when Net::HTTPServerError
158
+ Measurometer.increment_counter('format_parser.remote_io.upstream50x_errors', 1)
159
+ raise IntermittentFailure.new(response.code, "Server at #{uri} replied with a #{response.code} and we might want to retry")
130
160
  else
131
- Measurometer.increment_counter('format_parser.RemoteIO.invalid_request_errors', 1)
132
- raise InvalidRequest.new(response.status, "Server at #{@uri} replied with a #{response.status} and refused our request")
161
+ Measurometer.increment_counter('format_parser.remote_io.invalid_request_errors', 1)
162
+ raise InvalidRequest.new(response.code, "Server at #{uri} replied with a #{response.code} and refused our request")
133
163
  end
134
164
  end
135
165
  end
data/lib/string.rb ADDED
@@ -0,0 +1,9 @@
1
+ class String
2
+ def underscore
3
+ gsub(/::/, '/').
4
+ gsub(/([A-Z]+)([A-Z][a-z])/, '\1_\2').
5
+ gsub(/([a-z\d])([A-Z])/, '\1_\2').
6
+ tr('-', '_').
7
+ downcase
8
+ end
9
+ end
@@ -106,9 +106,6 @@ describe FormatParser::AttributesJSON do
106
106
  struct: Struct.new(:key).new('Value'),
107
107
  content: "\x01\xFF\xFEb\x00i\x00r\x00d\x00s\x00 \x005\x00 \x00m\x00o\x00r\x00e\x00 \x00c\x00o\x00m\x00p\x00".b
108
108
  }
109
- expect {
110
- JSON.pretty_generate(nasty_hash) # Should not raise an error
111
- }.to raise_error(Encoding::UndefinedConversionError)
112
109
 
113
110
  anon_class = Struct.new(:evil)
114
111
  anon_class.include FormatParser::AttributesJSON
@@ -0,0 +1,119 @@
1
+ require 'spec_helper'
2
+
3
+ describe FormatParser::ARWParser do
4
+ shared_examples 'likely_match for file' do |filename_with_extension|
5
+ it "matches '#{filename_with_extension}'" do
6
+ expect(subject.likely_match?(filename_with_extension)).to be_truthy
7
+ end
8
+ end
9
+
10
+ shared_examples 'no likely_match for file' do |filename_with_extension|
11
+ it "does not match '#{filename_with_extension}'" do
12
+ expect(subject.likely_match?(filename_with_extension)).to be_falsey
13
+ end
14
+ end
15
+
16
+ describe 'likely_match' do
17
+ filenames = ['raw_file', 'another raw file', 'and.another', 'one-more']
18
+ valid_extensions = ['.arw', '.Arw', '.aRw', '.arW', '.ARw', '.ArW', '.aRW', '.ARW']
19
+ invalid_extensions = ['.tiff', '.cr2', '.new', '.jpeg']
20
+ filenames.each do |filename|
21
+ valid_extensions.each do |extension|
22
+ include_examples 'likely_match for file', filename + extension
23
+ end
24
+ invalid_extensions.each do |extension|
25
+ include_examples 'no likely_match for file', filename + extension
26
+ end
27
+ end
28
+ end
29
+
30
+ describe 'parses Sony ARW fixtures as arw format file' do
31
+ expected_parsed_dimensions = {
32
+ 'RAW_SONY_A100.ARW' => {
33
+ width_px: 3872,
34
+ height_px: 2592,
35
+ display_width_px: 3872,
36
+ display_height_px: 2592,
37
+ orientation: :top_left
38
+ },
39
+ 'RAW_SONY_A700.ARW' => {
40
+ width_px: 4288,
41
+ height_px: 2856,
42
+ display_width_px: 4288,
43
+ display_height_px: 2856,
44
+ orientation: :top_left
45
+ },
46
+ 'RAW_SONY_A900.ARW' => {
47
+ width_px: 6080,
48
+ height_px: 4048,
49
+ display_width_px: 6080,
50
+ display_height_px: 4048,
51
+ orientation: :top_left
52
+ },
53
+ # rotated 90 degree image
54
+ 'RAW_SONY_DSC-RX100M2.ARW' => {
55
+ width_px: 5472,
56
+ height_px: 3648,
57
+ display_width_px: 3648,
58
+ display_height_px: 5472,
59
+ orientation: :right_top,
60
+ },
61
+ 'RAW_SONY_ILCE-7RM2.ARW' => {
62
+ width_px: 7952,
63
+ height_px: 5304,
64
+ display_width_px: 7952,
65
+ display_height_px: 5304,
66
+ orientation: :top_left,
67
+ },
68
+ 'RAW_SONY_NEX7.ARW' => {
69
+ width_px: 6000,
70
+ height_px: 4000,
71
+ display_width_px: 6000,
72
+ display_height_px: 4000,
73
+ orientation: :top_left,
74
+ },
75
+ 'RAW_SONY_SLTA55V.ARW' => {
76
+ width_px: 4928,
77
+ height_px: 3280,
78
+ display_width_px: 4928,
79
+ display_height_px: 3280,
80
+ orientation: :top_left,
81
+ },
82
+ }
83
+
84
+ Dir.glob(fixtures_dir + '/ARW/*.ARW').each do |arw_path|
85
+ it "is able to parse #{File.basename(arw_path)}" do
86
+ expected_dimension = expected_parsed_dimensions[File.basename(arw_path)]
87
+ # error if a new .arw test file is added without specifying the expected dimensions
88
+ expect(expected_dimension).not_to be_nil
89
+
90
+ parsed = subject.call(File.open(arw_path, 'rb'))
91
+ expect(parsed).not_to be_nil
92
+ expect(parsed.nature).to eq(:image)
93
+ expect(parsed.format).to eq(:arw)
94
+ expect(parsed.intrinsics[:exif]).not_to be_nil
95
+ expect(parsed.content_type).to eq('image/x-sony-arw')
96
+
97
+ expect(parsed.width_px).to eq(expected_dimension[:width_px])
98
+ expect(parsed.height_px).to eq(expected_dimension[:height_px])
99
+ expect(parsed.display_width_px).to eq(expected_dimension[:display_width_px])
100
+ expect(parsed.display_height_px).to eq(expected_dimension[:display_height_px])
101
+ expect(parsed.orientation).to eq(expected_dimension[:orientation])
102
+ end
103
+ end
104
+
105
+ shared_examples 'invalid filetype' do |filetype, fixture_path|
106
+ it "should fail to parse #{filetype}" do
107
+ file_path = fixtures_dir + fixture_path
108
+ parsed = subject.call(File.open(file_path, 'rb'))
109
+ expect(parsed).to be_nil
110
+ end
111
+ end
112
+
113
+ include_examples 'invalid filetype', 'NEF', '/NEF/RAW_NIKON_1S2.NEF'
114
+ include_examples 'invalid filetype', 'TIFF', '/TIFF/Shinbutsureijoushuincho.tiff'
115
+ include_examples 'invalid filetype', 'JPG', '/JPEG/orient_6.jpg'
116
+ include_examples 'invalid filetype', 'PNG', '/PNG/cat.png'
117
+ include_examples 'invalid filetype', 'CR2', '/CR2/RAW_CANON_1DM2.CR2'
118
+ end
119
+ end
@@ -47,21 +47,6 @@ describe FormatParser::TIFFParser do
47
47
  expect(parsed.intrinsics[:exif]).not_to be_nil
48
48
  end
49
49
 
50
- it 'parses Sony ARW fixture as arw format file' do
51
- arw_path = fixtures_dir + '/ARW/RAW_SONY_ILCE-7RM2.ARW'
52
-
53
- parsed = subject.call(File.open(arw_path, 'rb'))
54
-
55
- expect(parsed).not_to be_nil
56
- expect(parsed.nature).to eq(:image)
57
- expect(parsed.format).to eq(:arw)
58
-
59
- expect(parsed.width_px).to eq(7952)
60
- expect(parsed.height_px).to eq(5304)
61
- expect(parsed.intrinsics[:exif]).not_to be_nil
62
- expect(parsed.content_type).to eq('image/x-sony-arw')
63
- end
64
-
65
50
  describe 'correctly extracts dimensions from various TIFF flavors of the same file' do
66
51
  Dir.glob(fixtures_dir + '/TIFF/IMG_9266*.tif').each do |tiff_path|
67
52
  it "is able to parse #{File.basename(tiff_path)}" do
@@ -100,4 +85,13 @@ describe FormatParser::TIFFParser do
100
85
  end
101
86
  end
102
87
  end
88
+
89
+ describe 'bails out on ARW files, such as' do
90
+ Dir.glob(fixtures_dir + '/ARW/*.ARW').each do |arw_path|
91
+ it "skips #{File.basename(arw_path)}" do
92
+ parsed = subject.call(File.open(arw_path, 'rb'))
93
+ expect(parsed).to be_nil
94
+ end
95
+ end
96
+ end
103
97
  end
@@ -124,14 +124,9 @@ describe 'Fetching data from HTTP remotes' do
124
124
  end
125
125
 
126
126
  it 'sends provided HTTP headers in the request' do
127
- # Faraday is required only after calling .parse_http
128
- # This line is just to trigger this require, then it's possible to
129
- # add an expectation of how Faraday is initialized after.
130
- FormatParser.parse_http('invalid_url') rescue nil
131
-
132
- expect(Faraday)
133
- .to receive(:new)
134
- .with(headers: {'test-header' => 'test-value'})
127
+ expect_any_instance_of(Net::HTTP)
128
+ .to receive(:request_get)
129
+ .with(anything, a_hash_including('test-header' => 'test-value'))
135
130
  .and_call_original
136
131
 
137
132
  file_information = FormatParser.parse_http(