format_parser 1.6.0 → 2.0.0.pre
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/main.yml +4 -9
- data/CHANGELOG.md +12 -0
- data/README.md +1 -1
- data/format_parser.gemspec +9 -11
- data/lib/care.rb +5 -11
- data/lib/format_parser/version.rb +1 -1
- data/lib/format_parser.rb +8 -11
- data/lib/io_utils.rb +2 -6
- data/lib/parsers/aac_parser/adts_header_info.rb +3 -9
- data/lib/parsers/arw_parser.rb +50 -0
- data/lib/parsers/dpx_parser/dpx_structs.rb +1 -1
- data/lib/parsers/exif_parser.rb +2 -4
- data/lib/parsers/fdx_parser.rb +2 -2
- data/lib/parsers/flac_parser.rb +2 -6
- data/lib/parsers/jpeg_parser.rb +2 -2
- data/lib/parsers/moov_parser.rb +5 -7
- data/lib/parsers/mp3_parser.rb +2 -6
- data/lib/parsers/mpeg_parser.rb +1 -3
- data/lib/parsers/tiff_parser.rb +5 -6
- data/lib/parsers/wav_parser.rb +9 -12
- data/lib/parsers/zip_parser/file_reader.rb +45 -70
- data/lib/parsers/zip_parser.rb +1 -1
- data/lib/read_limiter.rb +8 -16
- data/lib/remote_io.rb +64 -34
- data/lib/string.rb +9 -0
- data/spec/attributes_json_spec.rb +0 -3
- data/spec/parsers/arw_parser_spec.rb +119 -0
- data/spec/parsers/tiff_parser_spec.rb +9 -15
- data/spec/remote_fetching_spec.rb +3 -8
- data/spec/remote_io_spec.rb +116 -60
- metadata +38 -75
@@ -27,52 +27,43 @@ class FormatParser::ZIPParser::FileReader
|
|
27
27
|
# To prevent too many tiny reads, read the maximum possible size of end of
|
28
28
|
# central directory record upfront (all the fixed fields + at most 0xFFFF
|
29
29
|
# bytes of the archive comment)
|
30
|
-
MAX_END_OF_CENTRAL_DIRECTORY_RECORD_SIZE =
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
2 + # The comment size
|
40
|
-
0xFFFF # Maximum comment size
|
41
|
-
end
|
30
|
+
MAX_END_OF_CENTRAL_DIRECTORY_RECORD_SIZE = 4 + # Offset of the start of central directory
|
31
|
+
4 + # Size of the central directory
|
32
|
+
2 + # Number of files in the cdir
|
33
|
+
4 + # End-of-central-directory signature
|
34
|
+
2 + # Number of this disk
|
35
|
+
2 + # Number of disk with the start of cdir
|
36
|
+
2 + # Number of files in the cdir of this disk
|
37
|
+
2 + # The comment size
|
38
|
+
0xFFFF # Maximum comment size
|
42
39
|
|
43
40
|
# To prevent too many tiny reads, read the maximum possible size of the local file header upfront.
|
44
41
|
# The maximum size is all the usual items, plus the maximum size
|
45
42
|
# of the filename (0xFFFF bytes) and the maximum size of the extras (0xFFFF bytes)
|
46
|
-
MAX_LOCAL_HEADER_SIZE =
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
2 + # Number of the disk with the EOCD record
|
68
|
-
2 + # Number of entries in the central directory of this disk
|
69
|
-
2 + # Number of entries in the central directory total
|
70
|
-
4 + # Size of the central directory
|
71
|
-
4 # Start of the central directory offset
|
72
|
-
end
|
43
|
+
MAX_LOCAL_HEADER_SIZE = 4 + # signature
|
44
|
+
2 + # Version needed to extract
|
45
|
+
2 + # gp flags
|
46
|
+
2 + # storage mode
|
47
|
+
2 + # dos time
|
48
|
+
2 + # dos date
|
49
|
+
4 + # CRC32
|
50
|
+
4 + # Comp size
|
51
|
+
4 + # Uncomp size
|
52
|
+
2 + # Filename size
|
53
|
+
2 + # Extra fields size
|
54
|
+
0xFFFF + # Maximum filename size
|
55
|
+
0xFFFF # Maximum extra fields size
|
56
|
+
|
57
|
+
SIZE_OF_USABLE_EOCD_RECORD = 4 + # Signature
|
58
|
+
2 + # Number of this disk
|
59
|
+
2 + # Number of the disk with the EOCD record
|
60
|
+
2 + # Number of entries in the central directory of this disk
|
61
|
+
2 + # Number of entries in the central directory total
|
62
|
+
4 + # Size of the central directory
|
63
|
+
4 # Start of the central directory offset
|
73
64
|
|
74
65
|
private_constant :C_UINT32LE, :C_UINT16LE, :C_UINT64LE, :MAX_END_OF_CENTRAL_DIRECTORY_RECORD_SIZE,
|
75
|
-
|
66
|
+
:MAX_LOCAL_HEADER_SIZE, :SIZE_OF_USABLE_EOCD_RECORD
|
76
67
|
|
77
68
|
# Represents a file within the ZIP archive being read
|
78
69
|
class ZipEntry
|
@@ -216,7 +207,7 @@ class FormatParser::ZIPParser::FileReader
|
|
216
207
|
io.seek(absolute_pos)
|
217
208
|
unless absolute_pos == io.pos
|
218
209
|
raise ReadError,
|
219
|
-
|
210
|
+
"Expected to seek to #{absolute_pos} but only got to #{io.pos}"
|
220
211
|
end
|
221
212
|
nil
|
222
213
|
end
|
@@ -235,18 +226,14 @@ class FormatParser::ZIPParser::FileReader
|
|
235
226
|
io.seek(io.pos + n)
|
236
227
|
pos_after = io.pos
|
237
228
|
delta = pos_after - pos_before
|
238
|
-
unless delta == n
|
239
|
-
raise ReadError, "Expected to seek #{n} bytes ahead, but could only seek #{delta} bytes ahead"
|
240
|
-
end
|
229
|
+
raise ReadError, "Expected to seek #{n} bytes ahead, but could only seek #{delta} bytes ahead" unless delta == n
|
241
230
|
nil
|
242
231
|
end
|
243
232
|
|
244
233
|
def read_n(io, n_bytes)
|
245
234
|
io.read(n_bytes).tap do |d|
|
246
235
|
raise ReadError, "Expected to read #{n_bytes} bytes, but the IO was at the end" if d.nil?
|
247
|
-
unless d.bytesize == n_bytes
|
248
|
-
raise ReadError, "Expected to read #{n_bytes} bytes, read #{d.bytesize}"
|
249
|
-
end
|
236
|
+
raise ReadError, "Expected to read #{n_bytes} bytes, read #{d.bytesize}" unless d.bytesize == n_bytes
|
250
237
|
end
|
251
238
|
end
|
252
239
|
|
@@ -310,15 +297,9 @@ class FormatParser::ZIPParser::FileReader
|
|
310
297
|
#
|
311
298
|
# It means that before we read this stuff we need to check if the previously-read
|
312
299
|
# values are at overflow, and only _then_ proceed to read them. Bah.
|
313
|
-
if e.uncompressed_size == 0xFFFFFFFF
|
314
|
-
|
315
|
-
|
316
|
-
if e.compressed_size == 0xFFFFFFFF
|
317
|
-
e.compressed_size = read_8b(zip64_extra)
|
318
|
-
end
|
319
|
-
if e.local_file_header_offset == 0xFFFFFFFF
|
320
|
-
e.local_file_header_offset = read_8b(zip64_extra)
|
321
|
-
end
|
300
|
+
e.uncompressed_size = read_8b(zip64_extra) if e.uncompressed_size == 0xFFFFFFFF
|
301
|
+
e.compressed_size = read_8b(zip64_extra) if e.compressed_size == 0xFFFFFFFF
|
302
|
+
e.local_file_header_offset = read_8b(zip64_extra) if e.local_file_header_offset == 0xFFFFFFFF
|
322
303
|
# Disk number comes last and we can skip it anyway, since we do
|
323
304
|
# not support multi-disk archives
|
324
305
|
end
|
@@ -370,9 +351,7 @@ class FormatParser::ZIPParser::FileReader
|
|
370
351
|
signature, *_rest, comment_size = maybe_record.unpack(unpack_pattern)
|
371
352
|
|
372
353
|
# Check the only condition for the match
|
373
|
-
if signature == 0x06054b50 && (maybe_record.bytesize - minimum_record_size) == comment_size
|
374
|
-
return check_at # Found the EOCD marker location
|
375
|
-
end
|
354
|
+
return check_at if signature == 0x06054b50 && (maybe_record.bytesize - minimum_record_size) == comment_size
|
376
355
|
end
|
377
356
|
# If we haven't caught anything, return nil deliberately instead of returning the last statement
|
378
357
|
nil
|
@@ -422,16 +401,12 @@ class FormatParser::ZIPParser::FileReader
|
|
422
401
|
|
423
402
|
disk_n = read_4b(zip64_eocdr) # number of this disk
|
424
403
|
disk_n_with_eocdr = read_4b(zip64_eocdr) # number of the disk with the EOCDR
|
425
|
-
if disk_n != disk_n_with_eocdr
|
426
|
-
raise UnsupportedFeature, 'The archive spans multiple disks'
|
427
|
-
end
|
404
|
+
raise UnsupportedFeature, 'The archive spans multiple disks' if disk_n != disk_n_with_eocdr
|
428
405
|
|
429
406
|
num_files_this_disk = read_8b(zip64_eocdr) # number of files on this disk
|
430
|
-
num_files_total
|
407
|
+
num_files_total = read_8b(zip64_eocdr) # files total in the central directory
|
431
408
|
|
432
|
-
if num_files_this_disk != num_files_total
|
433
|
-
raise UnsupportedFeature, 'The archive spans multiple disks'
|
434
|
-
end
|
409
|
+
raise UnsupportedFeature, 'The archive spans multiple disks' if num_files_this_disk != num_files_total
|
435
410
|
|
436
411
|
log do
|
437
412
|
format(
|
@@ -439,8 +414,8 @@ class FormatParser::ZIPParser::FileReader
|
|
439
414
|
num_files_total)
|
440
415
|
end
|
441
416
|
|
442
|
-
central_dir_size
|
443
|
-
central_dir_offset
|
417
|
+
central_dir_size = read_8b(zip64_eocdr) # Size of the central directory
|
418
|
+
central_dir_offset = read_8b(zip64_eocdr) # Where the central directory starts
|
444
419
|
|
445
420
|
[num_files_total, central_dir_offset, central_dir_size]
|
446
421
|
end
|
@@ -456,8 +431,8 @@ class FormatParser::ZIPParser::FileReader
|
|
456
431
|
skip_ahead_2(io) # number_of_this_disk
|
457
432
|
skip_ahead_2(io) # number of the disk with the EOCD record
|
458
433
|
skip_ahead_2(io) # number of entries in the central directory of this disk
|
459
|
-
num_files = read_2b(io)
|
460
|
-
cdir_size = read_4b(io)
|
434
|
+
num_files = read_2b(io) # number of entries in the central directory total
|
435
|
+
cdir_size = read_4b(io) # size of the central directory
|
461
436
|
cdir_offset = read_4b(io) # start of central directorty offset
|
462
437
|
[num_files, cdir_offset, cdir_size]
|
463
438
|
end
|
data/lib/parsers/zip_parser.rb
CHANGED
data/lib/read_limiter.rb
CHANGED
@@ -45,9 +45,7 @@ class FormatParser::ReadLimiter
|
|
45
45
|
# @return Integer
|
46
46
|
def seek(to)
|
47
47
|
@seeks += 1
|
48
|
-
if @max_seeks && @seeks > @max_seeks
|
49
|
-
raise BudgetExceeded, 'Seek budget exceeded (%d seeks performed)' % @max_seeks
|
50
|
-
end
|
48
|
+
raise BudgetExceeded, 'Seek budget exceeded (%d seeks performed)' % @max_seeks if @max_seeks && @seeks > @max_seeks
|
51
49
|
@io.seek(to)
|
52
50
|
end
|
53
51
|
|
@@ -60,26 +58,20 @@ class FormatParser::ReadLimiter
|
|
60
58
|
@bytes += n_bytes
|
61
59
|
@reads += 1
|
62
60
|
|
63
|
-
if @max_bytes && @bytes > @max_bytes
|
64
|
-
|
65
|
-
end
|
66
|
-
|
67
|
-
if @max_reads && @reads > @max_reads
|
68
|
-
raise BudgetExceeded, 'Number of read() calls exceeded (%d max)' % @max_reads
|
69
|
-
end
|
61
|
+
raise BudgetExceeded, 'Read bytes budget (%d) exceeded' % @max_bytes if @max_bytes && @bytes > @max_bytes
|
62
|
+
raise BudgetExceeded, 'Number of read() calls exceeded (%d max)' % @max_reads if @max_reads && @reads > @max_reads
|
70
63
|
|
71
64
|
@io.read(n_bytes)
|
72
65
|
end
|
73
66
|
|
74
67
|
# Sends the metrics about the state of this ReadLimiter to a Measurometer
|
75
68
|
#
|
76
|
-
# @param
|
77
|
-
# `format_parser.TIFF.read_limiter.num_seeks` and so forth
|
69
|
+
# @param parser[String] the parser to add as a tag.
|
78
70
|
# @return void
|
79
|
-
def send_metrics(
|
80
|
-
Measurometer.add_distribution_value('format_parser
|
81
|
-
Measurometer.add_distribution_value('format_parser
|
82
|
-
Measurometer.add_distribution_value('format_parser
|
71
|
+
def send_metrics(parser)
|
72
|
+
Measurometer.add_distribution_value('format_parser.read_limiter.num_seeks', @seeks, parser: parser)
|
73
|
+
Measurometer.add_distribution_value('format_parser.read_limiter.num_reads', @reads, parser: parser)
|
74
|
+
Measurometer.add_distribution_value('format_parser.read_limiter.read_bytes', @bytes, parser: parser)
|
83
75
|
end
|
84
76
|
|
85
77
|
# Resets all the recorded call counters so that the object can be reused for the next parser,
|
data/lib/remote_io.rb
CHANGED
@@ -1,14 +1,14 @@
|
|
1
|
+
require 'net/http'
|
2
|
+
|
1
3
|
# Acts as a wrapper for turning a given URL into an IO object
|
2
|
-
# you can read from and seek in.
|
3
|
-
# to perform fetches, so if you apply Faraday configuration
|
4
|
-
# tweaks using `Faraday.default_connection = ...` these will
|
5
|
-
# take effect for these RemoteIO objects as well
|
4
|
+
# you can read from and seek in.
|
6
5
|
class FormatParser::RemoteIO
|
7
6
|
class UpstreamError < StandardError
|
8
7
|
# @return Integer
|
9
8
|
attr_reader :status_code
|
9
|
+
|
10
10
|
def initialize(status_code, message)
|
11
|
-
@status_code = status_code
|
11
|
+
@status_code = Integer(status_code)
|
12
12
|
super(message)
|
13
13
|
end
|
14
14
|
end
|
@@ -23,13 +23,19 @@ class FormatParser::RemoteIO
|
|
23
23
|
class InvalidRequest < UpstreamError
|
24
24
|
end
|
25
25
|
|
26
|
-
#
|
26
|
+
# Represents a failure where the maximum amount of
|
27
|
+
# redirect requests are exceeded.
|
28
|
+
class RedirectLimitReached < UpstreamError
|
29
|
+
def initialize(uri)
|
30
|
+
super(504, "Too many redirects; last one to: #{uri}")
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
# @param uri[String, URI::Generic] the remote URL to obtain
|
27
35
|
# @param headers[Hash] (optional) the HTTP headers to be used in the HTTP request
|
28
36
|
def initialize(uri, headers: {})
|
29
|
-
require 'faraday'
|
30
|
-
require 'faraday_middleware/response/follow_redirects'
|
31
37
|
@headers = headers
|
32
|
-
@uri = uri
|
38
|
+
@uri = URI(uri)
|
33
39
|
@pos = 0
|
34
40
|
@remote_size = false
|
35
41
|
end
|
@@ -63,7 +69,7 @@ class FormatParser::RemoteIO
|
|
63
69
|
# @return [String] the read bytes
|
64
70
|
def read(n_bytes)
|
65
71
|
http_range = (@pos..(@pos + n_bytes - 1))
|
66
|
-
maybe_size, maybe_body = Measurometer.instrument('format_parser.
|
72
|
+
maybe_size, maybe_body = Measurometer.instrument('format_parser.remote_io.read') { request_range(http_range) }
|
67
73
|
if maybe_size && maybe_body
|
68
74
|
@remote_size = maybe_size
|
69
75
|
@pos += maybe_body.bytesize
|
@@ -73,23 +79,39 @@ class FormatParser::RemoteIO
|
|
73
79
|
|
74
80
|
protected
|
75
81
|
|
82
|
+
REDIRECT_LIMIT = 3
|
83
|
+
UNSAFE_URI_CHARS = %r{[^\-_.!~*'()a-zA-Z\d;/?:@&=+$,\[\]%]}
|
84
|
+
|
85
|
+
# Generate the URI to fetch from following a redirect response.
|
86
|
+
#
|
87
|
+
# @param location[String] The new URI reference, as provided by the Location header of the previous response.
|
88
|
+
# @param previous_uri[URI] The URI used in the previous request.
|
89
|
+
def redirect_uri(location, previous_uri)
|
90
|
+
# Escape unsafe characters in location. Use location as new URI if absolute, otherwise use it to replace the path of
|
91
|
+
# the previous URI.
|
92
|
+
new_uri = previous_uri.merge(location.to_s.gsub(UNSAFE_URI_CHARS) do |unsafe_char|
|
93
|
+
"%#{unsafe_char.unpack('H2' * unsafe_char.bytesize).join('%').upcase}"
|
94
|
+
end)
|
95
|
+
# Keep previous URI's fragment if not present in location (https://www.rfc-editor.org/rfc/rfc9110.html#section-10.2.2-5)
|
96
|
+
new_uri.fragment = previous_uri.fragment unless new_uri.fragment
|
97
|
+
new_uri
|
98
|
+
end
|
99
|
+
|
76
100
|
# Only used internally when reading the remote file
|
77
101
|
#
|
78
|
-
# @param range[Range]
|
79
|
-
# @
|
80
|
-
|
102
|
+
# @param range[Range] The HTTP range of data to fetch from remote
|
103
|
+
# @param uri[URI] The URI to fetch from
|
104
|
+
# @param redirects[Integer] The amount of remaining permitted redirects
|
105
|
+
# @return [[Integer, String]] The response body of the ranged request
|
106
|
+
def request_range(range, uri = @uri, redirects = REDIRECT_LIMIT)
|
81
107
|
# We use a GET and not a HEAD request followed by a GET because
|
82
108
|
# S3 does not allow HEAD requests if you only presigned your URL for GETs, so we
|
83
109
|
# combine the first GET of a segment and retrieving the size of the resource
|
84
|
-
|
85
|
-
|
86
|
-
# we still need the default adapter, more details: https://blog.thecodewhisperer.com/permalink/losing-time-to-faraday
|
87
|
-
faraday.adapter Faraday.default_adapter
|
110
|
+
response = Net::HTTP.start(uri.hostname, uri.port, use_ssl: uri.scheme == 'https') do |http|
|
111
|
+
http.request_get(uri, @headers.merge({ 'range' => 'bytes=%d-%d' % [range.begin, range.end] }))
|
88
112
|
end
|
89
|
-
response
|
90
|
-
|
91
|
-
case response.status
|
92
|
-
when 200
|
113
|
+
case response
|
114
|
+
when Net::HTTPOK
|
93
115
|
# S3 returns 200 when you request a Range that is fully satisfied by the entire object,
|
94
116
|
# we take that into account here. Also, for very tiny responses (and also for empty responses)
|
95
117
|
# the responses are going to be 200 which does not mean we cannot proceed
|
@@ -100,16 +122,16 @@ class FormatParser::RemoteIO
|
|
100
122
|
error_message = [
|
101
123
|
"We requested #{requested_range_size} bytes, but the server sent us more",
|
102
124
|
"(#{response_size} bytes) - it likely has no `Range:` support.",
|
103
|
-
"The error occurred when talking to #{
|
125
|
+
"The error occurred when talking to #{uri})"
|
104
126
|
]
|
105
|
-
raise InvalidRequest.new(response.
|
127
|
+
raise InvalidRequest.new(response.code, error_message.join("\n"))
|
106
128
|
end
|
107
129
|
[response_size, response.body]
|
108
|
-
when
|
130
|
+
when Net::HTTPPartialContent
|
109
131
|
# Figure out of the server supports content ranges, if it doesn't we have no
|
110
132
|
# business working with that server
|
111
|
-
range_header = response
|
112
|
-
raise InvalidRequest.new(response.
|
133
|
+
range_header = response['Content-Range']
|
134
|
+
raise InvalidRequest.new(response.code, "The server replied with 206 status but no Content-Range at #{uri}") unless range_header
|
113
135
|
|
114
136
|
# "Content-Range: bytes 0-0/307404381" is how the response header is structured
|
115
137
|
size = range_header[/\/(\d+)$/, 1].to_i
|
@@ -117,19 +139,27 @@ class FormatParser::RemoteIO
|
|
117
139
|
# If we request a _larger_ range than what can be satisfied by the server,
|
118
140
|
# the response is going to only contain what _can_ be sent and the status is also going
|
119
141
|
# to be 206
|
120
|
-
|
121
|
-
when
|
142
|
+
[size, response.body]
|
143
|
+
when Net::HTTPMovedPermanently, Net::HTTPFound, Net::HTTPSeeOther, Net::HTTPTemporaryRedirect, Net::HTTPPermanentRedirect
|
144
|
+
raise RedirectLimitReached(uri) if redirects == 0
|
145
|
+
location = response['location']
|
146
|
+
if location
|
147
|
+
request_range(range, redirect_uri(location, uri), redirects - 1)
|
148
|
+
else
|
149
|
+
raise InvalidRequest.new(response.code, "Server at #{uri} replied with a #{response.code}, indicating redirection; however, the location header was empty.")
|
150
|
+
end
|
151
|
+
when Net::HTTPRangeNotSatisfiable
|
122
152
|
# We return `nil` if we tried to read past the end of the IO,
|
123
153
|
# which satisfies the Ruby IO convention. The caller should deal with `nil` being the result of a read()
|
124
154
|
# S3 will also handily _not_ supply us with the Content-Range of the actual resource, so we
|
125
155
|
# cannot hint size with this response - at lease not when working with S3
|
126
|
-
|
127
|
-
when
|
128
|
-
Measurometer.increment_counter('format_parser.
|
129
|
-
raise IntermittentFailure.new(response.
|
156
|
+
nil
|
157
|
+
when Net::HTTPServerError
|
158
|
+
Measurometer.increment_counter('format_parser.remote_io.upstream50x_errors', 1)
|
159
|
+
raise IntermittentFailure.new(response.code, "Server at #{uri} replied with a #{response.code} and we might want to retry")
|
130
160
|
else
|
131
|
-
Measurometer.increment_counter('format_parser.
|
132
|
-
raise InvalidRequest.new(response.
|
161
|
+
Measurometer.increment_counter('format_parser.remote_io.invalid_request_errors', 1)
|
162
|
+
raise InvalidRequest.new(response.code, "Server at #{uri} replied with a #{response.code} and refused our request")
|
133
163
|
end
|
134
164
|
end
|
135
165
|
end
|
data/lib/string.rb
ADDED
@@ -106,9 +106,6 @@ describe FormatParser::AttributesJSON do
|
|
106
106
|
struct: Struct.new(:key).new('Value'),
|
107
107
|
content: "\x01\xFF\xFEb\x00i\x00r\x00d\x00s\x00 \x005\x00 \x00m\x00o\x00r\x00e\x00 \x00c\x00o\x00m\x00p\x00".b
|
108
108
|
}
|
109
|
-
expect {
|
110
|
-
JSON.pretty_generate(nasty_hash) # Should not raise an error
|
111
|
-
}.to raise_error(Encoding::UndefinedConversionError)
|
112
109
|
|
113
110
|
anon_class = Struct.new(:evil)
|
114
111
|
anon_class.include FormatParser::AttributesJSON
|
@@ -0,0 +1,119 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe FormatParser::ARWParser do
|
4
|
+
shared_examples 'likely_match for file' do |filename_with_extension|
|
5
|
+
it "matches '#{filename_with_extension}'" do
|
6
|
+
expect(subject.likely_match?(filename_with_extension)).to be_truthy
|
7
|
+
end
|
8
|
+
end
|
9
|
+
|
10
|
+
shared_examples 'no likely_match for file' do |filename_with_extension|
|
11
|
+
it "does not match '#{filename_with_extension}'" do
|
12
|
+
expect(subject.likely_match?(filename_with_extension)).to be_falsey
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
describe 'likely_match' do
|
17
|
+
filenames = ['raw_file', 'another raw file', 'and.another', 'one-more']
|
18
|
+
valid_extensions = ['.arw', '.Arw', '.aRw', '.arW', '.ARw', '.ArW', '.aRW', '.ARW']
|
19
|
+
invalid_extensions = ['.tiff', '.cr2', '.new', '.jpeg']
|
20
|
+
filenames.each do |filename|
|
21
|
+
valid_extensions.each do |extension|
|
22
|
+
include_examples 'likely_match for file', filename + extension
|
23
|
+
end
|
24
|
+
invalid_extensions.each do |extension|
|
25
|
+
include_examples 'no likely_match for file', filename + extension
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
describe 'parses Sony ARW fixtures as arw format file' do
|
31
|
+
expected_parsed_dimensions = {
|
32
|
+
'RAW_SONY_A100.ARW' => {
|
33
|
+
width_px: 3872,
|
34
|
+
height_px: 2592,
|
35
|
+
display_width_px: 3872,
|
36
|
+
display_height_px: 2592,
|
37
|
+
orientation: :top_left
|
38
|
+
},
|
39
|
+
'RAW_SONY_A700.ARW' => {
|
40
|
+
width_px: 4288,
|
41
|
+
height_px: 2856,
|
42
|
+
display_width_px: 4288,
|
43
|
+
display_height_px: 2856,
|
44
|
+
orientation: :top_left
|
45
|
+
},
|
46
|
+
'RAW_SONY_A900.ARW' => {
|
47
|
+
width_px: 6080,
|
48
|
+
height_px: 4048,
|
49
|
+
display_width_px: 6080,
|
50
|
+
display_height_px: 4048,
|
51
|
+
orientation: :top_left
|
52
|
+
},
|
53
|
+
# rotated 90 degree image
|
54
|
+
'RAW_SONY_DSC-RX100M2.ARW' => {
|
55
|
+
width_px: 5472,
|
56
|
+
height_px: 3648,
|
57
|
+
display_width_px: 3648,
|
58
|
+
display_height_px: 5472,
|
59
|
+
orientation: :right_top,
|
60
|
+
},
|
61
|
+
'RAW_SONY_ILCE-7RM2.ARW' => {
|
62
|
+
width_px: 7952,
|
63
|
+
height_px: 5304,
|
64
|
+
display_width_px: 7952,
|
65
|
+
display_height_px: 5304,
|
66
|
+
orientation: :top_left,
|
67
|
+
},
|
68
|
+
'RAW_SONY_NEX7.ARW' => {
|
69
|
+
width_px: 6000,
|
70
|
+
height_px: 4000,
|
71
|
+
display_width_px: 6000,
|
72
|
+
display_height_px: 4000,
|
73
|
+
orientation: :top_left,
|
74
|
+
},
|
75
|
+
'RAW_SONY_SLTA55V.ARW' => {
|
76
|
+
width_px: 4928,
|
77
|
+
height_px: 3280,
|
78
|
+
display_width_px: 4928,
|
79
|
+
display_height_px: 3280,
|
80
|
+
orientation: :top_left,
|
81
|
+
},
|
82
|
+
}
|
83
|
+
|
84
|
+
Dir.glob(fixtures_dir + '/ARW/*.ARW').each do |arw_path|
|
85
|
+
it "is able to parse #{File.basename(arw_path)}" do
|
86
|
+
expected_dimension = expected_parsed_dimensions[File.basename(arw_path)]
|
87
|
+
# error if a new .arw test file is added without specifying the expected dimensions
|
88
|
+
expect(expected_dimension).not_to be_nil
|
89
|
+
|
90
|
+
parsed = subject.call(File.open(arw_path, 'rb'))
|
91
|
+
expect(parsed).not_to be_nil
|
92
|
+
expect(parsed.nature).to eq(:image)
|
93
|
+
expect(parsed.format).to eq(:arw)
|
94
|
+
expect(parsed.intrinsics[:exif]).not_to be_nil
|
95
|
+
expect(parsed.content_type).to eq('image/x-sony-arw')
|
96
|
+
|
97
|
+
expect(parsed.width_px).to eq(expected_dimension[:width_px])
|
98
|
+
expect(parsed.height_px).to eq(expected_dimension[:height_px])
|
99
|
+
expect(parsed.display_width_px).to eq(expected_dimension[:display_width_px])
|
100
|
+
expect(parsed.display_height_px).to eq(expected_dimension[:display_height_px])
|
101
|
+
expect(parsed.orientation).to eq(expected_dimension[:orientation])
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
shared_examples 'invalid filetype' do |filetype, fixture_path|
|
106
|
+
it "should fail to parse #{filetype}" do
|
107
|
+
file_path = fixtures_dir + fixture_path
|
108
|
+
parsed = subject.call(File.open(file_path, 'rb'))
|
109
|
+
expect(parsed).to be_nil
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
include_examples 'invalid filetype', 'NEF', '/NEF/RAW_NIKON_1S2.NEF'
|
114
|
+
include_examples 'invalid filetype', 'TIFF', '/TIFF/Shinbutsureijoushuincho.tiff'
|
115
|
+
include_examples 'invalid filetype', 'JPG', '/JPEG/orient_6.jpg'
|
116
|
+
include_examples 'invalid filetype', 'PNG', '/PNG/cat.png'
|
117
|
+
include_examples 'invalid filetype', 'CR2', '/CR2/RAW_CANON_1DM2.CR2'
|
118
|
+
end
|
119
|
+
end
|
@@ -47,21 +47,6 @@ describe FormatParser::TIFFParser do
|
|
47
47
|
expect(parsed.intrinsics[:exif]).not_to be_nil
|
48
48
|
end
|
49
49
|
|
50
|
-
it 'parses Sony ARW fixture as arw format file' do
|
51
|
-
arw_path = fixtures_dir + '/ARW/RAW_SONY_ILCE-7RM2.ARW'
|
52
|
-
|
53
|
-
parsed = subject.call(File.open(arw_path, 'rb'))
|
54
|
-
|
55
|
-
expect(parsed).not_to be_nil
|
56
|
-
expect(parsed.nature).to eq(:image)
|
57
|
-
expect(parsed.format).to eq(:arw)
|
58
|
-
|
59
|
-
expect(parsed.width_px).to eq(7952)
|
60
|
-
expect(parsed.height_px).to eq(5304)
|
61
|
-
expect(parsed.intrinsics[:exif]).not_to be_nil
|
62
|
-
expect(parsed.content_type).to eq('image/x-sony-arw')
|
63
|
-
end
|
64
|
-
|
65
50
|
describe 'correctly extracts dimensions from various TIFF flavors of the same file' do
|
66
51
|
Dir.glob(fixtures_dir + '/TIFF/IMG_9266*.tif').each do |tiff_path|
|
67
52
|
it "is able to parse #{File.basename(tiff_path)}" do
|
@@ -100,4 +85,13 @@ describe FormatParser::TIFFParser do
|
|
100
85
|
end
|
101
86
|
end
|
102
87
|
end
|
88
|
+
|
89
|
+
describe 'bails out on ARW files, such as' do
|
90
|
+
Dir.glob(fixtures_dir + '/ARW/*.ARW').each do |arw_path|
|
91
|
+
it "skips #{File.basename(arw_path)}" do
|
92
|
+
parsed = subject.call(File.open(arw_path, 'rb'))
|
93
|
+
expect(parsed).to be_nil
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
103
97
|
end
|
@@ -124,14 +124,9 @@ describe 'Fetching data from HTTP remotes' do
|
|
124
124
|
end
|
125
125
|
|
126
126
|
it 'sends provided HTTP headers in the request' do
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
FormatParser.parse_http('invalid_url') rescue nil
|
131
|
-
|
132
|
-
expect(Faraday)
|
133
|
-
.to receive(:new)
|
134
|
-
.with(headers: {'test-header' => 'test-value'})
|
127
|
+
expect_any_instance_of(Net::HTTP)
|
128
|
+
.to receive(:request_get)
|
129
|
+
.with(anything, a_hash_including('test-header' => 'test-value'))
|
135
130
|
.and_call_original
|
136
131
|
|
137
132
|
file_information = FormatParser.parse_http(
|