format_parser 0.2.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +2 -0
- data/.travis.yml +1 -0
- data/README.md +14 -11
- data/format_parser.gemspec +11 -10
- data/lib/care.rb +9 -17
- data/lib/format_parser.rb +11 -13
- data/lib/format_parser/version.rb +1 -1
- data/lib/io_constraint.rb +3 -3
- data/lib/io_utils.rb +4 -10
- data/lib/parsers/aiff_parser.rb +9 -10
- data/lib/parsers/dpx_parser.rb +42 -42
- data/lib/parsers/dsl.rb +2 -2
- data/lib/parsers/exif_parser.rb +3 -8
- data/lib/parsers/fdx_parser.rb +3 -3
- data/lib/parsers/gif_parser.rb +3 -5
- data/lib/parsers/jpeg_parser.rb +4 -8
- data/lib/parsers/moov_parser.rb +8 -6
- data/lib/parsers/moov_parser/decoder.rb +105 -122
- data/lib/parsers/mp3_parser.rb +36 -46
- data/lib/parsers/mp3_parser/id3_v1.rb +7 -13
- data/lib/parsers/mp3_parser/id3_v2.rb +6 -6
- data/lib/parsers/png_parser.rb +5 -12
- data/lib/parsers/psd_parser.rb +2 -2
- data/lib/parsers/tiff_parser.rb +10 -12
- data/lib/parsers/wav_parser.rb +3 -3
- data/lib/read_limiter.rb +3 -7
- data/lib/remote_io.rb +3 -6
- data/spec/care_spec.rb +10 -10
- data/spec/file_information_spec.rb +1 -3
- data/spec/format_parser_spec.rb +6 -6
- data/spec/io_utils_spec.rb +7 -7
- data/spec/parsers/exif_parser_spec.rb +2 -3
- data/spec/parsers/gif_parser_spec.rb +1 -1
- data/spec/parsers/jpeg_parser_spec.rb +0 -1
- data/spec/parsers/moov_parser_spec.rb +2 -3
- data/spec/parsers/png_parser_spec.rb +1 -1
- data/spec/parsers/tiff_parser_spec.rb +0 -1
- data/spec/parsers/wav_parser_spec.rb +3 -3
- data/spec/read_limiter_spec.rb +0 -1
- data/spec/remote_fetching_spec.rb +34 -20
- data/spec/remote_io_spec.rb +20 -21
- data/spec/spec_helper.rb +2 -2
- metadata +19 -4
data/lib/parsers/mp3_parser.rb
CHANGED
@@ -37,12 +37,12 @@ class FormatParser::MP3Parser
|
|
37
37
|
ignore_bytes_at_tail = id3_v1 ? 128 : 0
|
38
38
|
ignore_bytes_at_head = id3_v2 ? io.pos : 0
|
39
39
|
bytes_used_by_frames = io.size - ignore_bytes_at_tail - ignore_bytes_at_tail
|
40
|
-
|
40
|
+
|
41
41
|
io.seek(ignore_bytes_at_head)
|
42
42
|
|
43
43
|
maybe_xing_header, initial_frames = parse_mpeg_frames(io)
|
44
44
|
|
45
|
-
return
|
45
|
+
return if initial_frames.empty?
|
46
46
|
|
47
47
|
first_frame = initial_frames.first
|
48
48
|
|
@@ -63,14 +63,14 @@ class FormatParser::MP3Parser
|
|
63
63
|
|
64
64
|
if maybe_xing_header
|
65
65
|
duration = maybe_xing_header.frames * SAMPLES_PER_FRAME / first_frame.sample_rate.to_f
|
66
|
-
|
66
|
+
_bit_rate = maybe_xing_header.byte_count * 8 / duration / 1000
|
67
67
|
file_info.media_duration_seconds = duration
|
68
68
|
return file_info
|
69
69
|
end
|
70
70
|
|
71
71
|
# Estimate duration using the frames we did parse - to have an exact one
|
72
72
|
# we would need to have all the frames and thus read most of the file
|
73
|
-
|
73
|
+
_avg_bitrate = float_average_over(initial_frames, :frame_bitrate)
|
74
74
|
avg_frame_size = float_average_over(initial_frames, :frame_length)
|
75
75
|
avg_sample_rate = float_average_over(initial_frames, :sample_rate)
|
76
76
|
|
@@ -79,17 +79,17 @@ class FormatParser::MP3Parser
|
|
79
79
|
est_duration_seconds = est_samples / avg_sample_rate
|
80
80
|
|
81
81
|
file_info.media_duration_seconds = est_duration_seconds
|
82
|
-
|
82
|
+
file_info
|
83
83
|
end
|
84
84
|
|
85
85
|
private
|
86
|
-
|
86
|
+
|
87
87
|
# The implementation of the MPEG frames parsing is mostly based on tinytag,
|
88
88
|
# a sweet little Python library for parsing audio metadata - do check it out
|
89
89
|
# if you have a minute. https://pypi.python.org/pypi/tinytag
|
90
90
|
def parse_mpeg_frames(io)
|
91
91
|
mpeg_frames = []
|
92
|
-
|
92
|
+
|
93
93
|
MAX_FRAMES_TO_SCAN.times do |frame_i|
|
94
94
|
# Read through until we can latch onto the 11 sync bits. Read in 4-byte
|
95
95
|
# increments to save on read() calls
|
@@ -123,7 +123,7 @@ class FormatParser::MP3Parser
|
|
123
123
|
return [xing_header, mpeg_frames]
|
124
124
|
end
|
125
125
|
end
|
126
|
-
if frame_detail.frame_length > 1
|
126
|
+
if frame_detail.frame_length > 1 # jump over current frame body
|
127
127
|
io.seek(io.pos + frame_detail.frame_length - 4)
|
128
128
|
end
|
129
129
|
end
|
@@ -132,14 +132,14 @@ class FormatParser::MP3Parser
|
|
132
132
|
[nil, mpeg_frames]
|
133
133
|
end
|
134
134
|
|
135
|
-
def parse_mpeg_frame_header(offset_in_file,
|
135
|
+
def parse_mpeg_frame_header(offset_in_file, _sync, conf, bitrate_freq, rest)
|
136
136
|
# see this page for the magic values used in mp3:
|
137
137
|
# http:/www.mpgedit.org/mpgedit/mpeg_format/mpeghdr.htm
|
138
138
|
samplerates = [
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
139
|
+
[11025, 12000, 8000], # MPEG 2.5
|
140
|
+
[], # reserved
|
141
|
+
[22050, 24000, 16000], # MPEG 2
|
142
|
+
[44100, 48000, 32000], # MPEG 1
|
143
143
|
]
|
144
144
|
v1l1 = [0, 32, 64, 96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 416, 448, 0]
|
145
145
|
v1l2 = [0, 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384, 0]
|
@@ -148,19 +148,19 @@ class FormatParser::MP3Parser
|
|
148
148
|
v2l2 = [0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160, 0]
|
149
149
|
v2l3 = v2l2
|
150
150
|
bitrate_by_version_by_layer = [
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
151
|
+
[nil, v2l3, v2l2, v2l1], # MPEG Version 2.5 # note that the layers go
|
152
|
+
nil, # reserved # from 3 to 1 by design.
|
153
|
+
[nil, v2l3, v2l2, v2l1], # MPEG Version 2 # the first layer id is
|
154
|
+
[nil, v1l3, v1l2, v1l1], # MPEG Version 1 # reserved
|
155
155
|
]
|
156
|
-
|
156
|
+
|
157
157
|
channels_per_channel_mode = [
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
158
|
+
2, # 00 Stereo
|
159
|
+
2, # 01 Joint stereo (Stereo)
|
160
|
+
2, # 10 Dual channel (2 mono channels)
|
161
|
+
1, # 11 Single channel (Mono)
|
162
162
|
]
|
163
|
-
|
163
|
+
|
164
164
|
br_id = (bitrate_freq >> 4) & 0x0F # biterate id
|
165
165
|
sr_id = (bitrate_freq >> 2) & 0x03 # sample rate id
|
166
166
|
padding = bitrate_freq & 0x02 > 0 ? 1 : 0
|
@@ -170,7 +170,7 @@ class FormatParser::MP3Parser
|
|
170
170
|
channels = channels_per_channel_mode.fetch(channel_mode)
|
171
171
|
sample_rate = deep_fetch(samplerates, mpeg_id, sr_id)
|
172
172
|
frame_bitrate = deep_fetch(bitrate_by_version_by_layer, mpeg_id, layer_id, br_id)
|
173
|
-
frame_length = (
|
173
|
+
frame_length = (144_000 * frame_bitrate) / sample_rate + padding
|
174
174
|
MPEGFrame.new(
|
175
175
|
offset_in_file: offset_in_file,
|
176
176
|
mpeg_id: mpeg_id,
|
@@ -186,17 +186,15 @@ class FormatParser::MP3Parser
|
|
186
186
|
# or whether there is the 0xFF byte at the end
|
187
187
|
def sync_bytes_offset_in_4_byte_seq(four_bytes)
|
188
188
|
four_bytes[0...3].each_with_index do |byte, i|
|
189
|
-
next_byte = four_bytes[i+1]
|
190
|
-
if byte == 0xFF && next_byte > 0xE0
|
191
|
-
return i
|
192
|
-
end
|
189
|
+
next_byte = four_bytes[i + 1]
|
190
|
+
return i if byte == 0xFF && next_byte > 0xE0
|
193
191
|
end
|
194
192
|
four_bytes[-1] == 0xFF ? 3 : 4
|
195
193
|
end
|
196
194
|
|
197
195
|
def attempt_xing_header(frame_body)
|
198
|
-
unless xing_offset = frame_body.index(
|
199
|
-
return
|
196
|
+
unless xing_offset = frame_body.index('Xing')
|
197
|
+
return # No Xing in this frame
|
200
198
|
end
|
201
199
|
|
202
200
|
io = StringIO.new(frame_body)
|
@@ -205,32 +203,24 @@ class FormatParser::MP3Parser
|
|
205
203
|
# https://www.codeproject.com/Articles/8295/MPEG-Audio-Frame-Header#XINGHeader
|
206
204
|
header_flags, _ = io.read(4).unpack('s>s>')
|
207
205
|
frames = byte_count = toc = vbr_scale = nil
|
208
|
-
|
209
|
-
if header_flags & 1 # FRAMES FLAG
|
210
|
-
frames = io.read(4).unpack('N1').first
|
211
|
-
end
|
212
206
|
|
213
|
-
if header_flags &
|
214
|
-
byte_count = io.read(4).unpack('N1').first
|
215
|
-
end
|
207
|
+
frames = io.read(4).unpack('N1').first if header_flags & 1 # FRAMES FLAG
|
216
208
|
|
217
|
-
if header_flags &
|
218
|
-
toc = io.read(100).unpack('C100')
|
219
|
-
end
|
209
|
+
byte_count = io.read(4).unpack('N1').first if header_flags & 2 # BYTES FLAG
|
220
210
|
|
221
|
-
if header_flags &
|
222
|
-
|
223
|
-
|
211
|
+
toc = io.read(100).unpack('C100') if header_flags & 4 # TOC FLAG
|
212
|
+
|
213
|
+
vbr_scale = io.read(4).unpack('N1').first if header_flags & 8 # VBR SCALE FLAG
|
224
214
|
|
225
215
|
VBRHeader.new(frames: frames, byte_count: byte_count, toc_entries: toc, vbr_scale: vbr_scale)
|
226
216
|
end
|
227
217
|
|
228
|
-
def average_bytes_and_bitrate(
|
218
|
+
def average_bytes_and_bitrate(_mpeg_frames)
|
229
219
|
avg_bytes_per_frame = initial_frames.map(&:frame_length).inject(&:+) / initial_frames.length.to_f
|
230
220
|
avg_bitrate_per_frame = initial_frames.map(&:frame_bitrate).inject(&:+) / initial_frames.length.to_f
|
231
221
|
[avg_bytes_per_frame, avg_bitrate_per_frame]
|
232
222
|
end
|
233
|
-
|
223
|
+
|
234
224
|
def xing_header_usable_for_duration?(xing_header)
|
235
225
|
xing_header && xing_header.frames && xing_header.byte_count && xing_header.vbr_scale
|
236
226
|
end
|
@@ -241,7 +231,7 @@ class FormatParser::MP3Parser
|
|
241
231
|
|
242
232
|
def deep_fetch(from, *keys)
|
243
233
|
keys.inject(from) { |receiver, key_or_idx| receiver.fetch(key_or_idx) }
|
244
|
-
rescue
|
234
|
+
rescue IndexError, NoMethodError
|
245
235
|
raise InvalidDeepFetch, "Could not retrieve #{keys.inspect} from #{from.inspect}"
|
246
236
|
end
|
247
237
|
|
@@ -8,40 +8,34 @@ module FormatParser::MP3Parser::ID3V1
|
|
8
8
|
:comment, :a30,
|
9
9
|
:genre, :C,
|
10
10
|
]
|
11
|
-
packspec_keys = PACKSPEC.select.with_index{|_, i| i.even? }
|
11
|
+
packspec_keys = PACKSPEC.select.with_index { |_, i| i.even? }
|
12
12
|
TAG_SIZE_BYTES = 128
|
13
13
|
|
14
14
|
class TagInformation < Struct.new(*packspec_keys)
|
15
15
|
end
|
16
16
|
|
17
17
|
def attempt_id3_v1_extraction(io)
|
18
|
-
if io.size < TAG_SIZE_BYTES # Won't fit the ID3v1 regardless
|
19
|
-
return nil
|
20
|
-
end
|
18
|
+
return if io.size < TAG_SIZE_BYTES # Won't fit the ID3v1 regardless
|
21
19
|
|
22
20
|
io.seek(io.size - 128)
|
23
21
|
trailer_bytes = io.read(128)
|
24
22
|
|
25
|
-
unless trailer_bytes && trailer_bytes.byteslice(0, 3) == 'TAG'
|
26
|
-
return nil
|
27
|
-
end
|
23
|
+
return unless trailer_bytes && trailer_bytes.byteslice(0, 3) == 'TAG'
|
28
24
|
|
29
25
|
id3_v1 = parse_id3_v1(trailer_bytes)
|
30
26
|
|
31
27
|
# If all of the resulting strings are empty this ID3v1 tag is invalid and
|
32
28
|
# we should ignore it.
|
33
|
-
strings_from_id3v1 = id3_v1.values.select{|e| e.is_a?(String) && e != 'TAG' }
|
34
|
-
if strings_from_id3v1.all?(&:empty?)
|
35
|
-
return nil
|
36
|
-
end
|
29
|
+
strings_from_id3v1 = id3_v1.values.select { |e| e.is_a?(String) && e != 'TAG' }
|
30
|
+
return if strings_from_id3v1.all?(&:empty?)
|
37
31
|
|
38
32
|
id3_v1
|
39
33
|
end
|
40
34
|
|
41
35
|
def parse_id3_v1(byte_str)
|
42
|
-
|
36
|
+
_keys, values = PACKSPEC.partition.with_index { |_, i| i.even? }
|
43
37
|
unpacked_values = byte_str.unpack(values.join)
|
44
|
-
unpacked_values.map! {|e| e.is_a?(String) ? trim_id3v1_string(e) : e }
|
38
|
+
unpacked_values.map! { |e| e.is_a?(String) ? trim_id3v1_string(e) : e }
|
45
39
|
TagInformation.new(unpacked_values)
|
46
40
|
end
|
47
41
|
|
@@ -2,16 +2,16 @@ module FormatParser::MP3Parser::ID3V2
|
|
2
2
|
def attempt_id3_v2_extraction(io)
|
3
3
|
io.seek(0) # Only support header ID3v2
|
4
4
|
header_bytes = io.read(10)
|
5
|
-
return
|
5
|
+
return unless header_bytes
|
6
6
|
|
7
7
|
header = parse_id3_v2_header(header_bytes)
|
8
|
-
return
|
9
|
-
return
|
8
|
+
return unless header[:tag] == 'ID3'
|
9
|
+
return unless header[:size] > 0
|
10
10
|
|
11
11
|
header_tag_payload = io.read(header[:size])
|
12
12
|
header_tag_payload = StringIO.new(header_tag_payload)
|
13
13
|
|
14
|
-
return
|
14
|
+
return unless header_tag_payload.size == header[:size]
|
15
15
|
|
16
16
|
frames = []
|
17
17
|
loop do
|
@@ -38,10 +38,10 @@ module FormatParser::MP3Parser::ID3V2
|
|
38
38
|
:flags, :C1,
|
39
39
|
:size, :a4,
|
40
40
|
]
|
41
|
-
keys, values = packspec.partition.with_index {|_, i| i.even? }
|
41
|
+
keys, values = packspec.partition.with_index { |_, i| i.even? }
|
42
42
|
unpacked_values = byte_str.unpack(values.join)
|
43
43
|
header_data = Hash[keys.zip(unpacked_values)]
|
44
|
-
|
44
|
+
|
45
45
|
header_data[:version] = header_data[:version].unpack('C2')
|
46
46
|
header_data[:size] = decode_syncsafe_int(header_data[:size])
|
47
47
|
|
data/lib/parsers/png_parser.rb
CHANGED
@@ -19,9 +19,8 @@ class FormatParser::PNGParser
|
|
19
19
|
6 => true,
|
20
20
|
}
|
21
21
|
|
22
|
-
|
23
22
|
def chunk_length_and_type(io)
|
24
|
-
safe_read(io, 8).unpack(
|
23
|
+
safe_read(io, 8).unpack('Na4')
|
25
24
|
end
|
26
25
|
|
27
26
|
def call(io)
|
@@ -36,7 +35,7 @@ class FormatParser::PNGParser
|
|
36
35
|
# correct length as well.
|
37
36
|
# IHDR _must_ come first, no exceptions. If it doesn't
|
38
37
|
# we should not consider this a valid PNG.
|
39
|
-
return unless chunk_type ==
|
38
|
+
return unless chunk_type == 'IHDR' && chunk_length == 13
|
40
39
|
|
41
40
|
chunk_data = safe_read(io, chunk_length)
|
42
41
|
# Width: 4 bytes
|
@@ -46,8 +45,8 @@ class FormatParser::PNGParser
|
|
46
45
|
# Compression method: 1 byte
|
47
46
|
# Filter method: 1 byte
|
48
47
|
# Interlace method: 1 byte
|
49
|
-
w, h,
|
50
|
-
|
48
|
+
w, h, _bit_depth, color_type, _compression_method,
|
49
|
+
_filter_method, _interlace_method = chunk_data.unpack('N2C5')
|
51
50
|
|
52
51
|
color_mode = COLOR_TYPES.fetch(color_type)
|
53
52
|
has_transparency = TRANSPARENCY_PER_COLOR_TYPE[color_type]
|
@@ -56,18 +55,12 @@ class FormatParser::PNGParser
|
|
56
55
|
# we are dealing with an APNG.
|
57
56
|
safe_skip(io, 4)
|
58
57
|
|
59
|
-
# dry-validation won't let booleans be filled with nil so we have to set
|
60
|
-
# has_animation to false by default
|
61
|
-
has_animation = nil
|
62
|
-
num_frames = nil
|
63
|
-
loop_n_times = nil
|
64
|
-
|
65
58
|
chunk_length, chunk_type = chunk_length_and_type(io)
|
66
59
|
if chunk_length == 8 && chunk_type == 'acTL'
|
67
60
|
# https://wiki.mozilla.org/APNG_Specification#.60acTL.60:_The_Animation_Control_Chunk
|
68
61
|
# Unlike GIF, we do have the frame count that we can recover
|
69
62
|
has_animation = true
|
70
|
-
num_frames,
|
63
|
+
num_frames, _loop_n_times = safe_read(io, 8).unpack('NN')
|
71
64
|
end
|
72
65
|
|
73
66
|
FormatParser::Image.new(
|
data/lib/parsers/psd_parser.rb
CHANGED
@@ -8,13 +8,13 @@ class FormatParser::PSDParser
|
|
8
8
|
|
9
9
|
def call(io)
|
10
10
|
io = FormatParser::IOConstraint.new(io)
|
11
|
-
magic_bytes = safe_read(io, 4).unpack(
|
11
|
+
magic_bytes = safe_read(io, 4).unpack('C4')
|
12
12
|
|
13
13
|
return unless magic_bytes == PSD_HEADER
|
14
14
|
|
15
15
|
# We can be reasonably certain this is a PSD so we grab the height
|
16
16
|
# and width bytes
|
17
|
-
w,h = safe_read(io, 22).unpack(
|
17
|
+
w, h = safe_read(io, 22).unpack('x10N2')
|
18
18
|
FormatParser::Image.new(
|
19
19
|
format: :psd,
|
20
20
|
width_px: w,
|
data/lib/parsers/tiff_parser.rb
CHANGED
@@ -12,30 +12,28 @@ class FormatParser::TIFFParser
|
|
12
12
|
|
13
13
|
def call(io)
|
14
14
|
io = FormatParser::IOConstraint.new(io)
|
15
|
-
magic_bytes = safe_read(io, 4).unpack(
|
15
|
+
magic_bytes = safe_read(io, 4).unpack('C4')
|
16
16
|
endianness = scan_tiff_endianness(magic_bytes)
|
17
17
|
return unless endianness
|
18
18
|
w, h = read_tiff_by_endianness(io, endianness)
|
19
19
|
scanner = FormatParser::EXIFParser.new(:tiff, io)
|
20
20
|
scanner.scan_image_exif
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
21
|
+
FormatParser::Image.new(
|
22
|
+
format: :tif,
|
23
|
+
width_px: w,
|
24
|
+
height_px: h,
|
25
|
+
# might be nil if EXIF metadata wasn't found
|
26
|
+
orientation: scanner.orientation
|
27
|
+
)
|
28
28
|
end
|
29
29
|
|
30
30
|
# TIFFs can be either big or little endian, so we check here
|
31
31
|
# and set our unpack method argument to suit.
|
32
32
|
def scan_tiff_endianness(magic_bytes)
|
33
33
|
if magic_bytes == LITTLE_ENDIAN_TIFF_HEADER_BYTES
|
34
|
-
|
34
|
+
'v'
|
35
35
|
elsif magic_bytes == BIG_ENDIAN_TIFF_HEADER_BYTES
|
36
|
-
|
37
|
-
else
|
38
|
-
nil
|
36
|
+
'n'
|
39
37
|
end
|
40
38
|
end
|
41
39
|
|
data/lib/parsers/wav_parser.rb
CHANGED
@@ -6,10 +6,10 @@ class FormatParser::WAVParser
|
|
6
6
|
formats :wav
|
7
7
|
|
8
8
|
def call(io)
|
9
|
-
# Read the RIFF header. Chunk descriptor should be RIFF, the size should
|
10
|
-
# contain the size of the entire file in bytes minus 8 bytes for the
|
9
|
+
# Read the RIFF header. Chunk descriptor should be RIFF, the size should
|
10
|
+
# contain the size of the entire file in bytes minus 8 bytes for the
|
11
11
|
# two fields not included in this count: chunk_id and size.
|
12
|
-
chunk_id,
|
12
|
+
chunk_id, _size, riff_type = safe_read(io, 12).unpack('a4la4')
|
13
13
|
|
14
14
|
# The chunk_id and riff_type should be RIFF and WAVE respectively
|
15
15
|
return unless chunk_id == 'RIFF' && riff_type == 'WAVE'
|
data/lib/read_limiter.rb
CHANGED
@@ -26,25 +26,21 @@ class FormatParser::ReadLimiter
|
|
26
26
|
def seek(to_offset)
|
27
27
|
@seeks += 1
|
28
28
|
if @max_seeks && @seeks > @max_seeks
|
29
|
-
raise BudgetExceeded,
|
29
|
+
raise BudgetExceeded, 'Seek budget exceeded (%d seeks performed)' % @max_seeks
|
30
30
|
end
|
31
31
|
@io.seek(to_offset)
|
32
32
|
end
|
33
33
|
|
34
|
-
def size
|
35
|
-
@io.size
|
36
|
-
end
|
37
|
-
|
38
34
|
def read(n)
|
39
35
|
@bytes += n
|
40
36
|
@reads += 1
|
41
37
|
|
42
38
|
if @max_bytes && @bytes > @max_bytes
|
43
|
-
raise BudgetExceeded,
|
39
|
+
raise BudgetExceeded, 'Read bytes budget (%d) exceeded' % @max_bytes
|
44
40
|
end
|
45
41
|
|
46
42
|
if @max_reads && @reads > @max_reads
|
47
|
-
raise BudgetExceeded,
|
43
|
+
raise BudgetExceeded, 'Number of read() calls exceeded (%d max)' % @max_reads
|
48
44
|
end
|
49
45
|
|
50
46
|
@io.read(n)
|
data/lib/remote_io.rb
CHANGED
@@ -1,5 +1,4 @@
|
|
1
1
|
class FormatParser::RemoteIO
|
2
|
-
|
3
2
|
# Represents a failure that might be retried
|
4
3
|
# (like a 5xx response or a timeout)
|
5
4
|
class IntermittentFailure < StandardError
|
@@ -33,7 +32,7 @@ class FormatParser::RemoteIO
|
|
33
32
|
#
|
34
33
|
# @return [Integer] the size of the remote resource
|
35
34
|
def size
|
36
|
-
raise
|
35
|
+
raise 'Remote size not yet obtained, need to perform at least one read() to retrieve it' unless @remote_size
|
37
36
|
@remote_size
|
38
37
|
end
|
39
38
|
|
@@ -52,8 +51,6 @@ class FormatParser::RemoteIO
|
|
52
51
|
@remote_size = maybe_size
|
53
52
|
@pos += maybe_body.bytesize
|
54
53
|
maybe_body.force_encoding(Encoding::ASCII_8BIT)
|
55
|
-
else
|
56
|
-
nil
|
57
54
|
end
|
58
55
|
end
|
59
56
|
|
@@ -67,7 +64,7 @@ class FormatParser::RemoteIO
|
|
67
64
|
# We use a GET and not a HEAD request followed by a GET because
|
68
65
|
# S3 does not allow HEAD requests if you only presigned your URL for GETs, so we
|
69
66
|
# combine the first GET of a segment and retrieving the size of the resource
|
70
|
-
response = Faraday.get(@uri, nil, range:
|
67
|
+
response = Faraday.get(@uri, nil, range: 'bytes=%d-%d' % [range.begin, range.end])
|
71
68
|
|
72
69
|
case response.status
|
73
70
|
when 200, 206
|
@@ -90,7 +87,7 @@ class FormatParser::RemoteIO
|
|
90
87
|
# which satisfies the Ruby IO convention. The caller should deal with `nil` being the result of a read()
|
91
88
|
# S3 will also handily _not_ supply us with the Content-Range of the actual resource, so we
|
92
89
|
# cannot hint size with this response - at lease not when working with S3
|
93
|
-
return
|
90
|
+
return
|
94
91
|
when 500..599
|
95
92
|
raise IntermittentFailure, "Server at #{@uri} replied with a #{response.status} and we might want to retry"
|
96
93
|
else
|