format_parser 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +2 -0
- data/.travis.yml +1 -0
- data/README.md +14 -11
- data/format_parser.gemspec +11 -10
- data/lib/care.rb +9 -17
- data/lib/format_parser.rb +11 -13
- data/lib/format_parser/version.rb +1 -1
- data/lib/io_constraint.rb +3 -3
- data/lib/io_utils.rb +4 -10
- data/lib/parsers/aiff_parser.rb +9 -10
- data/lib/parsers/dpx_parser.rb +42 -42
- data/lib/parsers/dsl.rb +2 -2
- data/lib/parsers/exif_parser.rb +3 -8
- data/lib/parsers/fdx_parser.rb +3 -3
- data/lib/parsers/gif_parser.rb +3 -5
- data/lib/parsers/jpeg_parser.rb +4 -8
- data/lib/parsers/moov_parser.rb +8 -6
- data/lib/parsers/moov_parser/decoder.rb +105 -122
- data/lib/parsers/mp3_parser.rb +36 -46
- data/lib/parsers/mp3_parser/id3_v1.rb +7 -13
- data/lib/parsers/mp3_parser/id3_v2.rb +6 -6
- data/lib/parsers/png_parser.rb +5 -12
- data/lib/parsers/psd_parser.rb +2 -2
- data/lib/parsers/tiff_parser.rb +10 -12
- data/lib/parsers/wav_parser.rb +3 -3
- data/lib/read_limiter.rb +3 -7
- data/lib/remote_io.rb +3 -6
- data/spec/care_spec.rb +10 -10
- data/spec/file_information_spec.rb +1 -3
- data/spec/format_parser_spec.rb +6 -6
- data/spec/io_utils_spec.rb +7 -7
- data/spec/parsers/exif_parser_spec.rb +2 -3
- data/spec/parsers/gif_parser_spec.rb +1 -1
- data/spec/parsers/jpeg_parser_spec.rb +0 -1
- data/spec/parsers/moov_parser_spec.rb +2 -3
- data/spec/parsers/png_parser_spec.rb +1 -1
- data/spec/parsers/tiff_parser_spec.rb +0 -1
- data/spec/parsers/wav_parser_spec.rb +3 -3
- data/spec/read_limiter_spec.rb +0 -1
- data/spec/remote_fetching_spec.rb +34 -20
- data/spec/remote_io_spec.rb +20 -21
- data/spec/spec_helper.rb +2 -2
- metadata +19 -4
data/lib/parsers/mp3_parser.rb
CHANGED
@@ -37,12 +37,12 @@ class FormatParser::MP3Parser
|
|
37
37
|
ignore_bytes_at_tail = id3_v1 ? 128 : 0
|
38
38
|
ignore_bytes_at_head = id3_v2 ? io.pos : 0
|
39
39
|
bytes_used_by_frames = io.size - ignore_bytes_at_tail - ignore_bytes_at_tail
|
40
|
-
|
40
|
+
|
41
41
|
io.seek(ignore_bytes_at_head)
|
42
42
|
|
43
43
|
maybe_xing_header, initial_frames = parse_mpeg_frames(io)
|
44
44
|
|
45
|
-
return
|
45
|
+
return if initial_frames.empty?
|
46
46
|
|
47
47
|
first_frame = initial_frames.first
|
48
48
|
|
@@ -63,14 +63,14 @@ class FormatParser::MP3Parser
|
|
63
63
|
|
64
64
|
if maybe_xing_header
|
65
65
|
duration = maybe_xing_header.frames * SAMPLES_PER_FRAME / first_frame.sample_rate.to_f
|
66
|
-
|
66
|
+
_bit_rate = maybe_xing_header.byte_count * 8 / duration / 1000
|
67
67
|
file_info.media_duration_seconds = duration
|
68
68
|
return file_info
|
69
69
|
end
|
70
70
|
|
71
71
|
# Estimate duration using the frames we did parse - to have an exact one
|
72
72
|
# we would need to have all the frames and thus read most of the file
|
73
|
-
|
73
|
+
_avg_bitrate = float_average_over(initial_frames, :frame_bitrate)
|
74
74
|
avg_frame_size = float_average_over(initial_frames, :frame_length)
|
75
75
|
avg_sample_rate = float_average_over(initial_frames, :sample_rate)
|
76
76
|
|
@@ -79,17 +79,17 @@ class FormatParser::MP3Parser
|
|
79
79
|
est_duration_seconds = est_samples / avg_sample_rate
|
80
80
|
|
81
81
|
file_info.media_duration_seconds = est_duration_seconds
|
82
|
-
|
82
|
+
file_info
|
83
83
|
end
|
84
84
|
|
85
85
|
private
|
86
|
-
|
86
|
+
|
87
87
|
# The implementation of the MPEG frames parsing is mostly based on tinytag,
|
88
88
|
# a sweet little Python library for parsing audio metadata - do check it out
|
89
89
|
# if you have a minute. https://pypi.python.org/pypi/tinytag
|
90
90
|
def parse_mpeg_frames(io)
|
91
91
|
mpeg_frames = []
|
92
|
-
|
92
|
+
|
93
93
|
MAX_FRAMES_TO_SCAN.times do |frame_i|
|
94
94
|
# Read through until we can latch onto the 11 sync bits. Read in 4-byte
|
95
95
|
# increments to save on read() calls
|
@@ -123,7 +123,7 @@ class FormatParser::MP3Parser
|
|
123
123
|
return [xing_header, mpeg_frames]
|
124
124
|
end
|
125
125
|
end
|
126
|
-
if frame_detail.frame_length > 1
|
126
|
+
if frame_detail.frame_length > 1 # jump over current frame body
|
127
127
|
io.seek(io.pos + frame_detail.frame_length - 4)
|
128
128
|
end
|
129
129
|
end
|
@@ -132,14 +132,14 @@ class FormatParser::MP3Parser
|
|
132
132
|
[nil, mpeg_frames]
|
133
133
|
end
|
134
134
|
|
135
|
-
def parse_mpeg_frame_header(offset_in_file,
|
135
|
+
def parse_mpeg_frame_header(offset_in_file, _sync, conf, bitrate_freq, rest)
|
136
136
|
# see this page for the magic values used in mp3:
|
137
137
|
# http:/www.mpgedit.org/mpgedit/mpeg_format/mpeghdr.htm
|
138
138
|
samplerates = [
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
139
|
+
[11025, 12000, 8000], # MPEG 2.5
|
140
|
+
[], # reserved
|
141
|
+
[22050, 24000, 16000], # MPEG 2
|
142
|
+
[44100, 48000, 32000], # MPEG 1
|
143
143
|
]
|
144
144
|
v1l1 = [0, 32, 64, 96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 416, 448, 0]
|
145
145
|
v1l2 = [0, 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384, 0]
|
@@ -148,19 +148,19 @@ class FormatParser::MP3Parser
|
|
148
148
|
v2l2 = [0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160, 0]
|
149
149
|
v2l3 = v2l2
|
150
150
|
bitrate_by_version_by_layer = [
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
151
|
+
[nil, v2l3, v2l2, v2l1], # MPEG Version 2.5 # note that the layers go
|
152
|
+
nil, # reserved # from 3 to 1 by design.
|
153
|
+
[nil, v2l3, v2l2, v2l1], # MPEG Version 2 # the first layer id is
|
154
|
+
[nil, v1l3, v1l2, v1l1], # MPEG Version 1 # reserved
|
155
155
|
]
|
156
|
-
|
156
|
+
|
157
157
|
channels_per_channel_mode = [
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
158
|
+
2, # 00 Stereo
|
159
|
+
2, # 01 Joint stereo (Stereo)
|
160
|
+
2, # 10 Dual channel (2 mono channels)
|
161
|
+
1, # 11 Single channel (Mono)
|
162
162
|
]
|
163
|
-
|
163
|
+
|
164
164
|
br_id = (bitrate_freq >> 4) & 0x0F # biterate id
|
165
165
|
sr_id = (bitrate_freq >> 2) & 0x03 # sample rate id
|
166
166
|
padding = bitrate_freq & 0x02 > 0 ? 1 : 0
|
@@ -170,7 +170,7 @@ class FormatParser::MP3Parser
|
|
170
170
|
channels = channels_per_channel_mode.fetch(channel_mode)
|
171
171
|
sample_rate = deep_fetch(samplerates, mpeg_id, sr_id)
|
172
172
|
frame_bitrate = deep_fetch(bitrate_by_version_by_layer, mpeg_id, layer_id, br_id)
|
173
|
-
frame_length = (
|
173
|
+
frame_length = (144_000 * frame_bitrate) / sample_rate + padding
|
174
174
|
MPEGFrame.new(
|
175
175
|
offset_in_file: offset_in_file,
|
176
176
|
mpeg_id: mpeg_id,
|
@@ -186,17 +186,15 @@ class FormatParser::MP3Parser
|
|
186
186
|
# or whether there is the 0xFF byte at the end
|
187
187
|
def sync_bytes_offset_in_4_byte_seq(four_bytes)
|
188
188
|
four_bytes[0...3].each_with_index do |byte, i|
|
189
|
-
next_byte = four_bytes[i+1]
|
190
|
-
if byte == 0xFF && next_byte > 0xE0
|
191
|
-
return i
|
192
|
-
end
|
189
|
+
next_byte = four_bytes[i + 1]
|
190
|
+
return i if byte == 0xFF && next_byte > 0xE0
|
193
191
|
end
|
194
192
|
four_bytes[-1] == 0xFF ? 3 : 4
|
195
193
|
end
|
196
194
|
|
197
195
|
def attempt_xing_header(frame_body)
|
198
|
-
unless xing_offset = frame_body.index(
|
199
|
-
return
|
196
|
+
unless xing_offset = frame_body.index('Xing')
|
197
|
+
return # No Xing in this frame
|
200
198
|
end
|
201
199
|
|
202
200
|
io = StringIO.new(frame_body)
|
@@ -205,32 +203,24 @@ class FormatParser::MP3Parser
|
|
205
203
|
# https://www.codeproject.com/Articles/8295/MPEG-Audio-Frame-Header#XINGHeader
|
206
204
|
header_flags, _ = io.read(4).unpack('s>s>')
|
207
205
|
frames = byte_count = toc = vbr_scale = nil
|
208
|
-
|
209
|
-
if header_flags & 1 # FRAMES FLAG
|
210
|
-
frames = io.read(4).unpack('N1').first
|
211
|
-
end
|
212
206
|
|
213
|
-
if header_flags &
|
214
|
-
byte_count = io.read(4).unpack('N1').first
|
215
|
-
end
|
207
|
+
frames = io.read(4).unpack('N1').first if header_flags & 1 # FRAMES FLAG
|
216
208
|
|
217
|
-
if header_flags &
|
218
|
-
toc = io.read(100).unpack('C100')
|
219
|
-
end
|
209
|
+
byte_count = io.read(4).unpack('N1').first if header_flags & 2 # BYTES FLAG
|
220
210
|
|
221
|
-
if header_flags &
|
222
|
-
|
223
|
-
|
211
|
+
toc = io.read(100).unpack('C100') if header_flags & 4 # TOC FLAG
|
212
|
+
|
213
|
+
vbr_scale = io.read(4).unpack('N1').first if header_flags & 8 # VBR SCALE FLAG
|
224
214
|
|
225
215
|
VBRHeader.new(frames: frames, byte_count: byte_count, toc_entries: toc, vbr_scale: vbr_scale)
|
226
216
|
end
|
227
217
|
|
228
|
-
def average_bytes_and_bitrate(
|
218
|
+
def average_bytes_and_bitrate(_mpeg_frames)
|
229
219
|
avg_bytes_per_frame = initial_frames.map(&:frame_length).inject(&:+) / initial_frames.length.to_f
|
230
220
|
avg_bitrate_per_frame = initial_frames.map(&:frame_bitrate).inject(&:+) / initial_frames.length.to_f
|
231
221
|
[avg_bytes_per_frame, avg_bitrate_per_frame]
|
232
222
|
end
|
233
|
-
|
223
|
+
|
234
224
|
def xing_header_usable_for_duration?(xing_header)
|
235
225
|
xing_header && xing_header.frames && xing_header.byte_count && xing_header.vbr_scale
|
236
226
|
end
|
@@ -241,7 +231,7 @@ class FormatParser::MP3Parser
|
|
241
231
|
|
242
232
|
def deep_fetch(from, *keys)
|
243
233
|
keys.inject(from) { |receiver, key_or_idx| receiver.fetch(key_or_idx) }
|
244
|
-
rescue
|
234
|
+
rescue IndexError, NoMethodError
|
245
235
|
raise InvalidDeepFetch, "Could not retrieve #{keys.inspect} from #{from.inspect}"
|
246
236
|
end
|
247
237
|
|
@@ -8,40 +8,34 @@ module FormatParser::MP3Parser::ID3V1
|
|
8
8
|
:comment, :a30,
|
9
9
|
:genre, :C,
|
10
10
|
]
|
11
|
-
packspec_keys = PACKSPEC.select.with_index{|_, i| i.even? }
|
11
|
+
packspec_keys = PACKSPEC.select.with_index { |_, i| i.even? }
|
12
12
|
TAG_SIZE_BYTES = 128
|
13
13
|
|
14
14
|
class TagInformation < Struct.new(*packspec_keys)
|
15
15
|
end
|
16
16
|
|
17
17
|
def attempt_id3_v1_extraction(io)
|
18
|
-
if io.size < TAG_SIZE_BYTES # Won't fit the ID3v1 regardless
|
19
|
-
return nil
|
20
|
-
end
|
18
|
+
return if io.size < TAG_SIZE_BYTES # Won't fit the ID3v1 regardless
|
21
19
|
|
22
20
|
io.seek(io.size - 128)
|
23
21
|
trailer_bytes = io.read(128)
|
24
22
|
|
25
|
-
unless trailer_bytes && trailer_bytes.byteslice(0, 3) == 'TAG'
|
26
|
-
return nil
|
27
|
-
end
|
23
|
+
return unless trailer_bytes && trailer_bytes.byteslice(0, 3) == 'TAG'
|
28
24
|
|
29
25
|
id3_v1 = parse_id3_v1(trailer_bytes)
|
30
26
|
|
31
27
|
# If all of the resulting strings are empty this ID3v1 tag is invalid and
|
32
28
|
# we should ignore it.
|
33
|
-
strings_from_id3v1 = id3_v1.values.select{|e| e.is_a?(String) && e != 'TAG' }
|
34
|
-
if strings_from_id3v1.all?(&:empty?)
|
35
|
-
return nil
|
36
|
-
end
|
29
|
+
strings_from_id3v1 = id3_v1.values.select { |e| e.is_a?(String) && e != 'TAG' }
|
30
|
+
return if strings_from_id3v1.all?(&:empty?)
|
37
31
|
|
38
32
|
id3_v1
|
39
33
|
end
|
40
34
|
|
41
35
|
def parse_id3_v1(byte_str)
|
42
|
-
|
36
|
+
_keys, values = PACKSPEC.partition.with_index { |_, i| i.even? }
|
43
37
|
unpacked_values = byte_str.unpack(values.join)
|
44
|
-
unpacked_values.map! {|e| e.is_a?(String) ? trim_id3v1_string(e) : e }
|
38
|
+
unpacked_values.map! { |e| e.is_a?(String) ? trim_id3v1_string(e) : e }
|
45
39
|
TagInformation.new(unpacked_values)
|
46
40
|
end
|
47
41
|
|
@@ -2,16 +2,16 @@ module FormatParser::MP3Parser::ID3V2
|
|
2
2
|
def attempt_id3_v2_extraction(io)
|
3
3
|
io.seek(0) # Only support header ID3v2
|
4
4
|
header_bytes = io.read(10)
|
5
|
-
return
|
5
|
+
return unless header_bytes
|
6
6
|
|
7
7
|
header = parse_id3_v2_header(header_bytes)
|
8
|
-
return
|
9
|
-
return
|
8
|
+
return unless header[:tag] == 'ID3'
|
9
|
+
return unless header[:size] > 0
|
10
10
|
|
11
11
|
header_tag_payload = io.read(header[:size])
|
12
12
|
header_tag_payload = StringIO.new(header_tag_payload)
|
13
13
|
|
14
|
-
return
|
14
|
+
return unless header_tag_payload.size == header[:size]
|
15
15
|
|
16
16
|
frames = []
|
17
17
|
loop do
|
@@ -38,10 +38,10 @@ module FormatParser::MP3Parser::ID3V2
|
|
38
38
|
:flags, :C1,
|
39
39
|
:size, :a4,
|
40
40
|
]
|
41
|
-
keys, values = packspec.partition.with_index {|_, i| i.even? }
|
41
|
+
keys, values = packspec.partition.with_index { |_, i| i.even? }
|
42
42
|
unpacked_values = byte_str.unpack(values.join)
|
43
43
|
header_data = Hash[keys.zip(unpacked_values)]
|
44
|
-
|
44
|
+
|
45
45
|
header_data[:version] = header_data[:version].unpack('C2')
|
46
46
|
header_data[:size] = decode_syncsafe_int(header_data[:size])
|
47
47
|
|
data/lib/parsers/png_parser.rb
CHANGED
@@ -19,9 +19,8 @@ class FormatParser::PNGParser
|
|
19
19
|
6 => true,
|
20
20
|
}
|
21
21
|
|
22
|
-
|
23
22
|
def chunk_length_and_type(io)
|
24
|
-
safe_read(io, 8).unpack(
|
23
|
+
safe_read(io, 8).unpack('Na4')
|
25
24
|
end
|
26
25
|
|
27
26
|
def call(io)
|
@@ -36,7 +35,7 @@ class FormatParser::PNGParser
|
|
36
35
|
# correct length as well.
|
37
36
|
# IHDR _must_ come first, no exceptions. If it doesn't
|
38
37
|
# we should not consider this a valid PNG.
|
39
|
-
return unless chunk_type ==
|
38
|
+
return unless chunk_type == 'IHDR' && chunk_length == 13
|
40
39
|
|
41
40
|
chunk_data = safe_read(io, chunk_length)
|
42
41
|
# Width: 4 bytes
|
@@ -46,8 +45,8 @@ class FormatParser::PNGParser
|
|
46
45
|
# Compression method: 1 byte
|
47
46
|
# Filter method: 1 byte
|
48
47
|
# Interlace method: 1 byte
|
49
|
-
w, h,
|
50
|
-
|
48
|
+
w, h, _bit_depth, color_type, _compression_method,
|
49
|
+
_filter_method, _interlace_method = chunk_data.unpack('N2C5')
|
51
50
|
|
52
51
|
color_mode = COLOR_TYPES.fetch(color_type)
|
53
52
|
has_transparency = TRANSPARENCY_PER_COLOR_TYPE[color_type]
|
@@ -56,18 +55,12 @@ class FormatParser::PNGParser
|
|
56
55
|
# we are dealing with an APNG.
|
57
56
|
safe_skip(io, 4)
|
58
57
|
|
59
|
-
# dry-validation won't let booleans be filled with nil so we have to set
|
60
|
-
# has_animation to false by default
|
61
|
-
has_animation = nil
|
62
|
-
num_frames = nil
|
63
|
-
loop_n_times = nil
|
64
|
-
|
65
58
|
chunk_length, chunk_type = chunk_length_and_type(io)
|
66
59
|
if chunk_length == 8 && chunk_type == 'acTL'
|
67
60
|
# https://wiki.mozilla.org/APNG_Specification#.60acTL.60:_The_Animation_Control_Chunk
|
68
61
|
# Unlike GIF, we do have the frame count that we can recover
|
69
62
|
has_animation = true
|
70
|
-
num_frames,
|
63
|
+
num_frames, _loop_n_times = safe_read(io, 8).unpack('NN')
|
71
64
|
end
|
72
65
|
|
73
66
|
FormatParser::Image.new(
|
data/lib/parsers/psd_parser.rb
CHANGED
@@ -8,13 +8,13 @@ class FormatParser::PSDParser
|
|
8
8
|
|
9
9
|
def call(io)
|
10
10
|
io = FormatParser::IOConstraint.new(io)
|
11
|
-
magic_bytes = safe_read(io, 4).unpack(
|
11
|
+
magic_bytes = safe_read(io, 4).unpack('C4')
|
12
12
|
|
13
13
|
return unless magic_bytes == PSD_HEADER
|
14
14
|
|
15
15
|
# We can be reasonably certain this is a PSD so we grab the height
|
16
16
|
# and width bytes
|
17
|
-
w,h = safe_read(io, 22).unpack(
|
17
|
+
w, h = safe_read(io, 22).unpack('x10N2')
|
18
18
|
FormatParser::Image.new(
|
19
19
|
format: :psd,
|
20
20
|
width_px: w,
|
data/lib/parsers/tiff_parser.rb
CHANGED
@@ -12,30 +12,28 @@ class FormatParser::TIFFParser
|
|
12
12
|
|
13
13
|
def call(io)
|
14
14
|
io = FormatParser::IOConstraint.new(io)
|
15
|
-
magic_bytes = safe_read(io, 4).unpack(
|
15
|
+
magic_bytes = safe_read(io, 4).unpack('C4')
|
16
16
|
endianness = scan_tiff_endianness(magic_bytes)
|
17
17
|
return unless endianness
|
18
18
|
w, h = read_tiff_by_endianness(io, endianness)
|
19
19
|
scanner = FormatParser::EXIFParser.new(:tiff, io)
|
20
20
|
scanner.scan_image_exif
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
21
|
+
FormatParser::Image.new(
|
22
|
+
format: :tif,
|
23
|
+
width_px: w,
|
24
|
+
height_px: h,
|
25
|
+
# might be nil if EXIF metadata wasn't found
|
26
|
+
orientation: scanner.orientation
|
27
|
+
)
|
28
28
|
end
|
29
29
|
|
30
30
|
# TIFFs can be either big or little endian, so we check here
|
31
31
|
# and set our unpack method argument to suit.
|
32
32
|
def scan_tiff_endianness(magic_bytes)
|
33
33
|
if magic_bytes == LITTLE_ENDIAN_TIFF_HEADER_BYTES
|
34
|
-
|
34
|
+
'v'
|
35
35
|
elsif magic_bytes == BIG_ENDIAN_TIFF_HEADER_BYTES
|
36
|
-
|
37
|
-
else
|
38
|
-
nil
|
36
|
+
'n'
|
39
37
|
end
|
40
38
|
end
|
41
39
|
|
data/lib/parsers/wav_parser.rb
CHANGED
@@ -6,10 +6,10 @@ class FormatParser::WAVParser
|
|
6
6
|
formats :wav
|
7
7
|
|
8
8
|
def call(io)
|
9
|
-
# Read the RIFF header. Chunk descriptor should be RIFF, the size should
|
10
|
-
# contain the size of the entire file in bytes minus 8 bytes for the
|
9
|
+
# Read the RIFF header. Chunk descriptor should be RIFF, the size should
|
10
|
+
# contain the size of the entire file in bytes minus 8 bytes for the
|
11
11
|
# two fields not included in this count: chunk_id and size.
|
12
|
-
chunk_id,
|
12
|
+
chunk_id, _size, riff_type = safe_read(io, 12).unpack('a4la4')
|
13
13
|
|
14
14
|
# The chunk_id and riff_type should be RIFF and WAVE respectively
|
15
15
|
return unless chunk_id == 'RIFF' && riff_type == 'WAVE'
|
data/lib/read_limiter.rb
CHANGED
@@ -26,25 +26,21 @@ class FormatParser::ReadLimiter
|
|
26
26
|
def seek(to_offset)
|
27
27
|
@seeks += 1
|
28
28
|
if @max_seeks && @seeks > @max_seeks
|
29
|
-
raise BudgetExceeded,
|
29
|
+
raise BudgetExceeded, 'Seek budget exceeded (%d seeks performed)' % @max_seeks
|
30
30
|
end
|
31
31
|
@io.seek(to_offset)
|
32
32
|
end
|
33
33
|
|
34
|
-
def size
|
35
|
-
@io.size
|
36
|
-
end
|
37
|
-
|
38
34
|
def read(n)
|
39
35
|
@bytes += n
|
40
36
|
@reads += 1
|
41
37
|
|
42
38
|
if @max_bytes && @bytes > @max_bytes
|
43
|
-
raise BudgetExceeded,
|
39
|
+
raise BudgetExceeded, 'Read bytes budget (%d) exceeded' % @max_bytes
|
44
40
|
end
|
45
41
|
|
46
42
|
if @max_reads && @reads > @max_reads
|
47
|
-
raise BudgetExceeded,
|
43
|
+
raise BudgetExceeded, 'Number of read() calls exceeded (%d max)' % @max_reads
|
48
44
|
end
|
49
45
|
|
50
46
|
@io.read(n)
|
data/lib/remote_io.rb
CHANGED
@@ -1,5 +1,4 @@
|
|
1
1
|
class FormatParser::RemoteIO
|
2
|
-
|
3
2
|
# Represents a failure that might be retried
|
4
3
|
# (like a 5xx response or a timeout)
|
5
4
|
class IntermittentFailure < StandardError
|
@@ -33,7 +32,7 @@ class FormatParser::RemoteIO
|
|
33
32
|
#
|
34
33
|
# @return [Integer] the size of the remote resource
|
35
34
|
def size
|
36
|
-
raise
|
35
|
+
raise 'Remote size not yet obtained, need to perform at least one read() to retrieve it' unless @remote_size
|
37
36
|
@remote_size
|
38
37
|
end
|
39
38
|
|
@@ -52,8 +51,6 @@ class FormatParser::RemoteIO
|
|
52
51
|
@remote_size = maybe_size
|
53
52
|
@pos += maybe_body.bytesize
|
54
53
|
maybe_body.force_encoding(Encoding::ASCII_8BIT)
|
55
|
-
else
|
56
|
-
nil
|
57
54
|
end
|
58
55
|
end
|
59
56
|
|
@@ -67,7 +64,7 @@ class FormatParser::RemoteIO
|
|
67
64
|
# We use a GET and not a HEAD request followed by a GET because
|
68
65
|
# S3 does not allow HEAD requests if you only presigned your URL for GETs, so we
|
69
66
|
# combine the first GET of a segment and retrieving the size of the resource
|
70
|
-
response = Faraday.get(@uri, nil, range:
|
67
|
+
response = Faraday.get(@uri, nil, range: 'bytes=%d-%d' % [range.begin, range.end])
|
71
68
|
|
72
69
|
case response.status
|
73
70
|
when 200, 206
|
@@ -90,7 +87,7 @@ class FormatParser::RemoteIO
|
|
90
87
|
# which satisfies the Ruby IO convention. The caller should deal with `nil` being the result of a read()
|
91
88
|
# S3 will also handily _not_ supply us with the Content-Range of the actual resource, so we
|
92
89
|
# cannot hint size with this response - at lease not when working with S3
|
93
|
-
return
|
90
|
+
return
|
94
91
|
when 500..599
|
95
92
|
raise IntermittentFailure, "Server at #{@uri} replied with a #{response.status} and we might want to retry"
|
96
93
|
else
|