bzip2-ffi 1.0.0 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- checksums.yaml.gz.sig +1 -2
- data.tar.gz.sig +0 -0
- data/CHANGES.md +23 -2
- data/Gemfile +33 -2
- data/LICENSE +13 -13
- data/README.md +70 -61
- data/Rakefile +24 -0
- data/bzip2-ffi.gemspec +9 -0
- data/lib/bzip2/ffi.rb +9 -6
- data/lib/bzip2/ffi/error.rb +5 -2
- data/lib/bzip2/ffi/io.rb +59 -47
- data/lib/bzip2/ffi/libbz2.rb +7 -3
- data/lib/bzip2/ffi/reader.rb +204 -104
- data/lib/bzip2/ffi/version.rb +4 -1
- data/lib/bzip2/ffi/writer.rb +77 -62
- data/test/error_test.rb +19 -20
- data/test/fixtures/{bzipped → compressed.bz2} +0 -0
- data/test/fixtures/lorem-4096-bytes-compressed.txt.bz2 +0 -0
- data/test/fixtures/lorem-first-structure-4096-bytes.txt.bz2 +0 -0
- data/test/fixtures/two_structures.bz2 +0 -0
- data/test/io_test.rb +34 -32
- data/test/reader_test.rb +335 -111
- data/test/test_helper.rb +45 -8
- data/test/version_test.rb +4 -1
- data/test/writer_test.rb +95 -73
- metadata +31 -25
- metadata.gz.sig +0 -0
data/lib/bzip2/ffi/libbz2.rb
CHANGED
@@ -1,10 +1,13 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
1
4
|
require 'ffi'
|
2
5
|
|
3
6
|
module Bzip2
|
4
7
|
module FFI
|
5
8
|
# FFI bindings for the libbz2 low-level interface.
|
6
9
|
#
|
7
|
-
# See bzlib.h and
|
10
|
+
# See bzlib.h and https://sourceware.org/bzip2/docs.html.
|
8
11
|
#
|
9
12
|
# @private
|
10
13
|
module Libbz2 #:nodoc:
|
@@ -50,7 +53,7 @@ module Bzip2
|
|
50
53
|
|
51
54
|
:bzalloc, :bzalloc,
|
52
55
|
:bzfree, :bzfree,
|
53
|
-
:opaque, :pointer
|
56
|
+
:opaque, :pointer
|
54
57
|
end
|
55
58
|
|
56
59
|
# int BZ2_bzCompressInt(bz_stream* strm, int blockSize100k, int verbosity, int workFactor);
|
@@ -61,7 +64,7 @@ module Bzip2
|
|
61
64
|
|
62
65
|
# int BZ2_bzCompressEnd (bz_stream* strm);
|
63
66
|
attach_function :BZ2_bzCompressEnd, [BzStream.by_ref], :int
|
64
|
-
|
67
|
+
|
65
68
|
# int BZ2_bzDecompressInit (bz_stream *strm, int verbosity, int small);
|
66
69
|
attach_function :BZ2_bzDecompressInit, [BzStream.by_ref, :int, :int], :int
|
67
70
|
|
@@ -71,5 +74,6 @@ module Bzip2
|
|
71
74
|
# int BZ2_bzDecompressEnd (bz_stream *strm);
|
72
75
|
attach_function :BZ2_bzDecompressEnd, [BzStream.by_ref], :int
|
73
76
|
end
|
77
|
+
private_constant :Libbz2
|
74
78
|
end
|
75
79
|
end
|
data/lib/bzip2/ffi/reader.rb
CHANGED
@@ -1,10 +1,13 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
1
4
|
require 'pathname'
|
2
5
|
require 'stringio'
|
3
6
|
|
4
7
|
module Bzip2
|
5
8
|
module FFI
|
6
|
-
#
|
7
|
-
# public instance methods of
|
9
|
+
# {Reader} reads and decompresses a bzip2 compressed stream or file. The
|
10
|
+
# public instance methods of {Reader} are intended to be equivalent to those
|
8
11
|
# of a standard `IO` object.
|
9
12
|
#
|
10
13
|
# Data can be read as a stream using {open} and {#read}, for example:
|
@@ -15,7 +18,7 @@ module Bzip2
|
|
15
18
|
# end
|
16
19
|
# end
|
17
20
|
#
|
18
|
-
# Alternatively, without passing a block to
|
21
|
+
# Alternatively, without passing a block to {open}:
|
19
22
|
#
|
20
23
|
# reader = Bzip2::FFI::Reader.open(io_or_path)
|
21
24
|
# begin
|
@@ -26,77 +29,88 @@ module Bzip2
|
|
26
29
|
# reader.close
|
27
30
|
# end
|
28
31
|
#
|
29
|
-
#
|
32
|
+
# All the available bzipped data can be read in a single step using {read}:
|
30
33
|
#
|
31
34
|
# uncompressed = Bzip2::FFI::Reader.read(io_or_path)
|
32
35
|
#
|
33
|
-
# The {open} and {read} methods accept either an
|
34
|
-
# path.
|
36
|
+
# The {open} and {read} methods accept either an IO-like object or a file
|
37
|
+
# path. IO-like objects must have a `#read` method. Paths can be given as
|
35
38
|
# either a `String` or `Pathname`.
|
36
39
|
#
|
37
40
|
# No character conversion is performed on decompressed bytes. The {read} and
|
38
41
|
# {#read} methods return instances of `String` that represent the raw
|
39
|
-
# decompressed bytes, with
|
42
|
+
# decompressed bytes, with `#encoding` set to `Encoding::ASCII_8BIT` (also
|
40
43
|
# known as `Encoding::BINARY`).
|
41
44
|
#
|
42
|
-
#
|
43
|
-
#
|
44
|
-
#
|
45
|
-
#
|
46
|
-
#
|
47
|
-
#
|
48
|
-
#
|
49
|
-
#
|
45
|
+
# {Reader} will normally read all consecutive bzip2 compressed structure
|
46
|
+
# from the given stream or file (unless the `:first_only` parameter is
|
47
|
+
# specified - see {open}). If the stream or file contains additional data
|
48
|
+
# beyond the end of the compressed bzip2 data, it may be read during
|
49
|
+
# decompression. If such an overread has occurred and the IO-like object
|
50
|
+
# being read from has a `#seek` method, {Reader} will use it to reposition
|
51
|
+
# the stream to the byte immediately following the end of the compressed
|
52
|
+
# bzip2 data. If `#seek` raises an `IOError`, it will be caught and the
|
53
|
+
# stream position will be left unchanged.
|
50
54
|
class Reader < IO
|
51
55
|
# The number of bytes read from the compressed data stream at a time.
|
52
56
|
#
|
53
57
|
# @private
|
54
58
|
READ_BUFFER_SIZE = 4096 #:nodoc:
|
59
|
+
private_constant :READ_BUFFER_SIZE
|
55
60
|
|
56
61
|
# The number of uncompressed bytes to read at a time when using {#read}
|
57
62
|
# without a length.
|
58
63
|
#
|
59
64
|
# @private
|
60
65
|
DEFAULT_DECOMPRESS_COUNT = 4096 #:nodoc:
|
66
|
+
private_constant :DEFAULT_DECOMPRESS_COUNT
|
61
67
|
|
62
68
|
class << self
|
63
69
|
# Use send to keep this hidden from YARD (visibility tag does not work).
|
64
70
|
send(:public, :new)
|
65
71
|
|
66
|
-
# Opens a {Reader} to read and decompress data from either an
|
67
|
-
# object or a file.
|
72
|
+
# Opens a {Reader} to read and decompress data from either an IO-like
|
73
|
+
# object or a file. IO-like objects must have a `#read` method. Files
|
68
74
|
# can be specified using either a `String` containing the file path or a
|
69
75
|
# `Pathname`.
|
70
76
|
#
|
71
|
-
# If no block is given, the opened
|
77
|
+
# If no block is given, the opened {Reader} instance is returned. After
|
72
78
|
# use, the instance should be closed using the {#close} method.
|
73
79
|
#
|
74
|
-
# If a block is given, it will be passed the opened
|
75
|
-
# as an argument. After the block terminates, the
|
76
|
-
# automatically be closed.
|
80
|
+
# If a block is given, it will be passed the opened {Reader} instance
|
81
|
+
# as an argument. After the block terminates, the {Reader} instance will
|
82
|
+
# automatically be closed. {open} will then return the result of the
|
77
83
|
# block.
|
78
84
|
#
|
79
85
|
# The following options can be specified using the `options` `Hash`:
|
80
86
|
#
|
81
|
-
# * `:autoclose` - When passing an
|
82
|
-
# close
|
87
|
+
# * `:autoclose` - When passing an IO-like object, set to `true` to
|
88
|
+
# close it when the {Reader} instance is closed.
|
89
|
+
# * `:first_only` - Bzip2 files can contain multiple consecutive
|
90
|
+
# compressed strctures. Normally all the structures
|
91
|
+
# will be decompressed with the decompressed bytes
|
92
|
+
# concatenated. Set to `true` to only read the first
|
93
|
+
# structure.
|
83
94
|
# * `:small` - Set to `true` to use an alternative decompression
|
84
95
|
# algorithm that uses less memory, but at the cost of
|
85
96
|
# decompressing more slowly (roughly 2,300 kB less memory
|
86
97
|
# at about half the speed).
|
87
98
|
#
|
88
|
-
# If an
|
89
|
-
#
|
90
|
-
#
|
99
|
+
# If an IO-like object that has a `#binmode` method is passed to {open},
|
100
|
+
# `#binmode` will be called on `io_or_path` before yielding to the block
|
101
|
+
# or returning.
|
91
102
|
#
|
92
|
-
# @param io_or_path [Object] Either an
|
103
|
+
# @param io_or_path [Object] Either an IO-like object with a `#read`
|
93
104
|
# method or a file path as a `String` or
|
94
105
|
# `Pathname`.
|
95
106
|
# @param options [Hash] Optional parameters (`:autoclose` and `:small`).
|
96
|
-
# @
|
107
|
+
# @yield [reader] If a block is given, it is yielded to.
|
108
|
+
# @yieldparam reader [Reader] The new {Reader} instance.
|
109
|
+
# @yieldresult [Object] A result to be returned as the result of {open}.
|
110
|
+
# @return [Object] The opened {Reader} instance if no block is given, or
|
97
111
|
# the result of the block if a block is given.
|
98
112
|
# @raise [ArgumentError] If `io_or_path` is _not_ a `String`, `Pathname`
|
99
|
-
# or an
|
113
|
+
# or an IO-like object with a `#read` method.
|
100
114
|
# @raise [Errno::ENOENT] If the specified file does not exist.
|
101
115
|
# @raise [Error::Bzip2Error] If an error occurs when initializing
|
102
116
|
# libbz2.
|
@@ -113,37 +127,42 @@ module Bzip2
|
|
113
127
|
end
|
114
128
|
|
115
129
|
# Reads and decompresses and entire bzip2 compressed structure from
|
116
|
-
# either an
|
117
|
-
#
|
118
|
-
#
|
119
|
-
#
|
130
|
+
# either an IO-like object or a file and returns the decompressed bytes
|
131
|
+
# as a `String`. IO-like objects must have a `#read` method. Files can
|
132
|
+
# be specified using either a `String` containing the file path or a
|
133
|
+
# `Pathname`.
|
120
134
|
#
|
121
135
|
# The following options can be specified using the `options` `Hash`:
|
122
136
|
#
|
123
|
-
# * `:autoclose` - When passing an
|
124
|
-
# close
|
125
|
-
#
|
137
|
+
# * `:autoclose` - When passing an IO-like object, set to `true` to
|
138
|
+
# close it when the compressed data has been read.
|
139
|
+
# * `:first_only` - Bzip2 files can contain multiple consecutive
|
140
|
+
# compressed strctures. Normally all the structures
|
141
|
+
# will be decompressed with the decompressed bytes
|
142
|
+
# concatenated. Set to `true` to only read the first
|
143
|
+
# structure.
|
126
144
|
# * `:small` - Set to `true` to use an alternative decompression
|
127
145
|
# algorithm that uses less memory, but at the cost of
|
128
146
|
# decompressing more slowly (roughly 2,300 kB less memory
|
129
147
|
# at about half the speed).
|
130
148
|
#
|
131
|
-
# No character conversion is performed on decompressed bytes.
|
149
|
+
# No character conversion is performed on decompressed bytes. {read}
|
132
150
|
# returns a `String` that represents the raw decompressed bytes, with
|
133
151
|
# `encoding` set to `Encoding::ASCII_8BIT` (also known as
|
134
152
|
# `Encoding::BINARY`).
|
135
153
|
#
|
136
|
-
# If an
|
137
|
-
#
|
138
|
-
#
|
154
|
+
# If an IO-like object that has a `#inmode` method is passed to {read},
|
155
|
+
# `#binmode` will be called on `io_or_path` before any compressed data
|
156
|
+
# is read.
|
139
157
|
#
|
140
|
-
# @param io_or_path [Object] Either an
|
158
|
+
# @param io_or_path [Object] Either an IO-like object with a `#read`
|
141
159
|
# method or a file path as a `String` or
|
142
160
|
# `Pathname`.
|
143
|
-
# @param options [Hash] Optional parameters (`:autoclose
|
161
|
+
# @param options [Hash] Optional parameters (`:autoclose`, `:first_only`
|
162
|
+
# and `:small`).
|
144
163
|
# @return [String] The decompressed data.
|
145
164
|
# @raise [ArgumentError] If `io_or_path` is _not_ a `String`, `Pathname`
|
146
|
-
# or an
|
165
|
+
# or an IO-like object with a `#read` method.
|
147
166
|
# @raise [Errno::ENOENT] If the specified file does not exist.
|
148
167
|
# @raise [Error::Bzip2Error] If an error occurs when initializing
|
149
168
|
# libbz2 or decompressing data.
|
@@ -155,11 +174,11 @@ module Bzip2
|
|
155
174
|
|
156
175
|
private
|
157
176
|
|
158
|
-
# Returns a Proc that can be used as a finalizer to call
|
159
|
-
#
|
177
|
+
# Returns a `Proc` that can be used as a finalizer to call
|
178
|
+
# {Libbz2::BZ2_bzDecompressEnd} with the given `stream`.
|
160
179
|
#
|
161
180
|
# @param stream [Libbz2::BzStream] The stream that should be passed to
|
162
|
-
#
|
181
|
+
# {Libbz2::BZ2_bzDecompressEnd}.
|
163
182
|
def finalize(stream)
|
164
183
|
->(id) do
|
165
184
|
Libbz2::BZ2_bzDecompressEnd(stream)
|
@@ -167,65 +186,74 @@ module Bzip2
|
|
167
186
|
end
|
168
187
|
end
|
169
188
|
|
170
|
-
# Initializes a {Reader} to read compressed data from an
|
171
|
-
# (`io`). `io` must have a
|
189
|
+
# Initializes a {Reader} to read compressed data from an IO-like object
|
190
|
+
# (`io`). `io` must have a `#read` method.
|
172
191
|
#
|
173
192
|
# The following options can be specified using the `options` `Hash`:
|
174
193
|
#
|
175
|
-
# * `:autoclose` - Set to `true` to close `io` when the
|
194
|
+
# * `:autoclose` - Set to `true` to close `io` when the {Reader} instance
|
176
195
|
# is closed.
|
196
|
+
# * `:first_only` - Bzip2 files can contain multiple consecutive
|
197
|
+
# compressed strctures. Normally all the structures will
|
198
|
+
# be decompressed with the decompressed bytes
|
199
|
+
# concatenated. Set to `true` to only read the first
|
200
|
+
# structure.
|
177
201
|
# * `:small` - Set to `true` to use an alternative decompression
|
178
202
|
# algorithm that uses less memory, but at the cost of
|
179
203
|
# decompressing more slowly (roughly 2,300 kB less memory
|
180
204
|
# at about half the speed).
|
181
205
|
#
|
182
|
-
#
|
206
|
+
# `#binmode` is called on `io` if `io` responds to `#binmode`.
|
183
207
|
#
|
184
|
-
# After use, the
|
208
|
+
# After use, the {Reader} instance should be closed using the {#close}
|
185
209
|
# method.
|
186
210
|
#
|
187
|
-
# @param io [Object] An
|
188
|
-
# @param options [Hash] Optional parameters (`:autoclose
|
189
|
-
#
|
211
|
+
# @param io [Object] An IO-like object with a `#read` method.
|
212
|
+
# @param options [Hash] Optional parameters (`:autoclose`, `:first_only`
|
213
|
+
# and `:small`).
|
214
|
+
# @raise [ArgumentError] If `io` is `nil` or does not respond to `#read`.
|
190
215
|
# @raise [Error::Bzip2Error] If an error occurs when initializing libbz2.
|
191
216
|
def initialize(io, options = {})
|
192
217
|
super
|
193
218
|
raise ArgumentError, 'io must respond to read' unless io.respond_to?(:read)
|
194
219
|
|
195
|
-
|
220
|
+
@first_only = options[:first_only]
|
221
|
+
@small = options[:small] ? 1 : 0
|
196
222
|
|
197
223
|
@in_eof = false
|
198
224
|
@out_eof = false
|
199
225
|
@in_buffer = nil
|
226
|
+
@structure_number = 1
|
227
|
+
@structure_start_pos = 0
|
228
|
+
@in_pos = 0
|
229
|
+
@out_pos = 0
|
200
230
|
|
201
|
-
|
202
|
-
|
203
|
-
ObjectSpace.define_finalizer(self, self.class.send(:finalize, stream))
|
231
|
+
decompress_init(stream)
|
204
232
|
end
|
205
233
|
|
206
234
|
# Ends decompression and closes the {Reader}.
|
207
235
|
#
|
208
236
|
# If the {open} method is used with a block, it is not necessary to call
|
209
|
-
#
|
237
|
+
# {#close}. Otherwise, {#close} should be called once the {Reader} is no
|
210
238
|
# longer needed.
|
211
239
|
#
|
212
240
|
# @return [NilType] `nil`.
|
213
|
-
# @raise [IOError] If the
|
241
|
+
# @raise [IOError] If the {Reader} has already been closed.
|
214
242
|
def close
|
215
243
|
s = stream
|
216
244
|
|
217
245
|
unless @out_eof
|
218
246
|
decompress_end(s)
|
219
247
|
end
|
220
|
-
|
248
|
+
|
221
249
|
s[:next_in] = nil
|
222
250
|
s[:next_out] = nil
|
223
|
-
|
251
|
+
|
224
252
|
if @in_buffer
|
225
253
|
@in_buffer.free
|
226
254
|
@in_buffer = nil
|
227
255
|
end
|
228
|
-
|
256
|
+
|
229
257
|
super
|
230
258
|
end
|
231
259
|
|
@@ -240,17 +268,17 @@ module Bzip2
|
|
240
268
|
# A result of `nil` or a `String` with a length less than `length` bytes
|
241
269
|
# indicates that the end of the decompressed data has been reached.
|
242
270
|
#
|
243
|
-
# If `length` is `nil`,
|
271
|
+
# If `length` is `nil`, {#read} reads until the end of the decompressed
|
244
272
|
# data, returning the uncompressed bytes as a `String`.
|
245
273
|
#
|
246
|
-
# If `length` is 0,
|
274
|
+
# If `length` is 0, {#read} returns an empty `String`.
|
247
275
|
#
|
248
276
|
# If the optional `buffer` argument is present, it must reference a
|
249
277
|
# `String` that will receive the decompressed data. `buffer` will
|
250
|
-
# contain only the decompressed data after the call to
|
278
|
+
# contain only the decompressed data after the call to {#read}, even if it
|
251
279
|
# is not empty beforehand.
|
252
280
|
#
|
253
|
-
# No character conversion is performed on decompressed bytes.
|
281
|
+
# No character conversion is performed on decompressed bytes. {#read}
|
254
282
|
# returns a `String` that represents the raw decompressed bytes, with
|
255
283
|
# `encoding` set to `Encoding::ASCII_8BIT` (also known as
|
256
284
|
# `Encoding::BINARY`).
|
@@ -267,7 +295,7 @@ module Bzip2
|
|
267
295
|
# the end of the decompressed data has been reached.
|
268
296
|
# @raise [ArgumentError] If `length` is negative.
|
269
297
|
# @raise [Error::Bzip2Error] If an error occurs during decompression.
|
270
|
-
# @raise [IOError] If the
|
298
|
+
# @raise [IOError] If the {Reader} has been closed.
|
271
299
|
def read(length = nil, buffer = nil)
|
272
300
|
if buffer
|
273
301
|
buffer.clear
|
@@ -279,11 +307,11 @@ module Bzip2
|
|
279
307
|
|
280
308
|
if length == 0
|
281
309
|
check_closed
|
282
|
-
return buffer ||
|
310
|
+
return buffer || String.new
|
283
311
|
end
|
284
312
|
|
285
313
|
decompressed = decompress(length)
|
286
|
-
|
314
|
+
|
287
315
|
return nil unless decompressed
|
288
316
|
buffer ? buffer << decompressed : decompressed
|
289
317
|
else
|
@@ -292,11 +320,11 @@ module Bzip2
|
|
292
320
|
# StringIO#binmode is a no-op, but call in case it is implemented in
|
293
321
|
# future versions.
|
294
322
|
result.binmode
|
295
|
-
|
323
|
+
|
296
324
|
result.set_encoding(Encoding::ASCII_8BIT)
|
297
325
|
|
298
326
|
loop do
|
299
|
-
decompressed = decompress(DEFAULT_DECOMPRESS_COUNT)
|
327
|
+
decompressed = decompress(DEFAULT_DECOMPRESS_COUNT)
|
300
328
|
break unless decompressed
|
301
329
|
result.write(decompressed)
|
302
330
|
break if decompressed.bytesize < DEFAULT_DECOMPRESS_COUNT
|
@@ -306,6 +334,29 @@ module Bzip2
|
|
306
334
|
end
|
307
335
|
end
|
308
336
|
|
337
|
+
# Returns `true` if decompression has completed, otherwise `false`.
|
338
|
+
#
|
339
|
+
# Note that it is possible for `false` to be returned after all the
|
340
|
+
# decompressed data has been read. In such cases, the next call to {#read}
|
341
|
+
# will detect the end of the bzip2 structure and set {#eof?} to `true`.
|
342
|
+
#
|
343
|
+
# @return [Boolean] If decompression has completed, otherwise `false`.
|
344
|
+
# @raise [IOError] If the {Reader} has been closed.
|
345
|
+
def eof?
|
346
|
+
check_closed
|
347
|
+
@out_eof
|
348
|
+
end
|
349
|
+
alias eof eof?
|
350
|
+
|
351
|
+
# Returns the number of decompressed bytes that have been read.
|
352
|
+
#
|
353
|
+
# @return [Integer] The number of decompressed bytes that have been read.
|
354
|
+
# @raise [IOError] If the {Reader} has been closed.
|
355
|
+
def pos
|
356
|
+
check_closed
|
357
|
+
@out_pos
|
358
|
+
end
|
359
|
+
|
309
360
|
private
|
310
361
|
|
311
362
|
# Attempts to decompress and return `count` bytes.
|
@@ -315,9 +366,9 @@ module Bzip2
|
|
315
366
|
# @return [String] The decompressed data as a `String` with ASCII-8BIT
|
316
367
|
# encoding, or `nil` if length was a positive integer and
|
317
368
|
# the end of the decompressed data has been reached.
|
318
|
-
# @raise [ArgumentError]
|
369
|
+
# @raise [ArgumentError] If `count` is not greater than or equal to 1.
|
319
370
|
# @raise [Error::Bzip2Error] If an error occurs during decompression.
|
320
|
-
# @raise [IOError] If the
|
371
|
+
# @raise [IOError] If the {Reader} has been closed.
|
321
372
|
def decompress(count)
|
322
373
|
raise ArgumentError, "count must be a positive integer" unless count >= 1
|
323
374
|
s = stream
|
@@ -335,6 +386,7 @@ module Bzip2
|
|
335
386
|
bytes = io.read(READ_BUFFER_SIZE)
|
336
387
|
|
337
388
|
if bytes && bytes.bytesize > 0
|
389
|
+
@in_pos += bytes.bytesize
|
338
390
|
@in_eof = bytes.bytesize < READ_BUFFER_SIZE
|
339
391
|
@in_buffer = ::FFI::MemoryPointer.new(1, bytes.bytesize)
|
340
392
|
@in_buffer.write_bytes(bytes)
|
@@ -345,8 +397,15 @@ module Bzip2
|
|
345
397
|
end
|
346
398
|
end
|
347
399
|
|
400
|
+
# Reached the end of input without reading anything in the current
|
401
|
+
# bzip2 structure. No more data to process.
|
402
|
+
if @in_pos == @structure_start_pos
|
403
|
+
@out_eof = true
|
404
|
+
break
|
405
|
+
end
|
406
|
+
|
348
407
|
prev_avail_out = s[:avail_out]
|
349
|
-
|
408
|
+
|
350
409
|
res = Libbz2::BZ2_bzDecompress(s)
|
351
410
|
|
352
411
|
if s[:avail_in] == 0 && @in_buffer
|
@@ -355,43 +414,51 @@ module Bzip2
|
|
355
414
|
@in_buffer = nil
|
356
415
|
end
|
357
416
|
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
# The input could contain data after the end of the bzip2 stream.
|
362
|
-
#
|
363
|
-
# s[:avail_in] will contain the number of bytes that have been
|
364
|
-
# read from io, but not been consumed by BZ2_bzDecompress.
|
417
|
+
if @structure_number > 1 && res == Libbz2::BZ_DATA_ERROR_MAGIC
|
418
|
+
# Found something other than the bzip2 magic bytes after the end
|
419
|
+
# of a bzip2 structure.
|
365
420
|
#
|
366
421
|
# Attempt to move the input stream back by the amount that has
|
367
422
|
# been over-read.
|
368
|
-
|
369
|
-
io.seek(-s[:avail_in], ::IO::SEEK_CUR) rescue IOError
|
370
|
-
end
|
371
|
-
|
372
|
-
if @in_buffer
|
373
|
-
s[:next_in] = nil
|
374
|
-
@in_buffer.free
|
375
|
-
@in_buffer = nil
|
376
|
-
end
|
377
|
-
|
423
|
+
attempt_seek_to_structure_start
|
378
424
|
decompress_end(s)
|
379
|
-
|
380
425
|
@out_eof = true
|
381
426
|
break
|
382
427
|
end
|
383
428
|
|
384
|
-
|
429
|
+
check_error(res)
|
430
|
+
|
431
|
+
if res == Libbz2::BZ_STREAM_END
|
432
|
+
decompress_end(s)
|
385
433
|
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
434
|
+
if (s[:avail_in] > 0 || !@in_eof) && !@first_only
|
435
|
+
# Re-initialize to read a second bzip2 structure if there is
|
436
|
+
# still input available and not restricting to the first stream.
|
437
|
+
@structure_number += 1
|
438
|
+
@structure_start_pos = @in_pos - s[:avail_in]
|
439
|
+
decompress_init(s)
|
440
|
+
else
|
441
|
+
# May have already read data after the end of the first bzip2
|
442
|
+
# structure.
|
443
|
+
attempt_seek_to_structure_start if @first_only
|
444
|
+
@out_eof = true
|
445
|
+
break
|
446
|
+
end
|
447
|
+
else
|
448
|
+
# No more input available and calling BZ2_bzDecompress didn't
|
449
|
+
# advance the output. Raise an error.
|
450
|
+
if @in_eof && s[:avail_in] == 0 && prev_avail_out == s[:avail_out]
|
451
|
+
decompress_end(s)
|
452
|
+
@out_eof = true
|
453
|
+
raise Error::UnexpectedEofError.new
|
454
|
+
end
|
390
455
|
end
|
456
|
+
|
457
|
+
break if s[:avail_out] == 0
|
391
458
|
end
|
392
459
|
|
393
460
|
result = out_buffer.read_bytes(out_buffer.size - s[:avail_out])
|
394
|
-
ensure
|
461
|
+
ensure
|
395
462
|
out_buffer.free
|
396
463
|
s[:next_out] = nil
|
397
464
|
s[:avail_out] = 0
|
@@ -400,18 +467,51 @@ module Bzip2
|
|
400
467
|
if @out_eof && result.bytesize == 0
|
401
468
|
nil
|
402
469
|
else
|
470
|
+
@out_pos += result.bytesize
|
403
471
|
result
|
404
|
-
end
|
472
|
+
end
|
473
|
+
end
|
474
|
+
|
475
|
+
# Attempts to reposition the compressed stream to the start of the current
|
476
|
+
# structure. Used when {Libbz2::BZ2_bzDecompress} has read beyond the end
|
477
|
+
# of a bzip2 structure.
|
478
|
+
def attempt_seek_to_structure_start
|
479
|
+
if io.respond_to?(:seek)
|
480
|
+
diff = @structure_start_pos - @in_pos
|
481
|
+
if diff < 0
|
482
|
+
begin
|
483
|
+
io.seek(diff, ::IO::SEEK_CUR)
|
484
|
+
@in_pos += diff
|
485
|
+
rescue IOError
|
486
|
+
end
|
487
|
+
end
|
488
|
+
end
|
489
|
+
end
|
490
|
+
|
491
|
+
# Calls {Libbz2::BZ2_bzDecompressInit} to initialize the decompression
|
492
|
+
# stream `s`.
|
493
|
+
#
|
494
|
+
# Defines a finalizer to ensure that the memory associated with the stream
|
495
|
+
# is deallocated.
|
496
|
+
#
|
497
|
+
# @param s [Libbz2::BzStream] The stream to initialize decompression for.
|
498
|
+
# @raise [Error::Bzip2Error] If {Libbz2::BZ2_bzDecompressInit} reports an
|
499
|
+
# error.
|
500
|
+
def decompress_init(s)
|
501
|
+
check_error(Libbz2::BZ2_bzDecompressInit(s, 0, @small))
|
502
|
+
|
503
|
+
ObjectSpace.define_finalizer(self, self.class.send(:finalize, s))
|
405
504
|
end
|
406
505
|
|
407
|
-
# Calls BZ2_bzDecompressEnd to release
|
408
|
-
# decompression stream `s`.
|
506
|
+
# Calls {Libbz2::BZ2_bzDecompressEnd} to release memory associated with
|
507
|
+
# the decompression stream `s`.
|
409
508
|
#
|
410
509
|
# Notifies `ObjectSpace` that it is no longer necessary to finalize the
|
411
|
-
#
|
510
|
+
# {Reader} instance.
|
412
511
|
#
|
413
512
|
# @param s [Libbz2::BzStream] The stream to end decompression for.
|
414
|
-
# @raise [Error::Bzip2Error] If
|
513
|
+
# @raise [Error::Bzip2Error] If {Libbz2::BZ2_bzDecompressEnd} reports an
|
514
|
+
# error.
|
415
515
|
def decompress_end(s)
|
416
516
|
res = Libbz2::BZ2_bzDecompressEnd(s)
|
417
517
|
ObjectSpace.undefine_finalizer(self)
|