bzip2-ffi 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- checksums.yaml.gz.sig +1 -2
- data.tar.gz.sig +0 -0
- data/CHANGES.md +23 -2
- data/Gemfile +33 -2
- data/LICENSE +13 -13
- data/README.md +70 -61
- data/Rakefile +24 -0
- data/bzip2-ffi.gemspec +9 -0
- data/lib/bzip2/ffi.rb +9 -6
- data/lib/bzip2/ffi/error.rb +5 -2
- data/lib/bzip2/ffi/io.rb +59 -47
- data/lib/bzip2/ffi/libbz2.rb +7 -3
- data/lib/bzip2/ffi/reader.rb +204 -104
- data/lib/bzip2/ffi/version.rb +4 -1
- data/lib/bzip2/ffi/writer.rb +77 -62
- data/test/error_test.rb +19 -20
- data/test/fixtures/{bzipped → compressed.bz2} +0 -0
- data/test/fixtures/lorem-4096-bytes-compressed.txt.bz2 +0 -0
- data/test/fixtures/lorem-first-structure-4096-bytes.txt.bz2 +0 -0
- data/test/fixtures/two_structures.bz2 +0 -0
- data/test/io_test.rb +34 -32
- data/test/reader_test.rb +335 -111
- data/test/test_helper.rb +45 -8
- data/test/version_test.rb +4 -1
- data/test/writer_test.rb +95 -73
- metadata +31 -25
- metadata.gz.sig +0 -0
data/lib/bzip2/ffi/libbz2.rb
CHANGED
@@ -1,10 +1,13 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
1
4
|
require 'ffi'
|
2
5
|
|
3
6
|
module Bzip2
|
4
7
|
module FFI
|
5
8
|
# FFI bindings for the libbz2 low-level interface.
|
6
9
|
#
|
7
|
-
# See bzlib.h and
|
10
|
+
# See bzlib.h and https://sourceware.org/bzip2/docs.html.
|
8
11
|
#
|
9
12
|
# @private
|
10
13
|
module Libbz2 #:nodoc:
|
@@ -50,7 +53,7 @@ module Bzip2
|
|
50
53
|
|
51
54
|
:bzalloc, :bzalloc,
|
52
55
|
:bzfree, :bzfree,
|
53
|
-
:opaque, :pointer
|
56
|
+
:opaque, :pointer
|
54
57
|
end
|
55
58
|
|
56
59
|
# int BZ2_bzCompressInt(bz_stream* strm, int blockSize100k, int verbosity, int workFactor);
|
@@ -61,7 +64,7 @@ module Bzip2
|
|
61
64
|
|
62
65
|
# int BZ2_bzCompressEnd (bz_stream* strm);
|
63
66
|
attach_function :BZ2_bzCompressEnd, [BzStream.by_ref], :int
|
64
|
-
|
67
|
+
|
65
68
|
# int BZ2_bzDecompressInit (bz_stream *strm, int verbosity, int small);
|
66
69
|
attach_function :BZ2_bzDecompressInit, [BzStream.by_ref, :int, :int], :int
|
67
70
|
|
@@ -71,5 +74,6 @@ module Bzip2
|
|
71
74
|
# int BZ2_bzDecompressEnd (bz_stream *strm);
|
72
75
|
attach_function :BZ2_bzDecompressEnd, [BzStream.by_ref], :int
|
73
76
|
end
|
77
|
+
private_constant :Libbz2
|
74
78
|
end
|
75
79
|
end
|
data/lib/bzip2/ffi/reader.rb
CHANGED
@@ -1,10 +1,13 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
1
4
|
require 'pathname'
|
2
5
|
require 'stringio'
|
3
6
|
|
4
7
|
module Bzip2
|
5
8
|
module FFI
|
6
|
-
#
|
7
|
-
# public instance methods of
|
9
|
+
# {Reader} reads and decompresses a bzip2 compressed stream or file. The
|
10
|
+
# public instance methods of {Reader} are intended to be equivalent to those
|
8
11
|
# of a standard `IO` object.
|
9
12
|
#
|
10
13
|
# Data can be read as a stream using {open} and {#read}, for example:
|
@@ -15,7 +18,7 @@ module Bzip2
|
|
15
18
|
# end
|
16
19
|
# end
|
17
20
|
#
|
18
|
-
# Alternatively, without passing a block to
|
21
|
+
# Alternatively, without passing a block to {open}:
|
19
22
|
#
|
20
23
|
# reader = Bzip2::FFI::Reader.open(io_or_path)
|
21
24
|
# begin
|
@@ -26,77 +29,88 @@ module Bzip2
|
|
26
29
|
# reader.close
|
27
30
|
# end
|
28
31
|
#
|
29
|
-
#
|
32
|
+
# All the available bzipped data can be read in a single step using {read}:
|
30
33
|
#
|
31
34
|
# uncompressed = Bzip2::FFI::Reader.read(io_or_path)
|
32
35
|
#
|
33
|
-
# The {open} and {read} methods accept either an
|
34
|
-
# path.
|
36
|
+
# The {open} and {read} methods accept either an IO-like object or a file
|
37
|
+
# path. IO-like objects must have a `#read` method. Paths can be given as
|
35
38
|
# either a `String` or `Pathname`.
|
36
39
|
#
|
37
40
|
# No character conversion is performed on decompressed bytes. The {read} and
|
38
41
|
# {#read} methods return instances of `String` that represent the raw
|
39
|
-
# decompressed bytes, with
|
42
|
+
# decompressed bytes, with `#encoding` set to `Encoding::ASCII_8BIT` (also
|
40
43
|
# known as `Encoding::BINARY`).
|
41
44
|
#
|
42
|
-
#
|
43
|
-
#
|
44
|
-
#
|
45
|
-
#
|
46
|
-
#
|
47
|
-
#
|
48
|
-
#
|
49
|
-
#
|
45
|
+
# {Reader} will normally read all consecutive bzip2 compressed structure
|
46
|
+
# from the given stream or file (unless the `:first_only` parameter is
|
47
|
+
# specified - see {open}). If the stream or file contains additional data
|
48
|
+
# beyond the end of the compressed bzip2 data, it may be read during
|
49
|
+
# decompression. If such an overread has occurred and the IO-like object
|
50
|
+
# being read from has a `#seek` method, {Reader} will use it to reposition
|
51
|
+
# the stream to the byte immediately following the end of the compressed
|
52
|
+
# bzip2 data. If `#seek` raises an `IOError`, it will be caught and the
|
53
|
+
# stream position will be left unchanged.
|
50
54
|
class Reader < IO
|
51
55
|
# The number of bytes read from the compressed data stream at a time.
|
52
56
|
#
|
53
57
|
# @private
|
54
58
|
READ_BUFFER_SIZE = 4096 #:nodoc:
|
59
|
+
private_constant :READ_BUFFER_SIZE
|
55
60
|
|
56
61
|
# The number of uncompressed bytes to read at a time when using {#read}
|
57
62
|
# without a length.
|
58
63
|
#
|
59
64
|
# @private
|
60
65
|
DEFAULT_DECOMPRESS_COUNT = 4096 #:nodoc:
|
66
|
+
private_constant :DEFAULT_DECOMPRESS_COUNT
|
61
67
|
|
62
68
|
class << self
|
63
69
|
# Use send to keep this hidden from YARD (visibility tag does not work).
|
64
70
|
send(:public, :new)
|
65
71
|
|
66
|
-
# Opens a {Reader} to read and decompress data from either an
|
67
|
-
# object or a file.
|
72
|
+
# Opens a {Reader} to read and decompress data from either an IO-like
|
73
|
+
# object or a file. IO-like objects must have a `#read` method. Files
|
68
74
|
# can be specified using either a `String` containing the file path or a
|
69
75
|
# `Pathname`.
|
70
76
|
#
|
71
|
-
# If no block is given, the opened
|
77
|
+
# If no block is given, the opened {Reader} instance is returned. After
|
72
78
|
# use, the instance should be closed using the {#close} method.
|
73
79
|
#
|
74
|
-
# If a block is given, it will be passed the opened
|
75
|
-
# as an argument. After the block terminates, the
|
76
|
-
# automatically be closed.
|
80
|
+
# If a block is given, it will be passed the opened {Reader} instance
|
81
|
+
# as an argument. After the block terminates, the {Reader} instance will
|
82
|
+
# automatically be closed. {open} will then return the result of the
|
77
83
|
# block.
|
78
84
|
#
|
79
85
|
# The following options can be specified using the `options` `Hash`:
|
80
86
|
#
|
81
|
-
# * `:autoclose` - When passing an
|
82
|
-
# close
|
87
|
+
# * `:autoclose` - When passing an IO-like object, set to `true` to
|
88
|
+
# close it when the {Reader} instance is closed.
|
89
|
+
# * `:first_only` - Bzip2 files can contain multiple consecutive
|
90
|
+
# compressed strctures. Normally all the structures
|
91
|
+
# will be decompressed with the decompressed bytes
|
92
|
+
# concatenated. Set to `true` to only read the first
|
93
|
+
# structure.
|
83
94
|
# * `:small` - Set to `true` to use an alternative decompression
|
84
95
|
# algorithm that uses less memory, but at the cost of
|
85
96
|
# decompressing more slowly (roughly 2,300 kB less memory
|
86
97
|
# at about half the speed).
|
87
98
|
#
|
88
|
-
# If an
|
89
|
-
#
|
90
|
-
#
|
99
|
+
# If an IO-like object that has a `#binmode` method is passed to {open},
|
100
|
+
# `#binmode` will be called on `io_or_path` before yielding to the block
|
101
|
+
# or returning.
|
91
102
|
#
|
92
|
-
# @param io_or_path [Object] Either an
|
103
|
+
# @param io_or_path [Object] Either an IO-like object with a `#read`
|
93
104
|
# method or a file path as a `String` or
|
94
105
|
# `Pathname`.
|
95
106
|
# @param options [Hash] Optional parameters (`:autoclose` and `:small`).
|
96
|
-
# @
|
107
|
+
# @yield [reader] If a block is given, it is yielded to.
|
108
|
+
# @yieldparam reader [Reader] The new {Reader} instance.
|
109
|
+
# @yieldresult [Object] A result to be returned as the result of {open}.
|
110
|
+
# @return [Object] The opened {Reader} instance if no block is given, or
|
97
111
|
# the result of the block if a block is given.
|
98
112
|
# @raise [ArgumentError] If `io_or_path` is _not_ a `String`, `Pathname`
|
99
|
-
# or an
|
113
|
+
# or an IO-like object with a `#read` method.
|
100
114
|
# @raise [Errno::ENOENT] If the specified file does not exist.
|
101
115
|
# @raise [Error::Bzip2Error] If an error occurs when initializing
|
102
116
|
# libbz2.
|
@@ -113,37 +127,42 @@ module Bzip2
|
|
113
127
|
end
|
114
128
|
|
115
129
|
# Reads and decompresses and entire bzip2 compressed structure from
|
116
|
-
# either an
|
117
|
-
#
|
118
|
-
#
|
119
|
-
#
|
130
|
+
# either an IO-like object or a file and returns the decompressed bytes
|
131
|
+
# as a `String`. IO-like objects must have a `#read` method. Files can
|
132
|
+
# be specified using either a `String` containing the file path or a
|
133
|
+
# `Pathname`.
|
120
134
|
#
|
121
135
|
# The following options can be specified using the `options` `Hash`:
|
122
136
|
#
|
123
|
-
# * `:autoclose` - When passing an
|
124
|
-
# close
|
125
|
-
#
|
137
|
+
# * `:autoclose` - When passing an IO-like object, set to `true` to
|
138
|
+
# close it when the compressed data has been read.
|
139
|
+
# * `:first_only` - Bzip2 files can contain multiple consecutive
|
140
|
+
# compressed strctures. Normally all the structures
|
141
|
+
# will be decompressed with the decompressed bytes
|
142
|
+
# concatenated. Set to `true` to only read the first
|
143
|
+
# structure.
|
126
144
|
# * `:small` - Set to `true` to use an alternative decompression
|
127
145
|
# algorithm that uses less memory, but at the cost of
|
128
146
|
# decompressing more slowly (roughly 2,300 kB less memory
|
129
147
|
# at about half the speed).
|
130
148
|
#
|
131
|
-
# No character conversion is performed on decompressed bytes.
|
149
|
+
# No character conversion is performed on decompressed bytes. {read}
|
132
150
|
# returns a `String` that represents the raw decompressed bytes, with
|
133
151
|
# `encoding` set to `Encoding::ASCII_8BIT` (also known as
|
134
152
|
# `Encoding::BINARY`).
|
135
153
|
#
|
136
|
-
# If an
|
137
|
-
#
|
138
|
-
#
|
154
|
+
# If an IO-like object that has a `#inmode` method is passed to {read},
|
155
|
+
# `#binmode` will be called on `io_or_path` before any compressed data
|
156
|
+
# is read.
|
139
157
|
#
|
140
|
-
# @param io_or_path [Object] Either an
|
158
|
+
# @param io_or_path [Object] Either an IO-like object with a `#read`
|
141
159
|
# method or a file path as a `String` or
|
142
160
|
# `Pathname`.
|
143
|
-
# @param options [Hash] Optional parameters (`:autoclose
|
161
|
+
# @param options [Hash] Optional parameters (`:autoclose`, `:first_only`
|
162
|
+
# and `:small`).
|
144
163
|
# @return [String] The decompressed data.
|
145
164
|
# @raise [ArgumentError] If `io_or_path` is _not_ a `String`, `Pathname`
|
146
|
-
# or an
|
165
|
+
# or an IO-like object with a `#read` method.
|
147
166
|
# @raise [Errno::ENOENT] If the specified file does not exist.
|
148
167
|
# @raise [Error::Bzip2Error] If an error occurs when initializing
|
149
168
|
# libbz2 or decompressing data.
|
@@ -155,11 +174,11 @@ module Bzip2
|
|
155
174
|
|
156
175
|
private
|
157
176
|
|
158
|
-
# Returns a Proc that can be used as a finalizer to call
|
159
|
-
#
|
177
|
+
# Returns a `Proc` that can be used as a finalizer to call
|
178
|
+
# {Libbz2::BZ2_bzDecompressEnd} with the given `stream`.
|
160
179
|
#
|
161
180
|
# @param stream [Libbz2::BzStream] The stream that should be passed to
|
162
|
-
#
|
181
|
+
# {Libbz2::BZ2_bzDecompressEnd}.
|
163
182
|
def finalize(stream)
|
164
183
|
->(id) do
|
165
184
|
Libbz2::BZ2_bzDecompressEnd(stream)
|
@@ -167,65 +186,74 @@ module Bzip2
|
|
167
186
|
end
|
168
187
|
end
|
169
188
|
|
170
|
-
# Initializes a {Reader} to read compressed data from an
|
171
|
-
# (`io`). `io` must have a
|
189
|
+
# Initializes a {Reader} to read compressed data from an IO-like object
|
190
|
+
# (`io`). `io` must have a `#read` method.
|
172
191
|
#
|
173
192
|
# The following options can be specified using the `options` `Hash`:
|
174
193
|
#
|
175
|
-
# * `:autoclose` - Set to `true` to close `io` when the
|
194
|
+
# * `:autoclose` - Set to `true` to close `io` when the {Reader} instance
|
176
195
|
# is closed.
|
196
|
+
# * `:first_only` - Bzip2 files can contain multiple consecutive
|
197
|
+
# compressed strctures. Normally all the structures will
|
198
|
+
# be decompressed with the decompressed bytes
|
199
|
+
# concatenated. Set to `true` to only read the first
|
200
|
+
# structure.
|
177
201
|
# * `:small` - Set to `true` to use an alternative decompression
|
178
202
|
# algorithm that uses less memory, but at the cost of
|
179
203
|
# decompressing more slowly (roughly 2,300 kB less memory
|
180
204
|
# at about half the speed).
|
181
205
|
#
|
182
|
-
#
|
206
|
+
# `#binmode` is called on `io` if `io` responds to `#binmode`.
|
183
207
|
#
|
184
|
-
# After use, the
|
208
|
+
# After use, the {Reader} instance should be closed using the {#close}
|
185
209
|
# method.
|
186
210
|
#
|
187
|
-
# @param io [Object] An
|
188
|
-
# @param options [Hash] Optional parameters (`:autoclose
|
189
|
-
#
|
211
|
+
# @param io [Object] An IO-like object with a `#read` method.
|
212
|
+
# @param options [Hash] Optional parameters (`:autoclose`, `:first_only`
|
213
|
+
# and `:small`).
|
214
|
+
# @raise [ArgumentError] If `io` is `nil` or does not respond to `#read`.
|
190
215
|
# @raise [Error::Bzip2Error] If an error occurs when initializing libbz2.
|
191
216
|
def initialize(io, options = {})
|
192
217
|
super
|
193
218
|
raise ArgumentError, 'io must respond to read' unless io.respond_to?(:read)
|
194
219
|
|
195
|
-
|
220
|
+
@first_only = options[:first_only]
|
221
|
+
@small = options[:small] ? 1 : 0
|
196
222
|
|
197
223
|
@in_eof = false
|
198
224
|
@out_eof = false
|
199
225
|
@in_buffer = nil
|
226
|
+
@structure_number = 1
|
227
|
+
@structure_start_pos = 0
|
228
|
+
@in_pos = 0
|
229
|
+
@out_pos = 0
|
200
230
|
|
201
|
-
|
202
|
-
|
203
|
-
ObjectSpace.define_finalizer(self, self.class.send(:finalize, stream))
|
231
|
+
decompress_init(stream)
|
204
232
|
end
|
205
233
|
|
206
234
|
# Ends decompression and closes the {Reader}.
|
207
235
|
#
|
208
236
|
# If the {open} method is used with a block, it is not necessary to call
|
209
|
-
#
|
237
|
+
# {#close}. Otherwise, {#close} should be called once the {Reader} is no
|
210
238
|
# longer needed.
|
211
239
|
#
|
212
240
|
# @return [NilType] `nil`.
|
213
|
-
# @raise [IOError] If the
|
241
|
+
# @raise [IOError] If the {Reader} has already been closed.
|
214
242
|
def close
|
215
243
|
s = stream
|
216
244
|
|
217
245
|
unless @out_eof
|
218
246
|
decompress_end(s)
|
219
247
|
end
|
220
|
-
|
248
|
+
|
221
249
|
s[:next_in] = nil
|
222
250
|
s[:next_out] = nil
|
223
|
-
|
251
|
+
|
224
252
|
if @in_buffer
|
225
253
|
@in_buffer.free
|
226
254
|
@in_buffer = nil
|
227
255
|
end
|
228
|
-
|
256
|
+
|
229
257
|
super
|
230
258
|
end
|
231
259
|
|
@@ -240,17 +268,17 @@ module Bzip2
|
|
240
268
|
# A result of `nil` or a `String` with a length less than `length` bytes
|
241
269
|
# indicates that the end of the decompressed data has been reached.
|
242
270
|
#
|
243
|
-
# If `length` is `nil`,
|
271
|
+
# If `length` is `nil`, {#read} reads until the end of the decompressed
|
244
272
|
# data, returning the uncompressed bytes as a `String`.
|
245
273
|
#
|
246
|
-
# If `length` is 0,
|
274
|
+
# If `length` is 0, {#read} returns an empty `String`.
|
247
275
|
#
|
248
276
|
# If the optional `buffer` argument is present, it must reference a
|
249
277
|
# `String` that will receive the decompressed data. `buffer` will
|
250
|
-
# contain only the decompressed data after the call to
|
278
|
+
# contain only the decompressed data after the call to {#read}, even if it
|
251
279
|
# is not empty beforehand.
|
252
280
|
#
|
253
|
-
# No character conversion is performed on decompressed bytes.
|
281
|
+
# No character conversion is performed on decompressed bytes. {#read}
|
254
282
|
# returns a `String` that represents the raw decompressed bytes, with
|
255
283
|
# `encoding` set to `Encoding::ASCII_8BIT` (also known as
|
256
284
|
# `Encoding::BINARY`).
|
@@ -267,7 +295,7 @@ module Bzip2
|
|
267
295
|
# the end of the decompressed data has been reached.
|
268
296
|
# @raise [ArgumentError] If `length` is negative.
|
269
297
|
# @raise [Error::Bzip2Error] If an error occurs during decompression.
|
270
|
-
# @raise [IOError] If the
|
298
|
+
# @raise [IOError] If the {Reader} has been closed.
|
271
299
|
def read(length = nil, buffer = nil)
|
272
300
|
if buffer
|
273
301
|
buffer.clear
|
@@ -279,11 +307,11 @@ module Bzip2
|
|
279
307
|
|
280
308
|
if length == 0
|
281
309
|
check_closed
|
282
|
-
return buffer ||
|
310
|
+
return buffer || String.new
|
283
311
|
end
|
284
312
|
|
285
313
|
decompressed = decompress(length)
|
286
|
-
|
314
|
+
|
287
315
|
return nil unless decompressed
|
288
316
|
buffer ? buffer << decompressed : decompressed
|
289
317
|
else
|
@@ -292,11 +320,11 @@ module Bzip2
|
|
292
320
|
# StringIO#binmode is a no-op, but call in case it is implemented in
|
293
321
|
# future versions.
|
294
322
|
result.binmode
|
295
|
-
|
323
|
+
|
296
324
|
result.set_encoding(Encoding::ASCII_8BIT)
|
297
325
|
|
298
326
|
loop do
|
299
|
-
decompressed = decompress(DEFAULT_DECOMPRESS_COUNT)
|
327
|
+
decompressed = decompress(DEFAULT_DECOMPRESS_COUNT)
|
300
328
|
break unless decompressed
|
301
329
|
result.write(decompressed)
|
302
330
|
break if decompressed.bytesize < DEFAULT_DECOMPRESS_COUNT
|
@@ -306,6 +334,29 @@ module Bzip2
|
|
306
334
|
end
|
307
335
|
end
|
308
336
|
|
337
|
+
# Returns `true` if decompression has completed, otherwise `false`.
|
338
|
+
#
|
339
|
+
# Note that it is possible for `false` to be returned after all the
|
340
|
+
# decompressed data has been read. In such cases, the next call to {#read}
|
341
|
+
# will detect the end of the bzip2 structure and set {#eof?} to `true`.
|
342
|
+
#
|
343
|
+
# @return [Boolean] If decompression has completed, otherwise `false`.
|
344
|
+
# @raise [IOError] If the {Reader} has been closed.
|
345
|
+
def eof?
|
346
|
+
check_closed
|
347
|
+
@out_eof
|
348
|
+
end
|
349
|
+
alias eof eof?
|
350
|
+
|
351
|
+
# Returns the number of decompressed bytes that have been read.
|
352
|
+
#
|
353
|
+
# @return [Integer] The number of decompressed bytes that have been read.
|
354
|
+
# @raise [IOError] If the {Reader} has been closed.
|
355
|
+
def pos
|
356
|
+
check_closed
|
357
|
+
@out_pos
|
358
|
+
end
|
359
|
+
|
309
360
|
private
|
310
361
|
|
311
362
|
# Attempts to decompress and return `count` bytes.
|
@@ -315,9 +366,9 @@ module Bzip2
|
|
315
366
|
# @return [String] The decompressed data as a `String` with ASCII-8BIT
|
316
367
|
# encoding, or `nil` if length was a positive integer and
|
317
368
|
# the end of the decompressed data has been reached.
|
318
|
-
# @raise [ArgumentError]
|
369
|
+
# @raise [ArgumentError] If `count` is not greater than or equal to 1.
|
319
370
|
# @raise [Error::Bzip2Error] If an error occurs during decompression.
|
320
|
-
# @raise [IOError] If the
|
371
|
+
# @raise [IOError] If the {Reader} has been closed.
|
321
372
|
def decompress(count)
|
322
373
|
raise ArgumentError, "count must be a positive integer" unless count >= 1
|
323
374
|
s = stream
|
@@ -335,6 +386,7 @@ module Bzip2
|
|
335
386
|
bytes = io.read(READ_BUFFER_SIZE)
|
336
387
|
|
337
388
|
if bytes && bytes.bytesize > 0
|
389
|
+
@in_pos += bytes.bytesize
|
338
390
|
@in_eof = bytes.bytesize < READ_BUFFER_SIZE
|
339
391
|
@in_buffer = ::FFI::MemoryPointer.new(1, bytes.bytesize)
|
340
392
|
@in_buffer.write_bytes(bytes)
|
@@ -345,8 +397,15 @@ module Bzip2
|
|
345
397
|
end
|
346
398
|
end
|
347
399
|
|
400
|
+
# Reached the end of input without reading anything in the current
|
401
|
+
# bzip2 structure. No more data to process.
|
402
|
+
if @in_pos == @structure_start_pos
|
403
|
+
@out_eof = true
|
404
|
+
break
|
405
|
+
end
|
406
|
+
|
348
407
|
prev_avail_out = s[:avail_out]
|
349
|
-
|
408
|
+
|
350
409
|
res = Libbz2::BZ2_bzDecompress(s)
|
351
410
|
|
352
411
|
if s[:avail_in] == 0 && @in_buffer
|
@@ -355,43 +414,51 @@ module Bzip2
|
|
355
414
|
@in_buffer = nil
|
356
415
|
end
|
357
416
|
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
# The input could contain data after the end of the bzip2 stream.
|
362
|
-
#
|
363
|
-
# s[:avail_in] will contain the number of bytes that have been
|
364
|
-
# read from io, but not been consumed by BZ2_bzDecompress.
|
417
|
+
if @structure_number > 1 && res == Libbz2::BZ_DATA_ERROR_MAGIC
|
418
|
+
# Found something other than the bzip2 magic bytes after the end
|
419
|
+
# of a bzip2 structure.
|
365
420
|
#
|
366
421
|
# Attempt to move the input stream back by the amount that has
|
367
422
|
# been over-read.
|
368
|
-
|
369
|
-
io.seek(-s[:avail_in], ::IO::SEEK_CUR) rescue IOError
|
370
|
-
end
|
371
|
-
|
372
|
-
if @in_buffer
|
373
|
-
s[:next_in] = nil
|
374
|
-
@in_buffer.free
|
375
|
-
@in_buffer = nil
|
376
|
-
end
|
377
|
-
|
423
|
+
attempt_seek_to_structure_start
|
378
424
|
decompress_end(s)
|
379
|
-
|
380
425
|
@out_eof = true
|
381
426
|
break
|
382
427
|
end
|
383
428
|
|
384
|
-
|
429
|
+
check_error(res)
|
430
|
+
|
431
|
+
if res == Libbz2::BZ_STREAM_END
|
432
|
+
decompress_end(s)
|
385
433
|
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
434
|
+
if (s[:avail_in] > 0 || !@in_eof) && !@first_only
|
435
|
+
# Re-initialize to read a second bzip2 structure if there is
|
436
|
+
# still input available and not restricting to the first stream.
|
437
|
+
@structure_number += 1
|
438
|
+
@structure_start_pos = @in_pos - s[:avail_in]
|
439
|
+
decompress_init(s)
|
440
|
+
else
|
441
|
+
# May have already read data after the end of the first bzip2
|
442
|
+
# structure.
|
443
|
+
attempt_seek_to_structure_start if @first_only
|
444
|
+
@out_eof = true
|
445
|
+
break
|
446
|
+
end
|
447
|
+
else
|
448
|
+
# No more input available and calling BZ2_bzDecompress didn't
|
449
|
+
# advance the output. Raise an error.
|
450
|
+
if @in_eof && s[:avail_in] == 0 && prev_avail_out == s[:avail_out]
|
451
|
+
decompress_end(s)
|
452
|
+
@out_eof = true
|
453
|
+
raise Error::UnexpectedEofError.new
|
454
|
+
end
|
390
455
|
end
|
456
|
+
|
457
|
+
break if s[:avail_out] == 0
|
391
458
|
end
|
392
459
|
|
393
460
|
result = out_buffer.read_bytes(out_buffer.size - s[:avail_out])
|
394
|
-
ensure
|
461
|
+
ensure
|
395
462
|
out_buffer.free
|
396
463
|
s[:next_out] = nil
|
397
464
|
s[:avail_out] = 0
|
@@ -400,18 +467,51 @@ module Bzip2
|
|
400
467
|
if @out_eof && result.bytesize == 0
|
401
468
|
nil
|
402
469
|
else
|
470
|
+
@out_pos += result.bytesize
|
403
471
|
result
|
404
|
-
end
|
472
|
+
end
|
473
|
+
end
|
474
|
+
|
475
|
+
# Attempts to reposition the compressed stream to the start of the current
|
476
|
+
# structure. Used when {Libbz2::BZ2_bzDecompress} has read beyond the end
|
477
|
+
# of a bzip2 structure.
|
478
|
+
def attempt_seek_to_structure_start
|
479
|
+
if io.respond_to?(:seek)
|
480
|
+
diff = @structure_start_pos - @in_pos
|
481
|
+
if diff < 0
|
482
|
+
begin
|
483
|
+
io.seek(diff, ::IO::SEEK_CUR)
|
484
|
+
@in_pos += diff
|
485
|
+
rescue IOError
|
486
|
+
end
|
487
|
+
end
|
488
|
+
end
|
489
|
+
end
|
490
|
+
|
491
|
+
# Calls {Libbz2::BZ2_bzDecompressInit} to initialize the decompression
|
492
|
+
# stream `s`.
|
493
|
+
#
|
494
|
+
# Defines a finalizer to ensure that the memory associated with the stream
|
495
|
+
# is deallocated.
|
496
|
+
#
|
497
|
+
# @param s [Libbz2::BzStream] The stream to initialize decompression for.
|
498
|
+
# @raise [Error::Bzip2Error] If {Libbz2::BZ2_bzDecompressInit} reports an
|
499
|
+
# error.
|
500
|
+
def decompress_init(s)
|
501
|
+
check_error(Libbz2::BZ2_bzDecompressInit(s, 0, @small))
|
502
|
+
|
503
|
+
ObjectSpace.define_finalizer(self, self.class.send(:finalize, s))
|
405
504
|
end
|
406
505
|
|
407
|
-
# Calls BZ2_bzDecompressEnd to release
|
408
|
-
# decompression stream `s`.
|
506
|
+
# Calls {Libbz2::BZ2_bzDecompressEnd} to release memory associated with
|
507
|
+
# the decompression stream `s`.
|
409
508
|
#
|
410
509
|
# Notifies `ObjectSpace` that it is no longer necessary to finalize the
|
411
|
-
#
|
510
|
+
# {Reader} instance.
|
412
511
|
#
|
413
512
|
# @param s [Libbz2::BzStream] The stream to end decompression for.
|
414
|
-
# @raise [Error::Bzip2Error] If
|
513
|
+
# @raise [Error::Bzip2Error] If {Libbz2::BZ2_bzDecompressEnd} reports an
|
514
|
+
# error.
|
415
515
|
def decompress_end(s)
|
416
516
|
res = Libbz2::BZ2_bzDecompressEnd(s)
|
417
517
|
ObjectSpace.undefine_finalizer(self)
|