bzip2-ffi 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,75 @@
1
+ require 'ffi'
2
+
3
+ module Bzip2
4
+ module FFI
5
+ # FFI bindings for the libbz2 low-level interface.
6
+ #
7
+ # See bzlib.h and http://bzip.org/docs.html.
8
+ #
9
+ # @private
10
+ module Libbz2 #:nodoc:
11
+ extend ::FFI::Library
12
+
13
+ ffi_lib ['bz2', 'libbz2.so.1', 'libbz2.dll']
14
+
15
+ BZ_RUN = 0
16
+ BZ_FLUSH = 1
17
+ BZ_FINISH = 2
18
+
19
+ BZ_OK = 0
20
+ BZ_RUN_OK = 1
21
+ BZ_FLUSH_OK = 2
22
+ BZ_FINISH_OK = 3
23
+ BZ_STREAM_END = 4
24
+ BZ_SEQUENCE_ERROR = -1
25
+ BZ_PARAM_ERROR = -2
26
+ BZ_MEM_ERROR = -3
27
+ BZ_DATA_ERROR = -4
28
+ BZ_DATA_ERROR_MAGIC = -5
29
+ BZ_CONFIG_ERROR = -9
30
+
31
+ # void *(*bzalloc)(void *,int,int);
32
+ callback :bzalloc, [:pointer, :int, :int], :pointer
33
+
34
+ # void (*bzfree)(void *,void *);
35
+ callback :bzfree, [:pointer, :pointer], :void
36
+
37
+ # typedef struct { ... } bz_stream;
38
+ class BzStream < ::FFI::Struct #:nodoc:
39
+ layout :next_in, :pointer,
40
+ :avail_in, :uint,
41
+ :total_in_lo32, :uint,
42
+ :total_in_hi32, :uint,
43
+
44
+ :next_out, :pointer,
45
+ :avail_out, :uint,
46
+ :total_out_lo32, :uint,
47
+ :total_out_hi32, :uint,
48
+
49
+ :state, :pointer,
50
+
51
+ :bzalloc, :bzalloc,
52
+ :bzfree, :bzfree,
53
+ :opaque, :pointer
54
+ end
55
+
56
+ # int BZ2_bzCompressInt(bz_stream* strm, int blockSize100k, int verbosity, int workFactor);
57
+ attach_function :BZ2_bzCompressInit, [BzStream.by_ref, :int, :int, :int], :int
58
+
59
+ # int BZ2_bzCompress (bz_stream* strm, int action);
60
+ attach_function :BZ2_bzCompress, [BzStream.by_ref, :int], :int
61
+
62
+ # int BZ2_bzCompressEnd (bz_stream* strm);
63
+ attach_function :BZ2_bzCompressEnd, [BzStream.by_ref], :int
64
+
65
+ # int BZ2_bzDecompressInit (bz_stream *strm, int verbosity, int small);
66
+ attach_function :BZ2_bzDecompressInit, [BzStream.by_ref, :int, :int], :int
67
+
68
+ # int BZ2_bzDecompress (bz_stream* strm);
69
+ attach_function :BZ2_bzDecompress, [BzStream.by_ref], :int
70
+
71
+ # int BZ2_bzDecompressEnd (bz_stream *strm);
72
+ attach_function :BZ2_bzDecompressEnd, [BzStream.by_ref], :int
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,422 @@
1
+ require 'pathname'
2
+ require 'stringio'
3
+
4
+ module Bzip2
5
+ module FFI
6
+ # `Reader` reads and decompresses a bzip2 compressed stream or file. The
7
+ # public instance methods of `Reader` are intended to be equivalent to those
8
+ # of a standard `IO` object.
9
+ #
10
+ # Data can be read as a stream using {open} and {#read}, for example:
11
+ #
12
+ # Bzip2::FFI::Reader.open(io_or_path) do |reader|
13
+ # while buffer = reader.read(1024) do
14
+ # # process uncompressed bytes in buffer
15
+ # end
16
+ # end
17
+ #
18
+ # Alternatively, without passing a block to `open`:
19
+ #
20
+ # reader = Bzip2::FFI::Reader.open(io_or_path)
21
+ # begin
22
+ # while buffer = reader.read(1024) do
23
+ # # process uncompressed bytes in buffer
24
+ # end
25
+ # ensure
26
+ # reader.close
27
+ # end
28
+ #
29
+ # An entire bzip2 structure can be read in a single step using {read}:
30
+ #
31
+ # uncompressed = Bzip2::FFI::Reader.read(io_or_path)
32
+ #
33
+ # The {open} and {read} methods accept either an `IO`-like object or a file
34
+ # path. `IO`-like objects must have a `read` method. Paths can be given as
35
+ # either a `String` or `Pathname`.
36
+ #
37
+ # No character conversion is performed on decompressed bytes. The {read} and
38
+ # {#read} methods return instances of `String` that represent the raw
39
+ # decompressed bytes, with `encoding` set to `Encoding::ASCII_8BIT` (also
40
+ # known as `Encoding::BINARY`).
41
+ #
42
+ # `Reader` will read a single bzip2 compressed structure from the given
43
+ # stream or file. If the stream or file contains data beyond the end of
44
+ # the bzip2 structure, such data may be read during decompression. If such
45
+ # an overread has occurred and the `IO`-like object being read from has a
46
+ # `seek` method, `Reader` will use it to reposition the stream to the byte
47
+ # immediately following the end of the bzip2 structure. If `seek` raises
48
+ # an `IOError`, it will be caught and the stream position will be left
49
+ # unchanged.
50
+ class Reader < IO
51
+ # The number of bytes read from the compressed data stream at a time.
52
+ #
53
+ # @private
54
+ READ_BUFFER_SIZE = 4096 #:nodoc:
55
+
56
+ # The number of uncompressed bytes to read at a time when using {#read}
57
+ # without a length.
58
+ #
59
+ # @private
60
+ DEFAULT_DECOMPRESS_COUNT = 4096 #:nodoc:
61
+
62
+ class << self
63
+ # Use send to keep this hidden from YARD (visibility tag does not work).
64
+ send(:public, :new)
65
+
66
+ # Opens a {Reader} to read and decompress data from either an `IO`-like
67
+ # object or a file. `IO`-like objects must have a `read` method. Files
68
+ # can be specified using either a `String` containing the file path or a
69
+ # `Pathname`.
70
+ #
71
+ # If no block is given, the opened `Reader` instance is returned. After
72
+ # use, the instance should be closed using the {#close} method.
73
+ #
74
+ # If a block is given, it will be passed the opened `Reader` instance
75
+ # as an argument. After the block terminates, the `Reader` instance will
76
+ # automatically be closed. `open` will then return the result of the
77
+ # block.
78
+ #
79
+ # The following options can be specified using the `options` `Hash`:
80
+ #
81
+ # * `:autoclose` - When passing an `IO`-like object, set to `true` to
82
+ # close the `IO` when the `Reader` instance is closed.
83
+ # * `:small` - Set to `true` to use an alternative decompression
84
+ # algorithm that uses less memory, but at the cost of
85
+ # decompressing more slowly (roughly 2,300 kB less memory
86
+ # at about half the speed).
87
+ #
88
+ # If an `IO`-like object that has a `binmode` method is passed to
89
+ # `open`, `binmode` will be called on `io_or_path` before yielding to
90
+ # the block or returning.
91
+ #
92
+ # @param io_or_path [Object] Either an `IO`-like object with a `read`
93
+ # method or a file path as a `String` or
94
+ # `Pathname`.
95
+ # @param options [Hash] Optional parameters (`:autoclose` and `:small`).
96
+ # @return [Object] The opened `Reader` instance if no block is given, or
97
+ # the result of the block if a block is given.
98
+ # @raise [ArgumentError] If `io_or_path` is _not_ a `String`, `Pathname`
99
+ # or an `IO`-like object with a `read` method.
100
+ # @raise [Errno::ENOENT] If the specified file does not exist.
101
+ # @raise [Error::Bzip2Error] If an error occurs when initializing
102
+ # libbz2.
103
+ def open(io_or_path, options = {})
104
+ if io_or_path.kind_of?(String) || io_or_path.kind_of?(Pathname)
105
+ options = options.merge(autoclose: true)
106
+ proc = -> { open_bzip_file(io_or_path.to_s, 'rb') }
107
+ super(proc, options)
108
+ elsif !io_or_path.kind_of?(Proc)
109
+ super
110
+ else
111
+ raise ArgumentError, 'io_or_path must be an IO-like object or a path'
112
+ end
113
+ end
114
+
115
+ # Reads and decompresses and entire bzip2 compressed structure from
116
+ # either an `IO`-like object or a file and returns the decompressed
117
+ # bytes as a `String`. `IO`-like objects must have a `read` method.
118
+ # Files can be specified using either a `String` containing the file
119
+ # path or a `Pathname`.
120
+ #
121
+ # The following options can be specified using the `options` `Hash`:
122
+ #
123
+ # * `:autoclose` - When passing an `IO`-like object, set to `true` to
124
+ # close the `IO` when the compressed data has been
125
+ # read.
126
+ # * `:small` - Set to `true` to use an alternative decompression
127
+ # algorithm that uses less memory, but at the cost of
128
+ # decompressing more slowly (roughly 2,300 kB less memory
129
+ # at about half the speed).
130
+ #
131
+ # No character conversion is performed on decompressed bytes. `read`
132
+ # returns a `String` that represents the raw decompressed bytes, with
133
+ # `encoding` set to `Encoding::ASCII_8BIT` (also known as
134
+ # `Encoding::BINARY`).
135
+ #
136
+ # If an `IO`-like object that has a `binmode` method is passed to
137
+ # `read`, `binmode` will be called on `io_or_path` before any compressed
138
+ # data is read.
139
+ #
140
+ # @param io_or_path [Object] Either an `IO`-like object with a `read`
141
+ # method or a file path as a `String` or
142
+ # `Pathname`.
143
+ # @param options [Hash] Optional parameters (`:autoclose` and `:small`).
144
+ # @return [String] The decompressed data.
145
+ # @raise [ArgumentError] If `io_or_path` is _not_ a `String`, `Pathname`
146
+ # or an `IO`-like object with a `read` method.
147
+ # @raise [Errno::ENOENT] If the specified file does not exist.
148
+ # @raise [Error::Bzip2Error] If an error occurs when initializing
149
+ # libbz2 or decompressing data.
150
+ def read(io_or_path, options = {})
151
+ open(io_or_path, options) do |reader|
152
+ reader.read
153
+ end
154
+ end
155
+
156
+ private
157
+
158
+ # Returns a Proc that can be used as a finalizer to call
159
+ # `BZ2_bzDecompressEnd` with the given `stream`.
160
+ #
161
+ # @param stream [Libbz2::BzStream] The stream that should be passed to
162
+ # `BZ2_bzDecompressEnd`.
163
+ def finalize(stream)
164
+ ->(id) do
165
+ Libbz2::BZ2_bzDecompressEnd(stream)
166
+ end
167
+ end
168
+ end
169
+
170
+ # Initializes a {Reader} to read compressed data from an `IO`-like object
171
+ # (`io`). `io` must have a `read` method.
172
+ #
173
+ # The following options can be specified using the `options` `Hash`:
174
+ #
175
+ # * `:autoclose` - Set to `true` to close `io` when the `Reader` instance
176
+ # is closed.
177
+ # * `:small` - Set to `true` to use an alternative decompression
178
+ # algorithm that uses less memory, but at the cost of
179
+ # decompressing more slowly (roughly 2,300 kB less memory
180
+ # at about half the speed).
181
+ #
182
+ # `binmode` is called on `io` if `io` responds to `binmode`.
183
+ #
184
+ # After use, the `Reader` instance should be closed using the {#close}
185
+ # method.
186
+ #
187
+ # @param io [Object] An `IO`-like object with a `read` method.
188
+ # @param options [Hash] Optional parameters (`:autoclose` and `:small`).
189
+ # @raise [ArgumentError] If `io` is `nil` or does not respond to `read`.
190
+ # @raise [Error::Bzip2Error] If an error occurs when initializing libbz2.
191
+ def initialize(io, options = {})
192
+ super
193
+ raise ArgumentError, 'io must respond to read' unless io.respond_to?(:read)
194
+
195
+ small = options[:small]
196
+
197
+ @in_eof = false
198
+ @out_eof = false
199
+ @in_buffer = nil
200
+
201
+ check_error(Libbz2::BZ2_bzDecompressInit(stream, 0, small ? 1 : 0))
202
+
203
+ ObjectSpace.define_finalizer(self, self.class.send(:finalize, stream))
204
+ end
205
+
206
+ # Ends decompression and closes the {Reader}.
207
+ #
208
+ # If the {open} method is used with a block, it is not necessary to call
209
+ # `close`. Otherwise, `close` should be called once the `Reader` is no
210
+ # longer needed.
211
+ #
212
+ # @return [NilType] `nil`.
213
+ # @raise [IOError] If the `Reader` has already been closed.
214
+ def close
215
+ s = stream
216
+
217
+ unless @out_eof
218
+ decompress_end(s)
219
+ end
220
+
221
+ s[:next_in] = nil
222
+ s[:next_out] = nil
223
+
224
+ if @in_buffer
225
+ @in_buffer.free
226
+ @in_buffer = nil
227
+ end
228
+
229
+ super
230
+ end
231
+
232
+ # Reads and decompresses data from the bzip2 compressed stream or file,
233
+ # returning the uncompressed bytes.
234
+ #
235
+ # `length` must be a non-negative integer or `nil`.
236
+ #
237
+ # If `length` is a positive integer, it specifies the maximum number of
238
+ # uncompressed bytes to return. `read` will return `nil` or a `String`
239
+ # with a length of 1 to `length` bytes containing the decompressed data.
240
+ # A result of `nil` or a `String` with a length less than `length` bytes
241
+ # indicates that the end of the decompressed data has been reached.
242
+ #
243
+ # If `length` is `nil`, `read` reads until the end of the decompressed
244
+ # data, returning the uncompressed bytes as a `String`.
245
+ #
246
+ # If `length` is 0, `read` returns an empty `String`.
247
+ #
248
+ # If the optional `buffer` argument is present, it must reference a
249
+ # `String` that will receive the decompressed data. `buffer` will
250
+ # contain only the decompressed data after the call to `read`, even if it
251
+ # is not empty beforehand.
252
+ #
253
+ # No character conversion is performed on decompressed bytes. `read`
254
+ # returns a `String` that represents the raw decompressed bytes, with
255
+ # `encoding` set to `Encoding::ASCII_8BIT` (also known as
256
+ # `Encoding::BINARY`).
257
+ #
258
+ # @param length [Integer] Must be a non-negative integer or `nil`. Set to
259
+ # a positive integer to specify the maximum number
260
+ # of uncompressed bytes to return. Set to `nil` to
261
+ # return the remaining decompressed data. Set to
262
+ # 0 to return an empty `String`.
263
+ # @param buffer [String] An optional buffer to receive the decompressed
264
+ # data.
265
+ # @return [String] The decompressed data as a `String` with ASCII-8BIT
266
+ # encoding, or `nil` if length was a positive integer and
267
+ # the end of the decompressed data has been reached.
268
+ # @raise [ArgumentError] If `length` is negative.
269
+ # @raise [Error::Bzip2Error] If an error occurs during decompression.
270
+ # @raise [IOError] If the `Reader` has been closed.
271
+ def read(length = nil, buffer = nil)
272
+ if buffer
273
+ buffer.clear
274
+ buffer.force_encoding(Encoding::ASCII_8BIT)
275
+ end
276
+
277
+ if length
278
+ raise ArgumentError 'length must be a non-negative integer or nil' if length < 0
279
+
280
+ if length == 0
281
+ check_closed
282
+ return buffer || ''
283
+ end
284
+
285
+ decompressed = decompress(length)
286
+
287
+ return nil unless decompressed
288
+ buffer ? buffer << decompressed : decompressed
289
+ else
290
+ result = buffer ? StringIO.new(buffer) : StringIO.new
291
+
292
+ # StringIO#binmode is a no-op, but call in case it is implemented in
293
+ # future versions.
294
+ result.binmode
295
+
296
+ result.set_encoding(Encoding::ASCII_8BIT)
297
+
298
+ loop do
299
+ decompressed = decompress(DEFAULT_DECOMPRESS_COUNT)
300
+ break unless decompressed
301
+ result.write(decompressed)
302
+ break if decompressed.bytesize < DEFAULT_DECOMPRESS_COUNT
303
+ end
304
+
305
+ result.string
306
+ end
307
+ end
308
+
309
+ private
310
+
311
+ # Attempts to decompress and return `count` bytes.
312
+ #
313
+ # @param count [Integer] The number of uncompressed bytes to return (must
314
+ # be a positive integer).
315
+ # @return [String] The decompressed data as a `String` with ASCII-8BIT
316
+ # encoding, or `nil` if length was a positive integer and
317
+ # the end of the decompressed data has been reached.
318
+ # @raise [ArgumentError] if `count` is not greater than or equal to 1.
319
+ # @raise [Error::Bzip2Error] If an error occurs during decompression.
320
+ # @raise [IOError] If the `Reader` has been closed.
321
+ def decompress(count)
322
+ raise ArgumentError, "count must be a positive integer" unless count >= 1
323
+ s = stream
324
+ return nil if @out_eof
325
+
326
+ out_buffer = ::FFI::MemoryPointer.new(1, count)
327
+ begin
328
+ s[:next_out] = out_buffer
329
+ s[:avail_out] = out_buffer.size
330
+
331
+ # Decompress data until count bytes have been read, or the end of
332
+ # the stream is reached.
333
+ loop do
334
+ if s[:avail_in] == 0 && !@in_eof
335
+ bytes = io.read(READ_BUFFER_SIZE)
336
+
337
+ if bytes && bytes.bytesize > 0
338
+ @in_eof = bytes.bytesize < READ_BUFFER_SIZE
339
+ @in_buffer = ::FFI::MemoryPointer.new(1, bytes.bytesize)
340
+ @in_buffer.write_bytes(bytes)
341
+ s[:next_in] = @in_buffer
342
+ s[:avail_in] = @in_buffer.size
343
+ else
344
+ @in_eof = true
345
+ end
346
+ end
347
+
348
+ prev_avail_out = s[:avail_out]
349
+
350
+ res = Libbz2::BZ2_bzDecompress(s)
351
+
352
+ if s[:avail_in] == 0 && @in_buffer
353
+ s[:next_in] = nil
354
+ @in_buffer.free
355
+ @in_buffer = nil
356
+ end
357
+
358
+ check_error(res)
359
+
360
+ if res == Libbz2::BZ_STREAM_END
361
+ # The input could contain data after the end of the bzip2 stream.
362
+ #
363
+ # s[:avail_in] will contain the number of bytes that have been
364
+ # read from io, but not been consumed by BZ2_bzDecompress.
365
+ #
366
+ # Attempt to move the input stream back by the amount that has
367
+ # been over-read.
368
+ if s[:avail_in] > 0 && io.respond_to?(:seek)
369
+ io.seek(-s[:avail_in], ::IO::SEEK_CUR) rescue IOError
370
+ end
371
+
372
+ if @in_buffer
373
+ s[:next_in] = nil
374
+ @in_buffer.free
375
+ @in_buffer = nil
376
+ end
377
+
378
+ decompress_end(s)
379
+
380
+ @out_eof = true
381
+ break
382
+ end
383
+
384
+ break if s[:avail_out] == 0
385
+
386
+ # No more input available and calling BZ2_bzDecompress didn't
387
+ # advance the output. Raise an error.
388
+ if @in_eof && prev_avail_out == s[:avail_out]
389
+ raise Error::UnexpectedEofError.new
390
+ end
391
+ end
392
+
393
+ result = out_buffer.read_bytes(out_buffer.size - s[:avail_out])
394
+ ensure
395
+ out_buffer.free
396
+ s[:next_out] = nil
397
+ s[:avail_out] = 0
398
+ end
399
+
400
+ if @out_eof && result.bytesize == 0
401
+ nil
402
+ else
403
+ result
404
+ end
405
+ end
406
+
407
+ # Calls BZ2_bzDecompressEnd to release memeory associated with the
408
+ # decompression stream `s`.
409
+ #
410
+ # Notifies `ObjectSpace` that it is no longer necessary to finalize the
411
+ # `Reader` instance.
412
+ #
413
+ # @param s [Libbz2::BzStream] The stream to end decompression for.
414
+ # @raise [Error::Bzip2Error] If `BZ2_bzDecompressEnd` reports an error.
415
+ def decompress_end(s)
416
+ res = Libbz2::BZ2_bzDecompressEnd(s)
417
+ ObjectSpace.undefine_finalizer(self)
418
+ check_error(res)
419
+ end
420
+ end
421
+ end
422
+ end