bzip2-ffi 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,75 @@
1
+ require 'ffi'
2
+
3
+ module Bzip2
4
+ module FFI
5
+ # FFI bindings for the libbz2 low-level interface.
6
+ #
7
+ # See bzlib.h and http://bzip.org/docs.html.
8
+ #
9
+ # @private
10
+ module Libbz2 #:nodoc:
11
+ extend ::FFI::Library
12
+
13
+ ffi_lib ['bz2', 'libbz2.so.1', 'libbz2.dll']
14
+
15
+ BZ_RUN = 0
16
+ BZ_FLUSH = 1
17
+ BZ_FINISH = 2
18
+
19
+ BZ_OK = 0
20
+ BZ_RUN_OK = 1
21
+ BZ_FLUSH_OK = 2
22
+ BZ_FINISH_OK = 3
23
+ BZ_STREAM_END = 4
24
+ BZ_SEQUENCE_ERROR = -1
25
+ BZ_PARAM_ERROR = -2
26
+ BZ_MEM_ERROR = -3
27
+ BZ_DATA_ERROR = -4
28
+ BZ_DATA_ERROR_MAGIC = -5
29
+ BZ_CONFIG_ERROR = -9
30
+
31
+ # void *(*bzalloc)(void *,int,int);
32
+ callback :bzalloc, [:pointer, :int, :int], :pointer
33
+
34
+ # void (*bzfree)(void *,void *);
35
+ callback :bzfree, [:pointer, :pointer], :void
36
+
37
+ # typedef struct { ... } bz_stream;
38
+ class BzStream < ::FFI::Struct #:nodoc:
39
+ layout :next_in, :pointer,
40
+ :avail_in, :uint,
41
+ :total_in_lo32, :uint,
42
+ :total_in_hi32, :uint,
43
+
44
+ :next_out, :pointer,
45
+ :avail_out, :uint,
46
+ :total_out_lo32, :uint,
47
+ :total_out_hi32, :uint,
48
+
49
+ :state, :pointer,
50
+
51
+ :bzalloc, :bzalloc,
52
+ :bzfree, :bzfree,
53
+ :opaque, :pointer
54
+ end
55
+
56
+ # int BZ2_bzCompressInt(bz_stream* strm, int blockSize100k, int verbosity, int workFactor);
57
+ attach_function :BZ2_bzCompressInit, [BzStream.by_ref, :int, :int, :int], :int
58
+
59
+ # int BZ2_bzCompress (bz_stream* strm, int action);
60
+ attach_function :BZ2_bzCompress, [BzStream.by_ref, :int], :int
61
+
62
+ # int BZ2_bzCompressEnd (bz_stream* strm);
63
+ attach_function :BZ2_bzCompressEnd, [BzStream.by_ref], :int
64
+
65
+ # int BZ2_bzDecompressInit (bz_stream *strm, int verbosity, int small);
66
+ attach_function :BZ2_bzDecompressInit, [BzStream.by_ref, :int, :int], :int
67
+
68
+ # int BZ2_bzDecompress (bz_stream* strm);
69
+ attach_function :BZ2_bzDecompress, [BzStream.by_ref], :int
70
+
71
+ # int BZ2_bzDecompressEnd (bz_stream *strm);
72
+ attach_function :BZ2_bzDecompressEnd, [BzStream.by_ref], :int
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,422 @@
1
+ require 'pathname'
2
+ require 'stringio'
3
+
4
+ module Bzip2
5
+ module FFI
6
+ # `Reader` reads and decompresses a bzip2 compressed stream or file. The
7
+ # public instance methods of `Reader` are intended to be equivalent to those
8
+ # of a standard `IO` object.
9
+ #
10
+ # Data can be read as a stream using {open} and {#read}, for example:
11
+ #
12
+ # Bzip2::FFI::Reader.open(io_or_path) do |reader|
13
+ # while buffer = reader.read(1024) do
14
+ # # process uncompressed bytes in buffer
15
+ # end
16
+ # end
17
+ #
18
+ # Alternatively, without passing a block to `open`:
19
+ #
20
+ # reader = Bzip2::FFI::Reader.open(io_or_path)
21
+ # begin
22
+ # while buffer = reader.read(1024) do
23
+ # # process uncompressed bytes in buffer
24
+ # end
25
+ # ensure
26
+ # reader.close
27
+ # end
28
+ #
29
+ # An entire bzip2 structure can be read in a single step using {read}:
30
+ #
31
+ # uncompressed = Bzip2::FFI::Reader.read(io_or_path)
32
+ #
33
+ # The {open} and {read} methods accept either an `IO`-like object or a file
34
+ # path. `IO`-like objects must have a `read` method. Paths can be given as
35
+ # either a `String` or `Pathname`.
36
+ #
37
+ # No character conversion is performed on decompressed bytes. The {read} and
38
+ # {#read} methods return instances of `String` that represent the raw
39
+ # decompressed bytes, with `encoding` set to `Encoding::ASCII_8BIT` (also
40
+ # known as `Encoding::BINARY`).
41
+ #
42
+ # `Reader` will read a single bzip2 compressed structure from the given
43
+ # stream or file. If the stream or file contains data beyond the end of
44
+ # the bzip2 structure, such data may be read during decompression. If such
45
+ # an overread has occurred and the `IO`-like object being read from has a
46
+ # `seek` method, `Reader` will use it to reposition the stream to the byte
47
+ # immediately following the end of the bzip2 structure. If `seek` raises
48
+ # an `IOError`, it will be caught and the stream position will be left
49
+ # unchanged.
50
+ class Reader < IO
51
+ # The number of bytes read from the compressed data stream at a time.
52
+ #
53
+ # @private
54
+ READ_BUFFER_SIZE = 4096 #:nodoc:
55
+
56
+ # The number of uncompressed bytes to read at a time when using {#read}
57
+ # without a length.
58
+ #
59
+ # @private
60
+ DEFAULT_DECOMPRESS_COUNT = 4096 #:nodoc:
61
+
62
+ class << self
63
+ # Use send to keep this hidden from YARD (visibility tag does not work).
64
+ send(:public, :new)
65
+
66
+ # Opens a {Reader} to read and decompress data from either an `IO`-like
67
+ # object or a file. `IO`-like objects must have a `read` method. Files
68
+ # can be specified using either a `String` containing the file path or a
69
+ # `Pathname`.
70
+ #
71
+ # If no block is given, the opened `Reader` instance is returned. After
72
+ # use, the instance should be closed using the {#close} method.
73
+ #
74
+ # If a block is given, it will be passed the opened `Reader` instance
75
+ # as an argument. After the block terminates, the `Reader` instance will
76
+ # automatically be closed. `open` will then return the result of the
77
+ # block.
78
+ #
79
+ # The following options can be specified using the `options` `Hash`:
80
+ #
81
+ # * `:autoclose` - When passing an `IO`-like object, set to `true` to
82
+ # close the `IO` when the `Reader` instance is closed.
83
+ # * `:small` - Set to `true` to use an alternative decompression
84
+ # algorithm that uses less memory, but at the cost of
85
+ # decompressing more slowly (roughly 2,300 kB less memory
86
+ # at about half the speed).
87
+ #
88
+ # If an `IO`-like object that has a `binmode` method is passed to
89
+ # `open`, `binmode` will be called on `io_or_path` before yielding to
90
+ # the block or returning.
91
+ #
92
+ # @param io_or_path [Object] Either an `IO`-like object with a `read`
93
+ # method or a file path as a `String` or
94
+ # `Pathname`.
95
+ # @param options [Hash] Optional parameters (`:autoclose` and `:small`).
96
+ # @return [Object] The opened `Reader` instance if no block is given, or
97
+ # the result of the block if a block is given.
98
+ # @raise [ArgumentError] If `io_or_path` is _not_ a `String`, `Pathname`
99
+ # or an `IO`-like object with a `read` method.
100
+ # @raise [Errno::ENOENT] If the specified file does not exist.
101
+ # @raise [Error::Bzip2Error] If an error occurs when initializing
102
+ # libbz2.
103
+ def open(io_or_path, options = {})
104
+ if io_or_path.kind_of?(String) || io_or_path.kind_of?(Pathname)
105
+ options = options.merge(autoclose: true)
106
+ proc = -> { open_bzip_file(io_or_path.to_s, 'rb') }
107
+ super(proc, options)
108
+ elsif !io_or_path.kind_of?(Proc)
109
+ super
110
+ else
111
+ raise ArgumentError, 'io_or_path must be an IO-like object or a path'
112
+ end
113
+ end
114
+
115
+ # Reads and decompresses and entire bzip2 compressed structure from
116
+ # either an `IO`-like object or a file and returns the decompressed
117
+ # bytes as a `String`. `IO`-like objects must have a `read` method.
118
+ # Files can be specified using either a `String` containing the file
119
+ # path or a `Pathname`.
120
+ #
121
+ # The following options can be specified using the `options` `Hash`:
122
+ #
123
+ # * `:autoclose` - When passing an `IO`-like object, set to `true` to
124
+ # close the `IO` when the compressed data has been
125
+ # read.
126
+ # * `:small` - Set to `true` to use an alternative decompression
127
+ # algorithm that uses less memory, but at the cost of
128
+ # decompressing more slowly (roughly 2,300 kB less memory
129
+ # at about half the speed).
130
+ #
131
+ # No character conversion is performed on decompressed bytes. `read`
132
+ # returns a `String` that represents the raw decompressed bytes, with
133
+ # `encoding` set to `Encoding::ASCII_8BIT` (also known as
134
+ # `Encoding::BINARY`).
135
+ #
136
+ # If an `IO`-like object that has a `binmode` method is passed to
137
+ # `read`, `binmode` will be called on `io_or_path` before any compressed
138
+ # data is read.
139
+ #
140
+ # @param io_or_path [Object] Either an `IO`-like object with a `read`
141
+ # method or a file path as a `String` or
142
+ # `Pathname`.
143
+ # @param options [Hash] Optional parameters (`:autoclose` and `:small`).
144
+ # @return [String] The decompressed data.
145
+ # @raise [ArgumentError] If `io_or_path` is _not_ a `String`, `Pathname`
146
+ # or an `IO`-like object with a `read` method.
147
+ # @raise [Errno::ENOENT] If the specified file does not exist.
148
+ # @raise [Error::Bzip2Error] If an error occurs when initializing
149
+ # libbz2 or decompressing data.
150
+ def read(io_or_path, options = {})
151
+ open(io_or_path, options) do |reader|
152
+ reader.read
153
+ end
154
+ end
155
+
156
+ private
157
+
158
+ # Returns a Proc that can be used as a finalizer to call
159
+ # `BZ2_bzDecompressEnd` with the given `stream`.
160
+ #
161
+ # @param stream [Libbz2::BzStream] The stream that should be passed to
162
+ # `BZ2_bzDecompressEnd`.
163
+ def finalize(stream)
164
+ ->(id) do
165
+ Libbz2::BZ2_bzDecompressEnd(stream)
166
+ end
167
+ end
168
+ end
169
+
170
+ # Initializes a {Reader} to read compressed data from an `IO`-like object
171
+ # (`io`). `io` must have a `read` method.
172
+ #
173
+ # The following options can be specified using the `options` `Hash`:
174
+ #
175
+ # * `:autoclose` - Set to `true` to close `io` when the `Reader` instance
176
+ # is closed.
177
+ # * `:small` - Set to `true` to use an alternative decompression
178
+ # algorithm that uses less memory, but at the cost of
179
+ # decompressing more slowly (roughly 2,300 kB less memory
180
+ # at about half the speed).
181
+ #
182
+ # `binmode` is called on `io` if `io` responds to `binmode`.
183
+ #
184
+ # After use, the `Reader` instance should be closed using the {#close}
185
+ # method.
186
+ #
187
+ # @param io [Object] An `IO`-like object with a `read` method.
188
+ # @param options [Hash] Optional parameters (`:autoclose` and `:small`).
189
+ # @raise [ArgumentError] If `io` is `nil` or does not respond to `read`.
190
+ # @raise [Error::Bzip2Error] If an error occurs when initializing libbz2.
191
+ def initialize(io, options = {})
192
+ super
193
+ raise ArgumentError, 'io must respond to read' unless io.respond_to?(:read)
194
+
195
+ small = options[:small]
196
+
197
+ @in_eof = false
198
+ @out_eof = false
199
+ @in_buffer = nil
200
+
201
+ check_error(Libbz2::BZ2_bzDecompressInit(stream, 0, small ? 1 : 0))
202
+
203
+ ObjectSpace.define_finalizer(self, self.class.send(:finalize, stream))
204
+ end
205
+
206
+ # Ends decompression and closes the {Reader}.
207
+ #
208
+ # If the {open} method is used with a block, it is not necessary to call
209
+ # `close`. Otherwise, `close` should be called once the `Reader` is no
210
+ # longer needed.
211
+ #
212
+ # @return [NilType] `nil`.
213
+ # @raise [IOError] If the `Reader` has already been closed.
214
+ def close
215
+ s = stream
216
+
217
+ unless @out_eof
218
+ decompress_end(s)
219
+ end
220
+
221
+ s[:next_in] = nil
222
+ s[:next_out] = nil
223
+
224
+ if @in_buffer
225
+ @in_buffer.free
226
+ @in_buffer = nil
227
+ end
228
+
229
+ super
230
+ end
231
+
232
+ # Reads and decompresses data from the bzip2 compressed stream or file,
233
+ # returning the uncompressed bytes.
234
+ #
235
+ # `length` must be a non-negative integer or `nil`.
236
+ #
237
+ # If `length` is a positive integer, it specifies the maximum number of
238
+ # uncompressed bytes to return. `read` will return `nil` or a `String`
239
+ # with a length of 1 to `length` bytes containing the decompressed data.
240
+ # A result of `nil` or a `String` with a length less than `length` bytes
241
+ # indicates that the end of the decompressed data has been reached.
242
+ #
243
+ # If `length` is `nil`, `read` reads until the end of the decompressed
244
+ # data, returning the uncompressed bytes as a `String`.
245
+ #
246
+ # If `length` is 0, `read` returns an empty `String`.
247
+ #
248
+ # If the optional `buffer` argument is present, it must reference a
249
+ # `String` that will receive the decompressed data. `buffer` will
250
+ # contain only the decompressed data after the call to `read`, even if it
251
+ # is not empty beforehand.
252
+ #
253
+ # No character conversion is performed on decompressed bytes. `read`
254
+ # returns a `String` that represents the raw decompressed bytes, with
255
+ # `encoding` set to `Encoding::ASCII_8BIT` (also known as
256
+ # `Encoding::BINARY`).
257
+ #
258
+ # @param length [Integer] Must be a non-negative integer or `nil`. Set to
259
+ # a positive integer to specify the maximum number
260
+ # of uncompressed bytes to return. Set to `nil` to
261
+ # return the remaining decompressed data. Set to
262
+ # 0 to return an empty `String`.
263
+ # @param buffer [String] An optional buffer to receive the decompressed
264
+ # data.
265
+ # @return [String] The decompressed data as a `String` with ASCII-8BIT
266
+ # encoding, or `nil` if length was a positive integer and
267
+ # the end of the decompressed data has been reached.
268
+ # @raise [ArgumentError] If `length` is negative.
269
+ # @raise [Error::Bzip2Error] If an error occurs during decompression.
270
+ # @raise [IOError] If the `Reader` has been closed.
271
+ def read(length = nil, buffer = nil)
272
+ if buffer
273
+ buffer.clear
274
+ buffer.force_encoding(Encoding::ASCII_8BIT)
275
+ end
276
+
277
+ if length
278
+ raise ArgumentError 'length must be a non-negative integer or nil' if length < 0
279
+
280
+ if length == 0
281
+ check_closed
282
+ return buffer || ''
283
+ end
284
+
285
+ decompressed = decompress(length)
286
+
287
+ return nil unless decompressed
288
+ buffer ? buffer << decompressed : decompressed
289
+ else
290
+ result = buffer ? StringIO.new(buffer) : StringIO.new
291
+
292
+ # StringIO#binmode is a no-op, but call in case it is implemented in
293
+ # future versions.
294
+ result.binmode
295
+
296
+ result.set_encoding(Encoding::ASCII_8BIT)
297
+
298
+ loop do
299
+ decompressed = decompress(DEFAULT_DECOMPRESS_COUNT)
300
+ break unless decompressed
301
+ result.write(decompressed)
302
+ break if decompressed.bytesize < DEFAULT_DECOMPRESS_COUNT
303
+ end
304
+
305
+ result.string
306
+ end
307
+ end
308
+
309
+ private
310
+
311
+ # Attempts to decompress and return `count` bytes.
312
+ #
313
+ # @param count [Integer] The number of uncompressed bytes to return (must
314
+ # be a positive integer).
315
+ # @return [String] The decompressed data as a `String` with ASCII-8BIT
316
+ # encoding, or `nil` if length was a positive integer and
317
+ # the end of the decompressed data has been reached.
318
+ # @raise [ArgumentError] if `count` is not greater than or equal to 1.
319
+ # @raise [Error::Bzip2Error] If an error occurs during decompression.
320
+ # @raise [IOError] If the `Reader` has been closed.
321
+ def decompress(count)
322
+ raise ArgumentError, "count must be a positive integer" unless count >= 1
323
+ s = stream
324
+ return nil if @out_eof
325
+
326
+ out_buffer = ::FFI::MemoryPointer.new(1, count)
327
+ begin
328
+ s[:next_out] = out_buffer
329
+ s[:avail_out] = out_buffer.size
330
+
331
+ # Decompress data until count bytes have been read, or the end of
332
+ # the stream is reached.
333
+ loop do
334
+ if s[:avail_in] == 0 && !@in_eof
335
+ bytes = io.read(READ_BUFFER_SIZE)
336
+
337
+ if bytes && bytes.bytesize > 0
338
+ @in_eof = bytes.bytesize < READ_BUFFER_SIZE
339
+ @in_buffer = ::FFI::MemoryPointer.new(1, bytes.bytesize)
340
+ @in_buffer.write_bytes(bytes)
341
+ s[:next_in] = @in_buffer
342
+ s[:avail_in] = @in_buffer.size
343
+ else
344
+ @in_eof = true
345
+ end
346
+ end
347
+
348
+ prev_avail_out = s[:avail_out]
349
+
350
+ res = Libbz2::BZ2_bzDecompress(s)
351
+
352
+ if s[:avail_in] == 0 && @in_buffer
353
+ s[:next_in] = nil
354
+ @in_buffer.free
355
+ @in_buffer = nil
356
+ end
357
+
358
+ check_error(res)
359
+
360
+ if res == Libbz2::BZ_STREAM_END
361
+ # The input could contain data after the end of the bzip2 stream.
362
+ #
363
+ # s[:avail_in] will contain the number of bytes that have been
364
+ # read from io, but not been consumed by BZ2_bzDecompress.
365
+ #
366
+ # Attempt to move the input stream back by the amount that has
367
+ # been over-read.
368
+ if s[:avail_in] > 0 && io.respond_to?(:seek)
369
+ io.seek(-s[:avail_in], ::IO::SEEK_CUR) rescue IOError
370
+ end
371
+
372
+ if @in_buffer
373
+ s[:next_in] = nil
374
+ @in_buffer.free
375
+ @in_buffer = nil
376
+ end
377
+
378
+ decompress_end(s)
379
+
380
+ @out_eof = true
381
+ break
382
+ end
383
+
384
+ break if s[:avail_out] == 0
385
+
386
+ # No more input available and calling BZ2_bzDecompress didn't
387
+ # advance the output. Raise an error.
388
+ if @in_eof && prev_avail_out == s[:avail_out]
389
+ raise Error::UnexpectedEofError.new
390
+ end
391
+ end
392
+
393
+ result = out_buffer.read_bytes(out_buffer.size - s[:avail_out])
394
+ ensure
395
+ out_buffer.free
396
+ s[:next_out] = nil
397
+ s[:avail_out] = 0
398
+ end
399
+
400
+ if @out_eof && result.bytesize == 0
401
+ nil
402
+ else
403
+ result
404
+ end
405
+ end
406
+
407
+ # Calls BZ2_bzDecompressEnd to release memeory associated with the
408
+ # decompression stream `s`.
409
+ #
410
+ # Notifies `ObjectSpace` that it is no longer necessary to finalize the
411
+ # `Reader` instance.
412
+ #
413
+ # @param s [Libbz2::BzStream] The stream to end decompression for.
414
+ # @raise [Error::Bzip2Error] If `BZ2_bzDecompressEnd` reports an error.
415
+ def decompress_end(s)
416
+ res = Libbz2::BZ2_bzDecompressEnd(s)
417
+ ObjectSpace.undefine_finalizer(self)
418
+ check_error(res)
419
+ end
420
+ end
421
+ end
422
+ end