ruby-xz 0.2.1 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/xz.rb CHANGED
@@ -1,10 +1,10 @@
1
1
  # -*- coding: utf-8 -*-
2
- # (The MIT License)
3
- #
2
+ #--
4
3
  # Basic liblzma-bindings for Ruby.
5
4
  #
6
- # Copyright © 2011,2012 Marvin Gülker
7
- # Copyright © 2011 Christoph Plank
5
+ # Copyright © 2011-2018 Marvin Gülker et al.
6
+ #
7
+ # See AUTHORS for the full list of contributors.
8
8
  #
9
9
  # Permission is hereby granted, free of charge, to any person obtaining a
10
10
  # copy of this software and associated documentation files (the ‘Software’),
@@ -23,144 +23,288 @@
23
23
  # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24
24
  # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25
25
  # THE SOFTWARE.
26
+ #++
26
27
 
27
28
  require "pathname"
28
- require "ffi"
29
- require 'stringio'
30
- require "io/like"
31
-
32
- #The namespace and main module of this library. Each method of this module
33
- #may raise exceptions of class XZ::LZMAError, which is not named in the
34
- #methods' documentations anymore.
35
- #
36
- #All strings you receive from any method defined in this module
37
- #and the classes defined in it are encoded in BINARY, so you may
38
- #have to call #force_encoding on them to tag them with the correct
39
- #encoding (assuming you _know_ what their correct encoding should be).
40
- #ruby-xz can’t handle this as compiled strings don’t come with encoding
41
- #information.
29
+ require "fiddle"
30
+ require "fiddle/import"
31
+ require "stringio"
32
+ require "forwardable"
33
+
34
+ # The namespace and main module of this library. Each method of this
35
+ # module may raise exceptions of class XZ::LZMAError, which is not
36
+ # named in the methods' documentations anymore.
42
37
  module XZ
43
- #The version of this library.
44
- VERSION = "0.2.1"
45
38
 
46
- #Number of bytes read in one chunk.
39
+ # Number of bytes read in one chunk.
47
40
  CHUNK_SIZE = 4096
48
41
 
49
42
  class << self
50
43
 
51
- #call-seq:
52
- # decompress_stream(io [, memory_limit [, flags ] ] ) → a_string
53
- # decompress_stream(io [, memory_limit [, flags ] ] ){|chunk| ... } → an_integer
54
- # decode_stream(io [, memory_limit [, flags ] ] ) → a_string
55
- # decode_stream(io [, memory_limit [, flags ] ] ){|chunk| ... } → an_integer
56
- #
57
- #Decompresses a stream containing XZ-compressed data.
58
- #===Parameters
59
- #[io] The IO to read from. It must be opened for reading.
60
- #[memory_limit] (+UINT64_MAX+) If not XZ::LibLZMA::UINT64_MAX, makes liblzma
61
- # use no more memory than +memory_limit+ bytes.
62
- #[flags] (<tt>[:tell_unsupported_check]</tt>) Additional flags
63
- # passed to liblzma (an array). Possible flags are:
64
- # [:tell_no_check] Spit out a warning if the archive hasn't an
65
- # integrity checksum.
66
- # [:tell_unsupported_check] Spit out a warning if the archive
67
- # has an unsupported checksum type.
68
- # [:concatenated] Decompress concatenated archives.
69
- #[chunk] (Block argument) One piece of decompressed data.
70
- #===Return value
71
- #If a block was given, returns the number of bytes written. Otherwise,
72
- #returns the decompressed data as a BINARY-encoded string.
73
- #===Example
74
- # data = File.open("archive.xz", "rb"){|f| f.read}
75
- # io = StringIO.new(data)
76
- # XZ.decompress_stream(io) #=> "I AM THE DATA"
77
- # io.rewind
78
- # str = ""
79
- # XZ.decompress_stream(io, XZ::LibLZMA::UINT64_MAX, [:tell_no_check]){|c| str << c} #=> 13
80
- # str #=> "I AM THE DATA"
81
- #===Remarks
82
- #The block form is *much* better on memory usage, because it doesn't have
83
- #to load everything into RAM at once. If you don't know how big your
84
- #data gets or if you want to decompress much data, use the block form. Of
85
- #course you shouldn't store the data you read in RAM then as in the
86
- #example above.
87
- def decompress_stream(io, memory_limit = LibLZMA::UINT64_MAX, flags = [:tell_unsupported_check], &block)
88
- raise(ArgumentError, "Invalid memory limit set!") unless (0..LibLZMA::UINT64_MAX).include?(memory_limit)
89
- flags.each do |flag|
90
- raise(ArgumentError, "Unknown flag #{flag}!") unless [:tell_no_check, :tell_unsupported_check, :tell_any_check, :concatenated].include?(flag)
44
+ # Force ruby-xz to be silent about deprecations. Using this is
45
+ # discouraged so that you are aware of upcoming changes to the
46
+ # API. However, if your standard error stream is closed,
47
+ # outputting the deprecation notices might result in an exception,
48
+ # so this method allows you to surpress these notices. Ensure you
49
+ # read the HISTORY.rdoc file carefully instead.
50
+ def disable_deprecation_notices=(bool)
51
+ @disable_deprecation_notices = bool
52
+ end
53
+
54
+ # Output a deprecation notice.
55
+ def deprecate(msg) # :nodoc:
56
+ @disable_deprecation_notices ||= false
57
+
58
+ unless @disable_deprecation_notices
59
+ $stderr.puts("DEPRECATION NOTICE: #{msg}\n#{caller.drop(1).join("\n\t")}")
91
60
  end
61
+ end
92
62
 
93
- stream = LZMAStream.new
94
- res = LibLZMA.lzma_stream_decoder(
95
- stream.pointer,
96
- memory_limit,
97
- flags.inject(0){|val, flag| val | LibLZMA.const_get(:"LZMA_#{flag.to_s.upcase}")}
98
- )
63
+ # call-seq:
64
+ # decompress_stream(io [, kw ] ) → a_string
65
+ # decompress_stream(io [, kw ] ] ){|chunk| ... } → an_integer
66
+ # decode_stream(io [, kw ] ] ) → a_string
67
+ # decode_stream(io [, kw ] ){|chunk| ... } → an_integer
68
+ #
69
+ # Decompresses a stream containing XZ-compressed data.
70
+ #
71
+ # === Parameters
72
+ # ==== Positional parameters
73
+ #
74
+ # [io]
75
+ # The IO to read from. It must be opened for reading in
76
+ # binary mode.
77
+ # [chunk (Block argument)]
78
+ # One piece of decompressed data. See Remarks section below
79
+ # for information about its encoding.
80
+ #
81
+ # ==== Keyword arguments
82
+ #
83
+ # [memory_limit (+UINT64_MAX+)]
84
+ # If not XZ::LibLZMA::UINT64_MAX, makes liblzma
85
+ # use no more memory than +memory_limit+ bytes.
86
+ #
87
+ # [flags (<tt>[:tell_unsupported_check]</tt>)]
88
+ # Additional flags
89
+ # passed to liblzma (an array). Possible flags are:
90
+ #
91
+ # [:tell_no_check]
92
+ # Spit out a warning if the archive hasn't an
93
+ # integrity checksum.
94
+ # [:tell_unsupported_check]
95
+ # Spit out a warning if the archive
96
+ # has an unsupported checksum type.
97
+ # [:concatenated]
98
+ # Decompress concatenated archives.
99
+ # [external_encoding (Encoding.default_external)]
100
+ # Assume the decompressed data inside the compressed data
101
+ # has this encoding. See Remarks section.
102
+ # [internal_encoding (Encoding.default_internal)]
103
+ # Request transcoding of the decompressed data into this
104
+ # encoding if not nil. Note that Encoding.default_internal
105
+ # is nil by default. See Remarks section.
106
+ #
107
+ # === Return value
108
+ #
109
+ # If a block was given, returns the number of bytes
110
+ # written. Otherwise, returns the decompressed data as a
111
+ # BINARY-encoded string.
112
+ #
113
+ # === Raises
114
+ #
115
+ # [Encoding::InvalidByteSequenceError]
116
+ # 1. You requested an “internal encoding” conversion
117
+ # and the archive contains invalid byte sequences
118
+ # in the external encoding.
119
+ # 2. You requested an “internal encoding” conversion, used
120
+ # the block form of this method, and liblzma decided
121
+ # to cut the decompressed data into chunks in mid of
122
+ # a multibyte character. See Remarks section for an
123
+ # explanation.
124
+ #
125
+ # === Example
126
+ #
127
+ # data = File.open("archive.xz", "rb"){|f| f.read}
128
+ # io = StringIO.new(data)
129
+ #
130
+ # XZ.decompress_stream(io) #=> "I AM THE DATA"
131
+ # io.rewind
132
+ #
133
+ # str = ""
134
+ # XZ.decompress_stream(io, XZ::LibLZMA::UINT64_MAX, [:tell_no_check]){|c| str << c} #=> 13
135
+ # str #=> "I AM THE DATA"
136
+ #
137
+ # === Remarks
138
+ #
139
+ # The block form is *much* better on memory usage, because it
140
+ # doesn't have to load everything into RAM at once. If you don't
141
+ # know how big your data gets or if you want to decompress much
142
+ # data, use the block form. Of course you shouldn't store the data
143
+ # you read in RAM then as in the example above.
144
+ #
145
+ # This method honours Ruby's external and internal encoding concept.
146
+ # All documentation about this applies to this method, with the
147
+ # exception that the external encoding does not refer to the data
148
+ # on the hard disk (that's compressed XZ data, it's always binary),
149
+ # but to the data inside the XZ container, i.e. to the *decompressed*
150
+ # data. Any strings you receive from this method (regardless of
151
+ # whether via return value or via the +chunk+ block argument) will
152
+ # first be tagged with the external encoding. If you set an internal
153
+ # encoding (either via the +internal_encoding+ parameter or via
154
+ # Ruby's default internal encoding) that string will be transcoded
155
+ # from the external encoding to the internal encoding before you
156
+ # even see it; in that case, the return value or chunk block argument
157
+ # will be encoded in the internal encoding. Internal encoding is
158
+ # disabled in Ruby by default and the argument for this method also
159
+ # defaults to nil.
160
+ #
161
+ # Due to the external encoding being applied, it can happen that
162
+ # +chunk+ contains an incomplete multibyte character causing
163
+ # <tt>valid_encoding?</tt> to return false if called on +chunk+,
164
+ # because liblzma doesn't know about encodings. The rest of the
165
+ # character will be yielded to the block in the next iteration
166
+ # then as liblzma progresses with the decompression from the XZ
167
+ # format. In other words, be prepared that +chunk+ can contain
168
+ # incomplete multibyte chars.
169
+ #
170
+ # This can have nasty side effects if you requested an internal
171
+ # encoding automatic transcoding and used the block form. Since
172
+ # this method applies the internal encoding transcoding before the
173
+ # chunk is yielded to the block, String#encode gets the incomplete
174
+ # multibyte character. In that case, you will receive an
175
+ # Encoding::InvalidByteSequenceError exception even though your
176
+ # data is perfectly well-formed inside the XZ data. It's just
177
+ # that liblzma during decompression cut the chunks at an
178
+ # unfortunate place. To avoid this, do not request internal encoding
179
+ # conversion when using the block form, but instead transcode
180
+ # the data manually after you have decompressed the entire data.
181
+ def decompress_stream(io, memory_limit: LibLZMA::UINT64_MAX, flags: [:tell_unsupported_check], external_encoding: nil, internal_encoding: nil, &block)
182
+ raise(ArgumentError, "Invalid memory limit set!") unless memory_limit > 0 && memory_limit <= LibLZMA::UINT64_MAX
183
+ raise(ArgumentError, "external_encoding must be set if internal_encoding transcoding is requested") if internal_encoding && !external_encoding
184
+
185
+ # The ArgumentError above is only about the concrete arguments
186
+ # (to sync with Ruby's IO API), not about the implied internal
187
+ # encoding, which might still kick in (and does, see below).
188
+ external_encoding ||= Encoding.default_external
189
+ internal_encoding ||= Encoding.default_internal
190
+
191
+ # bit-or all flags
192
+ allflags = flags.inject(0) do |val, flag|
193
+ flag = LibLZMA::LZMA_DECODE_FLAGS[flag] || raise(ArgumentError, "Unknown flag #{flag}!")
194
+ val | flag
195
+ end
196
+
197
+ stream = LibLZMA::LZMAStream.malloc
198
+ LibLZMA.LZMA_STREAM_INIT(stream)
199
+ res = LibLZMA.lzma_stream_decoder(stream.to_ptr,
200
+ memory_limit,
201
+ allflags)
99
202
 
100
203
  LZMAError.raise_if_necessary(res)
101
204
 
102
205
  res = ""
103
206
  res.encode!(Encoding::BINARY)
104
207
  if block_given?
105
- res = lzma_code(io, stream, &block)
208
+ res = lzma_code(io, stream) do |chunk|
209
+ chunk = chunk.dup # Do not write somewhere into the fiddle pointer while encoding (-> can segfault)
210
+ chunk.force_encoding(external_encoding) if external_encoding
211
+ chunk.encode!(internal_encoding) if internal_encoding
212
+ yield(chunk)
213
+ end
106
214
  else
107
215
  lzma_code(io, stream){|chunk| res << chunk}
216
+ res.force_encoding(external_encoding) if external_encoding
217
+ res.encode!(internal_encoding) if internal_encoding
108
218
  end
109
219
 
110
- LibLZMA.lzma_end(stream.pointer)
220
+ LibLZMA.lzma_end(stream.to_ptr)
111
221
 
112
- block_given? ? stream[:total_out] : res
222
+ block_given? ? stream.total_out : res
113
223
  end
114
224
  alias decode_stream decompress_stream
115
225
 
116
- #call-seq:
117
- # compress_stream(io [, compression_level [, check [, extreme ] ] ] ) → a_string
118
- # compress_stream(io [, compression_level [, check [, extreme ] ] ] ){|chunk| ... } → an_integer
119
- # encode_stream(io [, compression_level [, check [, extreme ] ] ] ) → a_string
120
- # encode_stream(io [, compression_level [, check [, extreme ] ] ] ){|chunk| ... } → an_integer
121
- #
122
- #Compresses a stream of data into XZ-compressed data.
123
- #===Parameters
124
- #[io] The IO to read the data from. Must be opened for
125
- # reading.
126
- #[compression_level] (6) Compression strength. Higher values indicate a
127
- # smaller result, but longer compression time. Maximum
128
- # is 9.
129
- #[check] (:crc64) The checksum algorithm to use for verifying
130
- # the data inside the archive. Possible values are:
131
- # * :none
132
- # * :crc32
133
- # * :crc64
134
- # * :sha256
135
- #[extreme] (false) Tries to get the last bit out of the
136
- # compression. This may succeed, but you can end
137
- # up with *very* long computation times.
138
- #[chunk] (Block argument) One piece of compressed data.
139
- #===Return value
140
- #If a block was given, returns the number of bytes written. Otherwise,
141
- #returns the compressed data as a BINARY-encoded string.
142
- #===Example
143
- # data = File.read("file.txt")
144
- # i = StringIO.new(data)
145
- # XZ.compress_stream(i) #=> Some binary blob
146
- # i.rewind
147
- # str = ""
148
- # XZ.compress_stream(i, 4, :sha256){|c| str << c} #=> 123
149
- # str #=> Some binary blob
150
- #===Remarks
151
- #The block form is *much* better on memory usage, because it doesn't have
152
- #to load everything into RAM at once. If you don't know how big your
153
- #data gets or if you want to compress much data, use the block form. Of
154
- #course you shouldn't store the data your read in RAM then as in the
155
- #example above.
156
- def compress_stream(io, compression_level = 6, check = :crc64, extreme = false, &block)
157
- raise(ArgumentError, "Invalid compression level!") unless (0..9).include?(compression_level)
226
+ # call-seq:
227
+ # compress_stream(io [, kw ] ) → a_string
228
+ # compress_stream(io [, kw ] ){|chunk| ... } → an_integer
229
+ # encode_stream(io [, kw ] ) → a_string
230
+ # encode_stream(io [, kw ] ){|chunk| ... } → an_integer
231
+ #
232
+ # Compresses a stream of data into XZ-compressed data.
233
+ #
234
+ # === Parameters
235
+ # ==== Positional arguments
236
+ #
237
+ # [io]
238
+ # The IO to read the data from. Must be opened for
239
+ # reading.
240
+ # [chunk (Block argument)]
241
+ # One piece of compressed data. This is always tagged
242
+ # as a BINARY string, since it's compressed binary data.
243
+ #
244
+ # ==== Keyword arguments
245
+ # All keyword arguments are optional.
246
+ #
247
+ # [level (6)]
248
+ # Compression strength. Higher values indicate a
249
+ # smaller result, but longer compression time. Maximum
250
+ # is 9.
251
+ #
252
+ # [check (:crc64)]
253
+ # The checksum algorithm to use for verifying
254
+ # the data inside the archive. Possible values are:
255
+ # * :none
256
+ # * :crc32
257
+ # * :crc64
258
+ # * :sha256
259
+ #
260
+ # [extreme (false)]
261
+ # Tries to get the last bit out of the
262
+ # compression. This may succeed, but you can end
263
+ # up with *very* long computation times.
264
+ #
265
+ # === Return value
266
+ #
267
+ # If a block was given, returns the number of bytes
268
+ # written. Otherwise, returns the compressed data as a
269
+ # BINARY-encoded string.
270
+ #
271
+ # === Example
272
+ # data = File.read("file.txt")
273
+ # i = StringIO.new(data)
274
+ # XZ.compress_stream(i) #=> Some binary blob
275
+ #
276
+ # i.rewind
277
+ # str = ""
278
+ #
279
+ # XZ.compress_stream(i, level: 4, check: :sha256) do |c|
280
+ # str << c
281
+ # end #=> 123
282
+ # str #=> Some binary blob
283
+ #
284
+ # === Remarks
285
+ #
286
+ # The block form is *much* better on memory usage, because it
287
+ # doesn't have to load everything into RAM at once. If you don't
288
+ # know how big your data gets or if you want to compress much
289
+ # data, use the block form. Of course you shouldn't store the data
290
+ # your read in RAM then as in the example above.
291
+ #
292
+ # For the +io+ object passed Ruby's normal external and internal
293
+ # encoding rules apply while it is read from by this method. These
294
+ # encodings are not changed on +io+ by this method. The data you
295
+ # receive in the block (+chunk+) above is binary data (compressed
296
+ # data) and as such encoded as BINARY.
297
+ def compress_stream(io, level: 6, check: :crc64, extreme: false, &block)
298
+ raise(ArgumentError, "Invalid compression level!") unless (0..9).include?(level)
158
299
  raise(ArgumentError, "Invalid checksum specified!") unless [:none, :crc32, :crc64, :sha256].include?(check)
159
300
 
160
- stream = LZMAStream.new
161
- res = LibLZMA.lzma_easy_encoder(stream.pointer,
162
- compression_level | (extreme ? LibLZMA::LZMA_PRESET_EXTREME : 0),
163
- LibLZMA::LZMA_CHECK[:"lzma_check_#{check}"])
301
+ level |= LibLZMA::LZMA_PRESET_EXTREME if extreme
302
+
303
+ stream = LibLZMA::LZMAStream.malloc
304
+ LibLZMA::LZMA_STREAM_INIT(stream)
305
+ res = LibLZMA.lzma_easy_encoder(stream.to_ptr,
306
+ level,
307
+ LibLZMA.const_get(:"LZMA_CHECK_#{check.upcase}"))
164
308
 
165
309
  LZMAError.raise_if_necessary(res)
166
310
 
@@ -172,90 +316,132 @@ module XZ
172
316
  lzma_code(io, stream){|chunk| res << chunk}
173
317
  end
174
318
 
175
- LibLZMA.lzma_end(stream.pointer)
319
+ LibLZMA.lzma_end(stream.to_ptr)
176
320
 
177
- block_given? ? stream[:total_out] : res
321
+ block_given? ? stream.total_out : res
178
322
  end
179
323
  alias encode_stream compress_stream
180
324
 
181
- #Compresses +in_file+ and writes the result to +out_file+.
182
- #===Parameters
183
- #[in_file] The path to the file to read from.
184
- #[out_file] The path of the file to write to. If it exists, it will be
185
- # overwritten.
186
- #For the other parameters, see the ::compress_stream method.
187
- #===Return value
188
- #The number of bytes written, i.e. the size of the archive.
189
- #===Example
190
- # XZ.compress("myfile.txt", "myfile.txt.xz")
191
- # XZ.compress("myarchive.tar", "myarchive.tar.xz")
192
- #===Remarks
193
- #This method is safe to use with big files, because files are not loaded
194
- #into memory completely at once.
195
- def compress_file(in_file, out_file, compression_level = 6, check = :crc64, extreme = false)
325
+ # Compresses +in_file+ and writes the result to +out_file+.
326
+ #
327
+ # === Parameters
328
+ #
329
+ # [in_file]
330
+ # The path to the file to read from.
331
+ # [out_file]
332
+ # The path of the file to write to. If it exists, it will be
333
+ # overwritten.
334
+ #
335
+ # For the keyword parameters, see the ::compress_stream method.
336
+ #
337
+ # === Return value
338
+ #
339
+ # The number of bytes written, i.e. the size of the archive.
340
+ #
341
+ # === Example
342
+ #
343
+ # XZ.compress_file("myfile.txt", "myfile.txt.xz")
344
+ # XZ.compress_file("myarchive.tar", "myarchive.tar.xz")
345
+ #
346
+ # === Remarks
347
+ #
348
+ # This method is safe to use with big files, because files are not
349
+ # loaded into memory completely at once.
350
+ def compress_file(in_file, out_file, **args)
196
351
  File.open(in_file, "rb") do |i_file|
197
352
  File.open(out_file, "wb") do |o_file|
198
- compress_stream(i_file, compression_level, check, extreme) do |chunk|
353
+ compress_stream(i_file, **args) do |chunk|
199
354
  o_file.write(chunk)
200
355
  end
201
356
  end
202
357
  end
203
358
  end
204
359
 
205
- #Compresses arbitrary data using the XZ algorithm.
206
- #===Parameters
207
- #[str] The data to compress.
208
- #For the other parameters, see the compress_stream method.
209
- #===Return value
210
- #The compressed data as a BINARY-encoded string.
211
- #===Example
212
- # data = "I love Ruby"
213
- # comp = XZ.compress(data) #=> binary blob
214
- #===Remarks
215
- #Don't use this method for big amounts of data--you may run out of
216
- #memory. Use compress_file or compress_stream instead.
217
- def compress(str, compression_level = 6, check = :crc64, extreme = false)
218
- raise(NotImplementedError, "StringIO isn't available!") unless defined? StringIO
360
+ # Compresses arbitrary data using the XZ algorithm.
361
+ #
362
+ # === Parameters
363
+ #
364
+ # [str] The data to compress.
365
+ #
366
+ # For the keyword parameters, see the #compress_stream method.
367
+ #
368
+ # === Return value
369
+ #
370
+ # The compressed data as a BINARY-encoded string.
371
+ #
372
+ # === Example
373
+ #
374
+ # data = "I love Ruby"
375
+ # comp = XZ.compress(data) #=> binary blob
376
+ #
377
+ # === Remarks
378
+ #
379
+ # Don't use this method for big amounts of data--you may run out
380
+ # of memory. Use compress_file or compress_stream instead.
381
+ def compress(str, **args)
219
382
  s = StringIO.new(str)
220
- compress_stream(s, compression_level, check, extreme)
383
+ compress_stream(s, **args)
221
384
  end
222
385
 
223
- #Decompresses data in XZ format.
224
- #===Parameters
225
- #[str] The data to decompress.
226
- #For the other parameters, see the decompress_stream method.
227
- #===Return value
228
- #The decompressed data as a BINARY-encoded string.
229
- #===Example
230
- # comp = File.open("data.xz", "rb"){|f| f.read}
231
- # data = XZ.decompress(comp) #=> "I love Ruby"
232
- #===Remarks
233
- #Don't use this method for big amounts of data--you may run out of
234
- #memory. Use decompress_file or decompress_stream instead.
235
- def decompress(str, memory_limit = LibLZMA::UINT64_MAX, flags = [:tell_unsupported_check])
236
- raise(NotImplementedError, "StringIO isn't available!") unless defined? StringIO
386
+ # Decompresses data in XZ format.
387
+ #
388
+ # === Parameters
389
+ #
390
+ # [str] The data to decompress.
391
+ #
392
+ # For the keyword parameters, see the decompress_stream method.
393
+ #
394
+ # === Return value
395
+ #
396
+ # The decompressed data as a BINARY-encoded string.
397
+ #
398
+ # === Example
399
+ #
400
+ # comp = File.open("data.xz", "rb"){|f| f.read}
401
+ # data = XZ.decompress(comp) #=> "I love Ruby"
402
+ #
403
+ # === Remarks
404
+ #
405
+ # Don't use this method for big amounts of data--you may run out
406
+ # of memory. Use decompress_file or decompress_stream instead.
407
+ #
408
+ # Read #decompress_stream's Remarks section for notes on the
409
+ # return value's encoding.
410
+ def decompress(str, **args)
237
411
  s = StringIO.new(str)
238
- decompress_stream(s, memory_limit, flags)
412
+ decompress_stream(s, **args)
239
413
  end
240
414
 
241
- #Decompresses +in_file+ and writes the result to +out_file+.
242
- #===Parameters
243
- #[in_file] The path to the file to read from.
244
- #[out_file] The path of the file to write to. If it exists, it will
245
- # be overwritten.
246
- #For the other parameters, see the decompress_stream method.
247
- #===Return value
248
- #The number of bytes written, i.e. the size of the uncompressed data.
249
- #===Example
250
- # XZ.decompres("myfile.txt.xz", "myfile.txt")
251
- # XZ.decompress("myarchive.tar.xz", "myarchive.tar")
252
- #===Remarks
253
- #This method is safe to use with big files, because files are not loaded
254
- #into memory completely at once.
255
- def decompress_file(in_file, out_file, memory_limit = LibLZMA::UINT64_MAX, flags = [:tell_unsupported_check])
415
+ # Decompresses +in_file+ and writes the result to +out_file+.
416
+ #
417
+ # ===Parameters
418
+ #
419
+ # [in_file]
420
+ # The path to the file to read from.
421
+ # [out_file]
422
+ # The path of the file to write to. If it exists, it will
423
+ # be overwritten.
424
+ #
425
+ # For the keyword parameters, see the decompress_stream method.
426
+ #
427
+ # === Return value
428
+ #
429
+ # The number of bytes written, i.e. the size of the uncompressed
430
+ # data.
431
+ #
432
+ # === Example
433
+ #
434
+ # XZ.decompress_file("myfile.txt.xz", "myfile.txt")
435
+ # XZ.decompress_file("myarchive.tar.xz", "myarchive.tar")
436
+ #
437
+ # === Remarks
438
+ #
439
+ # This method is safe to use with big files, because files are not
440
+ # loaded into memory completely at once.
441
+ def decompress_file(in_file, out_file, **args)
256
442
  File.open(in_file, "rb") do |i_file|
257
443
  File.open(out_file, "wb") do |o_file|
258
- decompress_stream(i_file, memory_limit, flags) do |chunk|
444
+ decompress_stream(i_file, internal_encoding: nil, external_encoding: Encoding::BINARY, **args) do |chunk|
259
445
  o_file.write(chunk)
260
446
  end
261
447
  end
@@ -264,76 +450,68 @@ module XZ
264
450
 
265
451
  private
266
452
 
267
- #This method returns the size of +str+ in bytes.
268
- def binary_size(str)
269
- #Believe it or not, but this is faster than str.bytes.to_a.size.
270
- #I benchmarked it, and it is as twice as fast.
271
- if str.respond_to? :force_encoding
272
- str.dup.force_encoding(Encoding::BINARY).size
273
- else
274
- str.bytes.to_a.size
275
- end
276
- end
277
-
278
- #This method does the heavy work of (de-)compressing a stream. It takes
279
- #an IO object to read data from (that means the IO must be opened
280
- #for reading) and a XZ::LZMAStream object that is used to (de-)compress
281
- #the data. Furthermore this method takes a block which gets passed
282
- #the (de-)compressed data in chunks one at a time--this is needed to allow
283
- #(de-)compressing of very large files that can't be loaded fully into
284
- #memory.
453
+ # This method does the heavy work of (de-)compressing a stream. It
454
+ # takes an IO object to read data from (that means the IO must be
455
+ # opened for reading) and a XZ::LibLZMA::LZMAStream object that is used to
456
+ # (de-)compress the data. Furthermore this method takes a block
457
+ # which gets passed the (de-)compressed data in chunks one at a
458
+ # time--this is needed to allow (de-)compressing of very large
459
+ # files that can't be loaded fully into memory.
285
460
  def lzma_code(io, stream)
286
- input_buffer_p = FFI::MemoryPointer.new(CHUNK_SIZE)
287
- output_buffer_p = FFI::MemoryPointer.new(CHUNK_SIZE)
461
+ input_buffer_p = Fiddle::Pointer.malloc(CHUNK_SIZE) # automatically freed by fiddle on GC
462
+ output_buffer_p = Fiddle::Pointer.malloc(CHUNK_SIZE) # automatically freed by fiddle on GC
288
463
 
289
464
  while str = io.read(CHUNK_SIZE)
290
- input_buffer_p.write_string(str)
291
-
292
- #Set the data for compressing
293
- stream[:next_in] = input_buffer_p
294
- stream[:avail_in] = binary_size(str)
295
-
296
- #Now loop until we gathered all the data in stream[:next_out]. Depending on the
297
- #amount of data, this may not fit into the buffer, meaning that we have to
298
- #provide a pointer to a "new" buffer that liblzma can write into. Since
299
- #liblzma already set stream[:avail_in] to 0 in the first iteration, the extra call to the
300
- #lzma_code() function doesn't hurt (indeed the pipe_comp example from
301
- #liblzma handles it this way too). Sometimes it happens that the compressed data
302
- #is bigger than the original (notably when the amount of data to compress
303
- #is small).
465
+ input_buffer_p[0, str.bytesize] = str
466
+
467
+ # Set the data for compressing
468
+ stream.next_in = input_buffer_p
469
+ stream.avail_in = str.bytesize
470
+
471
+ # Now loop until we gathered all the data in
472
+ # stream[:next_out]. Depending on the amount of data, this may
473
+ # not fit into the buffer, meaning that we have to provide a
474
+ # pointer to a "new" buffer that liblzma can write into. Since
475
+ # liblzma already set stream[:avail_in] to 0 in the first
476
+ # iteration, the extra call to the lzma_code() function
477
+ # doesn't hurt (indeed the pipe_comp example from liblzma
478
+ # handles it this way too). Sometimes it happens that the
479
+ # compressed data is bigger than the original (notably when
480
+ # the amount of data to compress is small).
304
481
  loop do
305
- #Prepare for getting the compressed_data
306
- stream[:next_out] = output_buffer_p
307
- stream[:avail_out] = CHUNK_SIZE
482
+ # Prepare for getting the compressed_data
483
+ stream.next_out = output_buffer_p
484
+ stream.avail_out = CHUNK_SIZE
308
485
 
309
- #Compress the data
486
+ # Compress the data
310
487
  res = if io.eof?
311
- LibLZMA.lzma_code(stream.pointer, LibLZMA::LZMA_ACTION[:lzma_finish])
488
+ LibLZMA.lzma_code(stream.to_ptr, LibLZMA::LZMA_FINISH)
312
489
  else
313
- LibLZMA.lzma_code(stream.pointer, LibLZMA::LZMA_ACTION[:lzma_run])
490
+ LibLZMA.lzma_code(stream.to_ptr, LibLZMA::LZMA_RUN)
314
491
  end
315
492
  check_lzma_code_retval(res)
316
493
 
317
- #Write the compressed data
318
- data = output_buffer_p.read_string(CHUNK_SIZE - stream[:avail_out])
494
+ # Write the compressed data
495
+ # Note: avail_out gives how much space is left after the new data
496
+ data = output_buffer_p[0, CHUNK_SIZE - stream.avail_out]
319
497
  yield(data)
320
498
 
321
- #If the buffer is completely filled, it's likely that there is
322
- #more data liblzma wants to hand to us. Start a new iteration,
323
- #but don't provide new input data.
324
- break unless stream[:avail_out] == 0
499
+ # If the buffer is completely filled, it's likely that there
500
+ # is more data liblzma wants to hand to us. Start a new
501
+ # iteration, but don't provide new input data.
502
+ break unless stream.avail_out == 0
325
503
  end #loop
326
504
  end #while
327
505
  end #lzma_code
328
506
 
329
- #Checks for errors and warnings that can be derived from the return
330
- #value of the lzma_code() function and shows them if necessary.
507
+ # Checks for errors and warnings that can be derived from the
508
+ # return value of the lzma_code() function and shows them if
509
+ # necessary.
331
510
  def check_lzma_code_retval(code)
332
- e = LibLZMA::LZMA_RET
333
511
  case code
334
- when e[:lzma_no_check] then warn("Couldn't verify archive integrity--archive has not integrity checksum.")
335
- when e[:lzma_unsupported_check] then warn("Couldn't verify archive integrity--archive has an unsupported integrity checksum.")
336
- when e[:lzma_get_check] then nil #This isn't useful for us. It indicates that the checksum type is now known.
512
+ when LibLZMA::LZMA_NO_CHECK then warn("Couldn't verify archive integrity--archive has no integrity checksum.")
513
+ when LibLZMA::LZMA_UNSUPPORTED_CHECK then warn("Couldn't verify archive integrity--archive has an unsupported integrity checksum.")
514
+ when LibLZMA::LZMA_GET_CHECK then nil # This isn't useful. It indicates that the checksum type is now known.
337
515
  else
338
516
  LZMAError.raise_if_necessary(code)
339
517
  end
@@ -343,6 +521,8 @@ module XZ
343
521
 
344
522
  end
345
523
 
524
+ require_relative "xz/version"
525
+ require_relative "xz/fiddle_helper"
346
526
  require_relative "xz/lib_lzma"
347
527
  require_relative "xz/stream"
348
528
  require_relative "xz/stream_writer"