ruby-xz 0.2.1 → 1.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/lib/xz.rb CHANGED
@@ -1,10 +1,10 @@
1
1
  # -*- coding: utf-8 -*-
2
- # (The MIT License)
3
- #
2
+ #--
4
3
  # Basic liblzma-bindings for Ruby.
5
4
  #
6
- # Copyright © 2011,2012 Marvin Gülker
7
- # Copyright © 2011 Christoph Plank
5
+ # Copyright © 2011-2018 Marvin Gülker et al.
6
+ #
7
+ # See AUTHORS for the full list of contributors.
8
8
  #
9
9
  # Permission is hereby granted, free of charge, to any person obtaining a
10
10
  # copy of this software and associated documentation files (the ‘Software’),
@@ -23,144 +23,288 @@
23
23
  # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24
24
  # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25
25
  # THE SOFTWARE.
26
+ #++
26
27
 
27
28
  require "pathname"
28
- require "ffi"
29
- require 'stringio'
30
- require "io/like"
31
-
32
- #The namespace and main module of this library. Each method of this module
33
- #may raise exceptions of class XZ::LZMAError, which is not named in the
34
- #methods' documentations anymore.
35
- #
36
- #All strings you receive from any method defined in this module
37
- #and the classes defined in it are encoded in BINARY, so you may
38
- #have to call #force_encoding on them to tag them with the correct
39
- #encoding (assuming you _know_ what their correct encoding should be).
40
- #ruby-xz can’t handle this as compiled strings don’t come with encoding
41
- #information.
29
+ require "fiddle"
30
+ require "fiddle/import"
31
+ require "stringio"
32
+ require "forwardable"
33
+
34
+ # The namespace and main module of this library. Each method of this
35
+ # module may raise exceptions of class XZ::LZMAError, which is not
36
+ # named in the methods' documentations anymore.
42
37
  module XZ
43
- #The version of this library.
44
- VERSION = "0.2.1"
45
38
 
46
- #Number of bytes read in one chunk.
39
+ # Number of bytes read in one chunk.
47
40
  CHUNK_SIZE = 4096
48
41
 
49
42
  class << self
50
43
 
51
- #call-seq:
52
- # decompress_stream(io [, memory_limit [, flags ] ] ) → a_string
53
- # decompress_stream(io [, memory_limit [, flags ] ] ){|chunk| ... } → an_integer
54
- # decode_stream(io [, memory_limit [, flags ] ] ) → a_string
55
- # decode_stream(io [, memory_limit [, flags ] ] ){|chunk| ... } → an_integer
56
- #
57
- #Decompresses a stream containing XZ-compressed data.
58
- #===Parameters
59
- #[io] The IO to read from. It must be opened for reading.
60
- #[memory_limit] (+UINT64_MAX+) If not XZ::LibLZMA::UINT64_MAX, makes liblzma
61
- # use no more memory than +memory_limit+ bytes.
62
- #[flags] (<tt>[:tell_unsupported_check]</tt>) Additional flags
63
- # passed to liblzma (an array). Possible flags are:
64
- # [:tell_no_check] Spit out a warning if the archive hasn't an
65
- # integrity checksum.
66
- # [:tell_unsupported_check] Spit out a warning if the archive
67
- # has an unsupported checksum type.
68
- # [:concatenated] Decompress concatenated archives.
69
- #[chunk] (Block argument) One piece of decompressed data.
70
- #===Return value
71
- #If a block was given, returns the number of bytes written. Otherwise,
72
- #returns the decompressed data as a BINARY-encoded string.
73
- #===Example
74
- # data = File.open("archive.xz", "rb"){|f| f.read}
75
- # io = StringIO.new(data)
76
- # XZ.decompress_stream(io) #=> "I AM THE DATA"
77
- # io.rewind
78
- # str = ""
79
- # XZ.decompress_stream(io, XZ::LibLZMA::UINT64_MAX, [:tell_no_check]){|c| str << c} #=> 13
80
- # str #=> "I AM THE DATA"
81
- #===Remarks
82
- #The block form is *much* better on memory usage, because it doesn't have
83
- #to load everything into RAM at once. If you don't know how big your
84
- #data gets or if you want to decompress much data, use the block form. Of
85
- #course you shouldn't store the data you read in RAM then as in the
86
- #example above.
87
- def decompress_stream(io, memory_limit = LibLZMA::UINT64_MAX, flags = [:tell_unsupported_check], &block)
88
- raise(ArgumentError, "Invalid memory limit set!") unless (0..LibLZMA::UINT64_MAX).include?(memory_limit)
89
- flags.each do |flag|
90
- raise(ArgumentError, "Unknown flag #{flag}!") unless [:tell_no_check, :tell_unsupported_check, :tell_any_check, :concatenated].include?(flag)
44
+ # Force ruby-xz to be silent about deprecations. Using this is
45
+ # discouraged so that you are aware of upcoming changes to the
46
+ # API. However, if your standard error stream is closed,
47
+ # outputting the deprecation notices might result in an exception,
48
+ # so this method allows you to surpress these notices. Ensure you
49
+ # read the HISTORY.rdoc file carefully instead.
50
+ def disable_deprecation_notices=(bool)
51
+ @disable_deprecation_notices = bool
52
+ end
53
+
54
+ # Output a deprecation notice.
55
+ def deprecate(msg) # :nodoc:
56
+ @disable_deprecation_notices ||= false
57
+
58
+ unless @disable_deprecation_notices
59
+ $stderr.puts("DEPRECATION NOTICE: #{msg}\n#{caller.drop(1).join("\n\t")}")
91
60
  end
61
+ end
92
62
 
93
- stream = LZMAStream.new
94
- res = LibLZMA.lzma_stream_decoder(
95
- stream.pointer,
96
- memory_limit,
97
- flags.inject(0){|val, flag| val | LibLZMA.const_get(:"LZMA_#{flag.to_s.upcase}")}
98
- )
63
+ # call-seq:
64
+ # decompress_stream(io [, kw ] ) → a_string
65
+ # decompress_stream(io [, kw ] ] ){|chunk| ... } → an_integer
66
+ # decode_stream(io [, kw ] ] ) → a_string
67
+ # decode_stream(io [, kw ] ){|chunk| ... } → an_integer
68
+ #
69
+ # Decompresses a stream containing XZ-compressed data.
70
+ #
71
+ # === Parameters
72
+ # ==== Positional parameters
73
+ #
74
+ # [io]
75
+ # The IO to read from. It must be opened for reading in
76
+ # binary mode.
77
+ # [chunk (Block argument)]
78
+ # One piece of decompressed data. See Remarks section below
79
+ # for information about its encoding.
80
+ #
81
+ # ==== Keyword arguments
82
+ #
83
+ # [memory_limit (+UINT64_MAX+)]
84
+ # If not XZ::LibLZMA::UINT64_MAX, makes liblzma
85
+ # use no more memory than +memory_limit+ bytes.
86
+ #
87
+ # [flags (<tt>[:tell_unsupported_check]</tt>)]
88
+ # Additional flags
89
+ # passed to liblzma (an array). Possible flags are:
90
+ #
91
+ # [:tell_no_check]
92
+ # Spit out a warning if the archive hasn't an
93
+ # integrity checksum.
94
+ # [:tell_unsupported_check]
95
+ # Spit out a warning if the archive
96
+ # has an unsupported checksum type.
97
+ # [:concatenated]
98
+ # Decompress concatenated archives.
99
+ # [external_encoding (Encoding.default_external)]
100
+ # Assume the decompressed data inside the compressed data
101
+ # has this encoding. See Remarks section.
102
+ # [internal_encoding (Encoding.default_internal)]
103
+ # Request transcoding of the decompressed data into this
104
+ # encoding if not nil. Note that Encoding.default_internal
105
+ # is nil by default. See Remarks section.
106
+ #
107
+ # === Return value
108
+ #
109
+ # If a block was given, returns the number of bytes
110
+ # written. Otherwise, returns the decompressed data as a
111
+ # BINARY-encoded string.
112
+ #
113
+ # === Raises
114
+ #
115
+ # [Encoding::InvalidByteSequenceError]
116
+ # 1. You requested an “internal encoding” conversion
117
+ # and the archive contains invalid byte sequences
118
+ # in the external encoding.
119
+ # 2. You requested an “internal encoding” conversion, used
120
+ # the block form of this method, and liblzma decided
121
+ # to cut the decompressed data into chunks in mid of
122
+ # a multibyte character. See Remarks section for an
123
+ # explanation.
124
+ #
125
+ # === Example
126
+ #
127
+ # data = File.open("archive.xz", "rb"){|f| f.read}
128
+ # io = StringIO.new(data)
129
+ #
130
+ # XZ.decompress_stream(io) #=> "I AM THE DATA"
131
+ # io.rewind
132
+ #
133
+ # str = ""
134
+ # XZ.decompress_stream(io, XZ::LibLZMA::UINT64_MAX, [:tell_no_check]){|c| str << c} #=> 13
135
+ # str #=> "I AM THE DATA"
136
+ #
137
+ # === Remarks
138
+ #
139
+ # The block form is *much* better on memory usage, because it
140
+ # doesn't have to load everything into RAM at once. If you don't
141
+ # know how big your data gets or if you want to decompress much
142
+ # data, use the block form. Of course you shouldn't store the data
143
+ # you read in RAM then as in the example above.
144
+ #
145
+ # This method honours Ruby's external and internal encoding concept.
146
+ # All documentation about this applies to this method, with the
147
+ # exception that the external encoding does not refer to the data
148
+ # on the hard disk (that's compressed XZ data, it's always binary),
149
+ # but to the data inside the XZ container, i.e. to the *decompressed*
150
+ # data. Any strings you receive from this method (regardless of
151
+ # whether via return value or via the +chunk+ block argument) will
152
+ # first be tagged with the external encoding. If you set an internal
153
+ # encoding (either via the +internal_encoding+ parameter or via
154
+ # Ruby's default internal encoding) that string will be transcoded
155
+ # from the external encoding to the internal encoding before you
156
+ # even see it; in that case, the return value or chunk block argument
157
+ # will be encoded in the internal encoding. Internal encoding is
158
+ # disabled in Ruby by default and the argument for this method also
159
+ # defaults to nil.
160
+ #
161
+ # Due to the external encoding being applied, it can happen that
162
+ # +chunk+ contains an incomplete multibyte character causing
163
+ # <tt>valid_encoding?</tt> to return false if called on +chunk+,
164
+ # because liblzma doesn't know about encodings. The rest of the
165
+ # character will be yielded to the block in the next iteration
166
+ # then as liblzma progresses with the decompression from the XZ
167
+ # format. In other words, be prepared that +chunk+ can contain
168
+ # incomplete multibyte chars.
169
+ #
170
+ # This can have nasty side effects if you requested an internal
171
+ # encoding automatic transcoding and used the block form. Since
172
+ # this method applies the internal encoding transcoding before the
173
+ # chunk is yielded to the block, String#encode gets the incomplete
174
+ # multibyte character. In that case, you will receive an
175
+ # Encoding::InvalidByteSequenceError exception even though your
176
+ # data is perfectly well-formed inside the XZ data. It's just
177
+ # that liblzma during decompression cut the chunks at an
178
+ # unfortunate place. To avoid this, do not request internal encoding
179
+ # conversion when using the block form, but instead transcode
180
+ # the data manually after you have decompressed the entire data.
181
+ def decompress_stream(io, memory_limit: LibLZMA::UINT64_MAX, flags: [:tell_unsupported_check], external_encoding: nil, internal_encoding: nil, &block)
182
+ raise(ArgumentError, "Invalid memory limit set!") unless memory_limit > 0 && memory_limit <= LibLZMA::UINT64_MAX
183
+ raise(ArgumentError, "external_encoding must be set if internal_encoding transcoding is requested") if internal_encoding && !external_encoding
184
+
185
+ # The ArgumentError above is only about the concrete arguments
186
+ # (to sync with Ruby's IO API), not about the implied internal
187
+ # encoding, which might still kick in (and does, see below).
188
+ external_encoding ||= Encoding.default_external
189
+ internal_encoding ||= Encoding.default_internal
190
+
191
+ # bit-or all flags
192
+ allflags = flags.inject(0) do |val, flag|
193
+ flag = LibLZMA::LZMA_DECODE_FLAGS[flag] || raise(ArgumentError, "Unknown flag #{flag}!")
194
+ val | flag
195
+ end
196
+
197
+ stream = LibLZMA::LZMAStream.malloc
198
+ LibLZMA.LZMA_STREAM_INIT(stream)
199
+ res = LibLZMA.lzma_stream_decoder(stream.to_ptr,
200
+ memory_limit,
201
+ allflags)
99
202
 
100
203
  LZMAError.raise_if_necessary(res)
101
204
 
102
205
  res = ""
103
206
  res.encode!(Encoding::BINARY)
104
207
  if block_given?
105
- res = lzma_code(io, stream, &block)
208
+ res = lzma_code(io, stream) do |chunk|
209
+ chunk = chunk.dup # Do not write somewhere into the fiddle pointer while encoding (-> can segfault)
210
+ chunk.force_encoding(external_encoding) if external_encoding
211
+ chunk.encode!(internal_encoding) if internal_encoding
212
+ yield(chunk)
213
+ end
106
214
  else
107
215
  lzma_code(io, stream){|chunk| res << chunk}
216
+ res.force_encoding(external_encoding) if external_encoding
217
+ res.encode!(internal_encoding) if internal_encoding
108
218
  end
109
219
 
110
- LibLZMA.lzma_end(stream.pointer)
220
+ LibLZMA.lzma_end(stream.to_ptr)
111
221
 
112
- block_given? ? stream[:total_out] : res
222
+ block_given? ? stream.total_out : res
113
223
  end
114
224
  alias decode_stream decompress_stream
115
225
 
116
- #call-seq:
117
- # compress_stream(io [, compression_level [, check [, extreme ] ] ] ) → a_string
118
- # compress_stream(io [, compression_level [, check [, extreme ] ] ] ){|chunk| ... } → an_integer
119
- # encode_stream(io [, compression_level [, check [, extreme ] ] ] ) → a_string
120
- # encode_stream(io [, compression_level [, check [, extreme ] ] ] ){|chunk| ... } → an_integer
121
- #
122
- #Compresses a stream of data into XZ-compressed data.
123
- #===Parameters
124
- #[io] The IO to read the data from. Must be opened for
125
- # reading.
126
- #[compression_level] (6) Compression strength. Higher values indicate a
127
- # smaller result, but longer compression time. Maximum
128
- # is 9.
129
- #[check] (:crc64) The checksum algorithm to use for verifying
130
- # the data inside the archive. Possible values are:
131
- # * :none
132
- # * :crc32
133
- # * :crc64
134
- # * :sha256
135
- #[extreme] (false) Tries to get the last bit out of the
136
- # compression. This may succeed, but you can end
137
- # up with *very* long computation times.
138
- #[chunk] (Block argument) One piece of compressed data.
139
- #===Return value
140
- #If a block was given, returns the number of bytes written. Otherwise,
141
- #returns the compressed data as a BINARY-encoded string.
142
- #===Example
143
- # data = File.read("file.txt")
144
- # i = StringIO.new(data)
145
- # XZ.compress_stream(i) #=> Some binary blob
146
- # i.rewind
147
- # str = ""
148
- # XZ.compress_stream(i, 4, :sha256){|c| str << c} #=> 123
149
- # str #=> Some binary blob
150
- #===Remarks
151
- #The block form is *much* better on memory usage, because it doesn't have
152
- #to load everything into RAM at once. If you don't know how big your
153
- #data gets or if you want to compress much data, use the block form. Of
154
- #course you shouldn't store the data your read in RAM then as in the
155
- #example above.
156
- def compress_stream(io, compression_level = 6, check = :crc64, extreme = false, &block)
157
- raise(ArgumentError, "Invalid compression level!") unless (0..9).include?(compression_level)
226
+ # call-seq:
227
+ # compress_stream(io [, kw ] ) → a_string
228
+ # compress_stream(io [, kw ] ){|chunk| ... } → an_integer
229
+ # encode_stream(io [, kw ] ) → a_string
230
+ # encode_stream(io [, kw ] ){|chunk| ... } → an_integer
231
+ #
232
+ # Compresses a stream of data into XZ-compressed data.
233
+ #
234
+ # === Parameters
235
+ # ==== Positional arguments
236
+ #
237
+ # [io]
238
+ # The IO to read the data from. Must be opened for
239
+ # reading.
240
+ # [chunk (Block argument)]
241
+ # One piece of compressed data. This is always tagged
242
+ # as a BINARY string, since it's compressed binary data.
243
+ #
244
+ # ==== Keyword arguments
245
+ # All keyword arguments are optional.
246
+ #
247
+ # [level (6)]
248
+ # Compression strength. Higher values indicate a
249
+ # smaller result, but longer compression time. Maximum
250
+ # is 9.
251
+ #
252
+ # [check (:crc64)]
253
+ # The checksum algorithm to use for verifying
254
+ # the data inside the archive. Possible values are:
255
+ # * :none
256
+ # * :crc32
257
+ # * :crc64
258
+ # * :sha256
259
+ #
260
+ # [extreme (false)]
261
+ # Tries to get the last bit out of the
262
+ # compression. This may succeed, but you can end
263
+ # up with *very* long computation times.
264
+ #
265
+ # === Return value
266
+ #
267
+ # If a block was given, returns the number of bytes
268
+ # written. Otherwise, returns the compressed data as a
269
+ # BINARY-encoded string.
270
+ #
271
+ # === Example
272
+ # data = File.read("file.txt")
273
+ # i = StringIO.new(data)
274
+ # XZ.compress_stream(i) #=> Some binary blob
275
+ #
276
+ # i.rewind
277
+ # str = ""
278
+ #
279
+ # XZ.compress_stream(i, level: 4, check: :sha256) do |c|
280
+ # str << c
281
+ # end #=> 123
282
+ # str #=> Some binary blob
283
+ #
284
+ # === Remarks
285
+ #
286
+ # The block form is *much* better on memory usage, because it
287
+ # doesn't have to load everything into RAM at once. If you don't
288
+ # know how big your data gets or if you want to compress much
289
+ # data, use the block form. Of course you shouldn't store the data
290
+ # your read in RAM then as in the example above.
291
+ #
292
+ # For the +io+ object passed Ruby's normal external and internal
293
+ # encoding rules apply while it is read from by this method. These
294
+ # encodings are not changed on +io+ by this method. The data you
295
+ # receive in the block (+chunk+) above is binary data (compressed
296
+ # data) and as such encoded as BINARY.
297
+ def compress_stream(io, level: 6, check: :crc64, extreme: false, &block)
298
+ raise(ArgumentError, "Invalid compression level!") unless (0..9).include?(level)
158
299
  raise(ArgumentError, "Invalid checksum specified!") unless [:none, :crc32, :crc64, :sha256].include?(check)
159
300
 
160
- stream = LZMAStream.new
161
- res = LibLZMA.lzma_easy_encoder(stream.pointer,
162
- compression_level | (extreme ? LibLZMA::LZMA_PRESET_EXTREME : 0),
163
- LibLZMA::LZMA_CHECK[:"lzma_check_#{check}"])
301
+ level |= LibLZMA::LZMA_PRESET_EXTREME if extreme
302
+
303
+ stream = LibLZMA::LZMAStream.malloc
304
+ LibLZMA::LZMA_STREAM_INIT(stream)
305
+ res = LibLZMA.lzma_easy_encoder(stream.to_ptr,
306
+ level,
307
+ LibLZMA.const_get(:"LZMA_CHECK_#{check.upcase}"))
164
308
 
165
309
  LZMAError.raise_if_necessary(res)
166
310
 
@@ -172,90 +316,132 @@ module XZ
172
316
  lzma_code(io, stream){|chunk| res << chunk}
173
317
  end
174
318
 
175
- LibLZMA.lzma_end(stream.pointer)
319
+ LibLZMA.lzma_end(stream.to_ptr)
176
320
 
177
- block_given? ? stream[:total_out] : res
321
+ block_given? ? stream.total_out : res
178
322
  end
179
323
  alias encode_stream compress_stream
180
324
 
181
- #Compresses +in_file+ and writes the result to +out_file+.
182
- #===Parameters
183
- #[in_file] The path to the file to read from.
184
- #[out_file] The path of the file to write to. If it exists, it will be
185
- # overwritten.
186
- #For the other parameters, see the ::compress_stream method.
187
- #===Return value
188
- #The number of bytes written, i.e. the size of the archive.
189
- #===Example
190
- # XZ.compress("myfile.txt", "myfile.txt.xz")
191
- # XZ.compress("myarchive.tar", "myarchive.tar.xz")
192
- #===Remarks
193
- #This method is safe to use with big files, because files are not loaded
194
- #into memory completely at once.
195
- def compress_file(in_file, out_file, compression_level = 6, check = :crc64, extreme = false)
325
+ # Compresses +in_file+ and writes the result to +out_file+.
326
+ #
327
+ # === Parameters
328
+ #
329
+ # [in_file]
330
+ # The path to the file to read from.
331
+ # [out_file]
332
+ # The path of the file to write to. If it exists, it will be
333
+ # overwritten.
334
+ #
335
+ # For the keyword parameters, see the ::compress_stream method.
336
+ #
337
+ # === Return value
338
+ #
339
+ # The number of bytes written, i.e. the size of the archive.
340
+ #
341
+ # === Example
342
+ #
343
+ # XZ.compress_file("myfile.txt", "myfile.txt.xz")
344
+ # XZ.compress_file("myarchive.tar", "myarchive.tar.xz")
345
+ #
346
+ # === Remarks
347
+ #
348
+ # This method is safe to use with big files, because files are not
349
+ # loaded into memory completely at once.
350
+ def compress_file(in_file, out_file, **args)
196
351
  File.open(in_file, "rb") do |i_file|
197
352
  File.open(out_file, "wb") do |o_file|
198
- compress_stream(i_file, compression_level, check, extreme) do |chunk|
353
+ compress_stream(i_file, **args) do |chunk|
199
354
  o_file.write(chunk)
200
355
  end
201
356
  end
202
357
  end
203
358
  end
204
359
 
205
- #Compresses arbitrary data using the XZ algorithm.
206
- #===Parameters
207
- #[str] The data to compress.
208
- #For the other parameters, see the compress_stream method.
209
- #===Return value
210
- #The compressed data as a BINARY-encoded string.
211
- #===Example
212
- # data = "I love Ruby"
213
- # comp = XZ.compress(data) #=> binary blob
214
- #===Remarks
215
- #Don't use this method for big amounts of data--you may run out of
216
- #memory. Use compress_file or compress_stream instead.
217
- def compress(str, compression_level = 6, check = :crc64, extreme = false)
218
- raise(NotImplementedError, "StringIO isn't available!") unless defined? StringIO
360
+ # Compresses arbitrary data using the XZ algorithm.
361
+ #
362
+ # === Parameters
363
+ #
364
+ # [str] The data to compress.
365
+ #
366
+ # For the keyword parameters, see the #compress_stream method.
367
+ #
368
+ # === Return value
369
+ #
370
+ # The compressed data as a BINARY-encoded string.
371
+ #
372
+ # === Example
373
+ #
374
+ # data = "I love Ruby"
375
+ # comp = XZ.compress(data) #=> binary blob
376
+ #
377
+ # === Remarks
378
+ #
379
+ # Don't use this method for big amounts of data--you may run out
380
+ # of memory. Use compress_file or compress_stream instead.
381
+ def compress(str, **args)
219
382
  s = StringIO.new(str)
220
- compress_stream(s, compression_level, check, extreme)
383
+ compress_stream(s, **args)
221
384
  end
222
385
 
223
- #Decompresses data in XZ format.
224
- #===Parameters
225
- #[str] The data to decompress.
226
- #For the other parameters, see the decompress_stream method.
227
- #===Return value
228
- #The decompressed data as a BINARY-encoded string.
229
- #===Example
230
- # comp = File.open("data.xz", "rb"){|f| f.read}
231
- # data = XZ.decompress(comp) #=> "I love Ruby"
232
- #===Remarks
233
- #Don't use this method for big amounts of data--you may run out of
234
- #memory. Use decompress_file or decompress_stream instead.
235
- def decompress(str, memory_limit = LibLZMA::UINT64_MAX, flags = [:tell_unsupported_check])
236
- raise(NotImplementedError, "StringIO isn't available!") unless defined? StringIO
386
+ # Decompresses data in XZ format.
387
+ #
388
+ # === Parameters
389
+ #
390
+ # [str] The data to decompress.
391
+ #
392
+ # For the keyword parameters, see the decompress_stream method.
393
+ #
394
+ # === Return value
395
+ #
396
+ # The decompressed data as a BINARY-encoded string.
397
+ #
398
+ # === Example
399
+ #
400
+ # comp = File.open("data.xz", "rb"){|f| f.read}
401
+ # data = XZ.decompress(comp) #=> "I love Ruby"
402
+ #
403
+ # === Remarks
404
+ #
405
+ # Don't use this method for big amounts of data--you may run out
406
+ # of memory. Use decompress_file or decompress_stream instead.
407
+ #
408
+ # Read #decompress_stream's Remarks section for notes on the
409
+ # return value's encoding.
410
+ def decompress(str, **args)
237
411
  s = StringIO.new(str)
238
- decompress_stream(s, memory_limit, flags)
412
+ decompress_stream(s, **args)
239
413
  end
240
414
 
241
- #Decompresses +in_file+ and writes the result to +out_file+.
242
- #===Parameters
243
- #[in_file] The path to the file to read from.
244
- #[out_file] The path of the file to write to. If it exists, it will
245
- # be overwritten.
246
- #For the other parameters, see the decompress_stream method.
247
- #===Return value
248
- #The number of bytes written, i.e. the size of the uncompressed data.
249
- #===Example
250
- # XZ.decompres("myfile.txt.xz", "myfile.txt")
251
- # XZ.decompress("myarchive.tar.xz", "myarchive.tar")
252
- #===Remarks
253
- #This method is safe to use with big files, because files are not loaded
254
- #into memory completely at once.
255
- def decompress_file(in_file, out_file, memory_limit = LibLZMA::UINT64_MAX, flags = [:tell_unsupported_check])
415
+ # Decompresses +in_file+ and writes the result to +out_file+.
416
+ #
417
+ # ===Parameters
418
+ #
419
+ # [in_file]
420
+ # The path to the file to read from.
421
+ # [out_file]
422
+ # The path of the file to write to. If it exists, it will
423
+ # be overwritten.
424
+ #
425
+ # For the keyword parameters, see the decompress_stream method.
426
+ #
427
+ # === Return value
428
+ #
429
+ # The number of bytes written, i.e. the size of the uncompressed
430
+ # data.
431
+ #
432
+ # === Example
433
+ #
434
+ # XZ.decompress_file("myfile.txt.xz", "myfile.txt")
435
+ # XZ.decompress_file("myarchive.tar.xz", "myarchive.tar")
436
+ #
437
+ # === Remarks
438
+ #
439
+ # This method is safe to use with big files, because files are not
440
+ # loaded into memory completely at once.
441
+ def decompress_file(in_file, out_file, **args)
256
442
  File.open(in_file, "rb") do |i_file|
257
443
  File.open(out_file, "wb") do |o_file|
258
- decompress_stream(i_file, memory_limit, flags) do |chunk|
444
+ decompress_stream(i_file, internal_encoding: nil, external_encoding: Encoding::BINARY, **args) do |chunk|
259
445
  o_file.write(chunk)
260
446
  end
261
447
  end
@@ -264,76 +450,68 @@ module XZ
264
450
 
265
451
  private
266
452
 
267
- #This method returns the size of +str+ in bytes.
268
- def binary_size(str)
269
- #Believe it or not, but this is faster than str.bytes.to_a.size.
270
- #I benchmarked it, and it is as twice as fast.
271
- if str.respond_to? :force_encoding
272
- str.dup.force_encoding(Encoding::BINARY).size
273
- else
274
- str.bytes.to_a.size
275
- end
276
- end
277
-
278
- #This method does the heavy work of (de-)compressing a stream. It takes
279
- #an IO object to read data from (that means the IO must be opened
280
- #for reading) and a XZ::LZMAStream object that is used to (de-)compress
281
- #the data. Furthermore this method takes a block which gets passed
282
- #the (de-)compressed data in chunks one at a time--this is needed to allow
283
- #(de-)compressing of very large files that can't be loaded fully into
284
- #memory.
453
+ # This method does the heavy work of (de-)compressing a stream. It
454
+ # takes an IO object to read data from (that means the IO must be
455
+ # opened for reading) and a XZ::LibLZMA::LZMAStream object that is used to
456
+ # (de-)compress the data. Furthermore this method takes a block
457
+ # which gets passed the (de-)compressed data in chunks one at a
458
+ # time--this is needed to allow (de-)compressing of very large
459
+ # files that can't be loaded fully into memory.
285
460
  def lzma_code(io, stream)
286
- input_buffer_p = FFI::MemoryPointer.new(CHUNK_SIZE)
287
- output_buffer_p = FFI::MemoryPointer.new(CHUNK_SIZE)
461
+ input_buffer_p = Fiddle::Pointer.malloc(CHUNK_SIZE) # automatically freed by fiddle on GC
462
+ output_buffer_p = Fiddle::Pointer.malloc(CHUNK_SIZE) # automatically freed by fiddle on GC
288
463
 
289
464
  while str = io.read(CHUNK_SIZE)
290
- input_buffer_p.write_string(str)
291
-
292
- #Set the data for compressing
293
- stream[:next_in] = input_buffer_p
294
- stream[:avail_in] = binary_size(str)
295
-
296
- #Now loop until we gathered all the data in stream[:next_out]. Depending on the
297
- #amount of data, this may not fit into the buffer, meaning that we have to
298
- #provide a pointer to a "new" buffer that liblzma can write into. Since
299
- #liblzma already set stream[:avail_in] to 0 in the first iteration, the extra call to the
300
- #lzma_code() function doesn't hurt (indeed the pipe_comp example from
301
- #liblzma handles it this way too). Sometimes it happens that the compressed data
302
- #is bigger than the original (notably when the amount of data to compress
303
- #is small).
465
+ input_buffer_p[0, str.bytesize] = str
466
+
467
+ # Set the data for compressing
468
+ stream.next_in = input_buffer_p
469
+ stream.avail_in = str.bytesize
470
+
471
+ # Now loop until we gathered all the data in
472
+ # stream[:next_out]. Depending on the amount of data, this may
473
+ # not fit into the buffer, meaning that we have to provide a
474
+ # pointer to a "new" buffer that liblzma can write into. Since
475
+ # liblzma already set stream[:avail_in] to 0 in the first
476
+ # iteration, the extra call to the lzma_code() function
477
+ # doesn't hurt (indeed the pipe_comp example from liblzma
478
+ # handles it this way too). Sometimes it happens that the
479
+ # compressed data is bigger than the original (notably when
480
+ # the amount of data to compress is small).
304
481
  loop do
305
- #Prepare for getting the compressed_data
306
- stream[:next_out] = output_buffer_p
307
- stream[:avail_out] = CHUNK_SIZE
482
+ # Prepare for getting the compressed_data
483
+ stream.next_out = output_buffer_p
484
+ stream.avail_out = CHUNK_SIZE
308
485
 
309
- #Compress the data
486
+ # Compress the data
310
487
  res = if io.eof?
311
- LibLZMA.lzma_code(stream.pointer, LibLZMA::LZMA_ACTION[:lzma_finish])
488
+ LibLZMA.lzma_code(stream.to_ptr, LibLZMA::LZMA_FINISH)
312
489
  else
313
- LibLZMA.lzma_code(stream.pointer, LibLZMA::LZMA_ACTION[:lzma_run])
490
+ LibLZMA.lzma_code(stream.to_ptr, LibLZMA::LZMA_RUN)
314
491
  end
315
492
  check_lzma_code_retval(res)
316
493
 
317
- #Write the compressed data
318
- data = output_buffer_p.read_string(CHUNK_SIZE - stream[:avail_out])
494
+ # Write the compressed data
495
+ # Note: avail_out gives how much space is left after the new data
496
+ data = output_buffer_p[0, CHUNK_SIZE - stream.avail_out]
319
497
  yield(data)
320
498
 
321
- #If the buffer is completely filled, it's likely that there is
322
- #more data liblzma wants to hand to us. Start a new iteration,
323
- #but don't provide new input data.
324
- break unless stream[:avail_out] == 0
499
+ # If the buffer is completely filled, it's likely that there
500
+ # is more data liblzma wants to hand to us. Start a new
501
+ # iteration, but don't provide new input data.
502
+ break unless stream.avail_out == 0
325
503
  end #loop
326
504
  end #while
327
505
  end #lzma_code
328
506
 
329
- #Checks for errors and warnings that can be derived from the return
330
- #value of the lzma_code() function and shows them if necessary.
507
+ # Checks for errors and warnings that can be derived from the
508
+ # return value of the lzma_code() function and shows them if
509
+ # necessary.
331
510
  def check_lzma_code_retval(code)
332
- e = LibLZMA::LZMA_RET
333
511
  case code
334
- when e[:lzma_no_check] then warn("Couldn't verify archive integrity--archive has not integrity checksum.")
335
- when e[:lzma_unsupported_check] then warn("Couldn't verify archive integrity--archive has an unsupported integrity checksum.")
336
- when e[:lzma_get_check] then nil #This isn't useful for us. It indicates that the checksum type is now known.
512
+ when LibLZMA::LZMA_NO_CHECK then warn("Couldn't verify archive integrity--archive has no integrity checksum.")
513
+ when LibLZMA::LZMA_UNSUPPORTED_CHECK then warn("Couldn't verify archive integrity--archive has an unsupported integrity checksum.")
514
+ when LibLZMA::LZMA_GET_CHECK then nil # This isn't useful. It indicates that the checksum type is now known.
337
515
  else
338
516
  LZMAError.raise_if_necessary(code)
339
517
  end
@@ -343,6 +521,8 @@ module XZ
343
521
 
344
522
  end
345
523
 
524
+ require_relative "xz/version"
525
+ require_relative "xz/fiddle_helper"
346
526
  require_relative "xz/lib_lzma"
347
527
  require_relative "xz/stream"
348
528
  require_relative "xz/stream_writer"