ruby-xz 0.2.1 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/AUTHORS +7 -0
- data/HISTORY.rdoc +84 -7
- data/LICENSE +21 -0
- data/README.md +122 -0
- data/lib/xz/fiddle_helper.rb +91 -0
- data/lib/xz/lib_lzma.rb +134 -110
- data/lib/xz/stream.rb +431 -32
- data/lib/xz/stream_reader.rb +251 -224
- data/lib/xz/stream_writer.rb +208 -158
- data/lib/xz/version.rb +33 -0
- data/lib/xz.rb +412 -232
- metadata +49 -57
- data/COPYING +0 -26
- data/README.rdoc +0 -89
data/lib/xz.rb
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
# -*- coding: utf-8 -*-
|
2
|
-
|
3
|
-
#
|
2
|
+
#--
|
4
3
|
# Basic liblzma-bindings for Ruby.
|
5
4
|
#
|
6
|
-
# Copyright © 2011
|
7
|
-
#
|
5
|
+
# Copyright © 2011-2018 Marvin Gülker et al.
|
6
|
+
#
|
7
|
+
# See AUTHORS for the full list of contributors.
|
8
8
|
#
|
9
9
|
# Permission is hereby granted, free of charge, to any person obtaining a
|
10
10
|
# copy of this software and associated documentation files (the ‘Software’),
|
@@ -23,144 +23,288 @@
|
|
23
23
|
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
24
24
|
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
25
25
|
# THE SOFTWARE.
|
26
|
+
#++
|
26
27
|
|
27
28
|
require "pathname"
|
28
|
-
require "
|
29
|
-
require
|
30
|
-
require "
|
31
|
-
|
32
|
-
|
33
|
-
#
|
34
|
-
#
|
35
|
-
#
|
36
|
-
#All strings you receive from any method defined in this module
|
37
|
-
#and the classes defined in it are encoded in BINARY, so you may
|
38
|
-
#have to call #force_encoding on them to tag them with the correct
|
39
|
-
#encoding (assuming you _know_ what their correct encoding should be).
|
40
|
-
#ruby-xz can’t handle this as compiled strings don’t come with encoding
|
41
|
-
#information.
|
29
|
+
require "fiddle"
|
30
|
+
require "fiddle/import"
|
31
|
+
require "stringio"
|
32
|
+
require "forwardable"
|
33
|
+
|
34
|
+
# The namespace and main module of this library. Each method of this
|
35
|
+
# module may raise exceptions of class XZ::LZMAError, which is not
|
36
|
+
# named in the methods' documentations anymore.
|
42
37
|
module XZ
|
43
|
-
#The version of this library.
|
44
|
-
VERSION = "0.2.1"
|
45
38
|
|
46
|
-
#Number of bytes read in one chunk.
|
39
|
+
# Number of bytes read in one chunk.
|
47
40
|
CHUNK_SIZE = 4096
|
48
41
|
|
49
42
|
class << self
|
50
43
|
|
51
|
-
#
|
52
|
-
#
|
53
|
-
#
|
54
|
-
#
|
55
|
-
#
|
56
|
-
#
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
#
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
# has an unsupported checksum type.
|
68
|
-
# [:concatenated] Decompress concatenated archives.
|
69
|
-
#[chunk] (Block argument) One piece of decompressed data.
|
70
|
-
#===Return value
|
71
|
-
#If a block was given, returns the number of bytes written. Otherwise,
|
72
|
-
#returns the decompressed data as a BINARY-encoded string.
|
73
|
-
#===Example
|
74
|
-
# data = File.open("archive.xz", "rb"){|f| f.read}
|
75
|
-
# io = StringIO.new(data)
|
76
|
-
# XZ.decompress_stream(io) #=> "I AM THE DATA"
|
77
|
-
# io.rewind
|
78
|
-
# str = ""
|
79
|
-
# XZ.decompress_stream(io, XZ::LibLZMA::UINT64_MAX, [:tell_no_check]){|c| str << c} #=> 13
|
80
|
-
# str #=> "I AM THE DATA"
|
81
|
-
#===Remarks
|
82
|
-
#The block form is *much* better on memory usage, because it doesn't have
|
83
|
-
#to load everything into RAM at once. If you don't know how big your
|
84
|
-
#data gets or if you want to decompress much data, use the block form. Of
|
85
|
-
#course you shouldn't store the data you read in RAM then as in the
|
86
|
-
#example above.
|
87
|
-
def decompress_stream(io, memory_limit = LibLZMA::UINT64_MAX, flags = [:tell_unsupported_check], &block)
|
88
|
-
raise(ArgumentError, "Invalid memory limit set!") unless (0..LibLZMA::UINT64_MAX).include?(memory_limit)
|
89
|
-
flags.each do |flag|
|
90
|
-
raise(ArgumentError, "Unknown flag #{flag}!") unless [:tell_no_check, :tell_unsupported_check, :tell_any_check, :concatenated].include?(flag)
|
44
|
+
# Force ruby-xz to be silent about deprecations. Using this is
|
45
|
+
# discouraged so that you are aware of upcoming changes to the
|
46
|
+
# API. However, if your standard error stream is closed,
|
47
|
+
# outputting the deprecation notices might result in an exception,
|
48
|
+
# so this method allows you to surpress these notices. Ensure you
|
49
|
+
# read the HISTORY.rdoc file carefully instead.
|
50
|
+
def disable_deprecation_notices=(bool)
|
51
|
+
@disable_deprecation_notices = bool
|
52
|
+
end
|
53
|
+
|
54
|
+
# Output a deprecation notice.
|
55
|
+
def deprecate(msg) # :nodoc:
|
56
|
+
@disable_deprecation_notices ||= false
|
57
|
+
|
58
|
+
unless @disable_deprecation_notices
|
59
|
+
$stderr.puts("DEPRECATION NOTICE: #{msg}\n#{caller.drop(1).join("\n\t")}")
|
91
60
|
end
|
61
|
+
end
|
92
62
|
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
63
|
+
# call-seq:
|
64
|
+
# decompress_stream(io [, kw ] ) → a_string
|
65
|
+
# decompress_stream(io [, kw ] ] ){|chunk| ... } → an_integer
|
66
|
+
# decode_stream(io [, kw ] ] ) → a_string
|
67
|
+
# decode_stream(io [, kw ] ){|chunk| ... } → an_integer
|
68
|
+
#
|
69
|
+
# Decompresses a stream containing XZ-compressed data.
|
70
|
+
#
|
71
|
+
# === Parameters
|
72
|
+
# ==== Positional parameters
|
73
|
+
#
|
74
|
+
# [io]
|
75
|
+
# The IO to read from. It must be opened for reading in
|
76
|
+
# binary mode.
|
77
|
+
# [chunk (Block argument)]
|
78
|
+
# One piece of decompressed data. See Remarks section below
|
79
|
+
# for information about its encoding.
|
80
|
+
#
|
81
|
+
# ==== Keyword arguments
|
82
|
+
#
|
83
|
+
# [memory_limit (+UINT64_MAX+)]
|
84
|
+
# If not XZ::LibLZMA::UINT64_MAX, makes liblzma
|
85
|
+
# use no more memory than +memory_limit+ bytes.
|
86
|
+
#
|
87
|
+
# [flags (<tt>[:tell_unsupported_check]</tt>)]
|
88
|
+
# Additional flags
|
89
|
+
# passed to liblzma (an array). Possible flags are:
|
90
|
+
#
|
91
|
+
# [:tell_no_check]
|
92
|
+
# Spit out a warning if the archive hasn't an
|
93
|
+
# integrity checksum.
|
94
|
+
# [:tell_unsupported_check]
|
95
|
+
# Spit out a warning if the archive
|
96
|
+
# has an unsupported checksum type.
|
97
|
+
# [:concatenated]
|
98
|
+
# Decompress concatenated archives.
|
99
|
+
# [external_encoding (Encoding.default_external)]
|
100
|
+
# Assume the decompressed data inside the compressed data
|
101
|
+
# has this encoding. See Remarks section.
|
102
|
+
# [internal_encoding (Encoding.default_internal)]
|
103
|
+
# Request transcoding of the decompressed data into this
|
104
|
+
# encoding if not nil. Note that Encoding.default_internal
|
105
|
+
# is nil by default. See Remarks section.
|
106
|
+
#
|
107
|
+
# === Return value
|
108
|
+
#
|
109
|
+
# If a block was given, returns the number of bytes
|
110
|
+
# written. Otherwise, returns the decompressed data as a
|
111
|
+
# BINARY-encoded string.
|
112
|
+
#
|
113
|
+
# === Raises
|
114
|
+
#
|
115
|
+
# [Encoding::InvalidByteSequenceError]
|
116
|
+
# 1. You requested an “internal encoding” conversion
|
117
|
+
# and the archive contains invalid byte sequences
|
118
|
+
# in the external encoding.
|
119
|
+
# 2. You requested an “internal encoding” conversion, used
|
120
|
+
# the block form of this method, and liblzma decided
|
121
|
+
# to cut the decompressed data into chunks in mid of
|
122
|
+
# a multibyte character. See Remarks section for an
|
123
|
+
# explanation.
|
124
|
+
#
|
125
|
+
# === Example
|
126
|
+
#
|
127
|
+
# data = File.open("archive.xz", "rb"){|f| f.read}
|
128
|
+
# io = StringIO.new(data)
|
129
|
+
#
|
130
|
+
# XZ.decompress_stream(io) #=> "I AM THE DATA"
|
131
|
+
# io.rewind
|
132
|
+
#
|
133
|
+
# str = ""
|
134
|
+
# XZ.decompress_stream(io, XZ::LibLZMA::UINT64_MAX, [:tell_no_check]){|c| str << c} #=> 13
|
135
|
+
# str #=> "I AM THE DATA"
|
136
|
+
#
|
137
|
+
# === Remarks
|
138
|
+
#
|
139
|
+
# The block form is *much* better on memory usage, because it
|
140
|
+
# doesn't have to load everything into RAM at once. If you don't
|
141
|
+
# know how big your data gets or if you want to decompress much
|
142
|
+
# data, use the block form. Of course you shouldn't store the data
|
143
|
+
# you read in RAM then as in the example above.
|
144
|
+
#
|
145
|
+
# This method honours Ruby's external and internal encoding concept.
|
146
|
+
# All documentation about this applies to this method, with the
|
147
|
+
# exception that the external encoding does not refer to the data
|
148
|
+
# on the hard disk (that's compressed XZ data, it's always binary),
|
149
|
+
# but to the data inside the XZ container, i.e. to the *decompressed*
|
150
|
+
# data. Any strings you receive from this method (regardless of
|
151
|
+
# whether via return value or via the +chunk+ block argument) will
|
152
|
+
# first be tagged with the external encoding. If you set an internal
|
153
|
+
# encoding (either via the +internal_encoding+ parameter or via
|
154
|
+
# Ruby's default internal encoding) that string will be transcoded
|
155
|
+
# from the external encoding to the internal encoding before you
|
156
|
+
# even see it; in that case, the return value or chunk block argument
|
157
|
+
# will be encoded in the internal encoding. Internal encoding is
|
158
|
+
# disabled in Ruby by default and the argument for this method also
|
159
|
+
# defaults to nil.
|
160
|
+
#
|
161
|
+
# Due to the external encoding being applied, it can happen that
|
162
|
+
# +chunk+ contains an incomplete multibyte character causing
|
163
|
+
# <tt>valid_encoding?</tt> to return false if called on +chunk+,
|
164
|
+
# because liblzma doesn't know about encodings. The rest of the
|
165
|
+
# character will be yielded to the block in the next iteration
|
166
|
+
# then as liblzma progresses with the decompression from the XZ
|
167
|
+
# format. In other words, be prepared that +chunk+ can contain
|
168
|
+
# incomplete multibyte chars.
|
169
|
+
#
|
170
|
+
# This can have nasty side effects if you requested an internal
|
171
|
+
# encoding automatic transcoding and used the block form. Since
|
172
|
+
# this method applies the internal encoding transcoding before the
|
173
|
+
# chunk is yielded to the block, String#encode gets the incomplete
|
174
|
+
# multibyte character. In that case, you will receive an
|
175
|
+
# Encoding::InvalidByteSequenceError exception even though your
|
176
|
+
# data is perfectly well-formed inside the XZ data. It's just
|
177
|
+
# that liblzma during decompression cut the chunks at an
|
178
|
+
# unfortunate place. To avoid this, do not request internal encoding
|
179
|
+
# conversion when using the block form, but instead transcode
|
180
|
+
# the data manually after you have decompressed the entire data.
|
181
|
+
def decompress_stream(io, memory_limit: LibLZMA::UINT64_MAX, flags: [:tell_unsupported_check], external_encoding: nil, internal_encoding: nil, &block)
|
182
|
+
raise(ArgumentError, "Invalid memory limit set!") unless memory_limit > 0 && memory_limit <= LibLZMA::UINT64_MAX
|
183
|
+
raise(ArgumentError, "external_encoding must be set if internal_encoding transcoding is requested") if internal_encoding && !external_encoding
|
184
|
+
|
185
|
+
# The ArgumentError above is only about the concrete arguments
|
186
|
+
# (to sync with Ruby's IO API), not about the implied internal
|
187
|
+
# encoding, which might still kick in (and does, see below).
|
188
|
+
external_encoding ||= Encoding.default_external
|
189
|
+
internal_encoding ||= Encoding.default_internal
|
190
|
+
|
191
|
+
# bit-or all flags
|
192
|
+
allflags = flags.inject(0) do |val, flag|
|
193
|
+
flag = LibLZMA::LZMA_DECODE_FLAGS[flag] || raise(ArgumentError, "Unknown flag #{flag}!")
|
194
|
+
val | flag
|
195
|
+
end
|
196
|
+
|
197
|
+
stream = LibLZMA::LZMAStream.malloc
|
198
|
+
LibLZMA.LZMA_STREAM_INIT(stream)
|
199
|
+
res = LibLZMA.lzma_stream_decoder(stream.to_ptr,
|
200
|
+
memory_limit,
|
201
|
+
allflags)
|
99
202
|
|
100
203
|
LZMAError.raise_if_necessary(res)
|
101
204
|
|
102
205
|
res = ""
|
103
206
|
res.encode!(Encoding::BINARY)
|
104
207
|
if block_given?
|
105
|
-
res = lzma_code(io, stream
|
208
|
+
res = lzma_code(io, stream) do |chunk|
|
209
|
+
chunk = chunk.dup # Do not write somewhere into the fiddle pointer while encoding (-> can segfault)
|
210
|
+
chunk.force_encoding(external_encoding) if external_encoding
|
211
|
+
chunk.encode!(internal_encoding) if internal_encoding
|
212
|
+
yield(chunk)
|
213
|
+
end
|
106
214
|
else
|
107
215
|
lzma_code(io, stream){|chunk| res << chunk}
|
216
|
+
res.force_encoding(external_encoding) if external_encoding
|
217
|
+
res.encode!(internal_encoding) if internal_encoding
|
108
218
|
end
|
109
219
|
|
110
|
-
LibLZMA.lzma_end(stream.
|
220
|
+
LibLZMA.lzma_end(stream.to_ptr)
|
111
221
|
|
112
|
-
block_given? ? stream
|
222
|
+
block_given? ? stream.total_out : res
|
113
223
|
end
|
114
224
|
alias decode_stream decompress_stream
|
115
225
|
|
116
|
-
#call-seq:
|
117
|
-
#
|
118
|
-
#
|
119
|
-
#
|
120
|
-
#
|
121
|
-
#
|
122
|
-
#Compresses a stream of data into XZ-compressed data.
|
123
|
-
|
124
|
-
#
|
125
|
-
#
|
126
|
-
#
|
127
|
-
#
|
128
|
-
#
|
129
|
-
#
|
130
|
-
#
|
131
|
-
#
|
132
|
-
#
|
133
|
-
#
|
134
|
-
#
|
135
|
-
#
|
136
|
-
#
|
137
|
-
#
|
138
|
-
#
|
139
|
-
|
140
|
-
#
|
141
|
-
#
|
142
|
-
|
143
|
-
#
|
144
|
-
#
|
145
|
-
#
|
146
|
-
#
|
147
|
-
#
|
148
|
-
#
|
149
|
-
#
|
150
|
-
|
151
|
-
#
|
152
|
-
#
|
153
|
-
#
|
154
|
-
#
|
155
|
-
#
|
156
|
-
|
157
|
-
|
226
|
+
# call-seq:
|
227
|
+
# compress_stream(io [, kw ] ) → a_string
|
228
|
+
# compress_stream(io [, kw ] ){|chunk| ... } → an_integer
|
229
|
+
# encode_stream(io [, kw ] ) → a_string
|
230
|
+
# encode_stream(io [, kw ] ){|chunk| ... } → an_integer
|
231
|
+
#
|
232
|
+
# Compresses a stream of data into XZ-compressed data.
|
233
|
+
#
|
234
|
+
# === Parameters
|
235
|
+
# ==== Positional arguments
|
236
|
+
#
|
237
|
+
# [io]
|
238
|
+
# The IO to read the data from. Must be opened for
|
239
|
+
# reading.
|
240
|
+
# [chunk (Block argument)]
|
241
|
+
# One piece of compressed data. This is always tagged
|
242
|
+
# as a BINARY string, since it's compressed binary data.
|
243
|
+
#
|
244
|
+
# ==== Keyword arguments
|
245
|
+
# All keyword arguments are optional.
|
246
|
+
#
|
247
|
+
# [level (6)]
|
248
|
+
# Compression strength. Higher values indicate a
|
249
|
+
# smaller result, but longer compression time. Maximum
|
250
|
+
# is 9.
|
251
|
+
#
|
252
|
+
# [check (:crc64)]
|
253
|
+
# The checksum algorithm to use for verifying
|
254
|
+
# the data inside the archive. Possible values are:
|
255
|
+
# * :none
|
256
|
+
# * :crc32
|
257
|
+
# * :crc64
|
258
|
+
# * :sha256
|
259
|
+
#
|
260
|
+
# [extreme (false)]
|
261
|
+
# Tries to get the last bit out of the
|
262
|
+
# compression. This may succeed, but you can end
|
263
|
+
# up with *very* long computation times.
|
264
|
+
#
|
265
|
+
# === Return value
|
266
|
+
#
|
267
|
+
# If a block was given, returns the number of bytes
|
268
|
+
# written. Otherwise, returns the compressed data as a
|
269
|
+
# BINARY-encoded string.
|
270
|
+
#
|
271
|
+
# === Example
|
272
|
+
# data = File.read("file.txt")
|
273
|
+
# i = StringIO.new(data)
|
274
|
+
# XZ.compress_stream(i) #=> Some binary blob
|
275
|
+
#
|
276
|
+
# i.rewind
|
277
|
+
# str = ""
|
278
|
+
#
|
279
|
+
# XZ.compress_stream(i, level: 4, check: :sha256) do |c|
|
280
|
+
# str << c
|
281
|
+
# end #=> 123
|
282
|
+
# str #=> Some binary blob
|
283
|
+
#
|
284
|
+
# === Remarks
|
285
|
+
#
|
286
|
+
# The block form is *much* better on memory usage, because it
|
287
|
+
# doesn't have to load everything into RAM at once. If you don't
|
288
|
+
# know how big your data gets or if you want to compress much
|
289
|
+
# data, use the block form. Of course you shouldn't store the data
|
290
|
+
# your read in RAM then as in the example above.
|
291
|
+
#
|
292
|
+
# For the +io+ object passed Ruby's normal external and internal
|
293
|
+
# encoding rules apply while it is read from by this method. These
|
294
|
+
# encodings are not changed on +io+ by this method. The data you
|
295
|
+
# receive in the block (+chunk+) above is binary data (compressed
|
296
|
+
# data) and as such encoded as BINARY.
|
297
|
+
def compress_stream(io, level: 6, check: :crc64, extreme: false, &block)
|
298
|
+
raise(ArgumentError, "Invalid compression level!") unless (0..9).include?(level)
|
158
299
|
raise(ArgumentError, "Invalid checksum specified!") unless [:none, :crc32, :crc64, :sha256].include?(check)
|
159
300
|
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
301
|
+
level |= LibLZMA::LZMA_PRESET_EXTREME if extreme
|
302
|
+
|
303
|
+
stream = LibLZMA::LZMAStream.malloc
|
304
|
+
LibLZMA::LZMA_STREAM_INIT(stream)
|
305
|
+
res = LibLZMA.lzma_easy_encoder(stream.to_ptr,
|
306
|
+
level,
|
307
|
+
LibLZMA.const_get(:"LZMA_CHECK_#{check.upcase}"))
|
164
308
|
|
165
309
|
LZMAError.raise_if_necessary(res)
|
166
310
|
|
@@ -172,90 +316,132 @@ module XZ
|
|
172
316
|
lzma_code(io, stream){|chunk| res << chunk}
|
173
317
|
end
|
174
318
|
|
175
|
-
LibLZMA.lzma_end(stream.
|
319
|
+
LibLZMA.lzma_end(stream.to_ptr)
|
176
320
|
|
177
|
-
block_given? ? stream
|
321
|
+
block_given? ? stream.total_out : res
|
178
322
|
end
|
179
323
|
alias encode_stream compress_stream
|
180
324
|
|
181
|
-
#Compresses +in_file+ and writes the result to +out_file+.
|
182
|
-
|
183
|
-
#
|
184
|
-
#
|
185
|
-
#
|
186
|
-
#
|
187
|
-
|
188
|
-
#The
|
189
|
-
|
190
|
-
#
|
191
|
-
#
|
192
|
-
|
193
|
-
#
|
194
|
-
#
|
195
|
-
|
325
|
+
# Compresses +in_file+ and writes the result to +out_file+.
|
326
|
+
#
|
327
|
+
# === Parameters
|
328
|
+
#
|
329
|
+
# [in_file]
|
330
|
+
# The path to the file to read from.
|
331
|
+
# [out_file]
|
332
|
+
# The path of the file to write to. If it exists, it will be
|
333
|
+
# overwritten.
|
334
|
+
#
|
335
|
+
# For the keyword parameters, see the ::compress_stream method.
|
336
|
+
#
|
337
|
+
# === Return value
|
338
|
+
#
|
339
|
+
# The number of bytes written, i.e. the size of the archive.
|
340
|
+
#
|
341
|
+
# === Example
|
342
|
+
#
|
343
|
+
# XZ.compress_file("myfile.txt", "myfile.txt.xz")
|
344
|
+
# XZ.compress_file("myarchive.tar", "myarchive.tar.xz")
|
345
|
+
#
|
346
|
+
# === Remarks
|
347
|
+
#
|
348
|
+
# This method is safe to use with big files, because files are not
|
349
|
+
# loaded into memory completely at once.
|
350
|
+
def compress_file(in_file, out_file, **args)
|
196
351
|
File.open(in_file, "rb") do |i_file|
|
197
352
|
File.open(out_file, "wb") do |o_file|
|
198
|
-
compress_stream(i_file,
|
353
|
+
compress_stream(i_file, **args) do |chunk|
|
199
354
|
o_file.write(chunk)
|
200
355
|
end
|
201
356
|
end
|
202
357
|
end
|
203
358
|
end
|
204
359
|
|
205
|
-
#Compresses arbitrary data using the XZ algorithm.
|
206
|
-
|
207
|
-
#
|
208
|
-
#
|
209
|
-
|
210
|
-
#
|
211
|
-
|
212
|
-
#
|
213
|
-
#
|
214
|
-
|
215
|
-
#
|
216
|
-
#
|
217
|
-
|
218
|
-
|
360
|
+
# Compresses arbitrary data using the XZ algorithm.
|
361
|
+
#
|
362
|
+
# === Parameters
|
363
|
+
#
|
364
|
+
# [str] The data to compress.
|
365
|
+
#
|
366
|
+
# For the keyword parameters, see the #compress_stream method.
|
367
|
+
#
|
368
|
+
# === Return value
|
369
|
+
#
|
370
|
+
# The compressed data as a BINARY-encoded string.
|
371
|
+
#
|
372
|
+
# === Example
|
373
|
+
#
|
374
|
+
# data = "I love Ruby"
|
375
|
+
# comp = XZ.compress(data) #=> binary blob
|
376
|
+
#
|
377
|
+
# === Remarks
|
378
|
+
#
|
379
|
+
# Don't use this method for big amounts of data--you may run out
|
380
|
+
# of memory. Use compress_file or compress_stream instead.
|
381
|
+
def compress(str, **args)
|
219
382
|
s = StringIO.new(str)
|
220
|
-
compress_stream(s,
|
383
|
+
compress_stream(s, **args)
|
221
384
|
end
|
222
385
|
|
223
|
-
#Decompresses data in XZ format.
|
224
|
-
|
225
|
-
#
|
226
|
-
#
|
227
|
-
|
228
|
-
#
|
229
|
-
|
230
|
-
#
|
231
|
-
#
|
232
|
-
|
233
|
-
#
|
234
|
-
#
|
235
|
-
|
236
|
-
|
386
|
+
# Decompresses data in XZ format.
|
387
|
+
#
|
388
|
+
# === Parameters
|
389
|
+
#
|
390
|
+
# [str] The data to decompress.
|
391
|
+
#
|
392
|
+
# For the keyword parameters, see the decompress_stream method.
|
393
|
+
#
|
394
|
+
# === Return value
|
395
|
+
#
|
396
|
+
# The decompressed data as a BINARY-encoded string.
|
397
|
+
#
|
398
|
+
# === Example
|
399
|
+
#
|
400
|
+
# comp = File.open("data.xz", "rb"){|f| f.read}
|
401
|
+
# data = XZ.decompress(comp) #=> "I love Ruby"
|
402
|
+
#
|
403
|
+
# === Remarks
|
404
|
+
#
|
405
|
+
# Don't use this method for big amounts of data--you may run out
|
406
|
+
# of memory. Use decompress_file or decompress_stream instead.
|
407
|
+
#
|
408
|
+
# Read #decompress_stream's Remarks section for notes on the
|
409
|
+
# return value's encoding.
|
410
|
+
def decompress(str, **args)
|
237
411
|
s = StringIO.new(str)
|
238
|
-
decompress_stream(s,
|
412
|
+
decompress_stream(s, **args)
|
239
413
|
end
|
240
414
|
|
241
|
-
#Decompresses +in_file+ and writes the result to +out_file+.
|
242
|
-
|
243
|
-
#
|
244
|
-
#
|
245
|
-
#
|
246
|
-
#
|
247
|
-
|
248
|
-
#The
|
249
|
-
|
250
|
-
#
|
251
|
-
#
|
252
|
-
|
253
|
-
#
|
254
|
-
#
|
255
|
-
|
415
|
+
# Decompresses +in_file+ and writes the result to +out_file+.
|
416
|
+
#
|
417
|
+
# ===Parameters
|
418
|
+
#
|
419
|
+
# [in_file]
|
420
|
+
# The path to the file to read from.
|
421
|
+
# [out_file]
|
422
|
+
# The path of the file to write to. If it exists, it will
|
423
|
+
# be overwritten.
|
424
|
+
#
|
425
|
+
# For the keyword parameters, see the decompress_stream method.
|
426
|
+
#
|
427
|
+
# === Return value
|
428
|
+
#
|
429
|
+
# The number of bytes written, i.e. the size of the uncompressed
|
430
|
+
# data.
|
431
|
+
#
|
432
|
+
# === Example
|
433
|
+
#
|
434
|
+
# XZ.decompress_file("myfile.txt.xz", "myfile.txt")
|
435
|
+
# XZ.decompress_file("myarchive.tar.xz", "myarchive.tar")
|
436
|
+
#
|
437
|
+
# === Remarks
|
438
|
+
#
|
439
|
+
# This method is safe to use with big files, because files are not
|
440
|
+
# loaded into memory completely at once.
|
441
|
+
def decompress_file(in_file, out_file, **args)
|
256
442
|
File.open(in_file, "rb") do |i_file|
|
257
443
|
File.open(out_file, "wb") do |o_file|
|
258
|
-
decompress_stream(i_file,
|
444
|
+
decompress_stream(i_file, internal_encoding: nil, external_encoding: Encoding::BINARY, **args) do |chunk|
|
259
445
|
o_file.write(chunk)
|
260
446
|
end
|
261
447
|
end
|
@@ -264,76 +450,68 @@ module XZ
|
|
264
450
|
|
265
451
|
private
|
266
452
|
|
267
|
-
#This method
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
str.bytes.to_a.size
|
275
|
-
end
|
276
|
-
end
|
277
|
-
|
278
|
-
#This method does the heavy work of (de-)compressing a stream. It takes
|
279
|
-
#an IO object to read data from (that means the IO must be opened
|
280
|
-
#for reading) and a XZ::LZMAStream object that is used to (de-)compress
|
281
|
-
#the data. Furthermore this method takes a block which gets passed
|
282
|
-
#the (de-)compressed data in chunks one at a time--this is needed to allow
|
283
|
-
#(de-)compressing of very large files that can't be loaded fully into
|
284
|
-
#memory.
|
453
|
+
# This method does the heavy work of (de-)compressing a stream. It
|
454
|
+
# takes an IO object to read data from (that means the IO must be
|
455
|
+
# opened for reading) and a XZ::LibLZMA::LZMAStream object that is used to
|
456
|
+
# (de-)compress the data. Furthermore this method takes a block
|
457
|
+
# which gets passed the (de-)compressed data in chunks one at a
|
458
|
+
# time--this is needed to allow (de-)compressing of very large
|
459
|
+
# files that can't be loaded fully into memory.
|
285
460
|
def lzma_code(io, stream)
|
286
|
-
input_buffer_p =
|
287
|
-
output_buffer_p =
|
461
|
+
input_buffer_p = Fiddle::Pointer.malloc(CHUNK_SIZE) # automatically freed by fiddle on GC
|
462
|
+
output_buffer_p = Fiddle::Pointer.malloc(CHUNK_SIZE) # automatically freed by fiddle on GC
|
288
463
|
|
289
464
|
while str = io.read(CHUNK_SIZE)
|
290
|
-
input_buffer_p.
|
291
|
-
|
292
|
-
#Set the data for compressing
|
293
|
-
stream
|
294
|
-
stream
|
295
|
-
|
296
|
-
#Now loop until we gathered all the data in
|
297
|
-
#amount of data, this may
|
298
|
-
#
|
299
|
-
#
|
300
|
-
#
|
301
|
-
#
|
302
|
-
#
|
303
|
-
#
|
465
|
+
input_buffer_p[0, str.bytesize] = str
|
466
|
+
|
467
|
+
# Set the data for compressing
|
468
|
+
stream.next_in = input_buffer_p
|
469
|
+
stream.avail_in = str.bytesize
|
470
|
+
|
471
|
+
# Now loop until we gathered all the data in
|
472
|
+
# stream[:next_out]. Depending on the amount of data, this may
|
473
|
+
# not fit into the buffer, meaning that we have to provide a
|
474
|
+
# pointer to a "new" buffer that liblzma can write into. Since
|
475
|
+
# liblzma already set stream[:avail_in] to 0 in the first
|
476
|
+
# iteration, the extra call to the lzma_code() function
|
477
|
+
# doesn't hurt (indeed the pipe_comp example from liblzma
|
478
|
+
# handles it this way too). Sometimes it happens that the
|
479
|
+
# compressed data is bigger than the original (notably when
|
480
|
+
# the amount of data to compress is small).
|
304
481
|
loop do
|
305
|
-
#Prepare for getting the compressed_data
|
306
|
-
stream
|
307
|
-
stream
|
482
|
+
# Prepare for getting the compressed_data
|
483
|
+
stream.next_out = output_buffer_p
|
484
|
+
stream.avail_out = CHUNK_SIZE
|
308
485
|
|
309
|
-
#Compress the data
|
486
|
+
# Compress the data
|
310
487
|
res = if io.eof?
|
311
|
-
LibLZMA.lzma_code(stream.
|
488
|
+
LibLZMA.lzma_code(stream.to_ptr, LibLZMA::LZMA_FINISH)
|
312
489
|
else
|
313
|
-
LibLZMA.lzma_code(stream.
|
490
|
+
LibLZMA.lzma_code(stream.to_ptr, LibLZMA::LZMA_RUN)
|
314
491
|
end
|
315
492
|
check_lzma_code_retval(res)
|
316
493
|
|
317
|
-
#Write the compressed data
|
318
|
-
|
494
|
+
# Write the compressed data
|
495
|
+
# Note: avail_out gives how much space is left after the new data
|
496
|
+
data = output_buffer_p[0, CHUNK_SIZE - stream.avail_out]
|
319
497
|
yield(data)
|
320
498
|
|
321
|
-
#If the buffer is completely filled, it's likely that there
|
322
|
-
#more data liblzma wants to hand to us. Start a new
|
323
|
-
#but don't provide new input data.
|
324
|
-
break unless stream
|
499
|
+
# If the buffer is completely filled, it's likely that there
|
500
|
+
# is more data liblzma wants to hand to us. Start a new
|
501
|
+
# iteration, but don't provide new input data.
|
502
|
+
break unless stream.avail_out == 0
|
325
503
|
end #loop
|
326
504
|
end #while
|
327
505
|
end #lzma_code
|
328
506
|
|
329
|
-
#Checks for errors and warnings that can be derived from the
|
330
|
-
#value of the lzma_code() function and shows them if
|
507
|
+
# Checks for errors and warnings that can be derived from the
|
508
|
+
# return value of the lzma_code() function and shows them if
|
509
|
+
# necessary.
|
331
510
|
def check_lzma_code_retval(code)
|
332
|
-
e = LibLZMA::LZMA_RET
|
333
511
|
case code
|
334
|
-
when
|
335
|
-
when
|
336
|
-
when
|
512
|
+
when LibLZMA::LZMA_NO_CHECK then warn("Couldn't verify archive integrity--archive has no integrity checksum.")
|
513
|
+
when LibLZMA::LZMA_UNSUPPORTED_CHECK then warn("Couldn't verify archive integrity--archive has an unsupported integrity checksum.")
|
514
|
+
when LibLZMA::LZMA_GET_CHECK then nil # This isn't useful. It indicates that the checksum type is now known.
|
337
515
|
else
|
338
516
|
LZMAError.raise_if_necessary(code)
|
339
517
|
end
|
@@ -343,6 +521,8 @@ module XZ
|
|
343
521
|
|
344
522
|
end
|
345
523
|
|
524
|
+
require_relative "xz/version"
|
525
|
+
require_relative "xz/fiddle_helper"
|
346
526
|
require_relative "xz/lib_lzma"
|
347
527
|
require_relative "xz/stream"
|
348
528
|
require_relative "xz/stream_writer"
|