ruby-xz 0.2.1 → 1.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/AUTHORS +7 -0
- data/HISTORY.rdoc +84 -7
- data/LICENSE +21 -0
- data/README.md +122 -0
- data/lib/xz/fiddle_helper.rb +91 -0
- data/lib/xz/lib_lzma.rb +134 -110
- data/lib/xz/stream.rb +431 -32
- data/lib/xz/stream_reader.rb +251 -224
- data/lib/xz/stream_writer.rb +208 -158
- data/lib/xz/version.rb +33 -0
- data/lib/xz.rb +412 -232
- metadata +49 -57
- data/COPYING +0 -26
- data/README.rdoc +0 -89
data/lib/xz.rb
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
# -*- coding: utf-8 -*-
|
2
|
-
|
3
|
-
#
|
2
|
+
#--
|
4
3
|
# Basic liblzma-bindings for Ruby.
|
5
4
|
#
|
6
|
-
# Copyright © 2011
|
7
|
-
#
|
5
|
+
# Copyright © 2011-2018 Marvin Gülker et al.
|
6
|
+
#
|
7
|
+
# See AUTHORS for the full list of contributors.
|
8
8
|
#
|
9
9
|
# Permission is hereby granted, free of charge, to any person obtaining a
|
10
10
|
# copy of this software and associated documentation files (the ‘Software’),
|
@@ -23,144 +23,288 @@
|
|
23
23
|
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
24
24
|
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
25
25
|
# THE SOFTWARE.
|
26
|
+
#++
|
26
27
|
|
27
28
|
require "pathname"
|
28
|
-
require "
|
29
|
-
require
|
30
|
-
require "
|
31
|
-
|
32
|
-
|
33
|
-
#
|
34
|
-
#
|
35
|
-
#
|
36
|
-
#All strings you receive from any method defined in this module
|
37
|
-
#and the classes defined in it are encoded in BINARY, so you may
|
38
|
-
#have to call #force_encoding on them to tag them with the correct
|
39
|
-
#encoding (assuming you _know_ what their correct encoding should be).
|
40
|
-
#ruby-xz can’t handle this as compiled strings don’t come with encoding
|
41
|
-
#information.
|
29
|
+
require "fiddle"
|
30
|
+
require "fiddle/import"
|
31
|
+
require "stringio"
|
32
|
+
require "forwardable"
|
33
|
+
|
34
|
+
# The namespace and main module of this library. Each method of this
|
35
|
+
# module may raise exceptions of class XZ::LZMAError, which is not
|
36
|
+
# named in the methods' documentations anymore.
|
42
37
|
module XZ
|
43
|
-
#The version of this library.
|
44
|
-
VERSION = "0.2.1"
|
45
38
|
|
46
|
-
#Number of bytes read in one chunk.
|
39
|
+
# Number of bytes read in one chunk.
|
47
40
|
CHUNK_SIZE = 4096
|
48
41
|
|
49
42
|
class << self
|
50
43
|
|
51
|
-
#
|
52
|
-
#
|
53
|
-
#
|
54
|
-
#
|
55
|
-
#
|
56
|
-
#
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
#
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
# has an unsupported checksum type.
|
68
|
-
# [:concatenated] Decompress concatenated archives.
|
69
|
-
#[chunk] (Block argument) One piece of decompressed data.
|
70
|
-
#===Return value
|
71
|
-
#If a block was given, returns the number of bytes written. Otherwise,
|
72
|
-
#returns the decompressed data as a BINARY-encoded string.
|
73
|
-
#===Example
|
74
|
-
# data = File.open("archive.xz", "rb"){|f| f.read}
|
75
|
-
# io = StringIO.new(data)
|
76
|
-
# XZ.decompress_stream(io) #=> "I AM THE DATA"
|
77
|
-
# io.rewind
|
78
|
-
# str = ""
|
79
|
-
# XZ.decompress_stream(io, XZ::LibLZMA::UINT64_MAX, [:tell_no_check]){|c| str << c} #=> 13
|
80
|
-
# str #=> "I AM THE DATA"
|
81
|
-
#===Remarks
|
82
|
-
#The block form is *much* better on memory usage, because it doesn't have
|
83
|
-
#to load everything into RAM at once. If you don't know how big your
|
84
|
-
#data gets or if you want to decompress much data, use the block form. Of
|
85
|
-
#course you shouldn't store the data you read in RAM then as in the
|
86
|
-
#example above.
|
87
|
-
def decompress_stream(io, memory_limit = LibLZMA::UINT64_MAX, flags = [:tell_unsupported_check], &block)
|
88
|
-
raise(ArgumentError, "Invalid memory limit set!") unless (0..LibLZMA::UINT64_MAX).include?(memory_limit)
|
89
|
-
flags.each do |flag|
|
90
|
-
raise(ArgumentError, "Unknown flag #{flag}!") unless [:tell_no_check, :tell_unsupported_check, :tell_any_check, :concatenated].include?(flag)
|
44
|
+
# Force ruby-xz to be silent about deprecations. Using this is
|
45
|
+
# discouraged so that you are aware of upcoming changes to the
|
46
|
+
# API. However, if your standard error stream is closed,
|
47
|
+
# outputting the deprecation notices might result in an exception,
|
48
|
+
# so this method allows you to surpress these notices. Ensure you
|
49
|
+
# read the HISTORY.rdoc file carefully instead.
|
50
|
+
def disable_deprecation_notices=(bool)
|
51
|
+
@disable_deprecation_notices = bool
|
52
|
+
end
|
53
|
+
|
54
|
+
# Output a deprecation notice.
|
55
|
+
def deprecate(msg) # :nodoc:
|
56
|
+
@disable_deprecation_notices ||= false
|
57
|
+
|
58
|
+
unless @disable_deprecation_notices
|
59
|
+
$stderr.puts("DEPRECATION NOTICE: #{msg}\n#{caller.drop(1).join("\n\t")}")
|
91
60
|
end
|
61
|
+
end
|
92
62
|
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
63
|
+
# call-seq:
|
64
|
+
# decompress_stream(io [, kw ] ) → a_string
|
65
|
+
# decompress_stream(io [, kw ] ] ){|chunk| ... } → an_integer
|
66
|
+
# decode_stream(io [, kw ] ] ) → a_string
|
67
|
+
# decode_stream(io [, kw ] ){|chunk| ... } → an_integer
|
68
|
+
#
|
69
|
+
# Decompresses a stream containing XZ-compressed data.
|
70
|
+
#
|
71
|
+
# === Parameters
|
72
|
+
# ==== Positional parameters
|
73
|
+
#
|
74
|
+
# [io]
|
75
|
+
# The IO to read from. It must be opened for reading in
|
76
|
+
# binary mode.
|
77
|
+
# [chunk (Block argument)]
|
78
|
+
# One piece of decompressed data. See Remarks section below
|
79
|
+
# for information about its encoding.
|
80
|
+
#
|
81
|
+
# ==== Keyword arguments
|
82
|
+
#
|
83
|
+
# [memory_limit (+UINT64_MAX+)]
|
84
|
+
# If not XZ::LibLZMA::UINT64_MAX, makes liblzma
|
85
|
+
# use no more memory than +memory_limit+ bytes.
|
86
|
+
#
|
87
|
+
# [flags (<tt>[:tell_unsupported_check]</tt>)]
|
88
|
+
# Additional flags
|
89
|
+
# passed to liblzma (an array). Possible flags are:
|
90
|
+
#
|
91
|
+
# [:tell_no_check]
|
92
|
+
# Spit out a warning if the archive hasn't an
|
93
|
+
# integrity checksum.
|
94
|
+
# [:tell_unsupported_check]
|
95
|
+
# Spit out a warning if the archive
|
96
|
+
# has an unsupported checksum type.
|
97
|
+
# [:concatenated]
|
98
|
+
# Decompress concatenated archives.
|
99
|
+
# [external_encoding (Encoding.default_external)]
|
100
|
+
# Assume the decompressed data inside the compressed data
|
101
|
+
# has this encoding. See Remarks section.
|
102
|
+
# [internal_encoding (Encoding.default_internal)]
|
103
|
+
# Request transcoding of the decompressed data into this
|
104
|
+
# encoding if not nil. Note that Encoding.default_internal
|
105
|
+
# is nil by default. See Remarks section.
|
106
|
+
#
|
107
|
+
# === Return value
|
108
|
+
#
|
109
|
+
# If a block was given, returns the number of bytes
|
110
|
+
# written. Otherwise, returns the decompressed data as a
|
111
|
+
# BINARY-encoded string.
|
112
|
+
#
|
113
|
+
# === Raises
|
114
|
+
#
|
115
|
+
# [Encoding::InvalidByteSequenceError]
|
116
|
+
# 1. You requested an “internal encoding” conversion
|
117
|
+
# and the archive contains invalid byte sequences
|
118
|
+
# in the external encoding.
|
119
|
+
# 2. You requested an “internal encoding” conversion, used
|
120
|
+
# the block form of this method, and liblzma decided
|
121
|
+
# to cut the decompressed data into chunks in mid of
|
122
|
+
# a multibyte character. See Remarks section for an
|
123
|
+
# explanation.
|
124
|
+
#
|
125
|
+
# === Example
|
126
|
+
#
|
127
|
+
# data = File.open("archive.xz", "rb"){|f| f.read}
|
128
|
+
# io = StringIO.new(data)
|
129
|
+
#
|
130
|
+
# XZ.decompress_stream(io) #=> "I AM THE DATA"
|
131
|
+
# io.rewind
|
132
|
+
#
|
133
|
+
# str = ""
|
134
|
+
# XZ.decompress_stream(io, XZ::LibLZMA::UINT64_MAX, [:tell_no_check]){|c| str << c} #=> 13
|
135
|
+
# str #=> "I AM THE DATA"
|
136
|
+
#
|
137
|
+
# === Remarks
|
138
|
+
#
|
139
|
+
# The block form is *much* better on memory usage, because it
|
140
|
+
# doesn't have to load everything into RAM at once. If you don't
|
141
|
+
# know how big your data gets or if you want to decompress much
|
142
|
+
# data, use the block form. Of course you shouldn't store the data
|
143
|
+
# you read in RAM then as in the example above.
|
144
|
+
#
|
145
|
+
# This method honours Ruby's external and internal encoding concept.
|
146
|
+
# All documentation about this applies to this method, with the
|
147
|
+
# exception that the external encoding does not refer to the data
|
148
|
+
# on the hard disk (that's compressed XZ data, it's always binary),
|
149
|
+
# but to the data inside the XZ container, i.e. to the *decompressed*
|
150
|
+
# data. Any strings you receive from this method (regardless of
|
151
|
+
# whether via return value or via the +chunk+ block argument) will
|
152
|
+
# first be tagged with the external encoding. If you set an internal
|
153
|
+
# encoding (either via the +internal_encoding+ parameter or via
|
154
|
+
# Ruby's default internal encoding) that string will be transcoded
|
155
|
+
# from the external encoding to the internal encoding before you
|
156
|
+
# even see it; in that case, the return value or chunk block argument
|
157
|
+
# will be encoded in the internal encoding. Internal encoding is
|
158
|
+
# disabled in Ruby by default and the argument for this method also
|
159
|
+
# defaults to nil.
|
160
|
+
#
|
161
|
+
# Due to the external encoding being applied, it can happen that
|
162
|
+
# +chunk+ contains an incomplete multibyte character causing
|
163
|
+
# <tt>valid_encoding?</tt> to return false if called on +chunk+,
|
164
|
+
# because liblzma doesn't know about encodings. The rest of the
|
165
|
+
# character will be yielded to the block in the next iteration
|
166
|
+
# then as liblzma progresses with the decompression from the XZ
|
167
|
+
# format. In other words, be prepared that +chunk+ can contain
|
168
|
+
# incomplete multibyte chars.
|
169
|
+
#
|
170
|
+
# This can have nasty side effects if you requested an internal
|
171
|
+
# encoding automatic transcoding and used the block form. Since
|
172
|
+
# this method applies the internal encoding transcoding before the
|
173
|
+
# chunk is yielded to the block, String#encode gets the incomplete
|
174
|
+
# multibyte character. In that case, you will receive an
|
175
|
+
# Encoding::InvalidByteSequenceError exception even though your
|
176
|
+
# data is perfectly well-formed inside the XZ data. It's just
|
177
|
+
# that liblzma during decompression cut the chunks at an
|
178
|
+
# unfortunate place. To avoid this, do not request internal encoding
|
179
|
+
# conversion when using the block form, but instead transcode
|
180
|
+
# the data manually after you have decompressed the entire data.
|
181
|
+
def decompress_stream(io, memory_limit: LibLZMA::UINT64_MAX, flags: [:tell_unsupported_check], external_encoding: nil, internal_encoding: nil, &block)
|
182
|
+
raise(ArgumentError, "Invalid memory limit set!") unless memory_limit > 0 && memory_limit <= LibLZMA::UINT64_MAX
|
183
|
+
raise(ArgumentError, "external_encoding must be set if internal_encoding transcoding is requested") if internal_encoding && !external_encoding
|
184
|
+
|
185
|
+
# The ArgumentError above is only about the concrete arguments
|
186
|
+
# (to sync with Ruby's IO API), not about the implied internal
|
187
|
+
# encoding, which might still kick in (and does, see below).
|
188
|
+
external_encoding ||= Encoding.default_external
|
189
|
+
internal_encoding ||= Encoding.default_internal
|
190
|
+
|
191
|
+
# bit-or all flags
|
192
|
+
allflags = flags.inject(0) do |val, flag|
|
193
|
+
flag = LibLZMA::LZMA_DECODE_FLAGS[flag] || raise(ArgumentError, "Unknown flag #{flag}!")
|
194
|
+
val | flag
|
195
|
+
end
|
196
|
+
|
197
|
+
stream = LibLZMA::LZMAStream.malloc
|
198
|
+
LibLZMA.LZMA_STREAM_INIT(stream)
|
199
|
+
res = LibLZMA.lzma_stream_decoder(stream.to_ptr,
|
200
|
+
memory_limit,
|
201
|
+
allflags)
|
99
202
|
|
100
203
|
LZMAError.raise_if_necessary(res)
|
101
204
|
|
102
205
|
res = ""
|
103
206
|
res.encode!(Encoding::BINARY)
|
104
207
|
if block_given?
|
105
|
-
res = lzma_code(io, stream
|
208
|
+
res = lzma_code(io, stream) do |chunk|
|
209
|
+
chunk = chunk.dup # Do not write somewhere into the fiddle pointer while encoding (-> can segfault)
|
210
|
+
chunk.force_encoding(external_encoding) if external_encoding
|
211
|
+
chunk.encode!(internal_encoding) if internal_encoding
|
212
|
+
yield(chunk)
|
213
|
+
end
|
106
214
|
else
|
107
215
|
lzma_code(io, stream){|chunk| res << chunk}
|
216
|
+
res.force_encoding(external_encoding) if external_encoding
|
217
|
+
res.encode!(internal_encoding) if internal_encoding
|
108
218
|
end
|
109
219
|
|
110
|
-
LibLZMA.lzma_end(stream.
|
220
|
+
LibLZMA.lzma_end(stream.to_ptr)
|
111
221
|
|
112
|
-
block_given? ? stream
|
222
|
+
block_given? ? stream.total_out : res
|
113
223
|
end
|
114
224
|
alias decode_stream decompress_stream
|
115
225
|
|
116
|
-
#call-seq:
|
117
|
-
#
|
118
|
-
#
|
119
|
-
#
|
120
|
-
#
|
121
|
-
#
|
122
|
-
#Compresses a stream of data into XZ-compressed data.
|
123
|
-
|
124
|
-
#
|
125
|
-
#
|
126
|
-
#
|
127
|
-
#
|
128
|
-
#
|
129
|
-
#
|
130
|
-
#
|
131
|
-
#
|
132
|
-
#
|
133
|
-
#
|
134
|
-
#
|
135
|
-
#
|
136
|
-
#
|
137
|
-
#
|
138
|
-
#
|
139
|
-
|
140
|
-
#
|
141
|
-
#
|
142
|
-
|
143
|
-
#
|
144
|
-
#
|
145
|
-
#
|
146
|
-
#
|
147
|
-
#
|
148
|
-
#
|
149
|
-
#
|
150
|
-
|
151
|
-
#
|
152
|
-
#
|
153
|
-
#
|
154
|
-
#
|
155
|
-
#
|
156
|
-
|
157
|
-
|
226
|
+
# call-seq:
|
227
|
+
# compress_stream(io [, kw ] ) → a_string
|
228
|
+
# compress_stream(io [, kw ] ){|chunk| ... } → an_integer
|
229
|
+
# encode_stream(io [, kw ] ) → a_string
|
230
|
+
# encode_stream(io [, kw ] ){|chunk| ... } → an_integer
|
231
|
+
#
|
232
|
+
# Compresses a stream of data into XZ-compressed data.
|
233
|
+
#
|
234
|
+
# === Parameters
|
235
|
+
# ==== Positional arguments
|
236
|
+
#
|
237
|
+
# [io]
|
238
|
+
# The IO to read the data from. Must be opened for
|
239
|
+
# reading.
|
240
|
+
# [chunk (Block argument)]
|
241
|
+
# One piece of compressed data. This is always tagged
|
242
|
+
# as a BINARY string, since it's compressed binary data.
|
243
|
+
#
|
244
|
+
# ==== Keyword arguments
|
245
|
+
# All keyword arguments are optional.
|
246
|
+
#
|
247
|
+
# [level (6)]
|
248
|
+
# Compression strength. Higher values indicate a
|
249
|
+
# smaller result, but longer compression time. Maximum
|
250
|
+
# is 9.
|
251
|
+
#
|
252
|
+
# [check (:crc64)]
|
253
|
+
# The checksum algorithm to use for verifying
|
254
|
+
# the data inside the archive. Possible values are:
|
255
|
+
# * :none
|
256
|
+
# * :crc32
|
257
|
+
# * :crc64
|
258
|
+
# * :sha256
|
259
|
+
#
|
260
|
+
# [extreme (false)]
|
261
|
+
# Tries to get the last bit out of the
|
262
|
+
# compression. This may succeed, but you can end
|
263
|
+
# up with *very* long computation times.
|
264
|
+
#
|
265
|
+
# === Return value
|
266
|
+
#
|
267
|
+
# If a block was given, returns the number of bytes
|
268
|
+
# written. Otherwise, returns the compressed data as a
|
269
|
+
# BINARY-encoded string.
|
270
|
+
#
|
271
|
+
# === Example
|
272
|
+
# data = File.read("file.txt")
|
273
|
+
# i = StringIO.new(data)
|
274
|
+
# XZ.compress_stream(i) #=> Some binary blob
|
275
|
+
#
|
276
|
+
# i.rewind
|
277
|
+
# str = ""
|
278
|
+
#
|
279
|
+
# XZ.compress_stream(i, level: 4, check: :sha256) do |c|
|
280
|
+
# str << c
|
281
|
+
# end #=> 123
|
282
|
+
# str #=> Some binary blob
|
283
|
+
#
|
284
|
+
# === Remarks
|
285
|
+
#
|
286
|
+
# The block form is *much* better on memory usage, because it
|
287
|
+
# doesn't have to load everything into RAM at once. If you don't
|
288
|
+
# know how big your data gets or if you want to compress much
|
289
|
+
# data, use the block form. Of course you shouldn't store the data
|
290
|
+
# your read in RAM then as in the example above.
|
291
|
+
#
|
292
|
+
# For the +io+ object passed Ruby's normal external and internal
|
293
|
+
# encoding rules apply while it is read from by this method. These
|
294
|
+
# encodings are not changed on +io+ by this method. The data you
|
295
|
+
# receive in the block (+chunk+) above is binary data (compressed
|
296
|
+
# data) and as such encoded as BINARY.
|
297
|
+
def compress_stream(io, level: 6, check: :crc64, extreme: false, &block)
|
298
|
+
raise(ArgumentError, "Invalid compression level!") unless (0..9).include?(level)
|
158
299
|
raise(ArgumentError, "Invalid checksum specified!") unless [:none, :crc32, :crc64, :sha256].include?(check)
|
159
300
|
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
301
|
+
level |= LibLZMA::LZMA_PRESET_EXTREME if extreme
|
302
|
+
|
303
|
+
stream = LibLZMA::LZMAStream.malloc
|
304
|
+
LibLZMA::LZMA_STREAM_INIT(stream)
|
305
|
+
res = LibLZMA.lzma_easy_encoder(stream.to_ptr,
|
306
|
+
level,
|
307
|
+
LibLZMA.const_get(:"LZMA_CHECK_#{check.upcase}"))
|
164
308
|
|
165
309
|
LZMAError.raise_if_necessary(res)
|
166
310
|
|
@@ -172,90 +316,132 @@ module XZ
|
|
172
316
|
lzma_code(io, stream){|chunk| res << chunk}
|
173
317
|
end
|
174
318
|
|
175
|
-
LibLZMA.lzma_end(stream.
|
319
|
+
LibLZMA.lzma_end(stream.to_ptr)
|
176
320
|
|
177
|
-
block_given? ? stream
|
321
|
+
block_given? ? stream.total_out : res
|
178
322
|
end
|
179
323
|
alias encode_stream compress_stream
|
180
324
|
|
181
|
-
#Compresses +in_file+ and writes the result to +out_file+.
|
182
|
-
|
183
|
-
#
|
184
|
-
#
|
185
|
-
#
|
186
|
-
#
|
187
|
-
|
188
|
-
#The
|
189
|
-
|
190
|
-
#
|
191
|
-
#
|
192
|
-
|
193
|
-
#
|
194
|
-
#
|
195
|
-
|
325
|
+
# Compresses +in_file+ and writes the result to +out_file+.
|
326
|
+
#
|
327
|
+
# === Parameters
|
328
|
+
#
|
329
|
+
# [in_file]
|
330
|
+
# The path to the file to read from.
|
331
|
+
# [out_file]
|
332
|
+
# The path of the file to write to. If it exists, it will be
|
333
|
+
# overwritten.
|
334
|
+
#
|
335
|
+
# For the keyword parameters, see the ::compress_stream method.
|
336
|
+
#
|
337
|
+
# === Return value
|
338
|
+
#
|
339
|
+
# The number of bytes written, i.e. the size of the archive.
|
340
|
+
#
|
341
|
+
# === Example
|
342
|
+
#
|
343
|
+
# XZ.compress_file("myfile.txt", "myfile.txt.xz")
|
344
|
+
# XZ.compress_file("myarchive.tar", "myarchive.tar.xz")
|
345
|
+
#
|
346
|
+
# === Remarks
|
347
|
+
#
|
348
|
+
# This method is safe to use with big files, because files are not
|
349
|
+
# loaded into memory completely at once.
|
350
|
+
def compress_file(in_file, out_file, **args)
|
196
351
|
File.open(in_file, "rb") do |i_file|
|
197
352
|
File.open(out_file, "wb") do |o_file|
|
198
|
-
compress_stream(i_file,
|
353
|
+
compress_stream(i_file, **args) do |chunk|
|
199
354
|
o_file.write(chunk)
|
200
355
|
end
|
201
356
|
end
|
202
357
|
end
|
203
358
|
end
|
204
359
|
|
205
|
-
#Compresses arbitrary data using the XZ algorithm.
|
206
|
-
|
207
|
-
#
|
208
|
-
#
|
209
|
-
|
210
|
-
#
|
211
|
-
|
212
|
-
#
|
213
|
-
#
|
214
|
-
|
215
|
-
#
|
216
|
-
#
|
217
|
-
|
218
|
-
|
360
|
+
# Compresses arbitrary data using the XZ algorithm.
|
361
|
+
#
|
362
|
+
# === Parameters
|
363
|
+
#
|
364
|
+
# [str] The data to compress.
|
365
|
+
#
|
366
|
+
# For the keyword parameters, see the #compress_stream method.
|
367
|
+
#
|
368
|
+
# === Return value
|
369
|
+
#
|
370
|
+
# The compressed data as a BINARY-encoded string.
|
371
|
+
#
|
372
|
+
# === Example
|
373
|
+
#
|
374
|
+
# data = "I love Ruby"
|
375
|
+
# comp = XZ.compress(data) #=> binary blob
|
376
|
+
#
|
377
|
+
# === Remarks
|
378
|
+
#
|
379
|
+
# Don't use this method for big amounts of data--you may run out
|
380
|
+
# of memory. Use compress_file or compress_stream instead.
|
381
|
+
def compress(str, **args)
|
219
382
|
s = StringIO.new(str)
|
220
|
-
compress_stream(s,
|
383
|
+
compress_stream(s, **args)
|
221
384
|
end
|
222
385
|
|
223
|
-
#Decompresses data in XZ format.
|
224
|
-
|
225
|
-
#
|
226
|
-
#
|
227
|
-
|
228
|
-
#
|
229
|
-
|
230
|
-
#
|
231
|
-
#
|
232
|
-
|
233
|
-
#
|
234
|
-
#
|
235
|
-
|
236
|
-
|
386
|
+
# Decompresses data in XZ format.
|
387
|
+
#
|
388
|
+
# === Parameters
|
389
|
+
#
|
390
|
+
# [str] The data to decompress.
|
391
|
+
#
|
392
|
+
# For the keyword parameters, see the decompress_stream method.
|
393
|
+
#
|
394
|
+
# === Return value
|
395
|
+
#
|
396
|
+
# The decompressed data as a BINARY-encoded string.
|
397
|
+
#
|
398
|
+
# === Example
|
399
|
+
#
|
400
|
+
# comp = File.open("data.xz", "rb"){|f| f.read}
|
401
|
+
# data = XZ.decompress(comp) #=> "I love Ruby"
|
402
|
+
#
|
403
|
+
# === Remarks
|
404
|
+
#
|
405
|
+
# Don't use this method for big amounts of data--you may run out
|
406
|
+
# of memory. Use decompress_file or decompress_stream instead.
|
407
|
+
#
|
408
|
+
# Read #decompress_stream's Remarks section for notes on the
|
409
|
+
# return value's encoding.
|
410
|
+
def decompress(str, **args)
|
237
411
|
s = StringIO.new(str)
|
238
|
-
decompress_stream(s,
|
412
|
+
decompress_stream(s, **args)
|
239
413
|
end
|
240
414
|
|
241
|
-
#Decompresses +in_file+ and writes the result to +out_file+.
|
242
|
-
|
243
|
-
#
|
244
|
-
#
|
245
|
-
#
|
246
|
-
#
|
247
|
-
|
248
|
-
#The
|
249
|
-
|
250
|
-
#
|
251
|
-
#
|
252
|
-
|
253
|
-
#
|
254
|
-
#
|
255
|
-
|
415
|
+
# Decompresses +in_file+ and writes the result to +out_file+.
|
416
|
+
#
|
417
|
+
# ===Parameters
|
418
|
+
#
|
419
|
+
# [in_file]
|
420
|
+
# The path to the file to read from.
|
421
|
+
# [out_file]
|
422
|
+
# The path of the file to write to. If it exists, it will
|
423
|
+
# be overwritten.
|
424
|
+
#
|
425
|
+
# For the keyword parameters, see the decompress_stream method.
|
426
|
+
#
|
427
|
+
# === Return value
|
428
|
+
#
|
429
|
+
# The number of bytes written, i.e. the size of the uncompressed
|
430
|
+
# data.
|
431
|
+
#
|
432
|
+
# === Example
|
433
|
+
#
|
434
|
+
# XZ.decompress_file("myfile.txt.xz", "myfile.txt")
|
435
|
+
# XZ.decompress_file("myarchive.tar.xz", "myarchive.tar")
|
436
|
+
#
|
437
|
+
# === Remarks
|
438
|
+
#
|
439
|
+
# This method is safe to use with big files, because files are not
|
440
|
+
# loaded into memory completely at once.
|
441
|
+
def decompress_file(in_file, out_file, **args)
|
256
442
|
File.open(in_file, "rb") do |i_file|
|
257
443
|
File.open(out_file, "wb") do |o_file|
|
258
|
-
decompress_stream(i_file,
|
444
|
+
decompress_stream(i_file, internal_encoding: nil, external_encoding: Encoding::BINARY, **args) do |chunk|
|
259
445
|
o_file.write(chunk)
|
260
446
|
end
|
261
447
|
end
|
@@ -264,76 +450,68 @@ module XZ
|
|
264
450
|
|
265
451
|
private
|
266
452
|
|
267
|
-
#This method
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
str.bytes.to_a.size
|
275
|
-
end
|
276
|
-
end
|
277
|
-
|
278
|
-
#This method does the heavy work of (de-)compressing a stream. It takes
|
279
|
-
#an IO object to read data from (that means the IO must be opened
|
280
|
-
#for reading) and a XZ::LZMAStream object that is used to (de-)compress
|
281
|
-
#the data. Furthermore this method takes a block which gets passed
|
282
|
-
#the (de-)compressed data in chunks one at a time--this is needed to allow
|
283
|
-
#(de-)compressing of very large files that can't be loaded fully into
|
284
|
-
#memory.
|
453
|
+
# This method does the heavy work of (de-)compressing a stream. It
|
454
|
+
# takes an IO object to read data from (that means the IO must be
|
455
|
+
# opened for reading) and a XZ::LibLZMA::LZMAStream object that is used to
|
456
|
+
# (de-)compress the data. Furthermore this method takes a block
|
457
|
+
# which gets passed the (de-)compressed data in chunks one at a
|
458
|
+
# time--this is needed to allow (de-)compressing of very large
|
459
|
+
# files that can't be loaded fully into memory.
|
285
460
|
def lzma_code(io, stream)
|
286
|
-
input_buffer_p =
|
287
|
-
output_buffer_p =
|
461
|
+
input_buffer_p = Fiddle::Pointer.malloc(CHUNK_SIZE) # automatically freed by fiddle on GC
|
462
|
+
output_buffer_p = Fiddle::Pointer.malloc(CHUNK_SIZE) # automatically freed by fiddle on GC
|
288
463
|
|
289
464
|
while str = io.read(CHUNK_SIZE)
|
290
|
-
input_buffer_p.
|
291
|
-
|
292
|
-
#Set the data for compressing
|
293
|
-
stream
|
294
|
-
stream
|
295
|
-
|
296
|
-
#Now loop until we gathered all the data in
|
297
|
-
#amount of data, this may
|
298
|
-
#
|
299
|
-
#
|
300
|
-
#
|
301
|
-
#
|
302
|
-
#
|
303
|
-
#
|
465
|
+
input_buffer_p[0, str.bytesize] = str
|
466
|
+
|
467
|
+
# Set the data for compressing
|
468
|
+
stream.next_in = input_buffer_p
|
469
|
+
stream.avail_in = str.bytesize
|
470
|
+
|
471
|
+
# Now loop until we gathered all the data in
|
472
|
+
# stream[:next_out]. Depending on the amount of data, this may
|
473
|
+
# not fit into the buffer, meaning that we have to provide a
|
474
|
+
# pointer to a "new" buffer that liblzma can write into. Since
|
475
|
+
# liblzma already set stream[:avail_in] to 0 in the first
|
476
|
+
# iteration, the extra call to the lzma_code() function
|
477
|
+
# doesn't hurt (indeed the pipe_comp example from liblzma
|
478
|
+
# handles it this way too). Sometimes it happens that the
|
479
|
+
# compressed data is bigger than the original (notably when
|
480
|
+
# the amount of data to compress is small).
|
304
481
|
loop do
|
305
|
-
#Prepare for getting the compressed_data
|
306
|
-
stream
|
307
|
-
stream
|
482
|
+
# Prepare for getting the compressed_data
|
483
|
+
stream.next_out = output_buffer_p
|
484
|
+
stream.avail_out = CHUNK_SIZE
|
308
485
|
|
309
|
-
#Compress the data
|
486
|
+
# Compress the data
|
310
487
|
res = if io.eof?
|
311
|
-
LibLZMA.lzma_code(stream.
|
488
|
+
LibLZMA.lzma_code(stream.to_ptr, LibLZMA::LZMA_FINISH)
|
312
489
|
else
|
313
|
-
LibLZMA.lzma_code(stream.
|
490
|
+
LibLZMA.lzma_code(stream.to_ptr, LibLZMA::LZMA_RUN)
|
314
491
|
end
|
315
492
|
check_lzma_code_retval(res)
|
316
493
|
|
317
|
-
#Write the compressed data
|
318
|
-
|
494
|
+
# Write the compressed data
|
495
|
+
# Note: avail_out gives how much space is left after the new data
|
496
|
+
data = output_buffer_p[0, CHUNK_SIZE - stream.avail_out]
|
319
497
|
yield(data)
|
320
498
|
|
321
|
-
#If the buffer is completely filled, it's likely that there
|
322
|
-
#more data liblzma wants to hand to us. Start a new
|
323
|
-
#but don't provide new input data.
|
324
|
-
break unless stream
|
499
|
+
# If the buffer is completely filled, it's likely that there
|
500
|
+
# is more data liblzma wants to hand to us. Start a new
|
501
|
+
# iteration, but don't provide new input data.
|
502
|
+
break unless stream.avail_out == 0
|
325
503
|
end #loop
|
326
504
|
end #while
|
327
505
|
end #lzma_code
|
328
506
|
|
329
|
-
#Checks for errors and warnings that can be derived from the
|
330
|
-
#value of the lzma_code() function and shows them if
|
507
|
+
# Checks for errors and warnings that can be derived from the
|
508
|
+
# return value of the lzma_code() function and shows them if
|
509
|
+
# necessary.
|
331
510
|
def check_lzma_code_retval(code)
|
332
|
-
e = LibLZMA::LZMA_RET
|
333
511
|
case code
|
334
|
-
when
|
335
|
-
when
|
336
|
-
when
|
512
|
+
when LibLZMA::LZMA_NO_CHECK then warn("Couldn't verify archive integrity--archive has no integrity checksum.")
|
513
|
+
when LibLZMA::LZMA_UNSUPPORTED_CHECK then warn("Couldn't verify archive integrity--archive has an unsupported integrity checksum.")
|
514
|
+
when LibLZMA::LZMA_GET_CHECK then nil # This isn't useful. It indicates that the checksum type is now known.
|
337
515
|
else
|
338
516
|
LZMAError.raise_if_necessary(code)
|
339
517
|
end
|
@@ -343,6 +521,8 @@ module XZ
|
|
343
521
|
|
344
522
|
end
|
345
523
|
|
524
|
+
require_relative "xz/version"
|
525
|
+
require_relative "xz/fiddle_helper"
|
346
526
|
require_relative "xz/lib_lzma"
|
347
527
|
require_relative "xz/stream"
|
348
528
|
require_relative "xz/stream_writer"
|