ruby-xz 0.2.1 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/AUTHORS +7 -0
- data/HISTORY.rdoc +84 -7
- data/LICENSE +21 -0
- data/README.md +122 -0
- data/lib/xz/fiddle_helper.rb +91 -0
- data/lib/xz/lib_lzma.rb +134 -110
- data/lib/xz/stream.rb +431 -32
- data/lib/xz/stream_reader.rb +251 -224
- data/lib/xz/stream_writer.rb +208 -158
- data/lib/xz/version.rb +33 -0
- data/lib/xz.rb +412 -232
- metadata +49 -57
- data/COPYING +0 -26
- data/README.rdoc +0 -89
data/lib/xz/stream.rb
CHANGED
@@ -1,9 +1,10 @@
|
|
1
1
|
# -*- coding: utf-8 -*-
|
2
|
-
|
3
|
-
#
|
2
|
+
#--
|
4
3
|
# Basic liblzma-bindings for Ruby.
|
5
4
|
#
|
6
|
-
# Copyright ©
|
5
|
+
# Copyright © 2011-2018 Marvin Gülker et al.
|
6
|
+
#
|
7
|
+
# See AUTHORS for the full list of contributors.
|
7
8
|
#
|
8
9
|
# Permission is hereby granted, free of charge, to any person obtaining a
|
9
10
|
# copy of this software and associated documentation files (the ‘Software’),
|
@@ -22,45 +23,443 @@
|
|
22
23
|
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
23
24
|
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
24
25
|
# THE SOFTWARE.
|
26
|
+
#++
|
25
27
|
|
26
|
-
#The base class for XZ::StreamReader and XZ::StreamWriter.
|
27
|
-
#
|
28
|
-
#
|
29
|
-
#
|
30
|
-
#
|
28
|
+
# The base class for XZ::StreamReader and XZ::StreamWriter. This is
|
29
|
+
# an abstract class that is not meant to be used directly. You can,
|
30
|
+
# however, test against this class in <tt>kind_of?</tt> tests.
|
31
|
+
#
|
32
|
+
# XZ::StreamReader and XZ::StreamWriter are IO-like classes that allow
|
33
|
+
# you to access XZ-compressed data the same way you access an
|
34
|
+
# IO-object, easily allowing to fool other libraries that expect IO
|
35
|
+
# objects. The most noticable example for this may be reading and
|
36
|
+
# writing XZ-compressed tarballs using the minitar
|
37
|
+
# RubyGem; see the README.md file for an example.
|
38
|
+
#
|
39
|
+
# Most of IO's methods are implemented in this class or one of the
|
40
|
+
# subclasses. The most notable exception is that it is not possible
|
41
|
+
# to seek in XZ archives (#seek and #pos= are not defined).
|
42
|
+
# Many methods that are not expressly documented in the RDoc
|
43
|
+
# still exist; this class uses Ruby's Forwardable module to forward
|
44
|
+
# them to the underlying IO object.
|
31
45
|
#
|
32
|
-
#
|
33
|
-
#
|
34
|
-
#
|
35
|
-
#
|
36
|
-
#
|
37
|
-
#
|
46
|
+
# Stream and its subclasses honour Ruby's external+internal encoding
|
47
|
+
# system just like Ruby's own IO does. All of what the Ruby docs say
|
48
|
+
# about external and internal encodings applies to this class with one
|
49
|
+
# important difference. The "external encoding" does not refer to the
|
50
|
+
# encoding of the file on the hard disk (this file is always a binary
|
51
|
+
# file as it's compressed data), but to the encoding of the
|
52
|
+
# decompressed data inside the compressed file.
|
38
53
|
#
|
39
|
-
#
|
40
|
-
#
|
41
|
-
#
|
42
|
-
#
|
43
|
-
#
|
54
|
+
# As with Ruby's IO class, instances of this class and its subclasses
|
55
|
+
# default their external encoding to Encoding.default_external and
|
56
|
+
# their internal encoding to Encoding.default_internal. You can use
|
57
|
+
# #set_encoding or pass appropriate arguments to the +new+ method to
|
58
|
+
# change these encodings per-instance.
|
44
59
|
class XZ::Stream
|
45
|
-
|
60
|
+
extend Forwardable
|
61
|
+
|
62
|
+
def_delegator :@delegate_io, :"autoclose="
|
63
|
+
def_delegator :@delegate_io, :"autoclose?"
|
64
|
+
def_delegator :@delegate_io, :binmode
|
65
|
+
def_delegator :@delegate_io, :"binmode?"
|
66
|
+
def_delegator :@delegate_io, :"close_on_exec="
|
67
|
+
def_delegator :@delegate_io, :"close_on_exec?"
|
68
|
+
def_delegator :@delegate_io, :fcntl
|
69
|
+
def_delegator :@delegate_io, :fdatasync
|
70
|
+
def_delegator :@delegate_io, :fileno
|
71
|
+
def_delegator :@delegate_io, :to_i
|
72
|
+
def_delegator :@delegate_io, :flush # TODO: liblzma might have its own flush method that should be used
|
73
|
+
def_delegator :@delegate_io, :fsync
|
74
|
+
def_delegator :@delegate_io, :ioctl
|
75
|
+
def_delegator :@delegate_io, :isatty
|
76
|
+
def_delegator :@delegate_io, :pid
|
77
|
+
#def_delegator :@delegate_io, :stat # If this is available the minitar gem thinks it's a File and wants to seek it O_o
|
78
|
+
def_delegator :@delegate_io, :sync # TODO: use liblzma's own syncing functionality?
|
79
|
+
def_delegator :@delegate_io, :"sync=" # TODO: use liblzma's own syncing functionality?
|
80
|
+
def_delegator :@delegate_io, :"tty?"
|
81
|
+
|
82
|
+
# Like IO#lineno and IO#lineno=.
|
83
|
+
attr_accessor :lineno
|
84
|
+
|
85
|
+
# Returns the encoding used inside the compressed data stream.
|
86
|
+
# Like IO#external_encoding.
|
87
|
+
attr_reader :external_encoding
|
88
|
+
|
89
|
+
# When compressed data is read, the decompressed data is transcoded
|
90
|
+
# from the external_encoding to this encoding. If this encoding is
|
91
|
+
# nil, no transcoding happens.
|
92
|
+
attr_reader :internal_encoding
|
93
|
+
|
94
|
+
# Private API only for use by subclasses.
|
95
|
+
def initialize(delegate_io) # :nodoc:
|
96
|
+
@delegate_io = delegate_io
|
97
|
+
@lzma_stream = XZ::LibLZMA::LZMAStream.malloc
|
98
|
+
XZ::LibLZMA::LZMA_STREAM_INIT(@lzma_stream)
|
99
|
+
|
100
|
+
@finished = false
|
101
|
+
@lineno = 0
|
102
|
+
@pos = 0
|
103
|
+
@external_encoding = Encoding.default_external
|
104
|
+
@internal_encoding = Encoding.default_internal
|
105
|
+
@transcode_options = {}
|
106
|
+
@input_buffer_p = Fiddle::Pointer.malloc(XZ::CHUNK_SIZE)
|
107
|
+
@output_buffer_p = Fiddle::Pointer.malloc(XZ::CHUNK_SIZE)
|
108
|
+
end
|
109
|
+
|
110
|
+
# Pass the given +str+ into libzlma's lzma_code() function.
|
111
|
+
# +action+ is either LibLZMA::LZMA_RUN (still working) or
|
112
|
+
# LibLZMA::LZMA_FINISH (this is the last piece).
|
113
|
+
def lzma_code(str, action) # :nodoc:
|
114
|
+
previous_encoding = str.encoding
|
115
|
+
str.force_encoding(Encoding::BINARY) # Need to operate on bytes now
|
116
|
+
|
117
|
+
begin
|
118
|
+
pos = 0
|
119
|
+
until pos > str.bytesize # Do not use >=, that conflicts with #lzma_finish
|
120
|
+
substr = str[pos, XZ::CHUNK_SIZE]
|
121
|
+
@input_buffer_p[0, substr.bytesize] = substr
|
122
|
+
pos += XZ::CHUNK_SIZE
|
123
|
+
|
124
|
+
@lzma_stream.next_in = @input_buffer_p
|
125
|
+
@lzma_stream.avail_in = substr.bytesize
|
126
|
+
|
127
|
+
loop do
|
128
|
+
@lzma_stream.next_out = @output_buffer_p
|
129
|
+
@lzma_stream.avail_out = XZ::CHUNK_SIZE
|
130
|
+
res = XZ::LibLZMA.lzma_code(@lzma_stream.to_ptr, action)
|
131
|
+
XZ.send :check_lzma_code_retval, res # call package-private method
|
132
|
+
|
133
|
+
data = @output_buffer_p[0, XZ::CHUNK_SIZE - @lzma_stream.avail_out]
|
134
|
+
yield(data)
|
135
|
+
|
136
|
+
break unless @lzma_stream.avail_out == 0
|
137
|
+
end
|
138
|
+
end
|
139
|
+
ensure
|
140
|
+
str.force_encoding(previous_encoding)
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
# Partial implementation of +rewind+ abstracting common operations.
|
145
|
+
# The subclasses implement the rest.
|
146
|
+
def rewind # :nodoc:
|
147
|
+
# Free the current lzma stream and rewind the underlying IO.
|
148
|
+
# It is required to call #rewind before allocating a new lzma
|
149
|
+
# stream, because if #rewind raises an exception (because the
|
150
|
+
# underlying IO is not rewindable), a memory leak would occur
|
151
|
+
# with regard to an allocated-but-never-freed lzma stream.
|
152
|
+
finish
|
153
|
+
@delegate_io.rewind
|
154
|
+
|
155
|
+
# Reset internal state
|
156
|
+
@pos = @lineno = 0
|
157
|
+
@finished = false
|
158
|
+
|
159
|
+
# Allocate a new lzma stream (subclasses will configure it).
|
160
|
+
@lzma_stream = XZ::LibLZMA::LZMAStream.malloc
|
161
|
+
XZ::LibLZMA::LZMA_STREAM_INIT(@lzma_stream)
|
162
|
+
|
163
|
+
0 # Mimic IO#rewind's return value
|
164
|
+
end
|
165
|
+
|
166
|
+
# You can mostly treat this as if it were an IO object.
|
167
|
+
# At least for subclasses. This class itself is abstract,
|
168
|
+
# you shouldn't be using it directly at all.
|
169
|
+
#
|
170
|
+
# Returns the receiver.
|
171
|
+
def to_io
|
172
|
+
self
|
173
|
+
end
|
174
|
+
|
175
|
+
# Overridden in StreamReader to be like IO#eof?.
|
176
|
+
# This abstract implementation only raises IOError.
|
177
|
+
def eof?
|
178
|
+
raise(IOError, "Stream not opened for reading")
|
179
|
+
end
|
180
|
+
|
181
|
+
# Alias for #eof?
|
182
|
+
def eof
|
183
|
+
eof?
|
184
|
+
end
|
185
|
+
|
186
|
+
# True if the delegate IO has been closed.
|
187
|
+
def closed?
|
188
|
+
@delegate_io.closed?
|
189
|
+
end
|
190
|
+
|
191
|
+
# True if liblzma's internal memory has been freed. For writer
|
192
|
+
# instances, receiving true from this method also means that all
|
193
|
+
# of liblzma's compressed data has been flushed to the underlying
|
194
|
+
# IO object.
|
195
|
+
def finished?
|
196
|
+
@finished
|
197
|
+
end
|
198
|
+
|
199
|
+
# Free internal libzlma memory. This needs to be called before
|
200
|
+
# you leave this object for the GC. If you used a block-form
|
201
|
+
# initializer, this done automatically for you.
|
202
|
+
#
|
203
|
+
# Subsequent calls to #read or #write will cause an IOError.
|
204
|
+
#
|
205
|
+
# Returns the underlying IO object. This allows you to retrieve
|
206
|
+
# the File instance that was automatically created when using
|
207
|
+
# the +open+ method's block form.
|
208
|
+
def finish
|
209
|
+
return if @finished
|
210
|
+
|
211
|
+
# Clean up the lzma_stream structure's internal memory.
|
212
|
+
# This would belong into a destructor if Ruby had that.
|
213
|
+
XZ::LibLZMA.lzma_end(@lzma_stream)
|
214
|
+
@finished = true
|
215
|
+
|
216
|
+
@delegate_io
|
217
|
+
end
|
218
|
+
|
219
|
+
|
220
|
+
# If not done yet, call #finish. Then close the delegate IO.
|
221
|
+
# The latter action is going to cause the delegate IO to
|
222
|
+
# flush its buffer. After this method returns, it is guaranteed
|
223
|
+
# that all pending data has been flushed to the OS' kernel.
|
224
|
+
def close
|
225
|
+
finish unless @finished
|
226
|
+
@delegate_io.close unless @delegate_io.closed?
|
227
|
+
nil
|
228
|
+
end
|
229
|
+
|
230
|
+
# Always raises IOError, because XZ streams can never be duplex.
|
231
|
+
def close_read
|
232
|
+
raise(IOError, "Not a duplex I/O stream")
|
233
|
+
end
|
234
|
+
|
235
|
+
# Always raises IOError, because XZ streams can never be duplex.
|
236
|
+
def close_write
|
237
|
+
raise(IOError, "Not a duplex I/O stream")
|
238
|
+
end
|
239
|
+
|
240
|
+
# Overridden in StreamReader to be like IO#read.
|
241
|
+
# This abstract implementation only raises IOError.
|
242
|
+
def read(*args)
|
243
|
+
raise(IOError, "Stream not opened for reading")
|
244
|
+
end
|
245
|
+
|
246
|
+
# Overridden in StreamWriter to be like IO#write.
|
247
|
+
# This abstract implementation only raises IOError.
|
248
|
+
def write(*args)
|
249
|
+
raise(IOError, "Stream not opened for writing")
|
250
|
+
end
|
251
|
+
|
252
|
+
# Returns the position in the *decompressed* data (regardless of
|
253
|
+
# whether this is a reader or a writer instance).
|
254
|
+
def pos
|
255
|
+
@pos
|
256
|
+
end
|
257
|
+
alias tell pos
|
258
|
+
|
259
|
+
# Like IO#set_encoding.
|
260
|
+
def set_encoding(*args)
|
261
|
+
if args.count < 1 || args.count > 3
|
262
|
+
raise ArgumentError, "Wrong number of arguments: Expected 1-3, got #{args.count}"
|
263
|
+
end
|
264
|
+
|
265
|
+
# Clean `args' to [external_encoding, internal_encoding],
|
266
|
+
# and @transcode_options.
|
267
|
+
return set_encoding($`, $', *args[1..-1]) if args[0].respond_to?(:to_str) && args[0].to_str =~ /:/
|
268
|
+
@transcode_options = args.delete_at(-1) if args[-1].kind_of?(Hash)
|
269
|
+
|
270
|
+
# `args' is always [external, internal] or [external] at this point
|
271
|
+
@external_encoding = args[0].kind_of?(Encoding) ? args[0] : Encoding.find(args[0])
|
272
|
+
if args[1]
|
273
|
+
@internal_encoding = args[1].kind_of?(Encoding) ? args[1] : Encoding.find(args[1])
|
274
|
+
else
|
275
|
+
@internal_encoding = Encoding.default_internal # Encoding.default_internal defaults to nil
|
276
|
+
end
|
277
|
+
|
278
|
+
self
|
279
|
+
end
|
280
|
+
|
281
|
+
# Do not define #pos= and #seek, not even to throw NotImplementedError.
|
282
|
+
# Reason: The minitar gem thinks it can use this methods then and provokes
|
283
|
+
# the NotImplementedError exception.
|
284
|
+
|
285
|
+
# Like IO#<<.
|
286
|
+
def <<(obj)
|
287
|
+
write(obj.to_s)
|
288
|
+
end
|
289
|
+
|
290
|
+
# Like IO#advise. No-op, because not meaningful on compressed data.
|
291
|
+
def advise
|
292
|
+
nil
|
293
|
+
end
|
294
|
+
|
295
|
+
# Like IO#getbyte. Note this method isn't exactly performant,
|
296
|
+
# because it actually reads compressed data as a string and then
|
297
|
+
# needs to figure out the bytes from that again.
|
298
|
+
def getbyte
|
299
|
+
return nil if eof?
|
300
|
+
read(1).bytes.first
|
301
|
+
end
|
302
|
+
|
303
|
+
# Like IO#readbyte.
|
304
|
+
def readbyte
|
305
|
+
getbyte || raise(EOFError, "End of stream reached")
|
306
|
+
end
|
307
|
+
|
308
|
+
# Like IO#getc.
|
309
|
+
def getc
|
310
|
+
str = String.new
|
311
|
+
|
312
|
+
# Read byte-by-byte until a valid character in the external
|
313
|
+
# encoding was built.
|
314
|
+
loop do
|
315
|
+
str.force_encoding(Encoding::BINARY)
|
316
|
+
str << read(1)
|
317
|
+
str.force_encoding(@external_encoding)
|
318
|
+
|
319
|
+
break if str.valid_encoding? || eof?
|
320
|
+
end
|
321
|
+
|
322
|
+
# Transcode to internal encoding if one was requested
|
323
|
+
if @internal_encoding
|
324
|
+
str.encode(@internal_encoding)
|
325
|
+
else
|
326
|
+
str
|
327
|
+
end
|
328
|
+
end
|
329
|
+
|
330
|
+
# Like IO#readchar.
|
331
|
+
def readchar
|
332
|
+
getc || raise(EOFError, "End of stream reached")
|
333
|
+
end
|
334
|
+
|
335
|
+
# Like IO#gets.
|
336
|
+
def gets(separator = $/, limit = nil)
|
337
|
+
return nil if eof?
|
338
|
+
@lineno += 1
|
339
|
+
|
340
|
+
# Mirror IO#gets' weird call-seq
|
341
|
+
if separator.respond_to?(:to_int)
|
342
|
+
limit = separator.to_int
|
343
|
+
separator = $/
|
344
|
+
end
|
345
|
+
|
346
|
+
buf = String.new
|
347
|
+
buf.force_encoding(target_encoding)
|
348
|
+
until eof? || (limit && buf.length >= limit)
|
349
|
+
buf << getc
|
350
|
+
return buf if buf[-1] == separator
|
351
|
+
end
|
352
|
+
|
353
|
+
buf
|
354
|
+
end
|
355
|
+
|
356
|
+
# Like IO#readline.
|
357
|
+
def readline(*args)
|
358
|
+
gets(*args) || raise(EOFError, "End of stream reached")
|
359
|
+
end
|
360
|
+
|
361
|
+
# Like IO#each.
|
362
|
+
def each(*args)
|
363
|
+
return enum_for __method__ unless block_given?
|
364
|
+
|
365
|
+
while line = gets(*args)
|
366
|
+
yield(line)
|
367
|
+
end
|
368
|
+
end
|
369
|
+
alias each_line each
|
370
|
+
|
371
|
+
# Like IO#each_byte.
|
372
|
+
def each_byte
|
373
|
+
return enum_for __method__ unless block_given?
|
374
|
+
|
375
|
+
while byte = getbyte
|
376
|
+
yield(byte)
|
377
|
+
end
|
378
|
+
end
|
379
|
+
|
380
|
+
# Like IO#each_char.
|
381
|
+
def each_char
|
382
|
+
return enum_for __method__ unless block_given?
|
383
|
+
|
384
|
+
while char = getc
|
385
|
+
yield(char)
|
386
|
+
end
|
387
|
+
end
|
388
|
+
|
389
|
+
# Like IO#each_codepoint.
|
390
|
+
def each_codepoint
|
391
|
+
return enum_for __method__ unless block_given?
|
392
|
+
|
393
|
+
each_char{|c| yield(c.ord)}
|
394
|
+
end
|
395
|
+
|
396
|
+
# Like IO#printf.
|
397
|
+
def printf(*args)
|
398
|
+
write(sprintf(*args))
|
399
|
+
nil
|
400
|
+
end
|
401
|
+
|
402
|
+
# Like IO#putc.
|
403
|
+
def putc(obj)
|
404
|
+
if obj.respond_to? :chr
|
405
|
+
write(obj.chr)
|
406
|
+
elsif obj.respond_to? :to_str
|
407
|
+
write(obj.to_str)
|
408
|
+
else
|
409
|
+
raise(TypeError, "Can only #putc strings and numbers")
|
410
|
+
end
|
411
|
+
end
|
412
|
+
|
413
|
+
def puts(*objs)
|
414
|
+
if objs.empty?
|
415
|
+
write("\n")
|
416
|
+
return nil
|
417
|
+
end
|
418
|
+
|
419
|
+
objs.each do |obj|
|
420
|
+
if obj.respond_to? :to_ary
|
421
|
+
puts(*obj.to_ary)
|
422
|
+
else
|
423
|
+
# Don't squeeze multiple subsequent trailing newlines in `obj'
|
424
|
+
obj = obj.to_s
|
425
|
+
if obj.end_with?("\n".encode(obj.encoding))
|
426
|
+
write(obj)
|
427
|
+
else
|
428
|
+
write(obj + "\n".encode(obj.encoding))
|
429
|
+
end
|
430
|
+
end
|
431
|
+
end
|
432
|
+
nil
|
433
|
+
end
|
434
|
+
|
435
|
+
# Like IO#print.
|
436
|
+
def print(*objs)
|
437
|
+
if objs.empty?
|
438
|
+
write($_)
|
439
|
+
else
|
440
|
+
objs.each do |obj|
|
441
|
+
write(obj.to_s)
|
442
|
+
write($,) if $,
|
443
|
+
end
|
444
|
+
end
|
445
|
+
|
446
|
+
write($\) if $\
|
447
|
+
nil
|
448
|
+
end
|
46
449
|
|
47
|
-
#
|
48
|
-
#
|
49
|
-
def
|
50
|
-
|
51
|
-
@lzma_stream = XZ::LZMAStream.new
|
450
|
+
# It is not possible to reopen an lzma stream, hence this
|
451
|
+
# method always raises NotImplementedError.
|
452
|
+
def reopen(*args)
|
453
|
+
raise(NotImplementedError, "Can't reopen an lzma stream")
|
52
454
|
end
|
53
455
|
|
54
456
|
private
|
55
457
|
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
#I benchmarked it, and it is as twice as fast.
|
60
|
-
if str.respond_to? :force_encoding
|
61
|
-
str.dup.force_encoding(Encoding::BINARY).size
|
458
|
+
def target_encoding
|
459
|
+
if @internal_encoding
|
460
|
+
@internal_encoding
|
62
461
|
else
|
63
|
-
|
462
|
+
@external_encoding
|
64
463
|
end
|
65
464
|
end
|
66
465
|
|