ruby-xz 0.2.1 → 1.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/AUTHORS +7 -0
- data/HISTORY.rdoc +84 -7
- data/LICENSE +21 -0
- data/README.md +122 -0
- data/lib/xz/fiddle_helper.rb +91 -0
- data/lib/xz/lib_lzma.rb +134 -110
- data/lib/xz/stream.rb +431 -32
- data/lib/xz/stream_reader.rb +251 -224
- data/lib/xz/stream_writer.rb +208 -158
- data/lib/xz/version.rb +33 -0
- data/lib/xz.rb +412 -232
- metadata +49 -57
- data/COPYING +0 -26
- data/README.rdoc +0 -89
data/lib/xz/stream.rb
CHANGED
@@ -1,9 +1,10 @@
|
|
1
1
|
# -*- coding: utf-8 -*-
|
2
|
-
|
3
|
-
#
|
2
|
+
#--
|
4
3
|
# Basic liblzma-bindings for Ruby.
|
5
4
|
#
|
6
|
-
# Copyright ©
|
5
|
+
# Copyright © 2011-2018 Marvin Gülker et al.
|
6
|
+
#
|
7
|
+
# See AUTHORS for the full list of contributors.
|
7
8
|
#
|
8
9
|
# Permission is hereby granted, free of charge, to any person obtaining a
|
9
10
|
# copy of this software and associated documentation files (the ‘Software’),
|
@@ -22,45 +23,443 @@
|
|
22
23
|
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
23
24
|
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
24
25
|
# THE SOFTWARE.
|
26
|
+
#++
|
25
27
|
|
26
|
-
#The base class for XZ::StreamReader and XZ::StreamWriter.
|
27
|
-
#
|
28
|
-
#
|
29
|
-
#
|
30
|
-
#
|
28
|
+
# The base class for XZ::StreamReader and XZ::StreamWriter. This is
|
29
|
+
# an abstract class that is not meant to be used directly. You can,
|
30
|
+
# however, test against this class in <tt>kind_of?</tt> tests.
|
31
|
+
#
|
32
|
+
# XZ::StreamReader and XZ::StreamWriter are IO-like classes that allow
|
33
|
+
# you to access XZ-compressed data the same way you access an
|
34
|
+
# IO-object, easily allowing to fool other libraries that expect IO
|
35
|
+
# objects. The most noticable example for this may be reading and
|
36
|
+
# writing XZ-compressed tarballs using the minitar
|
37
|
+
# RubyGem; see the README.md file for an example.
|
38
|
+
#
|
39
|
+
# Most of IO's methods are implemented in this class or one of the
|
40
|
+
# subclasses. The most notable exception is that it is not possible
|
41
|
+
# to seek in XZ archives (#seek and #pos= are not defined).
|
42
|
+
# Many methods that are not expressly documented in the RDoc
|
43
|
+
# still exist; this class uses Ruby's Forwardable module to forward
|
44
|
+
# them to the underlying IO object.
|
31
45
|
#
|
32
|
-
#
|
33
|
-
#
|
34
|
-
#
|
35
|
-
#
|
36
|
-
#
|
37
|
-
#
|
46
|
+
# Stream and its subclasses honour Ruby's external+internal encoding
|
47
|
+
# system just like Ruby's own IO does. All of what the Ruby docs say
|
48
|
+
# about external and internal encodings applies to this class with one
|
49
|
+
# important difference. The "external encoding" does not refer to the
|
50
|
+
# encoding of the file on the hard disk (this file is always a binary
|
51
|
+
# file as it's compressed data), but to the encoding of the
|
52
|
+
# decompressed data inside the compressed file.
|
38
53
|
#
|
39
|
-
#
|
40
|
-
#
|
41
|
-
#
|
42
|
-
#
|
43
|
-
#
|
54
|
+
# As with Ruby's IO class, instances of this class and its subclasses
|
55
|
+
# default their external encoding to Encoding.default_external and
|
56
|
+
# their internal encoding to Encoding.default_internal. You can use
|
57
|
+
# #set_encoding or pass appropriate arguments to the +new+ method to
|
58
|
+
# change these encodings per-instance.
|
44
59
|
class XZ::Stream
|
45
|
-
|
60
|
+
extend Forwardable
|
61
|
+
|
62
|
+
def_delegator :@delegate_io, :"autoclose="
|
63
|
+
def_delegator :@delegate_io, :"autoclose?"
|
64
|
+
def_delegator :@delegate_io, :binmode
|
65
|
+
def_delegator :@delegate_io, :"binmode?"
|
66
|
+
def_delegator :@delegate_io, :"close_on_exec="
|
67
|
+
def_delegator :@delegate_io, :"close_on_exec?"
|
68
|
+
def_delegator :@delegate_io, :fcntl
|
69
|
+
def_delegator :@delegate_io, :fdatasync
|
70
|
+
def_delegator :@delegate_io, :fileno
|
71
|
+
def_delegator :@delegate_io, :to_i
|
72
|
+
def_delegator :@delegate_io, :flush # TODO: liblzma might have its own flush method that should be used
|
73
|
+
def_delegator :@delegate_io, :fsync
|
74
|
+
def_delegator :@delegate_io, :ioctl
|
75
|
+
def_delegator :@delegate_io, :isatty
|
76
|
+
def_delegator :@delegate_io, :pid
|
77
|
+
#def_delegator :@delegate_io, :stat # If this is available the minitar gem thinks it's a File and wants to seek it O_o
|
78
|
+
def_delegator :@delegate_io, :sync # TODO: use liblzma's own syncing functionality?
|
79
|
+
def_delegator :@delegate_io, :"sync=" # TODO: use liblzma's own syncing functionality?
|
80
|
+
def_delegator :@delegate_io, :"tty?"
|
81
|
+
|
82
|
+
# Like IO#lineno and IO#lineno=.
|
83
|
+
attr_accessor :lineno
|
84
|
+
|
85
|
+
# Returns the encoding used inside the compressed data stream.
|
86
|
+
# Like IO#external_encoding.
|
87
|
+
attr_reader :external_encoding
|
88
|
+
|
89
|
+
# When compressed data is read, the decompressed data is transcoded
|
90
|
+
# from the external_encoding to this encoding. If this encoding is
|
91
|
+
# nil, no transcoding happens.
|
92
|
+
attr_reader :internal_encoding
|
93
|
+
|
94
|
+
# Private API only for use by subclasses.
|
95
|
+
def initialize(delegate_io) # :nodoc:
|
96
|
+
@delegate_io = delegate_io
|
97
|
+
@lzma_stream = XZ::LibLZMA::LZMAStream.malloc
|
98
|
+
XZ::LibLZMA::LZMA_STREAM_INIT(@lzma_stream)
|
99
|
+
|
100
|
+
@finished = false
|
101
|
+
@lineno = 0
|
102
|
+
@pos = 0
|
103
|
+
@external_encoding = Encoding.default_external
|
104
|
+
@internal_encoding = Encoding.default_internal
|
105
|
+
@transcode_options = {}
|
106
|
+
@input_buffer_p = Fiddle::Pointer.malloc(XZ::CHUNK_SIZE)
|
107
|
+
@output_buffer_p = Fiddle::Pointer.malloc(XZ::CHUNK_SIZE)
|
108
|
+
end
|
109
|
+
|
110
|
+
# Pass the given +str+ into libzlma's lzma_code() function.
|
111
|
+
# +action+ is either LibLZMA::LZMA_RUN (still working) or
|
112
|
+
# LibLZMA::LZMA_FINISH (this is the last piece).
|
113
|
+
def lzma_code(str, action) # :nodoc:
|
114
|
+
previous_encoding = str.encoding
|
115
|
+
str.force_encoding(Encoding::BINARY) # Need to operate on bytes now
|
116
|
+
|
117
|
+
begin
|
118
|
+
pos = 0
|
119
|
+
until pos > str.bytesize # Do not use >=, that conflicts with #lzma_finish
|
120
|
+
substr = str[pos, XZ::CHUNK_SIZE]
|
121
|
+
@input_buffer_p[0, substr.bytesize] = substr
|
122
|
+
pos += XZ::CHUNK_SIZE
|
123
|
+
|
124
|
+
@lzma_stream.next_in = @input_buffer_p
|
125
|
+
@lzma_stream.avail_in = substr.bytesize
|
126
|
+
|
127
|
+
loop do
|
128
|
+
@lzma_stream.next_out = @output_buffer_p
|
129
|
+
@lzma_stream.avail_out = XZ::CHUNK_SIZE
|
130
|
+
res = XZ::LibLZMA.lzma_code(@lzma_stream.to_ptr, action)
|
131
|
+
XZ.send :check_lzma_code_retval, res # call package-private method
|
132
|
+
|
133
|
+
data = @output_buffer_p[0, XZ::CHUNK_SIZE - @lzma_stream.avail_out]
|
134
|
+
yield(data)
|
135
|
+
|
136
|
+
break unless @lzma_stream.avail_out == 0
|
137
|
+
end
|
138
|
+
end
|
139
|
+
ensure
|
140
|
+
str.force_encoding(previous_encoding)
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
# Partial implementation of +rewind+ abstracting common operations.
|
145
|
+
# The subclasses implement the rest.
|
146
|
+
def rewind # :nodoc:
|
147
|
+
# Free the current lzma stream and rewind the underlying IO.
|
148
|
+
# It is required to call #rewind before allocating a new lzma
|
149
|
+
# stream, because if #rewind raises an exception (because the
|
150
|
+
# underlying IO is not rewindable), a memory leak would occur
|
151
|
+
# with regard to an allocated-but-never-freed lzma stream.
|
152
|
+
finish
|
153
|
+
@delegate_io.rewind
|
154
|
+
|
155
|
+
# Reset internal state
|
156
|
+
@pos = @lineno = 0
|
157
|
+
@finished = false
|
158
|
+
|
159
|
+
# Allocate a new lzma stream (subclasses will configure it).
|
160
|
+
@lzma_stream = XZ::LibLZMA::LZMAStream.malloc
|
161
|
+
XZ::LibLZMA::LZMA_STREAM_INIT(@lzma_stream)
|
162
|
+
|
163
|
+
0 # Mimic IO#rewind's return value
|
164
|
+
end
|
165
|
+
|
166
|
+
# You can mostly treat this as if it were an IO object.
|
167
|
+
# At least for subclasses. This class itself is abstract,
|
168
|
+
# you shouldn't be using it directly at all.
|
169
|
+
#
|
170
|
+
# Returns the receiver.
|
171
|
+
def to_io
|
172
|
+
self
|
173
|
+
end
|
174
|
+
|
175
|
+
# Overridden in StreamReader to be like IO#eof?.
|
176
|
+
# This abstract implementation only raises IOError.
|
177
|
+
def eof?
|
178
|
+
raise(IOError, "Stream not opened for reading")
|
179
|
+
end
|
180
|
+
|
181
|
+
# Alias for #eof?
|
182
|
+
def eof
|
183
|
+
eof?
|
184
|
+
end
|
185
|
+
|
186
|
+
# True if the delegate IO has been closed.
|
187
|
+
def closed?
|
188
|
+
@delegate_io.closed?
|
189
|
+
end
|
190
|
+
|
191
|
+
# True if liblzma's internal memory has been freed. For writer
|
192
|
+
# instances, receiving true from this method also means that all
|
193
|
+
# of liblzma's compressed data has been flushed to the underlying
|
194
|
+
# IO object.
|
195
|
+
def finished?
|
196
|
+
@finished
|
197
|
+
end
|
198
|
+
|
199
|
+
# Free internal libzlma memory. This needs to be called before
|
200
|
+
# you leave this object for the GC. If you used a block-form
|
201
|
+
# initializer, this done automatically for you.
|
202
|
+
#
|
203
|
+
# Subsequent calls to #read or #write will cause an IOError.
|
204
|
+
#
|
205
|
+
# Returns the underlying IO object. This allows you to retrieve
|
206
|
+
# the File instance that was automatically created when using
|
207
|
+
# the +open+ method's block form.
|
208
|
+
def finish
|
209
|
+
return if @finished
|
210
|
+
|
211
|
+
# Clean up the lzma_stream structure's internal memory.
|
212
|
+
# This would belong into a destructor if Ruby had that.
|
213
|
+
XZ::LibLZMA.lzma_end(@lzma_stream)
|
214
|
+
@finished = true
|
215
|
+
|
216
|
+
@delegate_io
|
217
|
+
end
|
218
|
+
|
219
|
+
|
220
|
+
# If not done yet, call #finish. Then close the delegate IO.
|
221
|
+
# The latter action is going to cause the delegate IO to
|
222
|
+
# flush its buffer. After this method returns, it is guaranteed
|
223
|
+
# that all pending data has been flushed to the OS' kernel.
|
224
|
+
def close
|
225
|
+
finish unless @finished
|
226
|
+
@delegate_io.close unless @delegate_io.closed?
|
227
|
+
nil
|
228
|
+
end
|
229
|
+
|
230
|
+
# Always raises IOError, because XZ streams can never be duplex.
|
231
|
+
def close_read
|
232
|
+
raise(IOError, "Not a duplex I/O stream")
|
233
|
+
end
|
234
|
+
|
235
|
+
# Always raises IOError, because XZ streams can never be duplex.
|
236
|
+
def close_write
|
237
|
+
raise(IOError, "Not a duplex I/O stream")
|
238
|
+
end
|
239
|
+
|
240
|
+
# Overridden in StreamReader to be like IO#read.
|
241
|
+
# This abstract implementation only raises IOError.
|
242
|
+
def read(*args)
|
243
|
+
raise(IOError, "Stream not opened for reading")
|
244
|
+
end
|
245
|
+
|
246
|
+
# Overridden in StreamWriter to be like IO#write.
|
247
|
+
# This abstract implementation only raises IOError.
|
248
|
+
def write(*args)
|
249
|
+
raise(IOError, "Stream not opened for writing")
|
250
|
+
end
|
251
|
+
|
252
|
+
# Returns the position in the *decompressed* data (regardless of
|
253
|
+
# whether this is a reader or a writer instance).
|
254
|
+
def pos
|
255
|
+
@pos
|
256
|
+
end
|
257
|
+
alias tell pos
|
258
|
+
|
259
|
+
# Like IO#set_encoding.
|
260
|
+
def set_encoding(*args)
|
261
|
+
if args.count < 1 || args.count > 3
|
262
|
+
raise ArgumentError, "Wrong number of arguments: Expected 1-3, got #{args.count}"
|
263
|
+
end
|
264
|
+
|
265
|
+
# Clean `args' to [external_encoding, internal_encoding],
|
266
|
+
# and @transcode_options.
|
267
|
+
return set_encoding($`, $', *args[1..-1]) if args[0].respond_to?(:to_str) && args[0].to_str =~ /:/
|
268
|
+
@transcode_options = args.delete_at(-1) if args[-1].kind_of?(Hash)
|
269
|
+
|
270
|
+
# `args' is always [external, internal] or [external] at this point
|
271
|
+
@external_encoding = args[0].kind_of?(Encoding) ? args[0] : Encoding.find(args[0])
|
272
|
+
if args[1]
|
273
|
+
@internal_encoding = args[1].kind_of?(Encoding) ? args[1] : Encoding.find(args[1])
|
274
|
+
else
|
275
|
+
@internal_encoding = Encoding.default_internal # Encoding.default_internal defaults to nil
|
276
|
+
end
|
277
|
+
|
278
|
+
self
|
279
|
+
end
|
280
|
+
|
281
|
+
# Do not define #pos= and #seek, not even to throw NotImplementedError.
|
282
|
+
# Reason: The minitar gem thinks it can use this methods then and provokes
|
283
|
+
# the NotImplementedError exception.
|
284
|
+
|
285
|
+
# Like IO#<<.
|
286
|
+
def <<(obj)
|
287
|
+
write(obj.to_s)
|
288
|
+
end
|
289
|
+
|
290
|
+
# Like IO#advise. No-op, because not meaningful on compressed data.
|
291
|
+
def advise
|
292
|
+
nil
|
293
|
+
end
|
294
|
+
|
295
|
+
# Like IO#getbyte. Note this method isn't exactly performant,
|
296
|
+
# because it actually reads compressed data as a string and then
|
297
|
+
# needs to figure out the bytes from that again.
|
298
|
+
def getbyte
|
299
|
+
return nil if eof?
|
300
|
+
read(1).bytes.first
|
301
|
+
end
|
302
|
+
|
303
|
+
# Like IO#readbyte.
|
304
|
+
def readbyte
|
305
|
+
getbyte || raise(EOFError, "End of stream reached")
|
306
|
+
end
|
307
|
+
|
308
|
+
# Like IO#getc.
|
309
|
+
def getc
|
310
|
+
str = String.new
|
311
|
+
|
312
|
+
# Read byte-by-byte until a valid character in the external
|
313
|
+
# encoding was built.
|
314
|
+
loop do
|
315
|
+
str.force_encoding(Encoding::BINARY)
|
316
|
+
str << read(1)
|
317
|
+
str.force_encoding(@external_encoding)
|
318
|
+
|
319
|
+
break if str.valid_encoding? || eof?
|
320
|
+
end
|
321
|
+
|
322
|
+
# Transcode to internal encoding if one was requested
|
323
|
+
if @internal_encoding
|
324
|
+
str.encode(@internal_encoding)
|
325
|
+
else
|
326
|
+
str
|
327
|
+
end
|
328
|
+
end
|
329
|
+
|
330
|
+
# Like IO#readchar.
|
331
|
+
def readchar
|
332
|
+
getc || raise(EOFError, "End of stream reached")
|
333
|
+
end
|
334
|
+
|
335
|
+
# Like IO#gets.
|
336
|
+
def gets(separator = $/, limit = nil)
|
337
|
+
return nil if eof?
|
338
|
+
@lineno += 1
|
339
|
+
|
340
|
+
# Mirror IO#gets' weird call-seq
|
341
|
+
if separator.respond_to?(:to_int)
|
342
|
+
limit = separator.to_int
|
343
|
+
separator = $/
|
344
|
+
end
|
345
|
+
|
346
|
+
buf = String.new
|
347
|
+
buf.force_encoding(target_encoding)
|
348
|
+
until eof? || (limit && buf.length >= limit)
|
349
|
+
buf << getc
|
350
|
+
return buf if buf[-1] == separator
|
351
|
+
end
|
352
|
+
|
353
|
+
buf
|
354
|
+
end
|
355
|
+
|
356
|
+
# Like IO#readline.
|
357
|
+
def readline(*args)
|
358
|
+
gets(*args) || raise(EOFError, "End of stream reached")
|
359
|
+
end
|
360
|
+
|
361
|
+
# Like IO#each.
|
362
|
+
def each(*args)
|
363
|
+
return enum_for __method__ unless block_given?
|
364
|
+
|
365
|
+
while line = gets(*args)
|
366
|
+
yield(line)
|
367
|
+
end
|
368
|
+
end
|
369
|
+
alias each_line each
|
370
|
+
|
371
|
+
# Like IO#each_byte.
|
372
|
+
def each_byte
|
373
|
+
return enum_for __method__ unless block_given?
|
374
|
+
|
375
|
+
while byte = getbyte
|
376
|
+
yield(byte)
|
377
|
+
end
|
378
|
+
end
|
379
|
+
|
380
|
+
# Like IO#each_char.
|
381
|
+
def each_char
|
382
|
+
return enum_for __method__ unless block_given?
|
383
|
+
|
384
|
+
while char = getc
|
385
|
+
yield(char)
|
386
|
+
end
|
387
|
+
end
|
388
|
+
|
389
|
+
# Like IO#each_codepoint.
|
390
|
+
def each_codepoint
|
391
|
+
return enum_for __method__ unless block_given?
|
392
|
+
|
393
|
+
each_char{|c| yield(c.ord)}
|
394
|
+
end
|
395
|
+
|
396
|
+
# Like IO#printf.
|
397
|
+
def printf(*args)
|
398
|
+
write(sprintf(*args))
|
399
|
+
nil
|
400
|
+
end
|
401
|
+
|
402
|
+
# Like IO#putc.
|
403
|
+
def putc(obj)
|
404
|
+
if obj.respond_to? :chr
|
405
|
+
write(obj.chr)
|
406
|
+
elsif obj.respond_to? :to_str
|
407
|
+
write(obj.to_str)
|
408
|
+
else
|
409
|
+
raise(TypeError, "Can only #putc strings and numbers")
|
410
|
+
end
|
411
|
+
end
|
412
|
+
|
413
|
+
def puts(*objs)
|
414
|
+
if objs.empty?
|
415
|
+
write("\n")
|
416
|
+
return nil
|
417
|
+
end
|
418
|
+
|
419
|
+
objs.each do |obj|
|
420
|
+
if obj.respond_to? :to_ary
|
421
|
+
puts(*obj.to_ary)
|
422
|
+
else
|
423
|
+
# Don't squeeze multiple subsequent trailing newlines in `obj'
|
424
|
+
obj = obj.to_s
|
425
|
+
if obj.end_with?("\n".encode(obj.encoding))
|
426
|
+
write(obj)
|
427
|
+
else
|
428
|
+
write(obj + "\n".encode(obj.encoding))
|
429
|
+
end
|
430
|
+
end
|
431
|
+
end
|
432
|
+
nil
|
433
|
+
end
|
434
|
+
|
435
|
+
# Like IO#print.
|
436
|
+
def print(*objs)
|
437
|
+
if objs.empty?
|
438
|
+
write($_)
|
439
|
+
else
|
440
|
+
objs.each do |obj|
|
441
|
+
write(obj.to_s)
|
442
|
+
write($,) if $,
|
443
|
+
end
|
444
|
+
end
|
445
|
+
|
446
|
+
write($\) if $\
|
447
|
+
nil
|
448
|
+
end
|
46
449
|
|
47
|
-
#
|
48
|
-
#
|
49
|
-
def
|
50
|
-
|
51
|
-
@lzma_stream = XZ::LZMAStream.new
|
450
|
+
# It is not possible to reopen an lzma stream, hence this
|
451
|
+
# method always raises NotImplementedError.
|
452
|
+
def reopen(*args)
|
453
|
+
raise(NotImplementedError, "Can't reopen an lzma stream")
|
52
454
|
end
|
53
455
|
|
54
456
|
private
|
55
457
|
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
#I benchmarked it, and it is as twice as fast.
|
60
|
-
if str.respond_to? :force_encoding
|
61
|
-
str.dup.force_encoding(Encoding::BINARY).size
|
458
|
+
def target_encoding
|
459
|
+
if @internal_encoding
|
460
|
+
@internal_encoding
|
62
461
|
else
|
63
|
-
|
462
|
+
@external_encoding
|
64
463
|
end
|
65
464
|
end
|
66
465
|
|