ruby-xz 0.2.1 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/xz/stream.rb CHANGED
@@ -1,9 +1,10 @@
1
1
  # -*- coding: utf-8 -*-
2
- # (The MIT license)
3
- #
2
+ #--
4
3
  # Basic liblzma-bindings for Ruby.
5
4
  #
6
- # Copyright © 2012 Marvin Gülker
5
+ # Copyright © 2011-2018 Marvin Gülker et al.
6
+ #
7
+ # See AUTHORS for the full list of contributors.
7
8
  #
8
9
  # Permission is hereby granted, free of charge, to any person obtaining a
9
10
  # copy of this software and associated documentation files (the ‘Software’),
@@ -22,45 +23,443 @@
22
23
  # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23
24
  # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24
25
  # THE SOFTWARE.
26
+ #++
25
27
 
26
- #The base class for XZ::StreamReader and XZ::StreamWriter.
27
- #This is an abstract class that is not meant to be used
28
- #directly; if you try, you will soon recognise that you’ve
29
- #created a quite limited object ;-). You can, however, test
30
- #against this class in <tt>kind_of?</tt> tests.
28
+ # The base class for XZ::StreamReader and XZ::StreamWriter. This is
29
+ # an abstract class that is not meant to be used directly. You can,
30
+ # however, test against this class in <tt>kind_of?</tt> tests.
31
+ #
32
+ # XZ::StreamReader and XZ::StreamWriter are IO-like classes that allow
33
+ # you to access XZ-compressed data the same way you access an
34
+ # IO-object, easily allowing to fool other libraries that expect IO
35
+ # objects. The most noticable example for this may be reading and
36
+ # writing XZ-compressed tarballs using the minitar
37
+ # RubyGem; see the README.md file for an example.
38
+ #
39
+ # Most of IO's methods are implemented in this class or one of the
40
+ # subclasses. The most notable exception is that it is not possible
41
+ # to seek in XZ archives (#seek and #pos= are not defined).
42
+ # Many methods that are not expressly documented in the RDoc
43
+ # still exist; this class uses Ruby's Forwardable module to forward
44
+ # them to the underlying IO object.
31
45
  #
32
- #XZ::StreamReader and XZ::StreamWriter are IO-like classes that
33
- #allow you to access XZ-compressed data the same way you access
34
- #an IO-object, easily allowing to fool other libraries that expect
35
- #IO objects. The most noticable example for this may be reading
36
- #and writing XZ-compressed tarballs; see XZ::StreamReader and
37
- #XZ::StreamWriter for respective examples.
46
+ # Stream and its subclasses honour Ruby's external+internal encoding
47
+ # system just like Ruby's own IO does. All of what the Ruby docs say
48
+ # about external and internal encodings applies to this class with one
49
+ # important difference. The "external encoding" does not refer to the
50
+ # encoding of the file on the hard disk (this file is always a binary
51
+ # file as it's compressed data), but to the encoding of the
52
+ # decompressed data inside the compressed file.
38
53
  #
39
- #Neither this class nor its subclasses document the IO-methods
40
- #they contain--this is due to the reason that they include the
41
- #great IO::Like module that provides all the necessary IO methods
42
- #based on a few methods you define. For all defined IO methods,
43
- #see the +io-like+ gem’s documentation.
54
+ # As with Ruby's IO class, instances of this class and its subclasses
55
+ # default their external encoding to Encoding.default_external and
56
+ # their internal encoding to Encoding.default_internal. You can use
57
+ # #set_encoding or pass appropriate arguments to the +new+ method to
58
+ # change these encodings per-instance.
44
59
  class XZ::Stream
45
- include IO::Like
60
+ extend Forwardable
61
+
62
+ def_delegator :@delegate_io, :"autoclose="
63
+ def_delegator :@delegate_io, :"autoclose?"
64
+ def_delegator :@delegate_io, :binmode
65
+ def_delegator :@delegate_io, :"binmode?"
66
+ def_delegator :@delegate_io, :"close_on_exec="
67
+ def_delegator :@delegate_io, :"close_on_exec?"
68
+ def_delegator :@delegate_io, :fcntl
69
+ def_delegator :@delegate_io, :fdatasync
70
+ def_delegator :@delegate_io, :fileno
71
+ def_delegator :@delegate_io, :to_i
72
+ def_delegator :@delegate_io, :flush # TODO: liblzma might have its own flush method that should be used
73
+ def_delegator :@delegate_io, :fsync
74
+ def_delegator :@delegate_io, :ioctl
75
+ def_delegator :@delegate_io, :isatty
76
+ def_delegator :@delegate_io, :pid
77
+ #def_delegator :@delegate_io, :stat # If this is available the minitar gem thinks it's a File and wants to seek it O_o
78
+ def_delegator :@delegate_io, :sync # TODO: use liblzma's own syncing functionality?
79
+ def_delegator :@delegate_io, :"sync=" # TODO: use liblzma's own syncing functionality?
80
+ def_delegator :@delegate_io, :"tty?"
81
+
82
+ # Like IO#lineno and IO#lineno=.
83
+ attr_accessor :lineno
84
+
85
+ # Returns the encoding used inside the compressed data stream.
86
+ # Like IO#external_encoding.
87
+ attr_reader :external_encoding
88
+
89
+ # When compressed data is read, the decompressed data is transcoded
90
+ # from the external_encoding to this encoding. If this encoding is
91
+ # nil, no transcoding happens.
92
+ attr_reader :internal_encoding
93
+
94
+ # Private API only for use by subclasses.
95
+ def initialize(delegate_io) # :nodoc:
96
+ @delegate_io = delegate_io
97
+ @lzma_stream = XZ::LibLZMA::LZMAStream.malloc
98
+ XZ::LibLZMA::LZMA_STREAM_INIT(@lzma_stream)
99
+
100
+ @finished = false
101
+ @lineno = 0
102
+ @pos = 0
103
+ @external_encoding = Encoding.default_external
104
+ @internal_encoding = Encoding.default_internal
105
+ @transcode_options = {}
106
+ @input_buffer_p = Fiddle::Pointer.malloc(XZ::CHUNK_SIZE)
107
+ @output_buffer_p = Fiddle::Pointer.malloc(XZ::CHUNK_SIZE)
108
+ end
109
+
110
+ # Pass the given +str+ into libzlma's lzma_code() function.
111
+ # +action+ is either LibLZMA::LZMA_RUN (still working) or
112
+ # LibLZMA::LZMA_FINISH (this is the last piece).
113
+ def lzma_code(str, action) # :nodoc:
114
+ previous_encoding = str.encoding
115
+ str.force_encoding(Encoding::BINARY) # Need to operate on bytes now
116
+
117
+ begin
118
+ pos = 0
119
+ until pos > str.bytesize # Do not use >=, that conflicts with #lzma_finish
120
+ substr = str[pos, XZ::CHUNK_SIZE]
121
+ @input_buffer_p[0, substr.bytesize] = substr
122
+ pos += XZ::CHUNK_SIZE
123
+
124
+ @lzma_stream.next_in = @input_buffer_p
125
+ @lzma_stream.avail_in = substr.bytesize
126
+
127
+ loop do
128
+ @lzma_stream.next_out = @output_buffer_p
129
+ @lzma_stream.avail_out = XZ::CHUNK_SIZE
130
+ res = XZ::LibLZMA.lzma_code(@lzma_stream.to_ptr, action)
131
+ XZ.send :check_lzma_code_retval, res # call package-private method
132
+
133
+ data = @output_buffer_p[0, XZ::CHUNK_SIZE - @lzma_stream.avail_out]
134
+ yield(data)
135
+
136
+ break unless @lzma_stream.avail_out == 0
137
+ end
138
+ end
139
+ ensure
140
+ str.force_encoding(previous_encoding)
141
+ end
142
+ end
143
+
144
+ # Partial implementation of +rewind+ abstracting common operations.
145
+ # The subclasses implement the rest.
146
+ def rewind # :nodoc:
147
+ # Free the current lzma stream and rewind the underlying IO.
148
+ # It is required to call #rewind before allocating a new lzma
149
+ # stream, because if #rewind raises an exception (because the
150
+ # underlying IO is not rewindable), a memory leak would occur
151
+ # with regard to an allocated-but-never-freed lzma stream.
152
+ finish
153
+ @delegate_io.rewind
154
+
155
+ # Reset internal state
156
+ @pos = @lineno = 0
157
+ @finished = false
158
+
159
+ # Allocate a new lzma stream (subclasses will configure it).
160
+ @lzma_stream = XZ::LibLZMA::LZMAStream.malloc
161
+ XZ::LibLZMA::LZMA_STREAM_INIT(@lzma_stream)
162
+
163
+ 0 # Mimic IO#rewind's return value
164
+ end
165
+
166
+ # You can mostly treat this as if it were an IO object.
167
+ # At least for subclasses. This class itself is abstract,
168
+ # you shouldn't be using it directly at all.
169
+ #
170
+ # Returns the receiver.
171
+ def to_io
172
+ self
173
+ end
174
+
175
+ # Overridden in StreamReader to be like IO#eof?.
176
+ # This abstract implementation only raises IOError.
177
+ def eof?
178
+ raise(IOError, "Stream not opened for reading")
179
+ end
180
+
181
+ # Alias for #eof?
182
+ def eof
183
+ eof?
184
+ end
185
+
186
+ # True if the delegate IO has been closed.
187
+ def closed?
188
+ @delegate_io.closed?
189
+ end
190
+
191
+ # True if liblzma's internal memory has been freed. For writer
192
+ # instances, receiving true from this method also means that all
193
+ # of liblzma's compressed data has been flushed to the underlying
194
+ # IO object.
195
+ def finished?
196
+ @finished
197
+ end
198
+
199
+ # Free internal libzlma memory. This needs to be called before
200
+ # you leave this object for the GC. If you used a block-form
201
+ # initializer, this done automatically for you.
202
+ #
203
+ # Subsequent calls to #read or #write will cause an IOError.
204
+ #
205
+ # Returns the underlying IO object. This allows you to retrieve
206
+ # the File instance that was automatically created when using
207
+ # the +open+ method's block form.
208
+ def finish
209
+ return if @finished
210
+
211
+ # Clean up the lzma_stream structure's internal memory.
212
+ # This would belong into a destructor if Ruby had that.
213
+ XZ::LibLZMA.lzma_end(@lzma_stream)
214
+ @finished = true
215
+
216
+ @delegate_io
217
+ end
218
+
219
+
220
+ # If not done yet, call #finish. Then close the delegate IO.
221
+ # The latter action is going to cause the delegate IO to
222
+ # flush its buffer. After this method returns, it is guaranteed
223
+ # that all pending data has been flushed to the OS' kernel.
224
+ def close
225
+ finish unless @finished
226
+ @delegate_io.close unless @delegate_io.closed?
227
+ nil
228
+ end
229
+
230
+ # Always raises IOError, because XZ streams can never be duplex.
231
+ def close_read
232
+ raise(IOError, "Not a duplex I/O stream")
233
+ end
234
+
235
+ # Always raises IOError, because XZ streams can never be duplex.
236
+ def close_write
237
+ raise(IOError, "Not a duplex I/O stream")
238
+ end
239
+
240
+ # Overridden in StreamReader to be like IO#read.
241
+ # This abstract implementation only raises IOError.
242
+ def read(*args)
243
+ raise(IOError, "Stream not opened for reading")
244
+ end
245
+
246
+ # Overridden in StreamWriter to be like IO#write.
247
+ # This abstract implementation only raises IOError.
248
+ def write(*args)
249
+ raise(IOError, "Stream not opened for writing")
250
+ end
251
+
252
+ # Returns the position in the *decompressed* data (regardless of
253
+ # whether this is a reader or a writer instance).
254
+ def pos
255
+ @pos
256
+ end
257
+ alias tell pos
258
+
259
+ # Like IO#set_encoding.
260
+ def set_encoding(*args)
261
+ if args.count < 1 || args.count > 3
262
+ raise ArgumentError, "Wrong number of arguments: Expected 1-3, got #{args.count}"
263
+ end
264
+
265
+ # Clean `args' to [external_encoding, internal_encoding],
266
+ # and @transcode_options.
267
+ return set_encoding($`, $', *args[1..-1]) if args[0].respond_to?(:to_str) && args[0].to_str =~ /:/
268
+ @transcode_options = args.delete_at(-1) if args[-1].kind_of?(Hash)
269
+
270
+ # `args' is always [external, internal] or [external] at this point
271
+ @external_encoding = args[0].kind_of?(Encoding) ? args[0] : Encoding.find(args[0])
272
+ if args[1]
273
+ @internal_encoding = args[1].kind_of?(Encoding) ? args[1] : Encoding.find(args[1])
274
+ else
275
+ @internal_encoding = Encoding.default_internal # Encoding.default_internal defaults to nil
276
+ end
277
+
278
+ self
279
+ end
280
+
281
+ # Do not define #pos= and #seek, not even to throw NotImplementedError.
282
+ # Reason: The minitar gem thinks it can use this methods then and provokes
283
+ # the NotImplementedError exception.
284
+
285
+ # Like IO#<<.
286
+ def <<(obj)
287
+ write(obj.to_s)
288
+ end
289
+
290
+ # Like IO#advise. No-op, because not meaningful on compressed data.
291
+ def advise
292
+ nil
293
+ end
294
+
295
+ # Like IO#getbyte. Note this method isn't exactly performant,
296
+ # because it actually reads compressed data as a string and then
297
+ # needs to figure out the bytes from that again.
298
+ def getbyte
299
+ return nil if eof?
300
+ read(1).bytes.first
301
+ end
302
+
303
+ # Like IO#readbyte.
304
+ def readbyte
305
+ getbyte || raise(EOFError, "End of stream reached")
306
+ end
307
+
308
+ # Like IO#getc.
309
+ def getc
310
+ str = String.new
311
+
312
+ # Read byte-by-byte until a valid character in the external
313
+ # encoding was built.
314
+ loop do
315
+ str.force_encoding(Encoding::BINARY)
316
+ str << read(1)
317
+ str.force_encoding(@external_encoding)
318
+
319
+ break if str.valid_encoding? || eof?
320
+ end
321
+
322
+ # Transcode to internal encoding if one was requested
323
+ if @internal_encoding
324
+ str.encode(@internal_encoding)
325
+ else
326
+ str
327
+ end
328
+ end
329
+
330
+ # Like IO#readchar.
331
+ def readchar
332
+ getc || raise(EOFError, "End of stream reached")
333
+ end
334
+
335
+ # Like IO#gets.
336
+ def gets(separator = $/, limit = nil)
337
+ return nil if eof?
338
+ @lineno += 1
339
+
340
+ # Mirror IO#gets' weird call-seq
341
+ if separator.respond_to?(:to_int)
342
+ limit = separator.to_int
343
+ separator = $/
344
+ end
345
+
346
+ buf = String.new
347
+ buf.force_encoding(target_encoding)
348
+ until eof? || (limit && buf.length >= limit)
349
+ buf << getc
350
+ return buf if buf[-1] == separator
351
+ end
352
+
353
+ buf
354
+ end
355
+
356
+ # Like IO#readline.
357
+ def readline(*args)
358
+ gets(*args) || raise(EOFError, "End of stream reached")
359
+ end
360
+
361
+ # Like IO#each.
362
+ def each(*args)
363
+ return enum_for __method__ unless block_given?
364
+
365
+ while line = gets(*args)
366
+ yield(line)
367
+ end
368
+ end
369
+ alias each_line each
370
+
371
+ # Like IO#each_byte.
372
+ def each_byte
373
+ return enum_for __method__ unless block_given?
374
+
375
+ while byte = getbyte
376
+ yield(byte)
377
+ end
378
+ end
379
+
380
+ # Like IO#each_char.
381
+ def each_char
382
+ return enum_for __method__ unless block_given?
383
+
384
+ while char = getc
385
+ yield(char)
386
+ end
387
+ end
388
+
389
+ # Like IO#each_codepoint.
390
+ def each_codepoint
391
+ return enum_for __method__ unless block_given?
392
+
393
+ each_char{|c| yield(c.ord)}
394
+ end
395
+
396
+ # Like IO#printf.
397
+ def printf(*args)
398
+ write(sprintf(*args))
399
+ nil
400
+ end
401
+
402
+ # Like IO#putc.
403
+ def putc(obj)
404
+ if obj.respond_to? :chr
405
+ write(obj.chr)
406
+ elsif obj.respond_to? :to_str
407
+ write(obj.to_str)
408
+ else
409
+ raise(TypeError, "Can only #putc strings and numbers")
410
+ end
411
+ end
412
+
413
+ def puts(*objs)
414
+ if objs.empty?
415
+ write("\n")
416
+ return nil
417
+ end
418
+
419
+ objs.each do |obj|
420
+ if obj.respond_to? :to_ary
421
+ puts(*obj.to_ary)
422
+ else
423
+ # Don't squeeze multiple subsequent trailing newlines in `obj'
424
+ obj = obj.to_s
425
+ if obj.end_with?("\n".encode(obj.encoding))
426
+ write(obj)
427
+ else
428
+ write(obj + "\n".encode(obj.encoding))
429
+ end
430
+ end
431
+ end
432
+ nil
433
+ end
434
+
435
+ # Like IO#print.
436
+ def print(*objs)
437
+ if objs.empty?
438
+ write($_)
439
+ else
440
+ objs.each do |obj|
441
+ write(obj.to_s)
442
+ write($,) if $,
443
+ end
444
+ end
445
+
446
+ write($\) if $\
447
+ nil
448
+ end
46
449
 
47
- #Creates a new instance of this class. Don’t use this directly,
48
- #it’s only called by subclasses’ ::new methods.
49
- def initialize(delegate_io)
50
- @delegate_io = delegate_io
51
- @lzma_stream = XZ::LZMAStream.new
450
+ # It is not possible to reopen an lzma stream, hence this
451
+ # method always raises NotImplementedError.
452
+ def reopen(*args)
453
+ raise(NotImplementedError, "Can't reopen an lzma stream")
52
454
  end
53
455
 
54
456
  private
55
457
 
56
- #This method returns the size of +str+ in bytes.
57
- def binary_size(str)
58
- #Believe it or not, but this is faster than str.bytes.to_a.size.
59
- #I benchmarked it, and it is as twice as fast.
60
- if str.respond_to? :force_encoding
61
- str.dup.force_encoding(Encoding::BINARY).size
458
+ def target_encoding
459
+ if @internal_encoding
460
+ @internal_encoding
62
461
  else
63
- str.bytes.to_a.size
462
+ @external_encoding
64
463
  end
65
464
  end
66
465