ruby-xz 0.2.1 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,9 +1,10 @@
1
1
  # -*- coding: utf-8 -*-
2
- # (The MIT license)
3
- #
2
+ #--
4
3
  # Basic liblzma-bindings for Ruby.
5
4
  #
6
- # Copyright © 2012 Marvin Gülker
5
+ # Copyright © 2011-2018 Marvin Gülker et al.
6
+ #
7
+ # See AUTHORS for the full list of contributors.
7
8
  #
8
9
  # Permission is hereby granted, free of charge, to any person obtaining a
9
10
  # copy of this software and associated documentation files (the ‘Software’),
@@ -22,264 +23,290 @@
22
23
  # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23
24
  # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24
25
  # THE SOFTWARE.
26
+ #++
25
27
 
26
- #An IO-like reader class for XZ-compressed data, allowing you to
27
- #access XZ-compressed data as if it was a normal IO object, but
28
- #please note you can’t seek in the data--this doesn’t make much
29
- #sense anyway. Where would you want to seek? The plain or the XZ
30
- #data?
31
- #
32
- #A StreamReader object actually wraps another IO object it reads
33
- #the compressed data from; you can either pass this IO object directly
34
- #to the ::new method, effectively allowing you to pass any IO-like thing
35
- #you can imagine (just ensure it is readable), or you can pass a path
36
- #to a filename to ::new, in which case StreamReader takes care of both
37
- #opening and closing the file correctly. You can even take it one step
38
- #further and use the block form of ::new which will automatically call
39
- #the #close method for you after the block finished. However, if you pass
40
- #an IO, remember you have to close:
41
- #
42
- #1. The StreamReader instance.
43
- #2. The IO object you passed to ::new.
44
- #
45
- #Do it <b>in exactly that order</b>, otherwise you may lose data.
28
+ # An IO-like reader class for XZ-compressed data, allowing you to
29
+ # access XZ-compressed data as if it was a normal IO object, but
30
+ # please note you can’t seek in the data--this doesn’t make much
31
+ # sense anyway. Where would you want to seek? The plain or the XZ
32
+ # data?
46
33
  #
47
- #See the +io-like+ gem’s documentation for the IO-reading methods
48
- #available for this class (although you’re probably familiar with
49
- #them through Ruby’s own IO class ;-)).
50
- #
51
- #==Example
52
- #In this example, we’re going to use ruby-xz together with the
53
- #+archive-tar-minitar+ gem that allows to read tarballs. Used
54
- #together, the two libraries allow us to read XZ-compressed tarballs.
55
- #
56
- # require "xz"
57
- # require "archive/tar/minitar"
58
- #
59
- # XZ::StreamReader.open("foo.tar.xz") do |txz|
60
- # # This automatically closes txz
61
- # Archive::Tar::Minitar.unpack(txz, "foo")
62
- # end
34
+ # A StreamReader object actually wraps another IO object it reads
35
+ # the compressed data from; you can either pass this IO object directly
36
+ # to the ::new method, effectively allowing you to pass any IO-like thing
37
+ # you can imagine (just ensure it is readable), or you can pass a path
38
+ # to a file to ::open, in which case StreamReader will open the path
39
+ # using Ruby's File class internally. If you use ::open's block form,
40
+ # the method will take care of properly closing both the liblzma
41
+ # stream and the File instance correctly.
63
42
  class XZ::StreamReader < XZ::Stream
64
43
 
65
- #The memory limit you set for this reader (in ::new).
44
+ # The memory limit configured for this lzma decoder.
66
45
  attr_reader :memory_limit
67
- #The flags you set for this reader (in ::new).
68
- attr_reader :flags
69
46
 
70
- #call-seq:
71
- # new(delegate, memory_limit = XZ::LibLZMA::UINT64_MAX, flags = [:tell_unsupported_check]) a_stream_reader
72
- # open(delegate, memory_limit = XZ::LibLZMA::UINT64_MAX, flags = [:tell_unsupported_check]) → a_stream_reader
47
+ # call-seq:
48
+ # open(filename [, kw]) stream_reader
49
+ # open(filename [, kw]){|sr| ...} stream_reader
73
50
  #
74
- #Creates a new StreamReader instance. If you pass an IO,
75
- #remember you have to close *both* the resulting instance
76
- #(via the #close method) and the IO object you pass to flush
77
- #any internal buffers in order to be able to read all decompressed
78
- #data.
79
- #==Parameters
80
- #[delegate] An IO object to read the data from, or a path
81
- # to a file to open. If you’re in an urgent need to
82
- # pass a plain string, use StringIO from Ruby’s
83
- # standard library. If this is an IO, it must be
84
- # opened for reading.
85
- #The other parameters are identical to what the XZ::decompress_stream
86
- #method expects.
87
- #==Return value
88
- #The newly created instance.
89
- #==Example
90
- # # Wrap it around a file
91
- # f = File.open("foo.xz")
92
- # r = XZ::StreamReader.new(f)
51
+ # Open the given file and wrap a new instance around it with ::new.
52
+ # If you use the block form, both the internally created File instance
53
+ # and the liblzma stream will be closed automatically for you.
93
54
  #
94
- # # Ignore any XZ checksums (may result in invalid data being read!)
95
- # File.open("foo.xz") do |f|
96
- # r = XZ::StreamReader.new(f, XZ::LibLZMA::UINT64_MAX, [:tell_no_check]
97
- # end
55
+ # === Parameters
56
+ # [filename]
57
+ # Path to the file to open.
58
+ # [sr (block argument)]
59
+ # The created StreamReader instance.
98
60
  #
99
- # # Let StreamReader handle file closing automatically
100
- # XZ::StreamReader.new("myfile.xz"){|r| r.raed}
101
- def initialize(delegate, memory_limit = XZ::LibLZMA::UINT64_MAX, flags = [:tell_unsupported_check])
102
- raise(ArgumentError, "Invalid memory limit set!") unless (0..XZ::LibLZMA::UINT64_MAX).include?(memory_limit)
103
- flags.each do |flag|
104
- raise(ArgumentError, "Unknown flag #{flag}!") unless [:tell_no_check, :tell_unsupported_check, :tell_any_check, :concatenated].include?(flag)
105
- end
106
-
107
- if delegate.respond_to?(:to_io)
108
- super(delegate)
109
- else
110
- @file = File.open(delegate, "rb")
111
- super(@file)
112
- end
113
-
114
- @memory_limit = memory_limit
115
- @flags = flags
116
-
117
- res = XZ::LibLZMA.lzma_stream_decoder(@lzma_stream,
118
- @memory_limit,
119
- @flags.inject(0){|val, flag| val | XZ::LibLZMA.const_get(:"LZMA_#{flag.to_s.upcase}")})
120
- XZ::LZMAError.raise_if_necessary(res)
121
-
122
- @input_buffer_p = FFI::MemoryPointer.new(XZ::CHUNK_SIZE)
123
-
124
- # These two are only used in #unbuffered read.
125
- @__lzma_finished = false
126
- @__lzma_action = nil
61
+ # See ::new for a description of the keyword parameters.
62
+ #
63
+ # === Return value
64
+ # The newly created instance.
65
+ #
66
+ # === Remarks
67
+ # Starting with version 1.0.0, the block form also returns the newly
68
+ # created instance rather than the block's return value. This is
69
+ # in line with Ruby's own GzipReader.open API.
70
+ #
71
+ # === Example
72
+ # # Normal usage
73
+ # XZ::StreamReader.open("myfile.txt.xz") do |xz|
74
+ # puts xz.read #=> I love Ruby
75
+ # end
76
+ #
77
+ # # If you really need the File instance created internally:
78
+ # file = nil
79
+ # XZ::StreamReader.open("myfile.txt.xz") do |xz|
80
+ # puts xz.read #=> I love Ruby
81
+ # file = xz.finish # prevents closing
82
+ # end
83
+ # file.close # Now close it manually
84
+ #
85
+ # # Or just don't use the block form:
86
+ # xz = XZ::StreamReader.open("myfile.txt.xz")
87
+ # puts xz.read #=> I love Ruby
88
+ # file = xz.finish
89
+ # file.close # Don't forget to close it manually (or use xz.close instead of xz.finish above).
90
+ def self.open(filename, **args)
91
+ file = File.open(filename, "rb")
92
+ reader = new(file, **args)
127
93
 
128
94
  if block_given?
129
95
  begin
130
- yield(self)
96
+ yield(reader)
131
97
  ensure
132
- close unless closed?
98
+ # Close both delegate IO and reader.
99
+ reader.close unless reader.finished?
133
100
  end
134
101
  end
102
+
103
+ reader
135
104
  end
136
- self.class.send(:alias_method, :open, :new)
137
105
 
138
- #Closes this StreamReader instance. Don’t use it afterwards
139
- #anymore.
140
- #==Return value
141
- #The total number of bytes decompressed.
142
- #==Example
143
- # r.close #=> 6468
144
- #==Remarks
145
- #If you passed an IO to ::new, this method doesn’t close it, so
146
- #you have to close it yourself.
147
- def close
148
- super
106
+ # Creates a new instance that is wrapped around the given IO object.
107
+ #
108
+ # === Parameters
109
+ # ==== Positional parameters
110
+ # [delegate_io]
111
+ # The underlying IO object to read the compressed data from.
112
+ # This IO object has to have been opened in binary mode,
113
+ # otherwise you are likely to receive exceptions indicating
114
+ # that the compressed data is corrupt.
115
+ #
116
+ # ==== Keyword arguments
117
+ # [memory_limit (+UINT64_MAX+)]
118
+ # If not XZ::LibLZMA::UINT64_MAX, makes liblzma
119
+ # use no more memory than +memory_limit+ bytes.
120
+ # [flags (<tt>[:tell_unsupported_check]</tt>)]
121
+ # Additional flags passed to liblzma (an array).
122
+ # Possible flags are:
123
+ #
124
+ # [:tell_no_check]
125
+ # Spit out a warning if the archive hasn't an
126
+ # integrity checksum.
127
+ # [:tell_unsupported_check]
128
+ # Spit out a warning if the archive
129
+ # has an unsupported checksum type.
130
+ # [:concatenated]
131
+ # Decompress concatenated archives.
132
+ # [external_encoding (Encoding.default_external)]
133
+ # Assume the decompressed data inside the XZ is encoded in
134
+ # this encoding. Defaults to Encoding.default_external,
135
+ # which in turn defaults to the environment.
136
+ # [internal_encoding (Encoding.default_internal)]
137
+ # Request that the data found in the XZ file (which is assumed
138
+ # to be in the encoding specified by +external_encoding+) to
139
+ # be transcoded into this encoding. Defaults to Encoding.default_internal,
140
+ # which defaults to nil, which means to not transcode anything.
141
+ #
142
+ # === Return value
143
+ # The newly created instance.
144
+ #
145
+ # === Remarks
146
+ # The strings returned from the reader will be in the encoding specified
147
+ # by the +internal_encoding+ parameter. If that parameter is nil (default),
148
+ # then they will be in the encoding specified by +external_encoding+.
149
+ #
150
+ # This method used to accept a block in earlier versions. Since version 1.0.0,
151
+ # this behaviour has been removed to synchronise the API with Ruby's own
152
+ # GzipReader.open.
153
+ #
154
+ # This method doesn't close the underlying IO or the liblzma stream.
155
+ # You need to call #finish or #close manually; see ::open for a method
156
+ # that takes a block to automate this.
157
+ #
158
+ # === Example
159
+ # file = File.open("compressed.txt.xz", "rb") # Note binary mode
160
+ # xz = XZ::StreamReader.open(file)
161
+ # puts xz.read #=> I love Ruby
162
+ # xz.close # closes both `xz' and `file'
163
+ #
164
+ # file = File.open("compressed.txt.xz", "rb") # Note binary mode
165
+ # xz = XZ::StreamReader.open(file)
166
+ # puts xz.read #=> I love Ruby
167
+ # xz.finish # closes only `xz'
168
+ # file.close # Now close `file' manually
169
+ def initialize(delegate_io, memory_limit: XZ::LibLZMA::UINT64_MAX, flags: [:tell_unsupported_check], external_encoding: nil, internal_encoding: nil)
170
+ super(delegate_io)
171
+ raise(ArgumentError, "When specifying the internal encoding, the external encoding must also be specified") if internal_encoding && !external_encoding
172
+ raise(ArgumentError, "Memory limit out of range") unless memory_limit > 0 && memory_limit <= XZ::LibLZMA::UINT64_MAX
149
173
 
150
- # Close the XZ stream
151
- res = XZ::LibLZMA.lzma_end(@lzma_stream.pointer)
152
- XZ::LZMAError.raise_if_necessary(res)
174
+ @memory_limit = memory_limit
175
+ @readbuf = String.new
176
+ @readbuf.force_encoding(Encoding::BINARY)
153
177
 
154
- #If we created a File object, close this as well.
155
- @file.close if @file
178
+ if external_encoding
179
+ encargs = []
180
+ encargs << external_encoding
181
+ encargs << internal_encoding if internal_encoding
182
+ set_encoding(*encargs)
183
+ end
184
+
185
+ @allflags = flags.reduce(0) do |val, flag|
186
+ flag = XZ::LibLZMA::LZMA_DECODE_FLAGS[flag] || raise(ArgumentError, "Unknown flag #{flag}")
187
+ val | flag
188
+ end
156
189
 
157
- # Return the number of bytes written in total.
158
- @lzma_stream[:total_out]
190
+ res = XZ::LibLZMA.lzma_stream_decoder(@lzma_stream.to_ptr,
191
+ @memory_limit,
192
+ @allflags)
193
+ XZ::LZMAError.raise_if_necessary(res)
159
194
  end
160
195
 
161
- #call-seq:
162
- # pos() → an_integer
163
- # tell() an_integer
196
+ # Mostly like IO#read. The +length+ parameter refers to the amount
197
+ # of decompressed bytes to read, not the amount of bytes to read
198
+ # from the compressed data. That is, if you request a read of 50
199
+ # bytes, you will receive a string with a maximum length of 50
200
+ # bytes, regardless of how many bytes this was in compressed form.
164
201
  #
165
- #Total number of output bytes provided to you yet.
166
- def pos
167
- @lzma_stream[:total_out]
168
- end
169
- alias tell pos
202
+ # Return values are as per IO#read.
203
+ def read(length = nil, outbuf = String.new)
204
+ return "".force_encoding(Encoding::BINARY) if length == 0 # Shortcut; retval as per IO#read.
170
205
 
171
- #Instrcuts liblzma to immediately stop decompression,
172
- #rewinds the wrapped IO object and reinitalizes the
173
- #StreamReader instance with the same values passed
174
- #originally to the ::new method. The wrapped IO object
175
- #must support the +rewind+ method for this method to
176
- #work; if it doesn’t, this method throws an IOError.
177
- #After the exception was thrown, the StreamReader instance
178
- #is in an unusable state. You cannot continue using it
179
- #(don’t call #close on it either); close the wrapped IO
180
- #stream and create another instance of this class.
181
- #==Raises
182
- #[IOError] The wrapped IO doesn’t support rewinding.
183
- # Do not use the StreamReader instance anymore
184
- # after receiving this exception.
185
- #==Remarks
186
- #I don’t really like this method, it uses several dirty
187
- #tricks to circumvent both io-like’s and liblzma’s control
188
- #mechanisms. I only implemented this because the
189
- #<tt>archive-tar-minitar</tt> gem calls this method when
190
- #unpacking a TAR archive from a stream.
191
- def rewind
192
- # HACK: Wipe all data from io-like’s internal read buffer.
193
- # This heavily relies on io-like’s internal structure.
194
- # Be always sure to test this when a new version of
195
- # io-like is released!
196
- __io_like__internal_read_buffer.clear
206
+ # Note: Querying the underlying IO as early as possible allows to
207
+ # have Ruby's own IO exceptions to bubble up.
208
+ if length
209
+ return nil if eof? # In line with IO#read
210
+ outbuf.force_encoding(Encoding::BINARY) # As per IO#read docs
197
211
 
198
- # Forcibly close the XZ stream (internally frees it!)
199
- res = XZ::LibLZMA.lzma_end(@lzma_stream.pointer)
200
- XZ::LZMAError.raise_if_necessary(res)
212
+ # The user's request is in decompressed bytes, so it doesn't matter
213
+ # how much is actually read from the compressed file.
214
+ if @delegate_io.eof?
215
+ data = ""
216
+ action = XZ::LibLZMA::LZMA_FINISH
217
+ else
218
+ data = @delegate_io.read(XZ::CHUNK_SIZE)
219
+ action = @delegate_io.eof? ? XZ::LibLZMA::LZMA_FINISH : XZ::LibLZMA::LZMA_RUN
220
+ end
201
221
 
202
- # Rewind the wrapped IO
203
- begin
204
- @delegate_io.rewind
205
- rescue => e
206
- raise(IOError, "Delegate IO failed to rewind! Original message: #{e.message}")
207
- end
222
+ lzma_code(data, action) { |decompressed| @readbuf << decompressed }
208
223
 
209
- # Reinitialize everything. Note this doesn’t affect @file as it
210
- # is already set and stays so (we don’t pass a filename here,
211
- # but rather an IO)
212
- initialize(@delegate_io, @memory_limit, @flags)
213
- end
224
+ # If the requested amount has been read, return it.
225
+ # Also return if EOF has been reached. Note that
226
+ # String#slice! will clear the string to an empty one
227
+ # if `length' is greater than the string length.
228
+ # If EOF is not yet reached, try reading and decompresing
229
+ # more data.
230
+ if @readbuf.bytesize >= length || @delegate_io.eof?
231
+ result = @readbuf.slice!(0, length)
232
+ @pos += result.bytesize
233
+ return outbuf.replace(result)
234
+ else
235
+ return read(length, outbuf)
236
+ end
237
+ else
238
+ # Read the entire file and decompress it into memory, returning it.
239
+ while chunk = @delegate_io.read(XZ::CHUNK_SIZE)
240
+ action = @delegate_io.eof? ? XZ::LibLZMA::LZMA_FINISH : XZ::LibLZMA::LZMA_RUN
241
+ lzma_code(chunk, action) { |decompressed| @readbuf << decompressed }
242
+ end
214
243
 
215
- #NO, you CANNOT seek in this object!!
216
- #io-like’s default behaviour is to raise Errno::ESPIPE
217
- #when calling a non-defined seek, which is not what some
218
- #libraries such as RubyGem’s TarReader expect (they expect
219
- #a NoMethodError/NameError instead).
220
- undef seek
244
+ @pos += @readbuf.bytesize
221
245
 
222
- private
246
+ # Apply encoding conversion.
247
+ # First, tag the read data with the external encoding.
248
+ @readbuf.force_encoding(@external_encoding)
223
249
 
224
- #Called by io-like’s read methods such as #read. Does the heavy work
225
- #of feeding liblzma the compressed data and reading the returned
226
- #uncompressed data.
227
- def unbuffered_read(length)
228
- raise(EOFError, "Input data completely processed!") if @__lzma_finished
250
+ # Now, transcode it to the internal encoding if that was requested.
251
+ # Otherwise return it with the external encoding as-is.
252
+ if @internal_encoding
253
+ @readbuf.encode!(@internal_encoding, **@transcode_options)
254
+ outbuf.force_encoding(@internal_encoding)
255
+ else
256
+ outbuf.force_encoding(@external_encoding)
257
+ end
229
258
 
230
- output_buffer_p = FFI::MemoryPointer.new(length) # User guarantees that this fits into RAM
259
+ outbuf.replace(@readbuf)
260
+ @readbuf.clear
261
+ @readbuf.force_encoding(Encoding::BINARY) # Back to binary mode for further reading
231
262
 
232
- @lzma_stream[:next_out] = output_buffer_p
233
- @lzma_stream[:avail_out] = output_buffer_p.size
263
+ return outbuf
264
+ end
265
+ end
234
266
 
235
- loop do
236
- # DON’T overwrite any not yet consumed input from any previous
237
- # run! Instead, wait until the last input data is entirely
238
- # consumed, then provide new data.
239
- # TODO: Theoretically, one could move the remaining data to the
240
- # beginning of the pointer and fill the rest with new data,
241
- # being a tiny bit more performant.
242
- if @lzma_stream[:avail_in].zero?
243
- compressed_data = @delegate_io.read(@input_buffer_p.size) || "" # nil at EOS → ""
244
- @input_buffer_p.write_string(compressed_data)
245
- @lzma_stream[:next_in] = @input_buffer_p
246
- @lzma_stream[:avail_in] = binary_size(compressed_data)
267
+ # Abort the current decompression process and reset everything
268
+ # to the start so that reading from this reader will start over
269
+ # from the beginning of the compressed data.
270
+ #
271
+ # The delegate IO has to support the #rewind method. Otherwise
272
+ # like IO#rewind.
273
+ def rewind
274
+ super
247
275
 
248
- # Now check if we’re at the last bytes of data and set accordingly the
249
- # LZMA-action to carry out (for any subsequent runs until
250
- # all input data has been consumed and the above condition
251
- # is triggered again).
252
- #
253
- # The @__lzma_action variable is only used in this method
254
- # and is _not_ supposed to be accessed from any other method.
255
- if compressed_data.empty?
256
- @__lzma_action = XZ::LibLZMA::LZMA_ACTION[:lzma_finish]
257
- else
258
- @__lzma_action = XZ::LibLZMA::LZMA_ACTION[:lzma_run]
259
- end
260
- end
276
+ @readbuf.clear
277
+ res = XZ::LibLZMA.lzma_stream_decoder(@lzma_stream.to_ptr,
278
+ @memory_limit,
279
+ @allflags)
280
+ XZ::LZMAError.raise_if_necessary(res)
261
281
 
262
- res = XZ::LibLZMA.lzma_code(@lzma_stream.pointer, @__lzma_action)
282
+ 0 # Mimic IO#rewind's return value
283
+ end
263
284
 
264
- # liblzma signals LZMA_BUF_ERROR when the output buffer is
265
- # completely filled, which means we can return now.
266
- # When it signals LZMA_STREAM_END, the buffer won’t be filled
267
- # completely anymore as the whole input data has been consumed.
268
- if res == XZ::LibLZMA::LZMA_RET[:lzma_buf_error]
269
- # @lzma_stream[:avail_out] holds the number of free bytes _behind_
270
- # the produced output!
271
- return output_buffer_p.read_string(output_buffer_p.size - @lzma_stream[:avail_out])
272
- elsif res == XZ::LibLZMA::LZMA_RET[:lzma_stream_end]
273
- # @__lzma_finished is not supposed to be used outside this method!
274
- @__lzma_finished = true
275
- return output_buffer_p.read_string(output_buffer_p.size - @lzma_stream[:avail_out])
276
- else
277
- XZ::LZMAError.raise_if_necessary(res)
278
- end
279
- end #loop
285
+ # Like IO#ungetbyte.
286
+ def ungetbyte(obj)
287
+ if obj.respond_to? :chr
288
+ @readbuf.prepend(obj.chr)
289
+ else
290
+ @readbuf.prepend(obj.to_s)
291
+ end
292
+ end
293
+
294
+ # Like IO#ungetc.
295
+ def ungetc(str)
296
+ @readbuf.prepend(str)
297
+ end
298
+
299
+ # Returns true if:
300
+ #
301
+ # 1. The underlying IO has reached EOF, and
302
+ # 2. liblzma has returned everything it could make out of that.
303
+ def eof?
304
+ @delegate_io.eof? && @readbuf.empty?
305
+ end
280
306
 
281
- rescue XZ::LZMAError => e
282
- raise(SystemCallError, e.message)
307
+ # Human-readable description
308
+ def inspect
309
+ "<#{self.class} pos=#{@pos} bufsize=#{@readbuf.bytesize} finished=#{@finished} closed=#{closed?} io=#{@delegate_io.inspect}>"
283
310
  end
284
311
 
285
312
  end