ruby-xz 0.2.1 → 1.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,9 +1,10 @@
1
1
  # -*- coding: utf-8 -*-
2
- # (The MIT license)
3
- #
2
+ #--
4
3
  # Basic liblzma-bindings for Ruby.
5
4
  #
6
- # Copyright © 2012 Marvin Gülker
5
+ # Copyright © 2011-2018 Marvin Gülker et al.
6
+ #
7
+ # See AUTHORS for the full list of contributors.
7
8
  #
8
9
  # Permission is hereby granted, free of charge, to any person obtaining a
9
10
  # copy of this software and associated documentation files (the ‘Software’),
@@ -22,264 +23,290 @@
22
23
  # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23
24
  # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24
25
  # THE SOFTWARE.
26
+ #++
25
27
 
26
- #An IO-like reader class for XZ-compressed data, allowing you to
27
- #access XZ-compressed data as if it was a normal IO object, but
28
- #please note you can’t seek in the data--this doesn’t make much
29
- #sense anyway. Where would you want to seek? The plain or the XZ
30
- #data?
31
- #
32
- #A StreamReader object actually wraps another IO object it reads
33
- #the compressed data from; you can either pass this IO object directly
34
- #to the ::new method, effectively allowing you to pass any IO-like thing
35
- #you can imagine (just ensure it is readable), or you can pass a path
36
- #to a filename to ::new, in which case StreamReader takes care of both
37
- #opening and closing the file correctly. You can even take it one step
38
- #further and use the block form of ::new which will automatically call
39
- #the #close method for you after the block finished. However, if you pass
40
- #an IO, remember you have to close:
41
- #
42
- #1. The StreamReader instance.
43
- #2. The IO object you passed to ::new.
44
- #
45
- #Do it <b>in exactly that order</b>, otherwise you may lose data.
28
+ # An IO-like reader class for XZ-compressed data, allowing you to
29
+ # access XZ-compressed data as if it was a normal IO object, but
30
+ # please note you can’t seek in the data--this doesn’t make much
31
+ # sense anyway. Where would you want to seek? The plain or the XZ
32
+ # data?
46
33
  #
47
- #See the +io-like+ gem’s documentation for the IO-reading methods
48
- #available for this class (although you’re probably familiar with
49
- #them through Ruby’s own IO class ;-)).
50
- #
51
- #==Example
52
- #In this example, we’re going to use ruby-xz together with the
53
- #+archive-tar-minitar+ gem that allows to read tarballs. Used
54
- #together, the two libraries allow us to read XZ-compressed tarballs.
55
- #
56
- # require "xz"
57
- # require "archive/tar/minitar"
58
- #
59
- # XZ::StreamReader.open("foo.tar.xz") do |txz|
60
- # # This automatically closes txz
61
- # Archive::Tar::Minitar.unpack(txz, "foo")
62
- # end
34
+ # A StreamReader object actually wraps another IO object it reads
35
+ # the compressed data from; you can either pass this IO object directly
36
+ # to the ::new method, effectively allowing you to pass any IO-like thing
37
+ # you can imagine (just ensure it is readable), or you can pass a path
38
+ # to a file to ::open, in which case StreamReader will open the path
39
+ # using Ruby's File class internally. If you use ::open's block form,
40
+ # the method will take care of properly closing both the liblzma
41
+ # stream and the File instance correctly.
63
42
  class XZ::StreamReader < XZ::Stream
64
43
 
65
- #The memory limit you set for this reader (in ::new).
44
+ # The memory limit configured for this lzma decoder.
66
45
  attr_reader :memory_limit
67
- #The flags you set for this reader (in ::new).
68
- attr_reader :flags
69
46
 
70
- #call-seq:
71
- # new(delegate, memory_limit = XZ::LibLZMA::UINT64_MAX, flags = [:tell_unsupported_check]) a_stream_reader
72
- # open(delegate, memory_limit = XZ::LibLZMA::UINT64_MAX, flags = [:tell_unsupported_check]) → a_stream_reader
47
+ # call-seq:
48
+ # open(filename [, kw]) stream_reader
49
+ # open(filename [, kw]){|sr| ...} stream_reader
73
50
  #
74
- #Creates a new StreamReader instance. If you pass an IO,
75
- #remember you have to close *both* the resulting instance
76
- #(via the #close method) and the IO object you pass to flush
77
- #any internal buffers in order to be able to read all decompressed
78
- #data.
79
- #==Parameters
80
- #[delegate] An IO object to read the data from, or a path
81
- # to a file to open. If you’re in an urgent need to
82
- # pass a plain string, use StringIO from Ruby’s
83
- # standard library. If this is an IO, it must be
84
- # opened for reading.
85
- #The other parameters are identical to what the XZ::decompress_stream
86
- #method expects.
87
- #==Return value
88
- #The newly created instance.
89
- #==Example
90
- # # Wrap it around a file
91
- # f = File.open("foo.xz")
92
- # r = XZ::StreamReader.new(f)
51
+ # Open the given file and wrap a new instance around it with ::new.
52
+ # If you use the block form, both the internally created File instance
53
+ # and the liblzma stream will be closed automatically for you.
93
54
  #
94
- # # Ignore any XZ checksums (may result in invalid data being read!)
95
- # File.open("foo.xz") do |f|
96
- # r = XZ::StreamReader.new(f, XZ::LibLZMA::UINT64_MAX, [:tell_no_check]
97
- # end
55
+ # === Parameters
56
+ # [filename]
57
+ # Path to the file to open.
58
+ # [sr (block argument)]
59
+ # The created StreamReader instance.
98
60
  #
99
- # # Let StreamReader handle file closing automatically
100
- # XZ::StreamReader.new("myfile.xz"){|r| r.raed}
101
- def initialize(delegate, memory_limit = XZ::LibLZMA::UINT64_MAX, flags = [:tell_unsupported_check])
102
- raise(ArgumentError, "Invalid memory limit set!") unless (0..XZ::LibLZMA::UINT64_MAX).include?(memory_limit)
103
- flags.each do |flag|
104
- raise(ArgumentError, "Unknown flag #{flag}!") unless [:tell_no_check, :tell_unsupported_check, :tell_any_check, :concatenated].include?(flag)
105
- end
106
-
107
- if delegate.respond_to?(:to_io)
108
- super(delegate)
109
- else
110
- @file = File.open(delegate, "rb")
111
- super(@file)
112
- end
113
-
114
- @memory_limit = memory_limit
115
- @flags = flags
116
-
117
- res = XZ::LibLZMA.lzma_stream_decoder(@lzma_stream,
118
- @memory_limit,
119
- @flags.inject(0){|val, flag| val | XZ::LibLZMA.const_get(:"LZMA_#{flag.to_s.upcase}")})
120
- XZ::LZMAError.raise_if_necessary(res)
121
-
122
- @input_buffer_p = FFI::MemoryPointer.new(XZ::CHUNK_SIZE)
123
-
124
- # These two are only used in #unbuffered read.
125
- @__lzma_finished = false
126
- @__lzma_action = nil
61
+ # See ::new for a description of the keyword parameters.
62
+ #
63
+ # === Return value
64
+ # The newly created instance.
65
+ #
66
+ # === Remarks
67
+ # Starting with version 1.0.0, the block form also returns the newly
68
+ # created instance rather than the block's return value. This is
69
+ # in line with Ruby's own GzipReader.open API.
70
+ #
71
+ # === Example
72
+ # # Normal usage
73
+ # XZ::StreamReader.open("myfile.txt.xz") do |xz|
74
+ # puts xz.read #=> I love Ruby
75
+ # end
76
+ #
77
+ # # If you really need the File instance created internally:
78
+ # file = nil
79
+ # XZ::StreamReader.open("myfile.txt.xz") do |xz|
80
+ # puts xz.read #=> I love Ruby
81
+ # file = xz.finish # prevents closing
82
+ # end
83
+ # file.close # Now close it manually
84
+ #
85
+ # # Or just don't use the block form:
86
+ # xz = XZ::StreamReader.open("myfile.txt.xz")
87
+ # puts xz.read #=> I love Ruby
88
+ # file = xz.finish
89
+ # file.close # Don't forget to close it manually (or use xz.close instead of xz.finish above).
90
+ def self.open(filename, **args)
91
+ file = File.open(filename, "rb")
92
+ reader = new(file, **args)
127
93
 
128
94
  if block_given?
129
95
  begin
130
- yield(self)
96
+ yield(reader)
131
97
  ensure
132
- close unless closed?
98
+ # Close both delegate IO and reader.
99
+ reader.close unless reader.finished?
133
100
  end
134
101
  end
102
+
103
+ reader
135
104
  end
136
- self.class.send(:alias_method, :open, :new)
137
105
 
138
- #Closes this StreamReader instance. Don’t use it afterwards
139
- #anymore.
140
- #==Return value
141
- #The total number of bytes decompressed.
142
- #==Example
143
- # r.close #=> 6468
144
- #==Remarks
145
- #If you passed an IO to ::new, this method doesn’t close it, so
146
- #you have to close it yourself.
147
- def close
148
- super
106
+ # Creates a new instance that is wrapped around the given IO object.
107
+ #
108
+ # === Parameters
109
+ # ==== Positional parameters
110
+ # [delegate_io]
111
+ # The underlying IO object to read the compressed data from.
112
+ # This IO object has to have been opened in binary mode,
113
+ # otherwise you are likely to receive exceptions indicating
114
+ # that the compressed data is corrupt.
115
+ #
116
+ # ==== Keyword arguments
117
+ # [memory_limit (+UINT64_MAX+)]
118
+ # If not XZ::LibLZMA::UINT64_MAX, makes liblzma
119
+ # use no more memory than +memory_limit+ bytes.
120
+ # [flags (<tt>[:tell_unsupported_check]</tt>)]
121
+ # Additional flags passed to liblzma (an array).
122
+ # Possible flags are:
123
+ #
124
+ # [:tell_no_check]
125
+ # Spit out a warning if the archive hasn't an
126
+ # integrity checksum.
127
+ # [:tell_unsupported_check]
128
+ # Spit out a warning if the archive
129
+ # has an unsupported checksum type.
130
+ # [:concatenated]
131
+ # Decompress concatenated archives.
132
+ # [external_encoding (Encoding.default_external)]
133
+ # Assume the decompressed data inside the XZ is encoded in
134
+ # this encoding. Defaults to Encoding.default_external,
135
+ # which in turn defaults to the environment.
136
+ # [internal_encoding (Encoding.default_internal)]
137
+ # Request that the data found in the XZ file (which is assumed
138
+ # to be in the encoding specified by +external_encoding+) to
139
+ # be transcoded into this encoding. Defaults to Encoding.default_internal,
140
+ # which defaults to nil, which means to not transcode anything.
141
+ #
142
+ # === Return value
143
+ # The newly created instance.
144
+ #
145
+ # === Remarks
146
+ # The strings returned from the reader will be in the encoding specified
147
+ # by the +internal_encoding+ parameter. If that parameter is nil (default),
148
+ # then they will be in the encoding specified by +external_encoding+.
149
+ #
150
+ # This method used to accept a block in earlier versions. Since version 1.0.0,
151
+ # this behaviour has been removed to synchronise the API with Ruby's own
152
+ # GzipReader.open.
153
+ #
154
+ # This method doesn't close the underlying IO or the liblzma stream.
155
+ # You need to call #finish or #close manually; see ::open for a method
156
+ # that takes a block to automate this.
157
+ #
158
+ # === Example
159
+ # file = File.open("compressed.txt.xz", "rb") # Note binary mode
160
+ # xz = XZ::StreamReader.open(file)
161
+ # puts xz.read #=> I love Ruby
162
+ # xz.close # closes both `xz' and `file'
163
+ #
164
+ # file = File.open("compressed.txt.xz", "rb") # Note binary mode
165
+ # xz = XZ::StreamReader.open(file)
166
+ # puts xz.read #=> I love Ruby
167
+ # xz.finish # closes only `xz'
168
+ # file.close # Now close `file' manually
169
+ def initialize(delegate_io, memory_limit: XZ::LibLZMA::UINT64_MAX, flags: [:tell_unsupported_check], external_encoding: nil, internal_encoding: nil)
170
+ super(delegate_io)
171
+ raise(ArgumentError, "When specifying the internal encoding, the external encoding must also be specified") if internal_encoding && !external_encoding
172
+ raise(ArgumentError, "Memory limit out of range") unless memory_limit > 0 && memory_limit <= XZ::LibLZMA::UINT64_MAX
149
173
 
150
- # Close the XZ stream
151
- res = XZ::LibLZMA.lzma_end(@lzma_stream.pointer)
152
- XZ::LZMAError.raise_if_necessary(res)
174
+ @memory_limit = memory_limit
175
+ @readbuf = String.new
176
+ @readbuf.force_encoding(Encoding::BINARY)
153
177
 
154
- #If we created a File object, close this as well.
155
- @file.close if @file
178
+ if external_encoding
179
+ encargs = []
180
+ encargs << external_encoding
181
+ encargs << internal_encoding if internal_encoding
182
+ set_encoding(*encargs)
183
+ end
184
+
185
+ @allflags = flags.reduce(0) do |val, flag|
186
+ flag = XZ::LibLZMA::LZMA_DECODE_FLAGS[flag] || raise(ArgumentError, "Unknown flag #{flag}")
187
+ val | flag
188
+ end
156
189
 
157
- # Return the number of bytes written in total.
158
- @lzma_stream[:total_out]
190
+ res = XZ::LibLZMA.lzma_stream_decoder(@lzma_stream.to_ptr,
191
+ @memory_limit,
192
+ @allflags)
193
+ XZ::LZMAError.raise_if_necessary(res)
159
194
  end
160
195
 
161
- #call-seq:
162
- # pos() → an_integer
163
- # tell() an_integer
196
+ # Mostly like IO#read. The +length+ parameter refers to the amount
197
+ # of decompressed bytes to read, not the amount of bytes to read
198
+ # from the compressed data. That is, if you request a read of 50
199
+ # bytes, you will receive a string with a maximum length of 50
200
+ # bytes, regardless of how many bytes this was in compressed form.
164
201
  #
165
- #Total number of output bytes provided to you yet.
166
- def pos
167
- @lzma_stream[:total_out]
168
- end
169
- alias tell pos
202
+ # Return values are as per IO#read.
203
+ def read(length = nil, outbuf = String.new)
204
+ return "".force_encoding(Encoding::BINARY) if length == 0 # Shortcut; retval as per IO#read.
170
205
 
171
- #Instrcuts liblzma to immediately stop decompression,
172
- #rewinds the wrapped IO object and reinitalizes the
173
- #StreamReader instance with the same values passed
174
- #originally to the ::new method. The wrapped IO object
175
- #must support the +rewind+ method for this method to
176
- #work; if it doesn’t, this method throws an IOError.
177
- #After the exception was thrown, the StreamReader instance
178
- #is in an unusable state. You cannot continue using it
179
- #(don’t call #close on it either); close the wrapped IO
180
- #stream and create another instance of this class.
181
- #==Raises
182
- #[IOError] The wrapped IO doesn’t support rewinding.
183
- # Do not use the StreamReader instance anymore
184
- # after receiving this exception.
185
- #==Remarks
186
- #I don’t really like this method, it uses several dirty
187
- #tricks to circumvent both io-like’s and liblzma’s control
188
- #mechanisms. I only implemented this because the
189
- #<tt>archive-tar-minitar</tt> gem calls this method when
190
- #unpacking a TAR archive from a stream.
191
- def rewind
192
- # HACK: Wipe all data from io-like’s internal read buffer.
193
- # This heavily relies on io-like’s internal structure.
194
- # Be always sure to test this when a new version of
195
- # io-like is released!
196
- __io_like__internal_read_buffer.clear
206
+ # Note: Querying the underlying IO as early as possible allows to
207
+ # have Ruby's own IO exceptions to bubble up.
208
+ if length
209
+ return nil if eof? # In line with IO#read
210
+ outbuf.force_encoding(Encoding::BINARY) # As per IO#read docs
197
211
 
198
- # Forcibly close the XZ stream (internally frees it!)
199
- res = XZ::LibLZMA.lzma_end(@lzma_stream.pointer)
200
- XZ::LZMAError.raise_if_necessary(res)
212
+ # The user's request is in decompressed bytes, so it doesn't matter
213
+ # how much is actually read from the compressed file.
214
+ if @delegate_io.eof?
215
+ data = ""
216
+ action = XZ::LibLZMA::LZMA_FINISH
217
+ else
218
+ data = @delegate_io.read(XZ::CHUNK_SIZE)
219
+ action = @delegate_io.eof? ? XZ::LibLZMA::LZMA_FINISH : XZ::LibLZMA::LZMA_RUN
220
+ end
201
221
 
202
- # Rewind the wrapped IO
203
- begin
204
- @delegate_io.rewind
205
- rescue => e
206
- raise(IOError, "Delegate IO failed to rewind! Original message: #{e.message}")
207
- end
222
+ lzma_code(data, action) { |decompressed| @readbuf << decompressed }
208
223
 
209
- # Reinitialize everything. Note this doesn’t affect @file as it
210
- # is already set and stays so (we don’t pass a filename here,
211
- # but rather an IO)
212
- initialize(@delegate_io, @memory_limit, @flags)
213
- end
224
+ # If the requested amount has been read, return it.
225
+ # Also return if EOF has been reached. Note that
226
+ # String#slice! will clear the string to an empty one
227
+ # if `length' is greater than the string length.
228
+ # If EOF is not yet reached, try reading and decompresing
229
+ # more data.
230
+ if @readbuf.bytesize >= length || @delegate_io.eof?
231
+ result = @readbuf.slice!(0, length)
232
+ @pos += result.bytesize
233
+ return outbuf.replace(result)
234
+ else
235
+ return read(length, outbuf)
236
+ end
237
+ else
238
+ # Read the entire file and decompress it into memory, returning it.
239
+ while chunk = @delegate_io.read(XZ::CHUNK_SIZE)
240
+ action = @delegate_io.eof? ? XZ::LibLZMA::LZMA_FINISH : XZ::LibLZMA::LZMA_RUN
241
+ lzma_code(chunk, action) { |decompressed| @readbuf << decompressed }
242
+ end
214
243
 
215
- #NO, you CANNOT seek in this object!!
216
- #io-like’s default behaviour is to raise Errno::ESPIPE
217
- #when calling a non-defined seek, which is not what some
218
- #libraries such as RubyGem’s TarReader expect (they expect
219
- #a NoMethodError/NameError instead).
220
- undef seek
244
+ @pos += @readbuf.bytesize
221
245
 
222
- private
246
+ # Apply encoding conversion.
247
+ # First, tag the read data with the external encoding.
248
+ @readbuf.force_encoding(@external_encoding)
223
249
 
224
- #Called by io-like’s read methods such as #read. Does the heavy work
225
- #of feeding liblzma the compressed data and reading the returned
226
- #uncompressed data.
227
- def unbuffered_read(length)
228
- raise(EOFError, "Input data completely processed!") if @__lzma_finished
250
+ # Now, transcode it to the internal encoding if that was requested.
251
+ # Otherwise return it with the external encoding as-is.
252
+ if @internal_encoding
253
+ @readbuf.encode!(@internal_encoding, **@transcode_options)
254
+ outbuf.force_encoding(@internal_encoding)
255
+ else
256
+ outbuf.force_encoding(@external_encoding)
257
+ end
229
258
 
230
- output_buffer_p = FFI::MemoryPointer.new(length) # User guarantees that this fits into RAM
259
+ outbuf.replace(@readbuf)
260
+ @readbuf.clear
261
+ @readbuf.force_encoding(Encoding::BINARY) # Back to binary mode for further reading
231
262
 
232
- @lzma_stream[:next_out] = output_buffer_p
233
- @lzma_stream[:avail_out] = output_buffer_p.size
263
+ return outbuf
264
+ end
265
+ end
234
266
 
235
- loop do
236
- # DON’T overwrite any not yet consumed input from any previous
237
- # run! Instead, wait until the last input data is entirely
238
- # consumed, then provide new data.
239
- # TODO: Theoretically, one could move the remaining data to the
240
- # beginning of the pointer and fill the rest with new data,
241
- # being a tiny bit more performant.
242
- if @lzma_stream[:avail_in].zero?
243
- compressed_data = @delegate_io.read(@input_buffer_p.size) || "" # nil at EOS → ""
244
- @input_buffer_p.write_string(compressed_data)
245
- @lzma_stream[:next_in] = @input_buffer_p
246
- @lzma_stream[:avail_in] = binary_size(compressed_data)
267
+ # Abort the current decompression process and reset everything
268
+ # to the start so that reading from this reader will start over
269
+ # from the beginning of the compressed data.
270
+ #
271
+ # The delegate IO has to support the #rewind method. Otherwise
272
+ # like IO#rewind.
273
+ def rewind
274
+ super
247
275
 
248
- # Now check if we’re at the last bytes of data and set accordingly the
249
- # LZMA-action to carry out (for any subsequent runs until
250
- # all input data has been consumed and the above condition
251
- # is triggered again).
252
- #
253
- # The @__lzma_action variable is only used in this method
254
- # and is _not_ supposed to be accessed from any other method.
255
- if compressed_data.empty?
256
- @__lzma_action = XZ::LibLZMA::LZMA_ACTION[:lzma_finish]
257
- else
258
- @__lzma_action = XZ::LibLZMA::LZMA_ACTION[:lzma_run]
259
- end
260
- end
276
+ @readbuf.clear
277
+ res = XZ::LibLZMA.lzma_stream_decoder(@lzma_stream.to_ptr,
278
+ @memory_limit,
279
+ @allflags)
280
+ XZ::LZMAError.raise_if_necessary(res)
261
281
 
262
- res = XZ::LibLZMA.lzma_code(@lzma_stream.pointer, @__lzma_action)
282
+ 0 # Mimic IO#rewind's return value
283
+ end
263
284
 
264
- # liblzma signals LZMA_BUF_ERROR when the output buffer is
265
- # completely filled, which means we can return now.
266
- # When it signals LZMA_STREAM_END, the buffer won’t be filled
267
- # completely anymore as the whole input data has been consumed.
268
- if res == XZ::LibLZMA::LZMA_RET[:lzma_buf_error]
269
- # @lzma_stream[:avail_out] holds the number of free bytes _behind_
270
- # the produced output!
271
- return output_buffer_p.read_string(output_buffer_p.size - @lzma_stream[:avail_out])
272
- elsif res == XZ::LibLZMA::LZMA_RET[:lzma_stream_end]
273
- # @__lzma_finished is not supposed to be used outside this method!
274
- @__lzma_finished = true
275
- return output_buffer_p.read_string(output_buffer_p.size - @lzma_stream[:avail_out])
276
- else
277
- XZ::LZMAError.raise_if_necessary(res)
278
- end
279
- end #loop
285
+ # Like IO#ungetbyte.
286
+ def ungetbyte(obj)
287
+ if obj.respond_to? :chr
288
+ @readbuf.prepend(obj.chr)
289
+ else
290
+ @readbuf.prepend(obj.to_s)
291
+ end
292
+ end
293
+
294
+ # Like IO#ungetc.
295
+ def ungetc(str)
296
+ @readbuf.prepend(str)
297
+ end
298
+
299
+ # Returns true if:
300
+ #
301
+ # 1. The underlying IO has reached EOF, and
302
+ # 2. liblzma has returned everything it could make out of that.
303
+ def eof?
304
+ @delegate_io.eof? && @readbuf.empty?
305
+ end
280
306
 
281
- rescue XZ::LZMAError => e
282
- raise(SystemCallError, e.message)
307
+ # Human-readable description
308
+ def inspect
309
+ "<#{self.class} pos=#{@pos} bufsize=#{@readbuf.bytesize} finished=#{@finished} closed=#{closed?} io=#{@delegate_io.inspect}>"
283
310
  end
284
311
 
285
312
  end