ruby-xz 0.2.1 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/AUTHORS +7 -0
- data/HISTORY.rdoc +84 -7
- data/LICENSE +21 -0
- data/README.md +122 -0
- data/lib/xz/fiddle_helper.rb +91 -0
- data/lib/xz/lib_lzma.rb +134 -110
- data/lib/xz/stream.rb +431 -32
- data/lib/xz/stream_reader.rb +251 -224
- data/lib/xz/stream_writer.rb +208 -158
- data/lib/xz/version.rb +33 -0
- data/lib/xz.rb +412 -232
- metadata +49 -57
- data/COPYING +0 -26
- data/README.rdoc +0 -89
data/lib/xz/stream_reader.rb
CHANGED
@@ -1,9 +1,10 @@
|
|
1
1
|
# -*- coding: utf-8 -*-
|
2
|
-
|
3
|
-
#
|
2
|
+
#--
|
4
3
|
# Basic liblzma-bindings for Ruby.
|
5
4
|
#
|
6
|
-
# Copyright ©
|
5
|
+
# Copyright © 2011-2018 Marvin Gülker et al.
|
6
|
+
#
|
7
|
+
# See AUTHORS for the full list of contributors.
|
7
8
|
#
|
8
9
|
# Permission is hereby granted, free of charge, to any person obtaining a
|
9
10
|
# copy of this software and associated documentation files (the ‘Software’),
|
@@ -22,264 +23,290 @@
|
|
22
23
|
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
23
24
|
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
24
25
|
# THE SOFTWARE.
|
26
|
+
#++
|
25
27
|
|
26
|
-
#An IO-like reader class for XZ-compressed data, allowing you to
|
27
|
-
#access XZ-compressed data as if it was a normal IO object, but
|
28
|
-
#please note you can’t seek in the data--this doesn’t make much
|
29
|
-
#sense anyway. Where would you want to seek? The plain or the XZ
|
30
|
-
#data?
|
31
|
-
#
|
32
|
-
#A StreamReader object actually wraps another IO object it reads
|
33
|
-
#the compressed data from; you can either pass this IO object directly
|
34
|
-
#to the ::new method, effectively allowing you to pass any IO-like thing
|
35
|
-
#you can imagine (just ensure it is readable), or you can pass a path
|
36
|
-
#to a filename to ::new, in which case StreamReader takes care of both
|
37
|
-
#opening and closing the file correctly. You can even take it one step
|
38
|
-
#further and use the block form of ::new which will automatically call
|
39
|
-
#the #close method for you after the block finished. However, if you pass
|
40
|
-
#an IO, remember you have to close:
|
41
|
-
#
|
42
|
-
#1. The StreamReader instance.
|
43
|
-
#2. The IO object you passed to ::new.
|
44
|
-
#
|
45
|
-
#Do it <b>in exactly that order</b>, otherwise you may lose data.
|
28
|
+
# An IO-like reader class for XZ-compressed data, allowing you to
|
29
|
+
# access XZ-compressed data as if it was a normal IO object, but
|
30
|
+
# please note you can’t seek in the data--this doesn’t make much
|
31
|
+
# sense anyway. Where would you want to seek? The plain or the XZ
|
32
|
+
# data?
|
46
33
|
#
|
47
|
-
#
|
48
|
-
#
|
49
|
-
#
|
50
|
-
#
|
51
|
-
|
52
|
-
#
|
53
|
-
|
54
|
-
#
|
55
|
-
#
|
56
|
-
# require "xz"
|
57
|
-
# require "archive/tar/minitar"
|
58
|
-
#
|
59
|
-
# XZ::StreamReader.open("foo.tar.xz") do |txz|
|
60
|
-
# # This automatically closes txz
|
61
|
-
# Archive::Tar::Minitar.unpack(txz, "foo")
|
62
|
-
# end
|
34
|
+
# A StreamReader object actually wraps another IO object it reads
|
35
|
+
# the compressed data from; you can either pass this IO object directly
|
36
|
+
# to the ::new method, effectively allowing you to pass any IO-like thing
|
37
|
+
# you can imagine (just ensure it is readable), or you can pass a path
|
38
|
+
# to a file to ::open, in which case StreamReader will open the path
|
39
|
+
# using Ruby's File class internally. If you use ::open's block form,
|
40
|
+
# the method will take care of properly closing both the liblzma
|
41
|
+
# stream and the File instance correctly.
|
63
42
|
class XZ::StreamReader < XZ::Stream
|
64
43
|
|
65
|
-
#The memory limit
|
44
|
+
# The memory limit configured for this lzma decoder.
|
66
45
|
attr_reader :memory_limit
|
67
|
-
#The flags you set for this reader (in ::new).
|
68
|
-
attr_reader :flags
|
69
46
|
|
70
|
-
#call-seq:
|
71
|
-
#
|
72
|
-
#
|
47
|
+
# call-seq:
|
48
|
+
# open(filename [, kw]) → stream_reader
|
49
|
+
# open(filename [, kw]){|sr| ...} → stream_reader
|
73
50
|
#
|
74
|
-
#
|
75
|
-
#
|
76
|
-
#
|
77
|
-
#any internal buffers in order to be able to read all decompressed
|
78
|
-
#data.
|
79
|
-
#==Parameters
|
80
|
-
#[delegate] An IO object to read the data from, or a path
|
81
|
-
# to a file to open. If you’re in an urgent need to
|
82
|
-
# pass a plain string, use StringIO from Ruby’s
|
83
|
-
# standard library. If this is an IO, it must be
|
84
|
-
# opened for reading.
|
85
|
-
#The other parameters are identical to what the XZ::decompress_stream
|
86
|
-
#method expects.
|
87
|
-
#==Return value
|
88
|
-
#The newly created instance.
|
89
|
-
#==Example
|
90
|
-
# # Wrap it around a file
|
91
|
-
# f = File.open("foo.xz")
|
92
|
-
# r = XZ::StreamReader.new(f)
|
51
|
+
# Open the given file and wrap a new instance around it with ::new.
|
52
|
+
# If you use the block form, both the internally created File instance
|
53
|
+
# and the liblzma stream will be closed automatically for you.
|
93
54
|
#
|
94
|
-
#
|
95
|
-
#
|
96
|
-
#
|
97
|
-
#
|
55
|
+
# === Parameters
|
56
|
+
# [filename]
|
57
|
+
# Path to the file to open.
|
58
|
+
# [sr (block argument)]
|
59
|
+
# The created StreamReader instance.
|
98
60
|
#
|
99
|
-
#
|
100
|
-
#
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
61
|
+
# See ::new for a description of the keyword parameters.
|
62
|
+
#
|
63
|
+
# === Return value
|
64
|
+
# The newly created instance.
|
65
|
+
#
|
66
|
+
# === Remarks
|
67
|
+
# Starting with version 1.0.0, the block form also returns the newly
|
68
|
+
# created instance rather than the block's return value. This is
|
69
|
+
# in line with Ruby's own GzipReader.open API.
|
70
|
+
#
|
71
|
+
# === Example
|
72
|
+
# # Normal usage
|
73
|
+
# XZ::StreamReader.open("myfile.txt.xz") do |xz|
|
74
|
+
# puts xz.read #=> I love Ruby
|
75
|
+
# end
|
76
|
+
#
|
77
|
+
# # If you really need the File instance created internally:
|
78
|
+
# file = nil
|
79
|
+
# XZ::StreamReader.open("myfile.txt.xz") do |xz|
|
80
|
+
# puts xz.read #=> I love Ruby
|
81
|
+
# file = xz.finish # prevents closing
|
82
|
+
# end
|
83
|
+
# file.close # Now close it manually
|
84
|
+
#
|
85
|
+
# # Or just don't use the block form:
|
86
|
+
# xz = XZ::StreamReader.open("myfile.txt.xz")
|
87
|
+
# puts xz.read #=> I love Ruby
|
88
|
+
# file = xz.finish
|
89
|
+
# file.close # Don't forget to close it manually (or use xz.close instead of xz.finish above).
|
90
|
+
def self.open(filename, **args)
|
91
|
+
file = File.open(filename, "rb")
|
92
|
+
reader = new(file, **args)
|
127
93
|
|
128
94
|
if block_given?
|
129
95
|
begin
|
130
|
-
yield(
|
96
|
+
yield(reader)
|
131
97
|
ensure
|
132
|
-
|
98
|
+
# Close both delegate IO and reader.
|
99
|
+
reader.close unless reader.finished?
|
133
100
|
end
|
134
101
|
end
|
102
|
+
|
103
|
+
reader
|
135
104
|
end
|
136
|
-
self.class.send(:alias_method, :open, :new)
|
137
105
|
|
138
|
-
#
|
139
|
-
#
|
140
|
-
|
141
|
-
#
|
142
|
-
|
143
|
-
#
|
144
|
-
|
145
|
-
#
|
146
|
-
#
|
147
|
-
|
148
|
-
|
106
|
+
# Creates a new instance that is wrapped around the given IO object.
|
107
|
+
#
|
108
|
+
# === Parameters
|
109
|
+
# ==== Positional parameters
|
110
|
+
# [delegate_io]
|
111
|
+
# The underlying IO object to read the compressed data from.
|
112
|
+
# This IO object has to have been opened in binary mode,
|
113
|
+
# otherwise you are likely to receive exceptions indicating
|
114
|
+
# that the compressed data is corrupt.
|
115
|
+
#
|
116
|
+
# ==== Keyword arguments
|
117
|
+
# [memory_limit (+UINT64_MAX+)]
|
118
|
+
# If not XZ::LibLZMA::UINT64_MAX, makes liblzma
|
119
|
+
# use no more memory than +memory_limit+ bytes.
|
120
|
+
# [flags (<tt>[:tell_unsupported_check]</tt>)]
|
121
|
+
# Additional flags passed to liblzma (an array).
|
122
|
+
# Possible flags are:
|
123
|
+
#
|
124
|
+
# [:tell_no_check]
|
125
|
+
# Spit out a warning if the archive hasn't an
|
126
|
+
# integrity checksum.
|
127
|
+
# [:tell_unsupported_check]
|
128
|
+
# Spit out a warning if the archive
|
129
|
+
# has an unsupported checksum type.
|
130
|
+
# [:concatenated]
|
131
|
+
# Decompress concatenated archives.
|
132
|
+
# [external_encoding (Encoding.default_external)]
|
133
|
+
# Assume the decompressed data inside the XZ is encoded in
|
134
|
+
# this encoding. Defaults to Encoding.default_external,
|
135
|
+
# which in turn defaults to the environment.
|
136
|
+
# [internal_encoding (Encoding.default_internal)]
|
137
|
+
# Request that the data found in the XZ file (which is assumed
|
138
|
+
# to be in the encoding specified by +external_encoding+) to
|
139
|
+
# be transcoded into this encoding. Defaults to Encoding.default_internal,
|
140
|
+
# which defaults to nil, which means to not transcode anything.
|
141
|
+
#
|
142
|
+
# === Return value
|
143
|
+
# The newly created instance.
|
144
|
+
#
|
145
|
+
# === Remarks
|
146
|
+
# The strings returned from the reader will be in the encoding specified
|
147
|
+
# by the +internal_encoding+ parameter. If that parameter is nil (default),
|
148
|
+
# then they will be in the encoding specified by +external_encoding+.
|
149
|
+
#
|
150
|
+
# This method used to accept a block in earlier versions. Since version 1.0.0,
|
151
|
+
# this behaviour has been removed to synchronise the API with Ruby's own
|
152
|
+
# GzipReader.open.
|
153
|
+
#
|
154
|
+
# This method doesn't close the underlying IO or the liblzma stream.
|
155
|
+
# You need to call #finish or #close manually; see ::open for a method
|
156
|
+
# that takes a block to automate this.
|
157
|
+
#
|
158
|
+
# === Example
|
159
|
+
# file = File.open("compressed.txt.xz", "rb") # Note binary mode
|
160
|
+
# xz = XZ::StreamReader.open(file)
|
161
|
+
# puts xz.read #=> I love Ruby
|
162
|
+
# xz.close # closes both `xz' and `file'
|
163
|
+
#
|
164
|
+
# file = File.open("compressed.txt.xz", "rb") # Note binary mode
|
165
|
+
# xz = XZ::StreamReader.open(file)
|
166
|
+
# puts xz.read #=> I love Ruby
|
167
|
+
# xz.finish # closes only `xz'
|
168
|
+
# file.close # Now close `file' manually
|
169
|
+
def initialize(delegate_io, memory_limit: XZ::LibLZMA::UINT64_MAX, flags: [:tell_unsupported_check], external_encoding: nil, internal_encoding: nil)
|
170
|
+
super(delegate_io)
|
171
|
+
raise(ArgumentError, "When specifying the internal encoding, the external encoding must also be specified") if internal_encoding && !external_encoding
|
172
|
+
raise(ArgumentError, "Memory limit out of range") unless memory_limit > 0 && memory_limit <= XZ::LibLZMA::UINT64_MAX
|
149
173
|
|
150
|
-
|
151
|
-
|
152
|
-
|
174
|
+
@memory_limit = memory_limit
|
175
|
+
@readbuf = String.new
|
176
|
+
@readbuf.force_encoding(Encoding::BINARY)
|
153
177
|
|
154
|
-
|
155
|
-
|
178
|
+
if external_encoding
|
179
|
+
encargs = []
|
180
|
+
encargs << external_encoding
|
181
|
+
encargs << internal_encoding if internal_encoding
|
182
|
+
set_encoding(*encargs)
|
183
|
+
end
|
184
|
+
|
185
|
+
@allflags = flags.reduce(0) do |val, flag|
|
186
|
+
flag = XZ::LibLZMA::LZMA_DECODE_FLAGS[flag] || raise(ArgumentError, "Unknown flag #{flag}")
|
187
|
+
val | flag
|
188
|
+
end
|
156
189
|
|
157
|
-
|
158
|
-
|
190
|
+
res = XZ::LibLZMA.lzma_stream_decoder(@lzma_stream.to_ptr,
|
191
|
+
@memory_limit,
|
192
|
+
@allflags)
|
193
|
+
XZ::LZMAError.raise_if_necessary(res)
|
159
194
|
end
|
160
195
|
|
161
|
-
#
|
162
|
-
#
|
163
|
-
#
|
196
|
+
# Mostly like IO#read. The +length+ parameter refers to the amount
|
197
|
+
# of decompressed bytes to read, not the amount of bytes to read
|
198
|
+
# from the compressed data. That is, if you request a read of 50
|
199
|
+
# bytes, you will receive a string with a maximum length of 50
|
200
|
+
# bytes, regardless of how many bytes this was in compressed form.
|
164
201
|
#
|
165
|
-
#
|
166
|
-
def
|
167
|
-
|
168
|
-
end
|
169
|
-
alias tell pos
|
202
|
+
# Return values are as per IO#read.
|
203
|
+
def read(length = nil, outbuf = String.new)
|
204
|
+
return "".force_encoding(Encoding::BINARY) if length == 0 # Shortcut; retval as per IO#read.
|
170
205
|
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
#work; if it doesn’t, this method throws an IOError.
|
177
|
-
#After the exception was thrown, the StreamReader instance
|
178
|
-
#is in an unusable state. You cannot continue using it
|
179
|
-
#(don’t call #close on it either); close the wrapped IO
|
180
|
-
#stream and create another instance of this class.
|
181
|
-
#==Raises
|
182
|
-
#[IOError] The wrapped IO doesn’t support rewinding.
|
183
|
-
# Do not use the StreamReader instance anymore
|
184
|
-
# after receiving this exception.
|
185
|
-
#==Remarks
|
186
|
-
#I don’t really like this method, it uses several dirty
|
187
|
-
#tricks to circumvent both io-like’s and liblzma’s control
|
188
|
-
#mechanisms. I only implemented this because the
|
189
|
-
#<tt>archive-tar-minitar</tt> gem calls this method when
|
190
|
-
#unpacking a TAR archive from a stream.
|
191
|
-
def rewind
|
192
|
-
# HACK: Wipe all data from io-like’s internal read buffer.
|
193
|
-
# This heavily relies on io-like’s internal structure.
|
194
|
-
# Be always sure to test this when a new version of
|
195
|
-
# io-like is released!
|
196
|
-
__io_like__internal_read_buffer.clear
|
206
|
+
# Note: Querying the underlying IO as early as possible allows to
|
207
|
+
# have Ruby's own IO exceptions to bubble up.
|
208
|
+
if length
|
209
|
+
return nil if eof? # In line with IO#read
|
210
|
+
outbuf.force_encoding(Encoding::BINARY) # As per IO#read docs
|
197
211
|
|
198
|
-
|
199
|
-
|
200
|
-
|
212
|
+
# The user's request is in decompressed bytes, so it doesn't matter
|
213
|
+
# how much is actually read from the compressed file.
|
214
|
+
if @delegate_io.eof?
|
215
|
+
data = ""
|
216
|
+
action = XZ::LibLZMA::LZMA_FINISH
|
217
|
+
else
|
218
|
+
data = @delegate_io.read(XZ::CHUNK_SIZE)
|
219
|
+
action = @delegate_io.eof? ? XZ::LibLZMA::LZMA_FINISH : XZ::LibLZMA::LZMA_RUN
|
220
|
+
end
|
201
221
|
|
202
|
-
|
203
|
-
begin
|
204
|
-
@delegate_io.rewind
|
205
|
-
rescue => e
|
206
|
-
raise(IOError, "Delegate IO failed to rewind! Original message: #{e.message}")
|
207
|
-
end
|
222
|
+
lzma_code(data, action) { |decompressed| @readbuf << decompressed }
|
208
223
|
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
224
|
+
# If the requested amount has been read, return it.
|
225
|
+
# Also return if EOF has been reached. Note that
|
226
|
+
# String#slice! will clear the string to an empty one
|
227
|
+
# if `length' is greater than the string length.
|
228
|
+
# If EOF is not yet reached, try reading and decompresing
|
229
|
+
# more data.
|
230
|
+
if @readbuf.bytesize >= length || @delegate_io.eof?
|
231
|
+
result = @readbuf.slice!(0, length)
|
232
|
+
@pos += result.bytesize
|
233
|
+
return outbuf.replace(result)
|
234
|
+
else
|
235
|
+
return read(length, outbuf)
|
236
|
+
end
|
237
|
+
else
|
238
|
+
# Read the entire file and decompress it into memory, returning it.
|
239
|
+
while chunk = @delegate_io.read(XZ::CHUNK_SIZE)
|
240
|
+
action = @delegate_io.eof? ? XZ::LibLZMA::LZMA_FINISH : XZ::LibLZMA::LZMA_RUN
|
241
|
+
lzma_code(chunk, action) { |decompressed| @readbuf << decompressed }
|
242
|
+
end
|
214
243
|
|
215
|
-
|
216
|
-
#io-like’s default behaviour is to raise Errno::ESPIPE
|
217
|
-
#when calling a non-defined seek, which is not what some
|
218
|
-
#libraries such as RubyGem’s TarReader expect (they expect
|
219
|
-
#a NoMethodError/NameError instead).
|
220
|
-
undef seek
|
244
|
+
@pos += @readbuf.bytesize
|
221
245
|
|
222
|
-
|
246
|
+
# Apply encoding conversion.
|
247
|
+
# First, tag the read data with the external encoding.
|
248
|
+
@readbuf.force_encoding(@external_encoding)
|
223
249
|
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
250
|
+
# Now, transcode it to the internal encoding if that was requested.
|
251
|
+
# Otherwise return it with the external encoding as-is.
|
252
|
+
if @internal_encoding
|
253
|
+
@readbuf.encode!(@internal_encoding, **@transcode_options)
|
254
|
+
outbuf.force_encoding(@internal_encoding)
|
255
|
+
else
|
256
|
+
outbuf.force_encoding(@external_encoding)
|
257
|
+
end
|
229
258
|
|
230
|
-
|
259
|
+
outbuf.replace(@readbuf)
|
260
|
+
@readbuf.clear
|
261
|
+
@readbuf.force_encoding(Encoding::BINARY) # Back to binary mode for further reading
|
231
262
|
|
232
|
-
|
233
|
-
|
263
|
+
return outbuf
|
264
|
+
end
|
265
|
+
end
|
234
266
|
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
compressed_data = @delegate_io.read(@input_buffer_p.size) || "" # nil at EOS → ""
|
244
|
-
@input_buffer_p.write_string(compressed_data)
|
245
|
-
@lzma_stream[:next_in] = @input_buffer_p
|
246
|
-
@lzma_stream[:avail_in] = binary_size(compressed_data)
|
267
|
+
# Abort the current decompression process and reset everything
|
268
|
+
# to the start so that reading from this reader will start over
|
269
|
+
# from the beginning of the compressed data.
|
270
|
+
#
|
271
|
+
# The delegate IO has to support the #rewind method. Otherwise
|
272
|
+
# like IO#rewind.
|
273
|
+
def rewind
|
274
|
+
super
|
247
275
|
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
# The @__lzma_action variable is only used in this method
|
254
|
-
# and is _not_ supposed to be accessed from any other method.
|
255
|
-
if compressed_data.empty?
|
256
|
-
@__lzma_action = XZ::LibLZMA::LZMA_ACTION[:lzma_finish]
|
257
|
-
else
|
258
|
-
@__lzma_action = XZ::LibLZMA::LZMA_ACTION[:lzma_run]
|
259
|
-
end
|
260
|
-
end
|
276
|
+
@readbuf.clear
|
277
|
+
res = XZ::LibLZMA.lzma_stream_decoder(@lzma_stream.to_ptr,
|
278
|
+
@memory_limit,
|
279
|
+
@allflags)
|
280
|
+
XZ::LZMAError.raise_if_necessary(res)
|
261
281
|
|
262
|
-
|
282
|
+
0 # Mimic IO#rewind's return value
|
283
|
+
end
|
263
284
|
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
285
|
+
# Like IO#ungetbyte.
|
286
|
+
def ungetbyte(obj)
|
287
|
+
if obj.respond_to? :chr
|
288
|
+
@readbuf.prepend(obj.chr)
|
289
|
+
else
|
290
|
+
@readbuf.prepend(obj.to_s)
|
291
|
+
end
|
292
|
+
end
|
293
|
+
|
294
|
+
# Like IO#ungetc.
|
295
|
+
def ungetc(str)
|
296
|
+
@readbuf.prepend(str)
|
297
|
+
end
|
298
|
+
|
299
|
+
# Returns true if:
|
300
|
+
#
|
301
|
+
# 1. The underlying IO has reached EOF, and
|
302
|
+
# 2. liblzma has returned everything it could make out of that.
|
303
|
+
def eof?
|
304
|
+
@delegate_io.eof? && @readbuf.empty?
|
305
|
+
end
|
280
306
|
|
281
|
-
|
282
|
-
|
307
|
+
# Human-readable description
|
308
|
+
def inspect
|
309
|
+
"<#{self.class} pos=#{@pos} bufsize=#{@readbuf.bytesize} finished=#{@finished} closed=#{closed?} io=#{@delegate_io.inspect}>"
|
283
310
|
end
|
284
311
|
|
285
312
|
end
|