ruby-xz 0.2.2 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,10 +1,10 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  #--
3
- # (The MIT license)
4
- #
5
3
  # Basic liblzma-bindings for Ruby.
6
4
  #
7
- # Copyright © 2012, 2015 Marvin Gülker
5
+ # Copyright © 2011-2018 Marvin Gülker et al.
6
+ #
7
+ # See AUTHORS for the full list of contributors.
8
8
  #
9
9
  # Permission is hereby granted, free of charge, to any person obtaining a
10
10
  # copy of this software and associated documentation files (the ‘Software’),
@@ -33,10 +33,10 @@
33
33
  # XZ-compressed data to. Here’s an ASCII art image to demonstrate way
34
34
  # data flows when using StreamWriter to write to a compressed file:
35
35
  #
36
- # +----------------+ +------------+
37
- # YOUR =>|StreamWriter's |=>|Wrapped IO's|=> ACTUAL
38
- # DATA =>|internal buffers|=>|buffers |=> FILE
39
- # +----------------+ +------------+
36
+ # +-----------------+ +------------+
37
+ # YOUR =>|StreamWriter's |=>|Wrapped IO's|=> ACTUAL
38
+ # DATA =>|(liblzma) buffers|=>|buffers |=> FILE
39
+ # +-----------------+ +------------+
40
40
  #
41
41
  # This graphic also illustrates why it is unlikely to see written data
42
42
  # directly appear in the file on your harddisk; the data is cached at
@@ -46,361 +46,220 @@
46
46
  # instance and then the wrapped IO object (in *exactly* that order,
47
47
  # otherwise data loss and unexpected exceptions may occur!).
48
48
  #
49
- # As it might be tedious to always remember the correct closing order,
50
- # it’s possible to pass a filename to the ::open method. In this case,
51
- # StreamWriter will open the file internally and also takes care
52
- # closing it when you call the #close method.
53
- #
54
- # *WARNING*: The closing behaviour described above is subject to
55
- # change in the next major version. In the future, wrapped IO
56
- # objects are automatically closed always, regardless of whether you
57
- # passed a filename or an IO instance. This is to sync the API with
58
- # Ruby’s own Zlib::GzipWriter. To retain the old behaviour, call
59
- # the #finish method (which is also in sync with the Zlib API).
60
- #
61
- # See the +io-like+ gem’s documentation for the IO-writing methods
62
- # available for this class (although you’re probably familiar with
63
- # them through Ruby’s own IO class ;-)).
64
- #
65
- # == Example
66
- #
67
- # Together with the <tt>archive-tar-minitar</tt> gem, this library
68
- # can be used to create XZ-compressed TAR archives (these commonly
69
- # use a file extension of <tt>.tar.xz</tt> or rarely <tt>.txz</tt>).
49
+ # Calling the #close method closes both the XZ writer and the
50
+ # underlying IO object in the correct order. This is akin to the
51
+ # behaviour exposed by Ruby's own Zlib::GzipWriter class. If you
52
+ # expressly don't want to close the underlying IO instance, you need
53
+ # to manually call StreamWriter#finish and never call
54
+ # StreamWriter#close. Instead, you then close your IO object manually
55
+ # using IO#close once you're done with it.
70
56
  #
71
- # XZ::StreamWriter.open("foo.tar.xz") do |txz|
72
- # # This automatically closes txz
73
- # Archive::Tar::Minitar.pack("foo", txz)
74
- # end
57
+ # *NOTE*: Using #finish inside the +open+ method's block allows
58
+ # you to continue using that writer's File instance as it is
59
+ # returned by #finish.
75
60
  class XZ::StreamWriter < XZ::Stream
76
61
 
62
+ # Compression level used for this writer (set on instanciation).
63
+ attr_reader :level
64
+ # Checksum algorithm in use.
65
+ attr_reader :check
66
+
77
67
  # call-seq:
78
- # new(delegate, compression_level = 6, opts = {}) → writer
79
- # new(delegate, compression_level = 6, opts = {}){|writer| } → obj
68
+ # open(filename [, compression_level = 6 [, options ]]) → stream_writer
69
+ # open(filename [, compression_level = 6 [, options ]]){|sw| ...} → stream_writer
80
70
  #
81
- # Creates a new StreamWriter instance. The block form automatically
82
- # calls the #close method when the block has finished executing.
71
+ # Creates a new instance for writing to a compressed file. The File
72
+ # instance is opened internally and then wrapped via ::new. The
73
+ # block form automatically closes both the liblzma stream and the
74
+ # internal File instance in the correct order. The non-block form
75
+ # does neither, leaving it to you to call #finish or #close later.
83
76
  #
84
77
  # === Parameters
85
- # [delegate]
86
- # An IO object to write the data to
87
- #
88
- # [compression_level (6)]
89
- # Compression strength. Higher values indicate a smaller result,
90
- # but longer compression time. Maximum is 9.
91
- #
92
- # [opts]
93
- # Options hash. Possible values are (defaults indicated in
94
- # parantheses):
95
- #
96
- # [:check (:crc64)]
97
- # The checksum algorithm to use for verifying
98
- # the data inside the archive. Possible values are:
99
- # * :none
100
- # * :crc32
101
- # * :crc64
102
- # * :sha256
103
- #
104
- # [:extreme (false)]
105
- # Tries to get the last bit out of the compression.
106
- # This may succeed, but you can end up with *very*
107
- # long computation times.
78
+ # [filename]
79
+ # The file to open.
80
+ # [sw (block argument)]
81
+ # The created StreamWriter instance.
108
82
  #
109
- # [writer]
110
- # Block argument. self of the new instance.
83
+ # See ::new for the other parameters.
111
84
  #
112
85
  # === Return value
86
+ # Returns the newly created instance.
113
87
  #
114
- # The block form returns the block’s last expression, the nonblock
115
- # form returns the newly created instance.
116
- #
117
- # === Deprecations
118
- #
119
- # The old API for this method as it was documented in version 0.2.1
120
- # still works, but is deprecated. Please change to the new API as
121
- # soon as possible.
122
- #
123
- # *WARNING*: The closing behaviour of the block form is subject to
124
- # upcoming change. In the next major release the wrapped IO *will*
125
- # be automatically closed, unless you call #finish to prevent that.
88
+ # === Remarks
89
+ # Starting with version 1.0.0, the block form also returns the newly
90
+ # created instance rather than the block's return value. This is
91
+ # in line with Ruby's own GzipWriter.open API.
126
92
  #
127
93
  # === Example
128
- #
129
- # # Wrap it around a file
130
- # f = File.open("data.xz")
131
- # w = XZ::StreamWriter.new(f)
132
- #
133
- # # Use SHA256 as the checksum and use a higher compression level
134
- # # than the default (6)
135
- # f = File.open("data.xz")
136
- # w = XZ::StreamWriter.new(f, 8, :check => :sha256)
137
- #
138
- # # Instruct liblzma to use ultra-really-high compression
139
- # # (may take eternity)
140
- # f = File.open("data.xz")
141
- # w = XZ::StreamWriter.new(f, 9, :extreme => true)
142
- def initialize(delegate, compression_level = 6, *args, &block)
143
- if delegate.respond_to?(:to_io)
144
- # Correct use with IO
145
- super(delegate.to_io)
146
- @autoclose = false
147
- else
148
- # Deprecated use of filename
149
- XZ.deprecate "Calling XZ::StreamWriter.new with a filename is deprecated, use XZ::StreamWriter.open instead"
150
-
151
- @autoclose = true
152
- super(File.open(delegate, "wb"))
153
- end
154
-
155
- # Flag for #finish method
156
- @finish = false
157
-
158
- opts = {}
159
- if args[0].kind_of?(Hash) # New API
160
- opts = args[0]
161
- opts[:check] ||= :crc64
162
- opts[:extreme] ||= false
163
- else # Old API
164
- # no arguments may also happen in new API
165
- unless args.empty?
166
- XZ.deprecate "Calling XZ::StreamWriter withm ore than 2 explicit arguments is deprecated, use options hash instead."
167
- end
168
-
169
- opts[:check] = args[0] || :crc64
170
- opts[:extreme] = args[1] || false
171
- end
172
-
173
- # TODO: Check argument validity...
174
-
175
- # Initialize the internal LZMA stream for encoding
176
- res = XZ::LibLZMA.lzma_easy_encoder(@lzma_stream.pointer,
177
- compression_level | (opts[:extreme] ? XZ::LibLZMA::LZMA_PRESET_EXTREME : 0),
178
- XZ::LibLZMA::LZMA_CHECK[:"lzma_check_#{opts[:check]}"])
179
- XZ::LZMAError.raise_if_necessary(res)
94
+ # # Normal usage
95
+ # XZ::StreamWriter.open("myfile.txt.xz") do |xz|
96
+ # xz.puts "Compress this line"
97
+ # xz.puts "And this line as well"
98
+ # end
99
+ #
100
+ # # If for whatever reason you want to do something else with
101
+ # # the internally opened file:
102
+ # file = nil
103
+ # XZ::StreamWriter.open("myfile.txt.xz") do |xz|
104
+ # xz.puts "Compress this line"
105
+ # xz.puts "And this line as well"
106
+ # file = xz.finish
107
+ # end
108
+ # # At this point, the liblzma stream has been closed, but `file'
109
+ # # now contains the internally created File instance, which is
110
+ # # still open. Don't forget to close it yourself at some point
111
+ # # to flush it.
112
+ # file.close
113
+ #
114
+ # # Or just don't use the block form:
115
+ # xz = StreamWriter.open("myfile.txt.xz")
116
+ # xz.puts "Compress this line"
117
+ # xz.puts "And this line as well"
118
+ # file = xz.finish
119
+ # file.close # Don't forget to close it manually (or use xz.close instead of xz.finish above)
120
+ def self.open(filename, **args)
121
+ file = File.open(filename, "wb")
122
+ writer = new(file, **args)
180
123
 
181
124
  if block_given?
182
125
  begin
183
- yield(self)
126
+ yield(writer)
184
127
  ensure
185
- close unless closed?
128
+ # Close both writer and delegate IO via writer.close
129
+ # unless the writer has manually been finished (usually
130
+ # not closing the delegate IO then).
131
+ writer.close unless writer.finished?
186
132
  end
187
133
  end
134
+
135
+ writer
188
136
  end
189
137
 
190
- # call-seq:
191
- # open(filename, compression_level = 6, opts = {}) → writer
192
- # open(filename, compression_level = 6, opts = {}){|writer| …} → obj
193
- #
194
- # Opens a file from disk and wraps an XZ::StreamWriter instance
195
- # around the resulting file IO object. This is a convenience method
196
- # mostly equivalent to
197
- #
198
- # file = File.open(filename, "wb")
199
- # writer = XZ::StreamWriter.new(file, compression_level, opts)
200
- #
201
- # , except that you don’t have to explicitely close the File
202
- # instance, this is done automatically for you when you call #close.
203
- # Beware the Deprecations section in this regard.
138
+ # Creates a new instance that is wrapped around the given IO instance.
204
139
  #
205
140
  # === Parameters
141
+ # ==== Positional parameters
142
+ # [delegate_io]
143
+ # The IO instance to wrap. It has to be opened in binary mode,
144
+ # otherwise the data it writes to the hard disk will be corrupt.
206
145
  #
207
- # [filename]
208
- # Path to a file on the disk to open. This file should exist and be
209
- # writable, otherwise you may get Errno exceptions.
210
- #
211
- # [opts]
212
- # Options hash. See ::new for a description of the possible
213
- # options.
214
- #
215
- # [writer]
216
- # Block argument. self of the new instance.
146
+ # ==== Keyword arguments
147
+ # [compression_level (6)]
148
+ # Compression strength. Higher values indicate a
149
+ # smaller result, but longer compression time. Maximum
150
+ # is 9.
151
+ # [:check (:crc64)]
152
+ # The checksum algorithm to use for verifying
153
+ # the data inside the archive. Possible values are:
154
+ # * :none
155
+ # * :crc32
156
+ # * :crc64
157
+ # * :sha256
158
+ # [:extreme (false)]
159
+ # Tries to get the last bit out of the
160
+ # compression. This may succeed, but you can end
161
+ # up with *very* long computation times.
162
+ # [:external_encoding (Encoding.default_external)]
163
+ # Transcode to this encoding when writing. Defaults
164
+ # to Encoding.default_external, which by default is
165
+ # set from the environment.
217
166
  #
218
167
  # === Return value
168
+ # Returns the newly created instance.
219
169
  #
220
- # The block form returns the blocks last expression, the nonblock
221
- # form returns the newly created XZ::StreamWriter instance.
222
- #
223
- # === Deprecations
224
- #
225
- # In the API up to and including version 0.2.1 this method was an
226
- # alias for ::new. This continues to work for now, but using it
227
- # as an alias for ::new is deprecated. The next major version will
228
- # only accept a string as a parameter for this method.
170
+ # === Remarks
171
+ # This method does not close the underlying IO nor does it automatically
172
+ # flush libzlma. You'll need to do that manually using #close or #finish.
173
+ # See ::open for a method that supports a block with auto-closing.
229
174
  #
230
- # *WARNING*: Future versions of ruby-xz will always close the
231
- # wrapped IO, regardless of whether you pass in your own IO or use
232
- # this convenience method, unless you call #finish to prevent that.
175
+ # This method used to accept a block in earlier versions. This
176
+ # behaviour has been removed in version 1.0.0 to synchronise the API
177
+ # with Ruby's own GzipWriter.new.
233
178
  #
234
179
  # === Example
235
- #
236
- # w = XZ::StreamWriter.new("compressed_data.xz")
237
- def self.open(filename, compression_level = 6, *args, &block)
238
- if filename.respond_to?(:to_io)
239
- # Deprecated use of IO
240
- XZ.deprecate "Calling XZ::StreamWriter.open with an IO is deprecated, use XZ::StreamReader.new instead."
241
- new(filename.to_io, compression_level, *args, &block)
242
- else
243
- # Correct use with filename
244
- file = File.open(filename, "wb")
245
-
246
- obj = new(file, compression_level, *args)
247
- obj.instance_variable_set(:@autoclose, true) # Only needed during deprecation phase, see #close
248
-
249
- if block_given?
250
- begin
251
- block.call(obj)
252
- ensure
253
- obj.close unless obj.closed?
254
- end
255
- else
256
- obj
257
- end
258
- end
180
+ # # Normal usage:
181
+ # file = File.open("myfile.txt.xz", "wb") # Note binary mode
182
+ # xz = XZ::StreamWriter.new(file)
183
+ # xz.puts("Compress this line")
184
+ # xz.puts("And this second line")
185
+ # xz.close # Closes both the libzlma stream and `file'
186
+ #
187
+ # # Expressly closing the delegate IO manually:
188
+ # File.open("myfile.txt.xz", "wb") do |file| # Note binary mode
189
+ # xz = XZ::StreamWriter.new(file)
190
+ # xz.puts("Compress this line")
191
+ # xz.puts("And this second line")
192
+ # xz.finish # Flushes libzlma, but keeps `file' open.
193
+ # end # Here, `file' is closed.
194
+ def initialize(delegate_io, level: 6, check: :crc64, extreme: false, external_encoding: nil)
195
+ super(delegate_io)
196
+
197
+ raise(ArgumentError, "Invalid compression level!") unless (0..9).include?(level)
198
+ raise(ArgumentError, "Invalid checksum specified!") unless [:none, :crc32, :crc64, :sha256].include?(check)
199
+
200
+ set_encoding(external_encoding) if external_encoding
201
+
202
+ @check = check
203
+ @level = level
204
+ @level |= LibLZMA::LZMA_PRESET_EXTREME if extreme
205
+
206
+ res = XZ::LibLZMA.lzma_easy_encoder(@lzma_stream.to_ptr,
207
+ @level,
208
+ XZ::LibLZMA.const_get(:"LZMA_CHECK_#{@check.upcase}"))
209
+ XZ::LZMAError.raise_if_necessary(res)
259
210
  end
260
211
 
261
- # Closes this StreamWriter instance and flushes all internal buffers.
262
- # Don’t use it afterwards anymore.
263
- #
264
- # === Return value
265
- #
266
- # The total number of bytes written, i.e. the size of the compressed
267
- # data.
268
- #
269
- # === Example
270
- #
271
- # w.close #=> 424
272
- #
273
- # === Remarks
274
- #
275
- # If you passed an IO object to ::new, this method doesn’t close it,
276
- # you have to do that yourself.
277
- #
278
- # *WARNING*: The next major release will change this behaviour.
279
- # In the future, the wrapped IO object will always be closed.
280
- # Use the #finish method for keeping it open.
281
- def close
282
- super
283
-
284
- #1. Close the current block ("file") (an XZ stream may actually include
285
- # multiple compressed files, which however is not supported by
286
- # this library). For this we have to tell liblzma that
287
- # the next bytes we pass to it are the last bytes (by means of
288
- # the FINISH action). Just that we don’t pass any new input ;-)
289
-
290
- output_buffer_p = FFI::MemoryPointer.new(XZ::CHUNK_SIZE)
212
+ # Mostly like IO#write. Additionally it raises an IOError
213
+ # if #finish has been called previously.
214
+ def write(*args)
215
+ raise(IOError, "Cannot write to a finished liblzma stream") if @finished
291
216
 
292
- # Get any pending data (LZMA_FINISH causes libzlma to flush its
293
- # internal buffers) and write it out to our wrapped IO.
294
- loop do
295
- @lzma_stream[:next_out] = output_buffer_p
296
- @lzma_stream[:avail_out] = output_buffer_p.size
217
+ origpos = @pos
297
218
 
298
- res = XZ::LibLZMA.lzma_code(@lzma_stream.pointer, XZ::LibLZMA::LZMA_ACTION[:lzma_finish])
299
- XZ::LZMAError.raise_if_necessary(res)
300
-
301
- @delegate_io.write(output_buffer_p.read_string(output_buffer_p.size - @lzma_stream[:avail_out]))
302
-
303
- break unless @lzma_stream[:avail_out] == 0
304
- end
305
-
306
- # 2. Close the whole XZ stream.
307
- res = XZ::LibLZMA.lzma_end(@lzma_stream.pointer)
308
- XZ::LZMAError.raise_if_necessary(res)
219
+ args.each do |arg|
220
+ @pos += arg.to_s.bytesize
309
221
 
310
- unless @finish
311
- # New API: Close the wrapped IO
312
- #@delegate_io.close
222
+ # Apply external encoding if requested
223
+ if @external_encoding && @external_encoding != Encoding::BINARY
224
+ arg = arg.to_s.encode(@external_encoding)
225
+ end
313
226
 
314
- # Old API:
315
- # 2b. If we wrapped a file automatically, close it.
316
- if @autoclose
317
- @delegate_io.close
318
- else
319
- XZ.deprecate "XZ::StreamWriter#close will automatically close the wrapped IO in the future. Use #finish to prevent that."
227
+ lzma_code(arg.to_s, XZ::LibLZMA::LZMA_RUN) do |compressed|
228
+ @delegate_io.write(compressed)
320
229
  end
321
230
  end
322
231
 
323
- # 3. Return the number of bytes written in total.
324
- @lzma_stream[:total_out]
232
+ @pos - origpos # Return number of bytes consumed from input
325
233
  end
326
234
 
327
- # If called in the block form of ::new or ::open, prevents the
328
- # wrapped IO from being closed, only the LZMA stream is closed
329
- # then. If called outside the block form of ::new and open, behaves
330
- # like #close, but only closes the underlying LZMA stream. The
331
- # wrapped IO object is kept open.
332
- #
333
- # === Return value
334
- #
335
- # Returns the wrapped IO object. This allows you to wire the File
336
- # instance out of a StreamReader instance that was created with
337
- # ::open.
338
- #
339
- # === Example
340
- #
341
- # # Nonblock form
342
- # f = File.open("foo.xz", "wb")
343
- # w = XZ::StreamReader.new(f)
344
- # # ...
345
- # w.finish
346
- # # f is still open here!
347
- #
348
- # # Block form
349
- # f = XZ::StreamReader.open("foo.xz") do |w|
350
- # # ...
351
- # w.finish
352
- # end
353
- # # f now is an *open* File instance of mode "wb".
235
+ # Like superclass' method, but also ensures liblzma flushes all
236
+ # compressed data to the delegate IO.
354
237
  def finish
355
- # Do not close wrapped IO object in #close
356
- @finish = true
357
- close
358
-
359
- @delegate_io
238
+ lzma_code("", XZ::LibLZMA::LZMA_FINISH) { |compressed| @delegate_io.write(compressed) }
239
+ super
360
240
  end
361
241
 
362
- # call-seq:
363
- # pos() → an_integer
364
- # tell() an_integer
242
+ # Abort the current compression process and reset everything
243
+ # to the start. Writing into this writer will cause existing data
244
+ # on the underlying IO to be overwritten after this method has been
245
+ # called.
365
246
  #
366
- # Total number of input bytes read so far from what you supplied to
367
- # any writer method.
368
- def pos
369
- @lzma_stream[:total_in]
370
- end
371
- alias tell pos
372
-
373
- private
374
-
375
- # Called by io-like’s write methods such as #write. Does the heavy
376
- # work of feeding liblzma the uncompressed data and reading the
377
- # returned compressed data.
378
- def unbuffered_write(data)
379
- output_buffer_p = FFI::MemoryPointer.new(XZ::CHUNK_SIZE)
380
- input_buffer_p = FFI::MemoryPointer.from_string(data) # This adds a terminating NUL byte we don’t want to compress!
381
-
382
- @lzma_stream[:next_in] = input_buffer_p
383
- @lzma_stream[:avail_in] = input_buffer_p.size - 1 # Don’t hand the terminating NUL
384
-
385
- loop do
386
- @lzma_stream[:next_out] = output_buffer_p
387
- @lzma_stream[:avail_out] = output_buffer_p.size
388
-
389
- # Compress the data
390
- res = XZ::LibLZMA.lzma_code(@lzma_stream.pointer, XZ::LibLZMA::LZMA_ACTION[:lzma_run])
391
- XZ::LZMAError.raise_if_necessary(res) # TODO: Warnings
247
+ # The delegte IO has to support the #rewind method. Otherwise like
248
+ # IO#rewind.
249
+ def rewind
250
+ super
392
251
 
393
- # Write the compressed data
394
- result = output_buffer_p.read_string(output_buffer_p.size - @lzma_stream[:avail_out])
395
- @delegate_io.write(result)
252
+ res = XZ::LibLZMA.lzma_easy_encoder(@lzma_stream.to_ptr,
253
+ @level,
254
+ XZ::LibLZMA.const_get(:"LZMA_CHECK_#{@check.upcase}"))
255
+ XZ::LZMAError.raise_if_necessary(res)
396
256
 
397
- # Loop until liblzma ate the whole data.
398
- break if @lzma_stream[:avail_in] == 0
399
- end
257
+ 0 # Mimic IO#rewind's return value
258
+ end
400
259
 
401
- binary_size(data)
402
- rescue XZ::LZMAError => e
403
- raise(SystemCallError, e.message)
260
+ # Human-readable description
261
+ def inspect
262
+ "<#{self.class} pos=#{@pos} finished=#{@finished} closed=#{closed?} io=#{@delegate_io.inspect}>"
404
263
  end
405
264
 
406
265
  end
data/lib/xz/version.rb CHANGED
@@ -1,10 +1,11 @@
1
+ # frozen_string_literal: true
1
2
  # -*- coding: utf-8 -*-
2
3
  #--
3
- # (The MIT License)
4
- #
5
4
  # Basic liblzma-bindings for Ruby.
6
5
  #
7
- # Copyright © 2015 Marvin Gülker
6
+ # Copyright © 2011-2018 Marvin Gülker et al.
7
+ #
8
+ # See AUTHORS for the full list of contributors.
8
9
  #
9
10
  # Permission is hereby granted, free of charge, to any person obtaining a
10
11
  # copy of this software and associated documentation files (the ‘Software’),
@@ -26,8 +27,6 @@
26
27
  #++
27
28
 
28
29
  module XZ
29
-
30
30
  # The version of this library.
31
- VERSION = "0.2.2".freeze
32
-
31
+ VERSION = '1.0.2'
33
32
  end