ruby-xz 0.2.1 → 1.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,9 +1,10 @@
1
1
  # -*- coding: utf-8 -*-
2
- # (The MIT license)
3
- #
2
+ #--
4
3
  # Basic liblzma-bindings for Ruby.
5
4
  #
6
- # Copyright © 2012 Marvin Gülker
5
+ # Copyright © 2011-2018 Marvin Gülker et al.
6
+ #
7
+ # See AUTHORS for the full list of contributors.
7
8
  #
8
9
  # Permission is hereby granted, free of charge, to any person obtaining a
9
10
  # copy of this software and associated documentation files (the ‘Software’),
@@ -22,194 +23,243 @@
22
23
  # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23
24
  # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24
25
  # THE SOFTWARE.
26
+ #++
25
27
 
26
- #An IO-like writer class for XZ-compressed data, allowing you to write
27
- #uncompressed data to a stream which ends up as compressed data in
28
- #a wrapped stream such as a file.
29
- #
30
- #A StreamWriter object actually wraps another IO object it writes the
31
- #XZ-compressed data to. Here’s an ASCII art image to demonstrate
32
- #way data flows when using StreamWriter to write to a compressed
33
- #file:
28
+ # An IO-like writer class for XZ-compressed data, allowing you to
29
+ # write uncompressed data to a stream which ends up as compressed data
30
+ # in a wrapped stream such as a file.
34
31
  #
35
- # +----------------+ +------------+
36
- # YOUR =>|StreamWriter's |=>|Wrapped IO's|=> ACTUAL
37
- # DATA =>|internal buffers|=>|buffers |=> FILE
38
- # +----------------+ +------------+
32
+ # A StreamWriter object actually wraps another IO object it writes the
33
+ # XZ-compressed data to. Here’s an ASCII art image to demonstrate way
34
+ # data flows when using StreamWriter to write to a compressed file:
39
35
  #
40
- #This graphic also illustrates why it is unlikely to see written
41
- #data directly appear in the file on your harddisk; the data is
42
- #cached at least twice before it actually gets written out. Regarding
43
- #file closing that means that before you can be sure any pending data
44
- #has been written to the file you have to close both the StreamWriter
45
- #instance and then the wrapped IO object (in *exactly* that order, otherwise
46
- #data loss and unexpected exceptions may occur!).
36
+ # +-----------------+ +------------+
37
+ # YOUR =>|StreamWriter's |=>|Wrapped IO's|=> ACTUAL
38
+ # DATA =>|(liblzma) buffers|=>|buffers |=> FILE
39
+ # +-----------------+ +------------+
47
40
  #
48
- #As it might be tedious to always remember the correct closing order,
49
- #it’s possible to pass a filename to the ::new method. In this case,
50
- #StreamWriter will open the file internally and also takes care closing
51
- #it when you call the #close method.
41
+ # This graphic also illustrates why it is unlikely to see written data
42
+ # directly appear in the file on your harddisk; the data is cached at
43
+ # least twice before it actually gets written out. Regarding file
44
+ # closing that means that before you can be sure any pending data has
45
+ # been written to the file you have to close both the StreamWriter
46
+ # instance and then the wrapped IO object (in *exactly* that order,
47
+ # otherwise data loss and unexpected exceptions may occur!).
52
48
  #
53
- #See the +io-like+ gem’s documentation for the IO-writing methods
54
- #available for this class (although you’re probably familiar with
55
- #them through Rubys own IO class ;-)).
49
+ # Calling the #close method closes both the XZ writer and the
50
+ # underlying IO object in the correct order. This is akin to the
51
+ # behaviour exposed by Ruby's own Zlib::GzipWriter class. If you
52
+ # expressly don't want to close the underlying IO instance, you need
53
+ # to manually call StreamWriter#finish and never call
54
+ # StreamWriter#close. Instead, you then close your IO object manually
55
+ # using IO#close once you're done with it.
56
56
  #
57
- #==Example
58
- #Together with the <tt>archive-tar-minitar</tt> gem, this library
59
- #can be used to create XZ-compressed TAR archives (these commonly
60
- #use a file extension of <tt>.tar.xz</tt> or rarely <tt>.txz</tt>).
61
- #
62
- # XZ::StreamWriter.open("foo.tar.xz") do |txz|
63
- # # This automatically closes txz
64
- # Archive::Tar::Minitar.pack("foo", txz)
65
- # end
57
+ # *NOTE*: Using #finish inside the +open+ method's block allows
58
+ # you to continue using that writer's File instance as it is
59
+ # returned by #finish.
66
60
  class XZ::StreamWriter < XZ::Stream
67
61
 
68
- #call-seq:
69
- # open(delegate, compression_level = 6, check = :crc64, extreme = false) → a_stream_writer
70
- # new(delegate, compression_level = 6, check = :crc64, extreme = false) → a_stream_writer
71
- #
72
- #Creates a new StreamWriter instance. The block form automatically
73
- #calls the #close method when the block has finished executing.
74
- #==Parameters
75
- #[delegate] An IO object to write the data to or a filename
76
- # which will be opened internally. If you pass an IO,
77
- # the #close method won’t close the passed IO object;
78
- # if you passed a filename, the created internal file
79
- # of course gets closed.
80
- #The other parameters are identical to what the XZ::compress_stream
81
- #method expects.
82
- #==Return value
83
- #The newly created instance.
84
- #==Example
85
- # # Wrap it around a file
86
- # f = File.open("data.xz")
87
- # w = XZ::StreamWriter.new(f)
88
- #
89
- # # Use SHA256 as the checksum and use a higher compression level
90
- # # than the default (6)
91
- # f = File.open("data.xz")
92
- # w = XZ::StreamWriter.new(f, 8, :sha256)
93
- #
94
- # # Instruct liblzma to use ultra-really-high compression
95
- # # (may take eternity)
96
- # f = File.open("data.xz")
97
- # w = XZ::StreamWriter.new(f, 9, :crc64, true)
98
- #
99
- # # Passing a filename
100
- # w = XZ::StreamWriter.new("compressed_data.xz")
101
- def initialize(delegate, compression_level = 6, check = :crc64, extreme = false)
102
- if delegate.respond_to?(:to_io)
103
- super(delegate)
104
- else
105
- @file = File.open(delegate, "wb")
106
- super(@file)
107
- end
62
+ # Compression level used for this writer (set on instanciation).
63
+ attr_reader :level
64
+ # Checksum algorithm in use.
65
+ attr_reader :check
108
66
 
109
- # Initialize the internal LZMA stream for encoding
110
- res = XZ::LibLZMA.lzma_easy_encoder(@lzma_stream.pointer,
111
- compression_level | (extreme ? XZ::LibLZMA::LZMA_PRESET_EXTREME : 0),
112
- XZ::LibLZMA::LZMA_CHECK[:"lzma_check_#{check}"])
113
- XZ::LZMAError.raise_if_necessary(res)
67
+ # call-seq:
68
+ # open(filename [, compression_level = 6 [, options ]]) → stream_writer
69
+ # open(filename [, compression_level = 6 [, options ]]){|sw| ...} → stream_writer
70
+ #
71
+ # Creates a new instance for writing to a compressed file. The File
72
+ # instance is opened internally and then wrapped via ::new. The
73
+ # block form automatically closes both the liblzma stream and the
74
+ # internal File instance in the correct order. The non-block form
75
+ # does neither, leaving it to you to call #finish or #close later.
76
+ #
77
+ # === Parameters
78
+ # [filename]
79
+ # The file to open.
80
+ # [sw (block argument)]
81
+ # The created StreamWriter instance.
82
+ #
83
+ # See ::new for the other parameters.
84
+ #
85
+ # === Return value
86
+ # Returns the newly created instance.
87
+ #
88
+ # === Remarks
89
+ # Starting with version 1.0.0, the block form also returns the newly
90
+ # created instance rather than the block's return value. This is
91
+ # in line with Ruby's own GzipWriter.open API.
92
+ #
93
+ # === Example
94
+ # # Normal usage
95
+ # XZ::StreamWriter.open("myfile.txt.xz") do |xz|
96
+ # xz.puts "Compress this line"
97
+ # xz.puts "And this line as well"
98
+ # end
99
+ #
100
+ # # If for whatever reason you want to do something else with
101
+ # # the internally opened file:
102
+ # file = nil
103
+ # XZ::StreamWriter.open("myfile.txt.xz") do |xz|
104
+ # xz.puts "Compress this line"
105
+ # xz.puts "And this line as well"
106
+ # file = xz.finish
107
+ # end
108
+ # # At this point, the liblzma stream has been closed, but `file'
109
+ # # now contains the internally created File instance, which is
110
+ # # still open. Don't forget to close it yourself at some point
111
+ # # to flush it.
112
+ # file.close
113
+ #
114
+ # # Or just don't use the block form:
115
+ # xz = StreamWriter.open("myfile.txt.xz")
116
+ # xz.puts "Compress this line"
117
+ # xz.puts "And this line as well"
118
+ # file = xz.finish
119
+ # file.close # Don't forget to close it manually (or use xz.close instead of xz.finish above)
120
+ def self.open(filename, **args)
121
+ file = File.open(filename, "wb")
122
+ writer = new(file, **args)
114
123
 
115
124
  if block_given?
116
125
  begin
117
- yield(self)
126
+ yield(writer)
118
127
  ensure
119
- close unless closed?
128
+ # Close both writer and delegate IO via writer.close
129
+ # unless the writer has manually been finished (usually
130
+ # not closing the delegate IO then).
131
+ writer.close unless writer.finished?
120
132
  end
121
133
  end
122
- end
123
- self.class.send(:alias_method, :open, :new)
124
-
125
- #Closes this StreamWriter instance and flushes all internal buffers.
126
- #Don’t use it afterwards anymore.
127
- #==Return vaule
128
- #The total number of bytes written, i.e. the size of the compressed
129
- #data.
130
- #==Example
131
- # w.close #=> 424
132
- #==Remarks
133
- #If you passed an IO object to ::new, this method doesn’t close it,
134
- #you have to do that yourself.
135
- def close
136
- super
137
134
 
138
- #1. Close the current block ("file") (an XZ stream may actually include
139
- # multiple compressed files, which however is not supported by
140
- # this library). For this we have to tell liblzma that
141
- # the next bytes we pass to it are the last bytes (by means of
142
- # the FINISH action). Just that we don’t pass any new input ;-)
143
-
144
- output_buffer_p = FFI::MemoryPointer.new(XZ::CHUNK_SIZE)
135
+ writer
136
+ end
145
137
 
146
- # Get any pending data (LZMA_FINISH causes libzlma to flush its
147
- # internal buffers) and write it out to our wrapped IO.
148
- loop do
149
- @lzma_stream[:next_out] = output_buffer_p
150
- @lzma_stream[:avail_out] = output_buffer_p.size
138
+ # Creates a new instance that is wrapped around the given IO instance.
139
+ #
140
+ # === Parameters
141
+ # ==== Positional parameters
142
+ # [delegate_io]
143
+ # The IO instance to wrap. It has to be opened in binary mode,
144
+ # otherwise the data it writes to the hard disk will be corrupt.
145
+ #
146
+ # ==== Keyword arguments
147
+ # [compression_level (6)]
148
+ # Compression strength. Higher values indicate a
149
+ # smaller result, but longer compression time. Maximum
150
+ # is 9.
151
+ # [:check (:crc64)]
152
+ # The checksum algorithm to use for verifying
153
+ # the data inside the archive. Possible values are:
154
+ # * :none
155
+ # * :crc32
156
+ # * :crc64
157
+ # * :sha256
158
+ # [:extreme (false)]
159
+ # Tries to get the last bit out of the
160
+ # compression. This may succeed, but you can end
161
+ # up with *very* long computation times.
162
+ # [:external_encoding (Encoding.default_external)]
163
+ # Transcode to this encoding when writing. Defaults
164
+ # to Encoding.default_external, which by default is
165
+ # set from the environment.
166
+ #
167
+ # === Return value
168
+ # Returns the newly created instance.
169
+ #
170
+ # === Remarks
171
+ # This method does not close the underlying IO nor does it automatically
172
+ # flush libzlma. You'll need to do that manually using #close or #finish.
173
+ # See ::open for a method that supports a block with auto-closing.
174
+ #
175
+ # This method used to accept a block in earlier versions. This
176
+ # behaviour has been removed in version 1.0.0 to synchronise the API
177
+ # with Ruby's own GzipWriter.new.
178
+ #
179
+ # === Example
180
+ # # Normal usage:
181
+ # file = File.open("myfile.txt.xz", "wb") # Note binary mode
182
+ # xz = XZ::StreamWriter.new(file)
183
+ # xz.puts("Compress this line")
184
+ # xz.puts("And this second line")
185
+ # xz.close # Closes both the libzlma stream and `file'
186
+ #
187
+ # # Expressly closing the delegate IO manually:
188
+ # File.open("myfile.txt.xz", "wb") do |file| # Note binary mode
189
+ # xz = XZ::StreamWriter.new(file)
190
+ # xz.puts("Compress this line")
191
+ # xz.puts("And this second line")
192
+ # xz.finish # Flushes libzlma, but keeps `file' open.
193
+ # end # Here, `file' is closed.
194
+ def initialize(delegate_io, level: 6, check: :crc64, extreme: false, external_encoding: nil)
195
+ super(delegate_io)
151
196
 
152
- res = XZ::LibLZMA.lzma_code(@lzma_stream.pointer, XZ::LibLZMA::LZMA_ACTION[:lzma_finish])
153
- XZ::LZMAError.raise_if_necessary(res)
197
+ raise(ArgumentError, "Invalid compression level!") unless (0..9).include?(level)
198
+ raise(ArgumentError, "Invalid checksum specified!") unless [:none, :crc32, :crc64, :sha256].include?(check)
154
199
 
155
- @delegate_io.write(output_buffer_p.read_string(output_buffer_p.size - @lzma_stream[:avail_out]))
200
+ set_encoding(external_encoding) if external_encoding
156
201
 
157
- break unless @lzma_stream[:avail_out] == 0
158
- end
202
+ @check = check
203
+ @level = level
204
+ @level |= LibLZMA::LZMA_PRESET_EXTREME if extreme
159
205
 
160
- #2. Close the whole XZ stream.
161
- res = XZ::LibLZMA.lzma_end(@lzma_stream.pointer)
206
+ res = XZ::LibLZMA.lzma_easy_encoder(@lzma_stream.to_ptr,
207
+ @level,
208
+ XZ::LibLZMA.const_get(:"LZMA_CHECK_#{@check.upcase}"))
162
209
  XZ::LZMAError.raise_if_necessary(res)
210
+ end
163
211
 
164
- #2b. If we wrapped a file automatically, close it.
165
- @file.close if @file
212
+ # Mostly like IO#write. Additionally it raises an IOError
213
+ # if #finish has been called previously.
214
+ def write(*args)
215
+ raise(IOError, "Cannot write to a finished liblzma stream") if @finished
166
216
 
167
- #3. Return the number of bytes written in total.
168
- @lzma_stream[:total_out]
169
- end
217
+ origpos = @pos
170
218
 
171
- #call-seq:
172
- # pos() → an_integer
173
- # tell() → an_integer
174
- #
175
- #Total number of input bytes read so far from what you
176
- #supplied to any writer method.
177
- def pos
178
- @lzma_stream[:total_in]
179
- end
180
- alias tell pos
219
+ args.each do |arg|
220
+ @pos += arg.to_s.bytesize
181
221
 
182
- private
222
+ # Apply external encoding if requested
223
+ if @external_encoding && @external_encoding != Encoding::BINARY
224
+ arg = arg.to_s.encode(@external_encoding)
225
+ end
183
226
 
184
- #Called by io-like’s write methods such as #write. Does the heavy
185
- #work of feeding liblzma the uncompressed data and reading the
186
- #returned compressed data.
187
- def unbuffered_write(data)
188
- output_buffer_p = FFI::MemoryPointer.new(XZ::CHUNK_SIZE)
189
- input_buffer_p = FFI::MemoryPointer.from_string(data) # This adds a terminating NUL byte we don’t want to compress!
227
+ lzma_code(arg.to_s, XZ::LibLZMA::LZMA_RUN) do |compressed|
228
+ @delegate_io.write(compressed)
229
+ end
230
+ end
190
231
 
191
- @lzma_stream[:next_in] = input_buffer_p
192
- @lzma_stream[:avail_in] = input_buffer_p.size - 1 # Don’t hand the terminating NUL
232
+ @pos - origpos # Return number of bytes consumed from input
233
+ end
193
234
 
194
- loop do
195
- @lzma_stream[:next_out] = output_buffer_p
196
- @lzma_stream[:avail_out] = output_buffer_p.size
235
+ # Like superclass' method, but also ensures liblzma flushes all
236
+ # compressed data to the delegate IO.
237
+ def finish
238
+ lzma_code("", XZ::LibLZMA::LZMA_FINISH) { |compressed| @delegate_io.write(compressed) }
239
+ super
240
+ end
197
241
 
198
- # Compress the data
199
- res = XZ::LibLZMA.lzma_code(@lzma_stream.pointer, XZ::LibLZMA::LZMA_ACTION[:lzma_run])
200
- XZ::LZMAError.raise_if_necessary(res) # TODO: Warnings
242
+ # Abort the current compression process and reset everything
243
+ # to the start. Writing into this writer will cause existing data
244
+ # on the underlying IO to be overwritten after this method has been
245
+ # called.
246
+ #
247
+ # The delegte IO has to support the #rewind method. Otherwise like
248
+ # IO#rewind.
249
+ def rewind
250
+ super
201
251
 
202
- # Write the compressed data
203
- result = output_buffer_p.read_string(output_buffer_p.size - @lzma_stream[:avail_out])
204
- @delegate_io.write(result)
252
+ res = XZ::LibLZMA.lzma_easy_encoder(@lzma_stream.to_ptr,
253
+ @level,
254
+ XZ::LibLZMA.const_get(:"LZMA_CHECK_#{@check.upcase}"))
255
+ XZ::LZMAError.raise_if_necessary(res)
205
256
 
206
- # Loop until liblzma ate the whole data.
207
- break if @lzma_stream[:avail_in] == 0
208
- end
257
+ 0 # Mimic IO#rewind's return value
258
+ end
209
259
 
210
- binary_size(data)
211
- rescue XZ::LZMAError => e
212
- raise(SystemCallError, e.message)
260
+ # Human-readable description
261
+ def inspect
262
+ "<#{self.class} pos=#{@pos} finished=#{@finished} closed=#{closed?} io=#{@delegate_io.inspect}>"
213
263
  end
214
264
 
215
265
  end
data/lib/xz/version.rb ADDED
@@ -0,0 +1,33 @@
1
+ # -*- coding: utf-8 -*-
2
+ #--
3
+ # Basic liblzma-bindings for Ruby.
4
+ #
5
+ # Copyright © 2011-2018 Marvin Gülker et al.
6
+ #
7
+ # See AUTHORS for the full list of contributors.
8
+ #
9
+ # Permission is hereby granted, free of charge, to any person obtaining a
10
+ # copy of this software and associated documentation files (the ‘Software’),
11
+ # to deal in the Software without restriction, including without limitation
12
+ # the rights to use, copy, modify, merge, publish, distribute, sublicense,
13
+ # and/or sell copies of the Software, and to permit persons to whom the Software
14
+ # is furnished to do so, subject to the following conditions:
15
+ #
16
+ # The above copyright notice and this permission notice shall be included in all
17
+ # copies or substantial portions of the Software.
18
+ #
19
+ # THE SOFTWARE IS PROVIDED ‘AS IS’, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25
+ # THE SOFTWARE.
26
+ #++
27
+
28
+ module XZ
29
+
30
+ # The version of this library.
31
+ VERSION = "1.0.1".freeze
32
+
33
+ end