ruby-xz 0.2.3 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/AUTHORS +1 -3
- data/HISTORY.rdoc +45 -0
- data/LICENSE +21 -0
- data/README.md +44 -43
- data/lib/xz.rb +163 -87
- data/lib/xz/fiddle_helper.rb +91 -0
- data/lib/xz/lib_lzma.rb +117 -103
- data/lib/xz/stream.rb +429 -32
- data/lib/xz/stream_reader.rb +221 -400
- data/lib/xz/stream_writer.rb +173 -314
- data/lib/xz/version.rb +4 -4
- metadata +17 -43
- data/COPYING +0 -26
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 820fac81115af7c562998d1c80cce241d045c210
|
4
|
+
data.tar.gz: 31823023e5de3c452f8d0b82b643732ff0294658
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 35aa3ebe08c3e82b4cc81339c23af1d78073ffb4d174f56079808f482ae60061c329aeed2fcbec56eb34e81102ed30bb3f32240f85436d5b99142df48e925f29
|
7
|
+
data.tar.gz: a83b6fc3ca7b397af5695877fde63ff8642fa60a689d36b505cbf08717ea2b4c22b182e484bd55aa66a49f90fb132da17e7cd7a2caf1250175590127fb7f0565
|
data/AUTHORS
CHANGED
@@ -2,7 +2,5 @@
|
|
2
2
|
|
3
3
|
All the people who worked on this project, in alphabetical order.
|
4
4
|
|
5
|
-
*
|
6
|
-
* Marvin Gülker (Quintus) <quintus ät quintilianus döt eu>
|
5
|
+
* Marvin Gülker (Quintus) <m-guelker@phoenixmail.de>
|
7
6
|
* Christoph Plank (chrisistuff)
|
8
|
-
* Nana Sakisaka (saki7)
|
data/HISTORY.rdoc
CHANGED
@@ -1,5 +1,50 @@
|
|
1
1
|
= Version history
|
2
2
|
|
3
|
+
== 1.0.0 (2018-05-20)
|
4
|
+
|
5
|
+
* *BreakingChange* The XZ module's methods now take any parameters
|
6
|
+
beyond the IO object as real Ruby keyword arguments rather than
|
7
|
+
a long argument list.
|
8
|
+
* *BreakingChange* XZ.decompress_stream now honours Ruby's
|
9
|
+
external and internal encoding concept instead of just
|
10
|
+
returning BINARY-tagged strings.
|
11
|
+
* *BreakingChange* Remove deprecated API on stream reader/writer
|
12
|
+
class and instead sync the API with Ruby's zlib library
|
13
|
+
(Ticket #12 by me).
|
14
|
+
* *BreakingChange* StreamWriter.new and StreamReader.new do not accept
|
15
|
+
a block anymore. This is part of syncing with Ruby's zlib API.
|
16
|
+
* *BreakingChange* StreamReader.open and StreamWriter.open always
|
17
|
+
return the new instance, even if a block is given to the method
|
18
|
+
(previous behaviour was to return the return value of the block).
|
19
|
+
This is part of the syncing with Ruby's zlib API.
|
20
|
+
* *BreakingChange* StreamReader.new and StreamWriter.new as well as
|
21
|
+
the ::open variants take additional arguments as real Ruby keyword
|
22
|
+
arguments now instead of a long parameter list plus options hash.
|
23
|
+
This is different from Ruby's own zlib API as that one takes both
|
24
|
+
a long parameter list and a hash of additional options. ruby-xz
|
25
|
+
is meant to follow zlib's semantics mostly, but not as a drop-in
|
26
|
+
replacement, so this divergence from zlib's API is okay (also
|
27
|
+
given that it isn't possible to replicate all possible options
|
28
|
+
1:1 anyway, since liblzma simply accepts different options as
|
29
|
+
libz). If you've never used these methods' optional arguments,
|
30
|
+
you should be fine.
|
31
|
+
* *BreakingChange* Stream#close now returns nil instead of the
|
32
|
+
number of bytes written. This syncs Stream#close with Ruby's
|
33
|
+
own IO#close, which also returns nil.
|
34
|
+
* *BreakingChange* Remove Stream#pos=, Stream#seek, Stream#stat. These
|
35
|
+
methods irritated the minitar gem, which doesn't expect them to
|
36
|
+
raise NotImplementedError, but directly to be missing if the object
|
37
|
+
does not support seeking.
|
38
|
+
* *BreakingChange* StreamReader and StreamWriter now honour Ruby's
|
39
|
+
encoding system instead of returning only BINARY-tagged strings.
|
40
|
+
* *Dependency* Remove dependency on ffi. ruby-xz now uses fiddle from
|
41
|
+
the stdlib instead.
|
42
|
+
* *Dependency* Remove dependency on io-like. ruby-xz now implements
|
43
|
+
all the IO mechanics itself. (Ticket #10 by me)
|
44
|
+
* *Dependency* Bump required Ruby version to 2.3.0.
|
45
|
+
* *Fix* libzlma.dylib not being found on OS X (Ticket #15 by
|
46
|
+
s0nspark).
|
47
|
+
|
3
48
|
== 0.2.3 (2015-12-29)
|
4
49
|
|
5
50
|
* *Fix* documentation of XZ module (a :nodoc: was causing havoc
|
data/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
Copyright © 2011-2018 Marvin Gülker et al.
|
2
|
+
|
3
|
+
See AUTHORS for the full list of contributors.
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a
|
6
|
+
copy of this software and associated documentation files (the ‘Software’),
|
7
|
+
to deal in the Software without restriction, including without limitation
|
8
|
+
the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
9
|
+
and/or sell copies of the Software, and to permit persons to whom the Software
|
10
|
+
is furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED ‘AS IS’, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
CHANGED
@@ -5,16 +5,20 @@ ruby-xz
|
|
5
5
|
best known for the extreme compression-ratio it's native *XZ* format
|
6
6
|
achieves. ruby-xz gives you the possibility of creating and extracting
|
7
7
|
XZ archives on any platform where liblzma is installed. No compilation
|
8
|
-
is needed, because ruby-xz is written
|
9
|
-
|
8
|
+
is needed, because ruby-xz is written on top of Ruby's “fiddle” library
|
9
|
+
(part of the standard libary). ruby-xz does not have any dependencies
|
10
|
+
other than Ruby itself.
|
10
11
|
|
11
|
-
ruby-xz supports both
|
12
|
+
ruby-xz supports both “intuitive” (de)compression by providing methods to
|
12
13
|
directly operate on strings and files, but also allows you to operate
|
13
14
|
directly on IO streams (see the various methods of the XZ module). On top
|
14
15
|
of that, ruby-xz offers an advanced interface that allows you to treat
|
15
16
|
XZ-compressed data as IO streams, both for reading and for writing. See the
|
16
17
|
XZ::StreamReader and XZ::StreamWriter classes for more information on this.
|
17
18
|
|
19
|
+
**Note**: Version 1.0.0 breaks the API quite heavily. Refer to
|
20
|
+
HISTORY.rdoc for details.
|
21
|
+
|
18
22
|
Installation
|
19
23
|
------------
|
20
24
|
|
@@ -28,7 +32,7 @@ Alternatively, you can clone the repository and build the most recent
|
|
28
32
|
code yourself:
|
29
33
|
|
30
34
|
```
|
31
|
-
$ git clone git://
|
35
|
+
$ git clone git://git.guelker.eu/ruby-xz.git
|
32
36
|
$ cd ruby-xz
|
33
37
|
$ rake gem
|
34
38
|
$ gem install pkg/ruby-xz-*.gem
|
@@ -42,26 +46,23 @@ everything you need to use ruby-xz. As said, it's not big, but powerful:
|
|
42
46
|
You can create and extract whole archive files, compress or decompress
|
43
47
|
streams of data or just plain strings.
|
44
48
|
|
45
|
-
You can read the documentation on your local gemserver, or browse it [online][
|
49
|
+
You can read the documentation on your local gemserver, or browse it [online][2].
|
46
50
|
|
47
|
-
###
|
51
|
+
### Require ###
|
48
52
|
|
49
|
-
You have to require
|
50
|
-
"xz.rb", so do
|
53
|
+
You have to require the “xz.rb” file:
|
51
54
|
|
52
55
|
``` ruby
|
53
56
|
require "xz"
|
54
57
|
```
|
55
58
|
|
56
|
-
to get it.
|
57
|
-
|
58
59
|
### Examples ###
|
59
60
|
|
60
61
|
``` ruby
|
61
|
-
# Compress a
|
62
|
-
XZ.compress_file("myfile.
|
62
|
+
# Compress a file
|
63
|
+
XZ.compress_file("myfile.txt", "myfile.txt.xz")
|
63
64
|
# Decompress it
|
64
|
-
XZ.decompress_file("myfile.
|
65
|
+
XZ.decompress_file("myfile.txt.xz", "myfile.txt")
|
65
66
|
|
66
67
|
# Compress everything you get from a socket (note that there HAS to be a EOF
|
67
68
|
# sometime, otherwise this will run infinitely)
|
@@ -76,42 +77,42 @@ data = XZ.decompress(comp)
|
|
76
77
|
Have a look at the XZ module's documentation for an in-depth description of
|
77
78
|
what is possible.
|
78
79
|
|
79
|
-
|
80
|
-
-----
|
81
|
-
|
82
|
-
* Code repository: https://github.com/Quintus/ruby-xz
|
83
|
-
* Issue tracker: https://github.com/Quintus/ruby-xz/issues
|
84
|
-
* Online documentation: http://quintus.github.io/ruby-xz
|
80
|
+
### Usage with the minitar gem ###
|
85
81
|
|
86
|
-
|
87
|
-
|
82
|
+
ruby-xz can be used together with the [minitar][3] library (formerly
|
83
|
+
“archive-tar-minitar”) to create XZ-compressed tarballs. This works by
|
84
|
+
employing the IO-like classes XZ::StreamReader and XZ::StreamWriter
|
85
|
+
analogous to how one would use Ruby's “zlib” library together with
|
86
|
+
“minitar”. Example:
|
88
87
|
|
89
|
-
|
88
|
+
``` ruby
|
89
|
+
require "xz"
|
90
|
+
require "minitar"
|
90
91
|
|
91
|
-
|
92
|
+
# Create an XZ-compressed tarball
|
93
|
+
XZ::StreamWriter.open("tarball.tar.xz") do |txz|
|
94
|
+
Minitar.pack("path/to/directory", txz)
|
95
|
+
end
|
92
96
|
|
93
|
-
|
97
|
+
# Unpack it again
|
98
|
+
XZ::StreamReader.open("tarball.tar.xz") do |txz|
|
99
|
+
Minitar.unpack(txz, "path/to/target/directory")
|
100
|
+
end
|
101
|
+
```
|
94
102
|
|
95
|
-
|
103
|
+
Links
|
104
|
+
-----
|
96
105
|
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
and/or sell copies of the Software, and to permit persons to whom the Software
|
102
|
-
is furnished to do so, subject to the following conditions:
|
106
|
+
* Website: https://mg.guelker.eu/projects/ruby-xz/
|
107
|
+
* Online documentation: https://mg.guelker.eu/projects/ruby-xz/doc
|
108
|
+
* Code repository: https://git.guelker.eu/?p=ruby-xz.git;a=summary
|
109
|
+
* Issue tracker: https://github.com/Quintus/ruby-xz/issues
|
103
110
|
|
104
|
-
|
105
|
-
|
111
|
+
License
|
112
|
+
-------
|
106
113
|
|
107
|
-
|
108
|
-
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
109
|
-
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
110
|
-
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
111
|
-
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
112
|
-
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
113
|
-
THE SOFTWARE.
|
114
|
+
MIT license; see LICENSE for the full license text.
|
114
115
|
|
115
|
-
[1]:
|
116
|
-
[2]: https://
|
117
|
-
[3]:
|
116
|
+
[1]: https://tukaani.org/xz/
|
117
|
+
[2]: https://mg.guelker.eu/projects/ruby-xz/doc
|
118
|
+
[3]: https://github.com/halostatue/minitar
|
data/lib/xz.rb
CHANGED
@@ -1,11 +1,10 @@
|
|
1
1
|
# -*- coding: utf-8 -*-
|
2
2
|
#--
|
3
|
-
# (The MIT License)
|
4
|
-
#
|
5
3
|
# Basic liblzma-bindings for Ruby.
|
6
4
|
#
|
7
|
-
# Copyright © 2011
|
8
|
-
#
|
5
|
+
# Copyright © 2011-2018 Marvin Gülker et al.
|
6
|
+
#
|
7
|
+
# See AUTHORS for the full list of contributors.
|
9
8
|
#
|
10
9
|
# Permission is hereby granted, free of charge, to any person obtaining a
|
11
10
|
# copy of this software and associated documentation files (the ‘Software’),
|
@@ -27,20 +26,14 @@
|
|
27
26
|
#++
|
28
27
|
|
29
28
|
require "pathname"
|
30
|
-
require "
|
31
|
-
require
|
32
|
-
require "
|
29
|
+
require "fiddle"
|
30
|
+
require "fiddle/import"
|
31
|
+
require "stringio"
|
32
|
+
require "forwardable"
|
33
33
|
|
34
34
|
# The namespace and main module of this library. Each method of this
|
35
35
|
# module may raise exceptions of class XZ::LZMAError, which is not
|
36
36
|
# named in the methods' documentations anymore.
|
37
|
-
#
|
38
|
-
# All strings you receive from any method defined in this module and
|
39
|
-
# the classes defined in it are encoded in BINARY, so you may have to
|
40
|
-
# call #force_encoding on them to tag them with the correct encoding
|
41
|
-
# (assuming you _know_ what their correct encoding should be).
|
42
|
-
# ruby-xz can’t handle this as compiled strings don’t come with
|
43
|
-
# encoding information.
|
44
37
|
module XZ
|
45
38
|
|
46
39
|
# Number of bytes read in one chunk.
|
@@ -68,17 +61,24 @@ module XZ
|
|
68
61
|
end
|
69
62
|
|
70
63
|
# call-seq:
|
71
|
-
# decompress_stream(io [,
|
72
|
-
# decompress_stream(io [,
|
73
|
-
# decode_stream(io [,
|
74
|
-
# decode_stream(io [,
|
64
|
+
# decompress_stream(io [, kw ] ) → a_string
|
65
|
+
# decompress_stream(io [, kw ] ] ){|chunk| ... } → an_integer
|
66
|
+
# decode_stream(io [, kw ] ] ) → a_string
|
67
|
+
# decode_stream(io [, kw ] ){|chunk| ... } → an_integer
|
75
68
|
#
|
76
69
|
# Decompresses a stream containing XZ-compressed data.
|
77
70
|
#
|
78
71
|
# === Parameters
|
72
|
+
# ==== Positional parameters
|
79
73
|
#
|
80
74
|
# [io]
|
81
|
-
# The IO to read from. It must be opened for reading
|
75
|
+
# The IO to read from. It must be opened for reading in
|
76
|
+
# binary mode.
|
77
|
+
# [chunk (Block argument)]
|
78
|
+
# One piece of decompressed data. See Remarks section below
|
79
|
+
# for information about its encoding.
|
80
|
+
#
|
81
|
+
# ==== Keyword arguments
|
82
82
|
#
|
83
83
|
# [memory_limit (+UINT64_MAX+)]
|
84
84
|
# If not XZ::LibLZMA::UINT64_MAX, makes liblzma
|
@@ -96,9 +96,13 @@ module XZ
|
|
96
96
|
# has an unsupported checksum type.
|
97
97
|
# [:concatenated]
|
98
98
|
# Decompress concatenated archives.
|
99
|
-
#
|
100
|
-
#
|
101
|
-
#
|
99
|
+
# [external_encoding (Encoding.default_external)]
|
100
|
+
# Assume the decompressed data inside the compressed data
|
101
|
+
# has this encoding. See Remarks section.
|
102
|
+
# [internal_encoding (Encoding.default_internal)]
|
103
|
+
# Request transcoding of the decompressed data into this
|
104
|
+
# encoding if not nil. Note that Encoding.default_internal
|
105
|
+
# is nil by default. See Remarks section.
|
102
106
|
#
|
103
107
|
# === Return value
|
104
108
|
#
|
@@ -106,6 +110,18 @@ module XZ
|
|
106
110
|
# written. Otherwise, returns the decompressed data as a
|
107
111
|
# BINARY-encoded string.
|
108
112
|
#
|
113
|
+
# === Raises
|
114
|
+
#
|
115
|
+
# [Encoding::InvalidByteSequenceError]
|
116
|
+
# 1. You requested an “internal encoding” conversion
|
117
|
+
# and the archive contains invalid byte sequences
|
118
|
+
# in the external encoding.
|
119
|
+
# 2. You requested an “internal encoding” conversion, used
|
120
|
+
# the block form of this method, and liblzma decided
|
121
|
+
# to cut the decompressed data into chunks in mid of
|
122
|
+
# a multibyte character. See Remarks section for an
|
123
|
+
# explanation.
|
124
|
+
#
|
109
125
|
# === Example
|
110
126
|
#
|
111
127
|
# data = File.open("archive.xz", "rb"){|f| f.read}
|
@@ -125,8 +141,52 @@ module XZ
|
|
125
141
|
# know how big your data gets or if you want to decompress much
|
126
142
|
# data, use the block form. Of course you shouldn't store the data
|
127
143
|
# you read in RAM then as in the example above.
|
128
|
-
|
144
|
+
#
|
145
|
+
# This method honours Ruby's external and internal encoding concept.
|
146
|
+
# All documentation about this applies to this method, with the
|
147
|
+
# exception that the external encoding does not refer to the data
|
148
|
+
# on the hard disk (that's compressed XZ data, it's always binary),
|
149
|
+
# but to the data inside the XZ container, i.e. to the *decompressed*
|
150
|
+
# data. Any strings you receive from this method (regardless of
|
151
|
+
# whether via return value or via the +chunk+ block argument) will
|
152
|
+
# first be tagged with the external encoding. If you set an internal
|
153
|
+
# encoding (either via the +internal_encoding+ parameter or via
|
154
|
+
# Ruby's default internal encoding) that string will be transcoded
|
155
|
+
# from the external encoding to the internal encoding before you
|
156
|
+
# even see it; in that case, the return value or chunk block argument
|
157
|
+
# will be encoded in the internal encoding. Internal encoding is
|
158
|
+
# disabled in Ruby by default and the argument for this method also
|
159
|
+
# defaults to nil.
|
160
|
+
#
|
161
|
+
# Due to the external encoding being applied, it can happen that
|
162
|
+
# +chunk+ contains an incomplete multibyte character causing
|
163
|
+
# <tt>valid_encoding?</tt> to return false if called on +chunk+,
|
164
|
+
# because liblzma doesn't know about encodings. The rest of the
|
165
|
+
# character will be yielded to the block in the next iteration
|
166
|
+
# then as liblzma progresses with the decompression from the XZ
|
167
|
+
# format. In other words, be prepared that +chunk+ can contain
|
168
|
+
# incomplete multibyte chars.
|
169
|
+
#
|
170
|
+
# This can have nasty side effects if you requested an internal
|
171
|
+
# encoding automatic transcoding and used the block form. Since
|
172
|
+
# this method applies the internal encoding transcoding before the
|
173
|
+
# chunk is yielded to the block, String#encode gets the incomplete
|
174
|
+
# multibyte character. In that case, you will receive an
|
175
|
+
# Encoding::InvalidByteSequenceError exception even though your
|
176
|
+
# data is perfectly well-formed inside the XZ data. It's just
|
177
|
+
# that liblzma during decompression cut the chunks at an
|
178
|
+
# unfortunate place. To avoid this, do not request internal encoding
|
179
|
+
# conversion when using the block form, but instead transcode
|
180
|
+
# the data manually after you have decompressed the entire data.
|
181
|
+
def decompress_stream(io, memory_limit: LibLZMA::UINT64_MAX, flags: [:tell_unsupported_check], external_encoding: nil, internal_encoding: nil, &block)
|
129
182
|
raise(ArgumentError, "Invalid memory limit set!") unless memory_limit > 0 && memory_limit <= LibLZMA::UINT64_MAX
|
183
|
+
raise(ArgumentError, "external_encoding must be set if internal_encoding transcoding is requested") if internal_encoding && !external_encoding
|
184
|
+
|
185
|
+
# The ArgumentError above is only about the concrete arguments
|
186
|
+
# (to sync with Ruby's IO API), not about the implied internal
|
187
|
+
# encoding, which might still kick in (and does, see below).
|
188
|
+
external_encoding ||= Encoding.default_external
|
189
|
+
internal_encoding ||= Encoding.default_internal
|
130
190
|
|
131
191
|
# bit-or all flags
|
132
192
|
allflags = flags.inject(0) do |val, flag|
|
@@ -134,8 +194,9 @@ module XZ
|
|
134
194
|
val | flag
|
135
195
|
end
|
136
196
|
|
137
|
-
stream = LZMAStream.
|
138
|
-
|
197
|
+
stream = LibLZMA::LZMAStream.malloc
|
198
|
+
LibLZMA.LZMA_STREAM_INIT(stream)
|
199
|
+
res = LibLZMA.lzma_stream_decoder(stream.to_ptr,
|
139
200
|
memory_limit,
|
140
201
|
allflags)
|
141
202
|
|
@@ -144,32 +205,46 @@ module XZ
|
|
144
205
|
res = ""
|
145
206
|
res.encode!(Encoding::BINARY)
|
146
207
|
if block_given?
|
147
|
-
res = lzma_code(io, stream
|
208
|
+
res = lzma_code(io, stream) do |chunk|
|
209
|
+
chunk = chunk.dup # Do not write somewhere into the fiddle pointer while encoding (-> can segfault)
|
210
|
+
chunk.force_encoding(external_encoding) if external_encoding
|
211
|
+
chunk.encode!(internal_encoding) if internal_encoding
|
212
|
+
yield(chunk)
|
213
|
+
end
|
148
214
|
else
|
149
215
|
lzma_code(io, stream){|chunk| res << chunk}
|
216
|
+
res.force_encoding(external_encoding) if external_encoding
|
217
|
+
res.encode!(internal_encoding) if internal_encoding
|
150
218
|
end
|
151
219
|
|
152
|
-
LibLZMA.lzma_end(stream.
|
220
|
+
LibLZMA.lzma_end(stream.to_ptr)
|
153
221
|
|
154
|
-
block_given? ? stream
|
222
|
+
block_given? ? stream.total_out : res
|
155
223
|
end
|
156
224
|
alias decode_stream decompress_stream
|
157
225
|
|
158
226
|
# call-seq:
|
159
|
-
# compress_stream(io [,
|
160
|
-
# compress_stream(io [,
|
161
|
-
# encode_stream(io [,
|
162
|
-
# encode_stream(io [,
|
227
|
+
# compress_stream(io [, kw ] ) → a_string
|
228
|
+
# compress_stream(io [, kw ] ){|chunk| ... } → an_integer
|
229
|
+
# encode_stream(io [, kw ] ) → a_string
|
230
|
+
# encode_stream(io [, kw ] ){|chunk| ... } → an_integer
|
163
231
|
#
|
164
232
|
# Compresses a stream of data into XZ-compressed data.
|
165
233
|
#
|
166
234
|
# === Parameters
|
235
|
+
# ==== Positional arguments
|
167
236
|
#
|
168
237
|
# [io]
|
169
238
|
# The IO to read the data from. Must be opened for
|
170
239
|
# reading.
|
240
|
+
# [chunk (Block argument)]
|
241
|
+
# One piece of compressed data. This is always tagged
|
242
|
+
# as a BINARY string, since it's compressed binary data.
|
171
243
|
#
|
172
|
-
#
|
244
|
+
# ==== Keyword arguments
|
245
|
+
# All keyword arguments are optional.
|
246
|
+
#
|
247
|
+
# [level (6)]
|
173
248
|
# Compression strength. Higher values indicate a
|
174
249
|
# smaller result, but longer compression time. Maximum
|
175
250
|
# is 9.
|
@@ -187,9 +262,6 @@ module XZ
|
|
187
262
|
# compression. This may succeed, but you can end
|
188
263
|
# up with *very* long computation times.
|
189
264
|
#
|
190
|
-
# [chunk (Block argument)]
|
191
|
-
# One piece of compressed data.
|
192
|
-
#
|
193
265
|
# === Return value
|
194
266
|
#
|
195
267
|
# If a block was given, returns the number of bytes
|
@@ -204,7 +276,9 @@ module XZ
|
|
204
276
|
# i.rewind
|
205
277
|
# str = ""
|
206
278
|
#
|
207
|
-
# XZ.compress_stream(i, 4, :sha256)
|
279
|
+
# XZ.compress_stream(i, level: 4, check: :sha256) do |c|
|
280
|
+
# str << c
|
281
|
+
# end #=> 123
|
208
282
|
# str #=> Some binary blob
|
209
283
|
#
|
210
284
|
# === Remarks
|
@@ -214,16 +288,23 @@ module XZ
|
|
214
288
|
# know how big your data gets or if you want to compress much
|
215
289
|
# data, use the block form. Of course you shouldn't store the data
|
216
290
|
# your read in RAM then as in the example above.
|
217
|
-
|
218
|
-
|
291
|
+
#
|
292
|
+
# For the +io+ object passed Ruby's normal external and internal
|
293
|
+
# encoding rules apply while it is read from by this method. These
|
294
|
+
# encodings are not changed on +io+ by this method. The data you
|
295
|
+
# receive in the block (+chunk+) above is binary data (compressed
|
296
|
+
# data) and as such encoded as BINARY.
|
297
|
+
def compress_stream(io, level: 6, check: :crc64, extreme: false, &block)
|
298
|
+
raise(ArgumentError, "Invalid compression level!") unless (0..9).include?(level)
|
219
299
|
raise(ArgumentError, "Invalid checksum specified!") unless [:none, :crc32, :crc64, :sha256].include?(check)
|
220
300
|
|
221
|
-
|
301
|
+
level |= LibLZMA::LZMA_PRESET_EXTREME if extreme
|
222
302
|
|
223
|
-
stream = LZMAStream.
|
224
|
-
|
225
|
-
|
226
|
-
|
303
|
+
stream = LibLZMA::LZMAStream.malloc
|
304
|
+
LibLZMA::LZMA_STREAM_INIT(stream)
|
305
|
+
res = LibLZMA.lzma_easy_encoder(stream.to_ptr,
|
306
|
+
level,
|
307
|
+
LibLZMA.const_get(:"LZMA_CHECK_#{check.upcase}"))
|
227
308
|
|
228
309
|
LZMAError.raise_if_necessary(res)
|
229
310
|
|
@@ -235,9 +316,9 @@ module XZ
|
|
235
316
|
lzma_code(io, stream){|chunk| res << chunk}
|
236
317
|
end
|
237
318
|
|
238
|
-
LibLZMA.lzma_end(stream.
|
319
|
+
LibLZMA.lzma_end(stream.to_ptr)
|
239
320
|
|
240
|
-
block_given? ? stream
|
321
|
+
block_given? ? stream.total_out : res
|
241
322
|
end
|
242
323
|
alias encode_stream compress_stream
|
243
324
|
|
@@ -251,7 +332,7 @@ module XZ
|
|
251
332
|
# The path of the file to write to. If it exists, it will be
|
252
333
|
# overwritten.
|
253
334
|
#
|
254
|
-
# For the
|
335
|
+
# For the keyword parameters, see the ::compress_stream method.
|
255
336
|
#
|
256
337
|
# === Return value
|
257
338
|
#
|
@@ -259,17 +340,17 @@ module XZ
|
|
259
340
|
#
|
260
341
|
# === Example
|
261
342
|
#
|
262
|
-
# XZ.
|
263
|
-
# XZ.
|
343
|
+
# XZ.compress_file("myfile.txt", "myfile.txt.xz")
|
344
|
+
# XZ.compress_file("myarchive.tar", "myarchive.tar.xz")
|
264
345
|
#
|
265
346
|
# === Remarks
|
266
347
|
#
|
267
348
|
# This method is safe to use with big files, because files are not
|
268
349
|
# loaded into memory completely at once.
|
269
|
-
def compress_file(in_file, out_file,
|
350
|
+
def compress_file(in_file, out_file, **args)
|
270
351
|
File.open(in_file, "rb") do |i_file|
|
271
352
|
File.open(out_file, "wb") do |o_file|
|
272
|
-
compress_stream(i_file,
|
353
|
+
compress_stream(i_file, **args) do |chunk|
|
273
354
|
o_file.write(chunk)
|
274
355
|
end
|
275
356
|
end
|
@@ -282,7 +363,7 @@ module XZ
|
|
282
363
|
#
|
283
364
|
# [str] The data to compress.
|
284
365
|
#
|
285
|
-
# For the
|
366
|
+
# For the keyword parameters, see the #compress_stream method.
|
286
367
|
#
|
287
368
|
# === Return value
|
288
369
|
#
|
@@ -297,10 +378,9 @@ module XZ
|
|
297
378
|
#
|
298
379
|
# Don't use this method for big amounts of data--you may run out
|
299
380
|
# of memory. Use compress_file or compress_stream instead.
|
300
|
-
def compress(str,
|
301
|
-
raise(NotImplementedError, "StringIO isn't available!") unless defined? StringIO
|
381
|
+
def compress(str, **args)
|
302
382
|
s = StringIO.new(str)
|
303
|
-
compress_stream(s,
|
383
|
+
compress_stream(s, **args)
|
304
384
|
end
|
305
385
|
|
306
386
|
# Decompresses data in XZ format.
|
@@ -309,7 +389,7 @@ module XZ
|
|
309
389
|
#
|
310
390
|
# [str] The data to decompress.
|
311
391
|
#
|
312
|
-
# For the
|
392
|
+
# For the keyword parameters, see the decompress_stream method.
|
313
393
|
#
|
314
394
|
# === Return value
|
315
395
|
#
|
@@ -324,10 +404,12 @@ module XZ
|
|
324
404
|
#
|
325
405
|
# Don't use this method for big amounts of data--you may run out
|
326
406
|
# of memory. Use decompress_file or decompress_stream instead.
|
327
|
-
|
328
|
-
|
407
|
+
#
|
408
|
+
# Read #decompress_stream's Remarks section for notes on the
|
409
|
+
# return value's encoding.
|
410
|
+
def decompress(str, **args)
|
329
411
|
s = StringIO.new(str)
|
330
|
-
decompress_stream(s,
|
412
|
+
decompress_stream(s, **args)
|
331
413
|
end
|
332
414
|
|
333
415
|
# Decompresses +in_file+ and writes the result to +out_file+.
|
@@ -340,7 +422,7 @@ module XZ
|
|
340
422
|
# The path of the file to write to. If it exists, it will
|
341
423
|
# be overwritten.
|
342
424
|
#
|
343
|
-
# For the
|
425
|
+
# For the keyword parameters, see the decompress_stream method.
|
344
426
|
#
|
345
427
|
# === Return value
|
346
428
|
#
|
@@ -349,17 +431,17 @@ module XZ
|
|
349
431
|
#
|
350
432
|
# === Example
|
351
433
|
#
|
352
|
-
# XZ.
|
353
|
-
# XZ.
|
434
|
+
# XZ.decompress_file("myfile.txt.xz", "myfile.txt")
|
435
|
+
# XZ.decompress_file("myarchive.tar.xz", "myarchive.tar")
|
354
436
|
#
|
355
437
|
# === Remarks
|
356
438
|
#
|
357
439
|
# This method is safe to use with big files, because files are not
|
358
440
|
# loaded into memory completely at once.
|
359
|
-
def decompress_file(in_file, out_file,
|
441
|
+
def decompress_file(in_file, out_file, **args)
|
360
442
|
File.open(in_file, "rb") do |i_file|
|
361
443
|
File.open(out_file, "wb") do |o_file|
|
362
|
-
decompress_stream(i_file,
|
444
|
+
decompress_stream(i_file, internal_encoding: nil, external_encoding: Encoding::BINARY, **args) do |chunk|
|
363
445
|
o_file.write(chunk)
|
364
446
|
end
|
365
447
|
end
|
@@ -368,30 +450,23 @@ module XZ
|
|
368
450
|
|
369
451
|
private
|
370
452
|
|
371
|
-
# This method returns the size of +str+ in bytes.
|
372
|
-
def binary_size(str)
|
373
|
-
# Believe it or not, but this is faster than str.bytes.to_a.size.
|
374
|
-
# I benchmarked it, and it is as twice as fast.
|
375
|
-
str.dup.force_encoding(Encoding::BINARY).size
|
376
|
-
end
|
377
|
-
|
378
453
|
# This method does the heavy work of (de-)compressing a stream. It
|
379
454
|
# takes an IO object to read data from (that means the IO must be
|
380
|
-
# opened for reading) and a XZ::LZMAStream object that is used to
|
455
|
+
# opened for reading) and a XZ::LibLZMA::LZMAStream object that is used to
|
381
456
|
# (de-)compress the data. Furthermore this method takes a block
|
382
457
|
# which gets passed the (de-)compressed data in chunks one at a
|
383
458
|
# time--this is needed to allow (de-)compressing of very large
|
384
459
|
# files that can't be loaded fully into memory.
|
385
460
|
def lzma_code(io, stream)
|
386
|
-
input_buffer_p =
|
387
|
-
output_buffer_p =
|
461
|
+
input_buffer_p = Fiddle::Pointer.malloc(CHUNK_SIZE) # automatically freed by fiddle on GC
|
462
|
+
output_buffer_p = Fiddle::Pointer.malloc(CHUNK_SIZE) # automatically freed by fiddle on GC
|
388
463
|
|
389
464
|
while str = io.read(CHUNK_SIZE)
|
390
|
-
input_buffer_p.
|
465
|
+
input_buffer_p[0, str.bytesize] = str
|
391
466
|
|
392
467
|
# Set the data for compressing
|
393
|
-
stream
|
394
|
-
stream
|
468
|
+
stream.next_in = input_buffer_p
|
469
|
+
stream.avail_in = str.bytesize
|
395
470
|
|
396
471
|
# Now loop until we gathered all the data in
|
397
472
|
# stream[:next_out]. Depending on the amount of data, this may
|
@@ -405,25 +480,26 @@ module XZ
|
|
405
480
|
# the amount of data to compress is small).
|
406
481
|
loop do
|
407
482
|
# Prepare for getting the compressed_data
|
408
|
-
stream
|
409
|
-
stream
|
483
|
+
stream.next_out = output_buffer_p
|
484
|
+
stream.avail_out = CHUNK_SIZE
|
410
485
|
|
411
486
|
# Compress the data
|
412
487
|
res = if io.eof?
|
413
|
-
LibLZMA.lzma_code(stream.
|
488
|
+
LibLZMA.lzma_code(stream.to_ptr, LibLZMA::LZMA_FINISH)
|
414
489
|
else
|
415
|
-
LibLZMA.lzma_code(stream.
|
490
|
+
LibLZMA.lzma_code(stream.to_ptr, LibLZMA::LZMA_RUN)
|
416
491
|
end
|
417
492
|
check_lzma_code_retval(res)
|
418
493
|
|
419
494
|
# Write the compressed data
|
420
|
-
|
495
|
+
# Note: avail_out gives how much space is left after the new data
|
496
|
+
data = output_buffer_p[0, CHUNK_SIZE - stream.avail_out]
|
421
497
|
yield(data)
|
422
498
|
|
423
499
|
# If the buffer is completely filled, it's likely that there
|
424
500
|
# is more data liblzma wants to hand to us. Start a new
|
425
501
|
# iteration, but don't provide new input data.
|
426
|
-
break unless stream
|
502
|
+
break unless stream.avail_out == 0
|
427
503
|
end #loop
|
428
504
|
end #while
|
429
505
|
end #lzma_code
|
@@ -432,11 +508,10 @@ module XZ
|
|
432
508
|
# return value of the lzma_code() function and shows them if
|
433
509
|
# necessary.
|
434
510
|
def check_lzma_code_retval(code)
|
435
|
-
e = LibLZMA::LZMA_RET
|
436
511
|
case code
|
437
|
-
when
|
438
|
-
when
|
439
|
-
when
|
512
|
+
when LibLZMA::LZMA_NO_CHECK then warn("Couldn't verify archive integrity--archive has no integrity checksum.")
|
513
|
+
when LibLZMA::LZMA_UNSUPPORTED_CHECK then warn("Couldn't verify archive integrity--archive has an unsupported integrity checksum.")
|
514
|
+
when LibLZMA::LZMA_GET_CHECK then nil # This isn't useful. It indicates that the checksum type is now known.
|
440
515
|
else
|
441
516
|
LZMAError.raise_if_necessary(code)
|
442
517
|
end
|
@@ -447,6 +522,7 @@ module XZ
|
|
447
522
|
end
|
448
523
|
|
449
524
|
require_relative "xz/version"
|
525
|
+
require_relative "xz/fiddle_helper"
|
450
526
|
require_relative "xz/lib_lzma"
|
451
527
|
require_relative "xz/stream"
|
452
528
|
require_relative "xz/stream_writer"
|