ruby-xz 0.2.3 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/AUTHORS +1 -3
- data/HISTORY.rdoc +45 -0
- data/LICENSE +21 -0
- data/README.md +44 -43
- data/lib/xz.rb +163 -87
- data/lib/xz/fiddle_helper.rb +91 -0
- data/lib/xz/lib_lzma.rb +117 -103
- data/lib/xz/stream.rb +429 -32
- data/lib/xz/stream_reader.rb +221 -400
- data/lib/xz/stream_writer.rb +173 -314
- data/lib/xz/version.rb +4 -4
- metadata +17 -43
- data/COPYING +0 -26
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 820fac81115af7c562998d1c80cce241d045c210
|
4
|
+
data.tar.gz: 31823023e5de3c452f8d0b82b643732ff0294658
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 35aa3ebe08c3e82b4cc81339c23af1d78073ffb4d174f56079808f482ae60061c329aeed2fcbec56eb34e81102ed30bb3f32240f85436d5b99142df48e925f29
|
7
|
+
data.tar.gz: a83b6fc3ca7b397af5695877fde63ff8642fa60a689d36b505cbf08717ea2b4c22b182e484bd55aa66a49f90fb132da17e7cd7a2caf1250175590127fb7f0565
|
data/AUTHORS
CHANGED
@@ -2,7 +2,5 @@
|
|
2
2
|
|
3
3
|
All the people who worked on this project, in alphabetical order.
|
4
4
|
|
5
|
-
*
|
6
|
-
* Marvin Gülker (Quintus) <quintus ät quintilianus döt eu>
|
5
|
+
* Marvin Gülker (Quintus) <m-guelker@phoenixmail.de>
|
7
6
|
* Christoph Plank (chrisistuff)
|
8
|
-
* Nana Sakisaka (saki7)
|
data/HISTORY.rdoc
CHANGED
@@ -1,5 +1,50 @@
|
|
1
1
|
= Version history
|
2
2
|
|
3
|
+
== 1.0.0 (2018-05-20)
|
4
|
+
|
5
|
+
* *BreakingChange* The XZ module's methods now take any parameters
|
6
|
+
beyond the IO object as real Ruby keyword arguments rather than
|
7
|
+
a long argument list.
|
8
|
+
* *BreakingChange* XZ.decompress_stream now honours Ruby's
|
9
|
+
external and internal encoding concept instead of just
|
10
|
+
returning BINARY-tagged strings.
|
11
|
+
* *BreakingChange* Remove deprecated API on stream reader/writer
|
12
|
+
class and instead sync the API with Ruby's zlib library
|
13
|
+
(Ticket #12 by me).
|
14
|
+
* *BreakingChange* StreamWriter.new and StreamReader.new do not accept
|
15
|
+
a block anymore. This is part of syncing with Ruby's zlib API.
|
16
|
+
* *BreakingChange* StreamReader.open and StreamWriter.open always
|
17
|
+
return the new instance, even if a block is given to the method
|
18
|
+
(previous behaviour was to return the return value of the block).
|
19
|
+
This is part of the syncing with Ruby's zlib API.
|
20
|
+
* *BreakingChange* StreamReader.new and StreamWriter.new as well as
|
21
|
+
the ::open variants take additional arguments as real Ruby keyword
|
22
|
+
arguments now instead of a long parameter list plus options hash.
|
23
|
+
This is different from Ruby's own zlib API as that one takes both
|
24
|
+
a long parameter list and a hash of additional options. ruby-xz
|
25
|
+
is meant to follow zlib's semantics mostly, but not as a drop-in
|
26
|
+
replacement, so this divergence from zlib's API is okay (also
|
27
|
+
given that it isn't possible to replicate all possible options
|
28
|
+
1:1 anyway, since liblzma simply accepts different options as
|
29
|
+
libz). If you've never used these methods' optional arguments,
|
30
|
+
you should be fine.
|
31
|
+
* *BreakingChange* Stream#close now returns nil instead of the
|
32
|
+
number of bytes written. This syncs Stream#close with Ruby's
|
33
|
+
own IO#close, which also returns nil.
|
34
|
+
* *BreakingChange* Remove Stream#pos=, Stream#seek, Stream#stat. These
|
35
|
+
methods irritated the minitar gem, which doesn't expect them to
|
36
|
+
raise NotImplementedError, but directly to be missing if the object
|
37
|
+
does not support seeking.
|
38
|
+
* *BreakingChange* StreamReader and StreamWriter now honour Ruby's
|
39
|
+
encoding system instead of returning only BINARY-tagged strings.
|
40
|
+
* *Dependency* Remove dependency on ffi. ruby-xz now uses fiddle from
|
41
|
+
the stdlib instead.
|
42
|
+
* *Dependency* Remove dependency on io-like. ruby-xz now implements
|
43
|
+
all the IO mechanics itself. (Ticket #10 by me)
|
44
|
+
* *Dependency* Bump required Ruby version to 2.3.0.
|
45
|
+
* *Fix* libzlma.dylib not being found on OS X (Ticket #15 by
|
46
|
+
s0nspark).
|
47
|
+
|
3
48
|
== 0.2.3 (2015-12-29)
|
4
49
|
|
5
50
|
* *Fix* documentation of XZ module (a :nodoc: was causing havoc
|
data/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
Copyright © 2011-2018 Marvin Gülker et al.
|
2
|
+
|
3
|
+
See AUTHORS for the full list of contributors.
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a
|
6
|
+
copy of this software and associated documentation files (the ‘Software’),
|
7
|
+
to deal in the Software without restriction, including without limitation
|
8
|
+
the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
9
|
+
and/or sell copies of the Software, and to permit persons to whom the Software
|
10
|
+
is furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED ‘AS IS’, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
CHANGED
@@ -5,16 +5,20 @@ ruby-xz
|
|
5
5
|
best known for the extreme compression-ratio it's native *XZ* format
|
6
6
|
achieves. ruby-xz gives you the possibility of creating and extracting
|
7
7
|
XZ archives on any platform where liblzma is installed. No compilation
|
8
|
-
is needed, because ruby-xz is written
|
9
|
-
|
8
|
+
is needed, because ruby-xz is written on top of Ruby's “fiddle” library
|
9
|
+
(part of the standard libary). ruby-xz does not have any dependencies
|
10
|
+
other than Ruby itself.
|
10
11
|
|
11
|
-
ruby-xz supports both
|
12
|
+
ruby-xz supports both “intuitive” (de)compression by providing methods to
|
12
13
|
directly operate on strings and files, but also allows you to operate
|
13
14
|
directly on IO streams (see the various methods of the XZ module). On top
|
14
15
|
of that, ruby-xz offers an advanced interface that allows you to treat
|
15
16
|
XZ-compressed data as IO streams, both for reading and for writing. See the
|
16
17
|
XZ::StreamReader and XZ::StreamWriter classes for more information on this.
|
17
18
|
|
19
|
+
**Note**: Version 1.0.0 breaks the API quite heavily. Refer to
|
20
|
+
HISTORY.rdoc for details.
|
21
|
+
|
18
22
|
Installation
|
19
23
|
------------
|
20
24
|
|
@@ -28,7 +32,7 @@ Alternatively, you can clone the repository and build the most recent
|
|
28
32
|
code yourself:
|
29
33
|
|
30
34
|
```
|
31
|
-
$ git clone git://
|
35
|
+
$ git clone git://git.guelker.eu/ruby-xz.git
|
32
36
|
$ cd ruby-xz
|
33
37
|
$ rake gem
|
34
38
|
$ gem install pkg/ruby-xz-*.gem
|
@@ -42,26 +46,23 @@ everything you need to use ruby-xz. As said, it's not big, but powerful:
|
|
42
46
|
You can create and extract whole archive files, compress or decompress
|
43
47
|
streams of data or just plain strings.
|
44
48
|
|
45
|
-
You can read the documentation on your local gemserver, or browse it [online][
|
49
|
+
You can read the documentation on your local gemserver, or browse it [online][2].
|
46
50
|
|
47
|
-
###
|
51
|
+
### Require ###
|
48
52
|
|
49
|
-
You have to require
|
50
|
-
"xz.rb", so do
|
53
|
+
You have to require the “xz.rb” file:
|
51
54
|
|
52
55
|
``` ruby
|
53
56
|
require "xz"
|
54
57
|
```
|
55
58
|
|
56
|
-
to get it.
|
57
|
-
|
58
59
|
### Examples ###
|
59
60
|
|
60
61
|
``` ruby
|
61
|
-
# Compress a
|
62
|
-
XZ.compress_file("myfile.
|
62
|
+
# Compress a file
|
63
|
+
XZ.compress_file("myfile.txt", "myfile.txt.xz")
|
63
64
|
# Decompress it
|
64
|
-
XZ.decompress_file("myfile.
|
65
|
+
XZ.decompress_file("myfile.txt.xz", "myfile.txt")
|
65
66
|
|
66
67
|
# Compress everything you get from a socket (note that there HAS to be a EOF
|
67
68
|
# sometime, otherwise this will run infinitely)
|
@@ -76,42 +77,42 @@ data = XZ.decompress(comp)
|
|
76
77
|
Have a look at the XZ module's documentation for an in-depth description of
|
77
78
|
what is possible.
|
78
79
|
|
79
|
-
|
80
|
-
-----
|
81
|
-
|
82
|
-
* Code repository: https://github.com/Quintus/ruby-xz
|
83
|
-
* Issue tracker: https://github.com/Quintus/ruby-xz/issues
|
84
|
-
* Online documentation: http://quintus.github.io/ruby-xz
|
80
|
+
### Usage with the minitar gem ###
|
85
81
|
|
86
|
-
|
87
|
-
|
82
|
+
ruby-xz can be used together with the [minitar][3] library (formerly
|
83
|
+
“archive-tar-minitar”) to create XZ-compressed tarballs. This works by
|
84
|
+
employing the IO-like classes XZ::StreamReader and XZ::StreamWriter
|
85
|
+
analogous to how one would use Ruby's “zlib” library together with
|
86
|
+
“minitar”. Example:
|
88
87
|
|
89
|
-
|
88
|
+
``` ruby
|
89
|
+
require "xz"
|
90
|
+
require "minitar"
|
90
91
|
|
91
|
-
|
92
|
+
# Create an XZ-compressed tarball
|
93
|
+
XZ::StreamWriter.open("tarball.tar.xz") do |txz|
|
94
|
+
Minitar.pack("path/to/directory", txz)
|
95
|
+
end
|
92
96
|
|
93
|
-
|
97
|
+
# Unpack it again
|
98
|
+
XZ::StreamReader.open("tarball.tar.xz") do |txz|
|
99
|
+
Minitar.unpack(txz, "path/to/target/directory")
|
100
|
+
end
|
101
|
+
```
|
94
102
|
|
95
|
-
|
103
|
+
Links
|
104
|
+
-----
|
96
105
|
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
and/or sell copies of the Software, and to permit persons to whom the Software
|
102
|
-
is furnished to do so, subject to the following conditions:
|
106
|
+
* Website: https://mg.guelker.eu/projects/ruby-xz/
|
107
|
+
* Online documentation: https://mg.guelker.eu/projects/ruby-xz/doc
|
108
|
+
* Code repository: https://git.guelker.eu/?p=ruby-xz.git;a=summary
|
109
|
+
* Issue tracker: https://github.com/Quintus/ruby-xz/issues
|
103
110
|
|
104
|
-
|
105
|
-
|
111
|
+
License
|
112
|
+
-------
|
106
113
|
|
107
|
-
|
108
|
-
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
109
|
-
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
110
|
-
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
111
|
-
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
112
|
-
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
113
|
-
THE SOFTWARE.
|
114
|
+
MIT license; see LICENSE for the full license text.
|
114
115
|
|
115
|
-
[1]:
|
116
|
-
[2]: https://
|
117
|
-
[3]:
|
116
|
+
[1]: https://tukaani.org/xz/
|
117
|
+
[2]: https://mg.guelker.eu/projects/ruby-xz/doc
|
118
|
+
[3]: https://github.com/halostatue/minitar
|
data/lib/xz.rb
CHANGED
@@ -1,11 +1,10 @@
|
|
1
1
|
# -*- coding: utf-8 -*-
|
2
2
|
#--
|
3
|
-
# (The MIT License)
|
4
|
-
#
|
5
3
|
# Basic liblzma-bindings for Ruby.
|
6
4
|
#
|
7
|
-
# Copyright © 2011
|
8
|
-
#
|
5
|
+
# Copyright © 2011-2018 Marvin Gülker et al.
|
6
|
+
#
|
7
|
+
# See AUTHORS for the full list of contributors.
|
9
8
|
#
|
10
9
|
# Permission is hereby granted, free of charge, to any person obtaining a
|
11
10
|
# copy of this software and associated documentation files (the ‘Software’),
|
@@ -27,20 +26,14 @@
|
|
27
26
|
#++
|
28
27
|
|
29
28
|
require "pathname"
|
30
|
-
require "
|
31
|
-
require
|
32
|
-
require "
|
29
|
+
require "fiddle"
|
30
|
+
require "fiddle/import"
|
31
|
+
require "stringio"
|
32
|
+
require "forwardable"
|
33
33
|
|
34
34
|
# The namespace and main module of this library. Each method of this
|
35
35
|
# module may raise exceptions of class XZ::LZMAError, which is not
|
36
36
|
# named in the methods' documentations anymore.
|
37
|
-
#
|
38
|
-
# All strings you receive from any method defined in this module and
|
39
|
-
# the classes defined in it are encoded in BINARY, so you may have to
|
40
|
-
# call #force_encoding on them to tag them with the correct encoding
|
41
|
-
# (assuming you _know_ what their correct encoding should be).
|
42
|
-
# ruby-xz can’t handle this as compiled strings don’t come with
|
43
|
-
# encoding information.
|
44
37
|
module XZ
|
45
38
|
|
46
39
|
# Number of bytes read in one chunk.
|
@@ -68,17 +61,24 @@ module XZ
|
|
68
61
|
end
|
69
62
|
|
70
63
|
# call-seq:
|
71
|
-
# decompress_stream(io [,
|
72
|
-
# decompress_stream(io [,
|
73
|
-
# decode_stream(io [,
|
74
|
-
# decode_stream(io [,
|
64
|
+
# decompress_stream(io [, kw ] ) → a_string
|
65
|
+
# decompress_stream(io [, kw ] ] ){|chunk| ... } → an_integer
|
66
|
+
# decode_stream(io [, kw ] ] ) → a_string
|
67
|
+
# decode_stream(io [, kw ] ){|chunk| ... } → an_integer
|
75
68
|
#
|
76
69
|
# Decompresses a stream containing XZ-compressed data.
|
77
70
|
#
|
78
71
|
# === Parameters
|
72
|
+
# ==== Positional parameters
|
79
73
|
#
|
80
74
|
# [io]
|
81
|
-
# The IO to read from. It must be opened for reading
|
75
|
+
# The IO to read from. It must be opened for reading in
|
76
|
+
# binary mode.
|
77
|
+
# [chunk (Block argument)]
|
78
|
+
# One piece of decompressed data. See Remarks section below
|
79
|
+
# for information about its encoding.
|
80
|
+
#
|
81
|
+
# ==== Keyword arguments
|
82
82
|
#
|
83
83
|
# [memory_limit (+UINT64_MAX+)]
|
84
84
|
# If not XZ::LibLZMA::UINT64_MAX, makes liblzma
|
@@ -96,9 +96,13 @@ module XZ
|
|
96
96
|
# has an unsupported checksum type.
|
97
97
|
# [:concatenated]
|
98
98
|
# Decompress concatenated archives.
|
99
|
-
#
|
100
|
-
#
|
101
|
-
#
|
99
|
+
# [external_encoding (Encoding.default_external)]
|
100
|
+
# Assume the decompressed data inside the compressed data
|
101
|
+
# has this encoding. See Remarks section.
|
102
|
+
# [internal_encoding (Encoding.default_internal)]
|
103
|
+
# Request transcoding of the decompressed data into this
|
104
|
+
# encoding if not nil. Note that Encoding.default_internal
|
105
|
+
# is nil by default. See Remarks section.
|
102
106
|
#
|
103
107
|
# === Return value
|
104
108
|
#
|
@@ -106,6 +110,18 @@ module XZ
|
|
106
110
|
# written. Otherwise, returns the decompressed data as a
|
107
111
|
# BINARY-encoded string.
|
108
112
|
#
|
113
|
+
# === Raises
|
114
|
+
#
|
115
|
+
# [Encoding::InvalidByteSequenceError]
|
116
|
+
# 1. You requested an “internal encoding” conversion
|
117
|
+
# and the archive contains invalid byte sequences
|
118
|
+
# in the external encoding.
|
119
|
+
# 2. You requested an “internal encoding” conversion, used
|
120
|
+
# the block form of this method, and liblzma decided
|
121
|
+
# to cut the decompressed data into chunks in mid of
|
122
|
+
# a multibyte character. See Remarks section for an
|
123
|
+
# explanation.
|
124
|
+
#
|
109
125
|
# === Example
|
110
126
|
#
|
111
127
|
# data = File.open("archive.xz", "rb"){|f| f.read}
|
@@ -125,8 +141,52 @@ module XZ
|
|
125
141
|
# know how big your data gets or if you want to decompress much
|
126
142
|
# data, use the block form. Of course you shouldn't store the data
|
127
143
|
# you read in RAM then as in the example above.
|
128
|
-
|
144
|
+
#
|
145
|
+
# This method honours Ruby's external and internal encoding concept.
|
146
|
+
# All documentation about this applies to this method, with the
|
147
|
+
# exception that the external encoding does not refer to the data
|
148
|
+
# on the hard disk (that's compressed XZ data, it's always binary),
|
149
|
+
# but to the data inside the XZ container, i.e. to the *decompressed*
|
150
|
+
# data. Any strings you receive from this method (regardless of
|
151
|
+
# whether via return value or via the +chunk+ block argument) will
|
152
|
+
# first be tagged with the external encoding. If you set an internal
|
153
|
+
# encoding (either via the +internal_encoding+ parameter or via
|
154
|
+
# Ruby's default internal encoding) that string will be transcoded
|
155
|
+
# from the external encoding to the internal encoding before you
|
156
|
+
# even see it; in that case, the return value or chunk block argument
|
157
|
+
# will be encoded in the internal encoding. Internal encoding is
|
158
|
+
# disabled in Ruby by default and the argument for this method also
|
159
|
+
# defaults to nil.
|
160
|
+
#
|
161
|
+
# Due to the external encoding being applied, it can happen that
|
162
|
+
# +chunk+ contains an incomplete multibyte character causing
|
163
|
+
# <tt>valid_encoding?</tt> to return false if called on +chunk+,
|
164
|
+
# because liblzma doesn't know about encodings. The rest of the
|
165
|
+
# character will be yielded to the block in the next iteration
|
166
|
+
# then as liblzma progresses with the decompression from the XZ
|
167
|
+
# format. In other words, be prepared that +chunk+ can contain
|
168
|
+
# incomplete multibyte chars.
|
169
|
+
#
|
170
|
+
# This can have nasty side effects if you requested an internal
|
171
|
+
# encoding automatic transcoding and used the block form. Since
|
172
|
+
# this method applies the internal encoding transcoding before the
|
173
|
+
# chunk is yielded to the block, String#encode gets the incomplete
|
174
|
+
# multibyte character. In that case, you will receive an
|
175
|
+
# Encoding::InvalidByteSequenceError exception even though your
|
176
|
+
# data is perfectly well-formed inside the XZ data. It's just
|
177
|
+
# that liblzma during decompression cut the chunks at an
|
178
|
+
# unfortunate place. To avoid this, do not request internal encoding
|
179
|
+
# conversion when using the block form, but instead transcode
|
180
|
+
# the data manually after you have decompressed the entire data.
|
181
|
+
def decompress_stream(io, memory_limit: LibLZMA::UINT64_MAX, flags: [:tell_unsupported_check], external_encoding: nil, internal_encoding: nil, &block)
|
129
182
|
raise(ArgumentError, "Invalid memory limit set!") unless memory_limit > 0 && memory_limit <= LibLZMA::UINT64_MAX
|
183
|
+
raise(ArgumentError, "external_encoding must be set if internal_encoding transcoding is requested") if internal_encoding && !external_encoding
|
184
|
+
|
185
|
+
# The ArgumentError above is only about the concrete arguments
|
186
|
+
# (to sync with Ruby's IO API), not about the implied internal
|
187
|
+
# encoding, which might still kick in (and does, see below).
|
188
|
+
external_encoding ||= Encoding.default_external
|
189
|
+
internal_encoding ||= Encoding.default_internal
|
130
190
|
|
131
191
|
# bit-or all flags
|
132
192
|
allflags = flags.inject(0) do |val, flag|
|
@@ -134,8 +194,9 @@ module XZ
|
|
134
194
|
val | flag
|
135
195
|
end
|
136
196
|
|
137
|
-
stream = LZMAStream.
|
138
|
-
|
197
|
+
stream = LibLZMA::LZMAStream.malloc
|
198
|
+
LibLZMA.LZMA_STREAM_INIT(stream)
|
199
|
+
res = LibLZMA.lzma_stream_decoder(stream.to_ptr,
|
139
200
|
memory_limit,
|
140
201
|
allflags)
|
141
202
|
|
@@ -144,32 +205,46 @@ module XZ
|
|
144
205
|
res = ""
|
145
206
|
res.encode!(Encoding::BINARY)
|
146
207
|
if block_given?
|
147
|
-
res = lzma_code(io, stream
|
208
|
+
res = lzma_code(io, stream) do |chunk|
|
209
|
+
chunk = chunk.dup # Do not write somewhere into the fiddle pointer while encoding (-> can segfault)
|
210
|
+
chunk.force_encoding(external_encoding) if external_encoding
|
211
|
+
chunk.encode!(internal_encoding) if internal_encoding
|
212
|
+
yield(chunk)
|
213
|
+
end
|
148
214
|
else
|
149
215
|
lzma_code(io, stream){|chunk| res << chunk}
|
216
|
+
res.force_encoding(external_encoding) if external_encoding
|
217
|
+
res.encode!(internal_encoding) if internal_encoding
|
150
218
|
end
|
151
219
|
|
152
|
-
LibLZMA.lzma_end(stream.
|
220
|
+
LibLZMA.lzma_end(stream.to_ptr)
|
153
221
|
|
154
|
-
block_given? ? stream
|
222
|
+
block_given? ? stream.total_out : res
|
155
223
|
end
|
156
224
|
alias decode_stream decompress_stream
|
157
225
|
|
158
226
|
# call-seq:
|
159
|
-
# compress_stream(io [,
|
160
|
-
# compress_stream(io [,
|
161
|
-
# encode_stream(io [,
|
162
|
-
# encode_stream(io [,
|
227
|
+
# compress_stream(io [, kw ] ) → a_string
|
228
|
+
# compress_stream(io [, kw ] ){|chunk| ... } → an_integer
|
229
|
+
# encode_stream(io [, kw ] ) → a_string
|
230
|
+
# encode_stream(io [, kw ] ){|chunk| ... } → an_integer
|
163
231
|
#
|
164
232
|
# Compresses a stream of data into XZ-compressed data.
|
165
233
|
#
|
166
234
|
# === Parameters
|
235
|
+
# ==== Positional arguments
|
167
236
|
#
|
168
237
|
# [io]
|
169
238
|
# The IO to read the data from. Must be opened for
|
170
239
|
# reading.
|
240
|
+
# [chunk (Block argument)]
|
241
|
+
# One piece of compressed data. This is always tagged
|
242
|
+
# as a BINARY string, since it's compressed binary data.
|
171
243
|
#
|
172
|
-
#
|
244
|
+
# ==== Keyword arguments
|
245
|
+
# All keyword arguments are optional.
|
246
|
+
#
|
247
|
+
# [level (6)]
|
173
248
|
# Compression strength. Higher values indicate a
|
174
249
|
# smaller result, but longer compression time. Maximum
|
175
250
|
# is 9.
|
@@ -187,9 +262,6 @@ module XZ
|
|
187
262
|
# compression. This may succeed, but you can end
|
188
263
|
# up with *very* long computation times.
|
189
264
|
#
|
190
|
-
# [chunk (Block argument)]
|
191
|
-
# One piece of compressed data.
|
192
|
-
#
|
193
265
|
# === Return value
|
194
266
|
#
|
195
267
|
# If a block was given, returns the number of bytes
|
@@ -204,7 +276,9 @@ module XZ
|
|
204
276
|
# i.rewind
|
205
277
|
# str = ""
|
206
278
|
#
|
207
|
-
# XZ.compress_stream(i, 4, :sha256)
|
279
|
+
# XZ.compress_stream(i, level: 4, check: :sha256) do |c|
|
280
|
+
# str << c
|
281
|
+
# end #=> 123
|
208
282
|
# str #=> Some binary blob
|
209
283
|
#
|
210
284
|
# === Remarks
|
@@ -214,16 +288,23 @@ module XZ
|
|
214
288
|
# know how big your data gets or if you want to compress much
|
215
289
|
# data, use the block form. Of course you shouldn't store the data
|
216
290
|
# your read in RAM then as in the example above.
|
217
|
-
|
218
|
-
|
291
|
+
#
|
292
|
+
# For the +io+ object passed Ruby's normal external and internal
|
293
|
+
# encoding rules apply while it is read from by this method. These
|
294
|
+
# encodings are not changed on +io+ by this method. The data you
|
295
|
+
# receive in the block (+chunk+) above is binary data (compressed
|
296
|
+
# data) and as such encoded as BINARY.
|
297
|
+
def compress_stream(io, level: 6, check: :crc64, extreme: false, &block)
|
298
|
+
raise(ArgumentError, "Invalid compression level!") unless (0..9).include?(level)
|
219
299
|
raise(ArgumentError, "Invalid checksum specified!") unless [:none, :crc32, :crc64, :sha256].include?(check)
|
220
300
|
|
221
|
-
|
301
|
+
level |= LibLZMA::LZMA_PRESET_EXTREME if extreme
|
222
302
|
|
223
|
-
stream = LZMAStream.
|
224
|
-
|
225
|
-
|
226
|
-
|
303
|
+
stream = LibLZMA::LZMAStream.malloc
|
304
|
+
LibLZMA::LZMA_STREAM_INIT(stream)
|
305
|
+
res = LibLZMA.lzma_easy_encoder(stream.to_ptr,
|
306
|
+
level,
|
307
|
+
LibLZMA.const_get(:"LZMA_CHECK_#{check.upcase}"))
|
227
308
|
|
228
309
|
LZMAError.raise_if_necessary(res)
|
229
310
|
|
@@ -235,9 +316,9 @@ module XZ
|
|
235
316
|
lzma_code(io, stream){|chunk| res << chunk}
|
236
317
|
end
|
237
318
|
|
238
|
-
LibLZMA.lzma_end(stream.
|
319
|
+
LibLZMA.lzma_end(stream.to_ptr)
|
239
320
|
|
240
|
-
block_given? ? stream
|
321
|
+
block_given? ? stream.total_out : res
|
241
322
|
end
|
242
323
|
alias encode_stream compress_stream
|
243
324
|
|
@@ -251,7 +332,7 @@ module XZ
|
|
251
332
|
# The path of the file to write to. If it exists, it will be
|
252
333
|
# overwritten.
|
253
334
|
#
|
254
|
-
# For the
|
335
|
+
# For the keyword parameters, see the ::compress_stream method.
|
255
336
|
#
|
256
337
|
# === Return value
|
257
338
|
#
|
@@ -259,17 +340,17 @@ module XZ
|
|
259
340
|
#
|
260
341
|
# === Example
|
261
342
|
#
|
262
|
-
# XZ.
|
263
|
-
# XZ.
|
343
|
+
# XZ.compress_file("myfile.txt", "myfile.txt.xz")
|
344
|
+
# XZ.compress_file("myarchive.tar", "myarchive.tar.xz")
|
264
345
|
#
|
265
346
|
# === Remarks
|
266
347
|
#
|
267
348
|
# This method is safe to use with big files, because files are not
|
268
349
|
# loaded into memory completely at once.
|
269
|
-
def compress_file(in_file, out_file,
|
350
|
+
def compress_file(in_file, out_file, **args)
|
270
351
|
File.open(in_file, "rb") do |i_file|
|
271
352
|
File.open(out_file, "wb") do |o_file|
|
272
|
-
compress_stream(i_file,
|
353
|
+
compress_stream(i_file, **args) do |chunk|
|
273
354
|
o_file.write(chunk)
|
274
355
|
end
|
275
356
|
end
|
@@ -282,7 +363,7 @@ module XZ
|
|
282
363
|
#
|
283
364
|
# [str] The data to compress.
|
284
365
|
#
|
285
|
-
# For the
|
366
|
+
# For the keyword parameters, see the #compress_stream method.
|
286
367
|
#
|
287
368
|
# === Return value
|
288
369
|
#
|
@@ -297,10 +378,9 @@ module XZ
|
|
297
378
|
#
|
298
379
|
# Don't use this method for big amounts of data--you may run out
|
299
380
|
# of memory. Use compress_file or compress_stream instead.
|
300
|
-
def compress(str,
|
301
|
-
raise(NotImplementedError, "StringIO isn't available!") unless defined? StringIO
|
381
|
+
def compress(str, **args)
|
302
382
|
s = StringIO.new(str)
|
303
|
-
compress_stream(s,
|
383
|
+
compress_stream(s, **args)
|
304
384
|
end
|
305
385
|
|
306
386
|
# Decompresses data in XZ format.
|
@@ -309,7 +389,7 @@ module XZ
|
|
309
389
|
#
|
310
390
|
# [str] The data to decompress.
|
311
391
|
#
|
312
|
-
# For the
|
392
|
+
# For the keyword parameters, see the decompress_stream method.
|
313
393
|
#
|
314
394
|
# === Return value
|
315
395
|
#
|
@@ -324,10 +404,12 @@ module XZ
|
|
324
404
|
#
|
325
405
|
# Don't use this method for big amounts of data--you may run out
|
326
406
|
# of memory. Use decompress_file or decompress_stream instead.
|
327
|
-
|
328
|
-
|
407
|
+
#
|
408
|
+
# Read #decompress_stream's Remarks section for notes on the
|
409
|
+
# return value's encoding.
|
410
|
+
def decompress(str, **args)
|
329
411
|
s = StringIO.new(str)
|
330
|
-
decompress_stream(s,
|
412
|
+
decompress_stream(s, **args)
|
331
413
|
end
|
332
414
|
|
333
415
|
# Decompresses +in_file+ and writes the result to +out_file+.
|
@@ -340,7 +422,7 @@ module XZ
|
|
340
422
|
# The path of the file to write to. If it exists, it will
|
341
423
|
# be overwritten.
|
342
424
|
#
|
343
|
-
# For the
|
425
|
+
# For the keyword parameters, see the decompress_stream method.
|
344
426
|
#
|
345
427
|
# === Return value
|
346
428
|
#
|
@@ -349,17 +431,17 @@ module XZ
|
|
349
431
|
#
|
350
432
|
# === Example
|
351
433
|
#
|
352
|
-
# XZ.
|
353
|
-
# XZ.
|
434
|
+
# XZ.decompress_file("myfile.txt.xz", "myfile.txt")
|
435
|
+
# XZ.decompress_file("myarchive.tar.xz", "myarchive.tar")
|
354
436
|
#
|
355
437
|
# === Remarks
|
356
438
|
#
|
357
439
|
# This method is safe to use with big files, because files are not
|
358
440
|
# loaded into memory completely at once.
|
359
|
-
def decompress_file(in_file, out_file,
|
441
|
+
def decompress_file(in_file, out_file, **args)
|
360
442
|
File.open(in_file, "rb") do |i_file|
|
361
443
|
File.open(out_file, "wb") do |o_file|
|
362
|
-
decompress_stream(i_file,
|
444
|
+
decompress_stream(i_file, internal_encoding: nil, external_encoding: Encoding::BINARY, **args) do |chunk|
|
363
445
|
o_file.write(chunk)
|
364
446
|
end
|
365
447
|
end
|
@@ -368,30 +450,23 @@ module XZ
|
|
368
450
|
|
369
451
|
private
|
370
452
|
|
371
|
-
# This method returns the size of +str+ in bytes.
|
372
|
-
def binary_size(str)
|
373
|
-
# Believe it or not, but this is faster than str.bytes.to_a.size.
|
374
|
-
# I benchmarked it, and it is as twice as fast.
|
375
|
-
str.dup.force_encoding(Encoding::BINARY).size
|
376
|
-
end
|
377
|
-
|
378
453
|
# This method does the heavy work of (de-)compressing a stream. It
|
379
454
|
# takes an IO object to read data from (that means the IO must be
|
380
|
-
# opened for reading) and a XZ::LZMAStream object that is used to
|
455
|
+
# opened for reading) and a XZ::LibLZMA::LZMAStream object that is used to
|
381
456
|
# (de-)compress the data. Furthermore this method takes a block
|
382
457
|
# which gets passed the (de-)compressed data in chunks one at a
|
383
458
|
# time--this is needed to allow (de-)compressing of very large
|
384
459
|
# files that can't be loaded fully into memory.
|
385
460
|
def lzma_code(io, stream)
|
386
|
-
input_buffer_p =
|
387
|
-
output_buffer_p =
|
461
|
+
input_buffer_p = Fiddle::Pointer.malloc(CHUNK_SIZE) # automatically freed by fiddle on GC
|
462
|
+
output_buffer_p = Fiddle::Pointer.malloc(CHUNK_SIZE) # automatically freed by fiddle on GC
|
388
463
|
|
389
464
|
while str = io.read(CHUNK_SIZE)
|
390
|
-
input_buffer_p.
|
465
|
+
input_buffer_p[0, str.bytesize] = str
|
391
466
|
|
392
467
|
# Set the data for compressing
|
393
|
-
stream
|
394
|
-
stream
|
468
|
+
stream.next_in = input_buffer_p
|
469
|
+
stream.avail_in = str.bytesize
|
395
470
|
|
396
471
|
# Now loop until we gathered all the data in
|
397
472
|
# stream[:next_out]. Depending on the amount of data, this may
|
@@ -405,25 +480,26 @@ module XZ
|
|
405
480
|
# the amount of data to compress is small).
|
406
481
|
loop do
|
407
482
|
# Prepare for getting the compressed_data
|
408
|
-
stream
|
409
|
-
stream
|
483
|
+
stream.next_out = output_buffer_p
|
484
|
+
stream.avail_out = CHUNK_SIZE
|
410
485
|
|
411
486
|
# Compress the data
|
412
487
|
res = if io.eof?
|
413
|
-
LibLZMA.lzma_code(stream.
|
488
|
+
LibLZMA.lzma_code(stream.to_ptr, LibLZMA::LZMA_FINISH)
|
414
489
|
else
|
415
|
-
LibLZMA.lzma_code(stream.
|
490
|
+
LibLZMA.lzma_code(stream.to_ptr, LibLZMA::LZMA_RUN)
|
416
491
|
end
|
417
492
|
check_lzma_code_retval(res)
|
418
493
|
|
419
494
|
# Write the compressed data
|
420
|
-
|
495
|
+
# Note: avail_out gives how much space is left after the new data
|
496
|
+
data = output_buffer_p[0, CHUNK_SIZE - stream.avail_out]
|
421
497
|
yield(data)
|
422
498
|
|
423
499
|
# If the buffer is completely filled, it's likely that there
|
424
500
|
# is more data liblzma wants to hand to us. Start a new
|
425
501
|
# iteration, but don't provide new input data.
|
426
|
-
break unless stream
|
502
|
+
break unless stream.avail_out == 0
|
427
503
|
end #loop
|
428
504
|
end #while
|
429
505
|
end #lzma_code
|
@@ -432,11 +508,10 @@ module XZ
|
|
432
508
|
# return value of the lzma_code() function and shows them if
|
433
509
|
# necessary.
|
434
510
|
def check_lzma_code_retval(code)
|
435
|
-
e = LibLZMA::LZMA_RET
|
436
511
|
case code
|
437
|
-
when
|
438
|
-
when
|
439
|
-
when
|
512
|
+
when LibLZMA::LZMA_NO_CHECK then warn("Couldn't verify archive integrity--archive has no integrity checksum.")
|
513
|
+
when LibLZMA::LZMA_UNSUPPORTED_CHECK then warn("Couldn't verify archive integrity--archive has an unsupported integrity checksum.")
|
514
|
+
when LibLZMA::LZMA_GET_CHECK then nil # This isn't useful. It indicates that the checksum type is now known.
|
440
515
|
else
|
441
516
|
LZMAError.raise_if_necessary(code)
|
442
517
|
end
|
@@ -447,6 +522,7 @@ module XZ
|
|
447
522
|
end
|
448
523
|
|
449
524
|
require_relative "xz/version"
|
525
|
+
require_relative "xz/fiddle_helper"
|
450
526
|
require_relative "xz/lib_lzma"
|
451
527
|
require_relative "xz/stream"
|
452
528
|
require_relative "xz/stream_writer"
|