ruby-xz 0.2.3 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/AUTHORS +1 -3
- data/HISTORY.rdoc +45 -0
- data/LICENSE +21 -0
- data/README.md +44 -43
- data/lib/xz.rb +163 -87
- data/lib/xz/fiddle_helper.rb +91 -0
- data/lib/xz/lib_lzma.rb +117 -103
- data/lib/xz/stream.rb +429 -32
- data/lib/xz/stream_reader.rb +221 -400
- data/lib/xz/stream_writer.rb +173 -314
- data/lib/xz/version.rb +4 -4
- metadata +17 -43
- data/COPYING +0 -26
@@ -0,0 +1,91 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
#--
|
3
|
+
# Basic liblzma-bindings for Ruby.
|
4
|
+
#
|
5
|
+
# Copyright © 2011-2018 Marvin Gülker et al.
|
6
|
+
#
|
7
|
+
# See AUTHORS for the full list of contributors.
|
8
|
+
#
|
9
|
+
# Permission is hereby granted, free of charge, to any person obtaining a
|
10
|
+
# copy of this software and associated documentation files (the ‘Software’),
|
11
|
+
# to deal in the Software without restriction, including without limitation
|
12
|
+
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
13
|
+
# and/or sell copies of the Software, and to permit persons to whom the Software
|
14
|
+
# is furnished to do so, subject to the following conditions:
|
15
|
+
#
|
16
|
+
# The above copyright notice and this permission notice shall be included in all
|
17
|
+
# copies or substantial portions of the Software.
|
18
|
+
#
|
19
|
+
# THE SOFTWARE IS PROVIDED ‘AS IS’, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
20
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
21
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
22
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
23
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
24
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
25
|
+
# THE SOFTWARE.
|
26
|
+
#++
|
27
|
+
|
28
|
+
module XZ
|
29
|
+
|
30
|
+
# This is an internal API not meant for users of ruby-xz.
|
31
|
+
# This mixin modules defines some helper functions on top
|
32
|
+
# of Fiddle's functionality.
|
33
|
+
module FiddleHelper # :nodoc:
|
34
|
+
|
35
|
+
# Define constants that have numeric constants assigned as if
|
36
|
+
# it was a C enum definition. You can specificy values explicitely
|
37
|
+
# or rely on the implicit incrementation; the first implicit value
|
38
|
+
# is zero.
|
39
|
+
#
|
40
|
+
# Example:
|
41
|
+
#
|
42
|
+
# enum :FOO, :BAR, 5, :BAZ
|
43
|
+
#
|
44
|
+
# This defines a constant FOO with value 0, BAR with value 5, BAZ
|
45
|
+
# with value 6.
|
46
|
+
def enum(*args)
|
47
|
+
@next_enum_val = 0 # First value of an enum is 0 in C
|
48
|
+
|
49
|
+
args.each_cons(2) do |val1, val2|
|
50
|
+
next if val1.respond_to?(:to_int)
|
51
|
+
|
52
|
+
if val2.respond_to?(:to_int)
|
53
|
+
const_set(val1, val2.to_int)
|
54
|
+
@next_enum_val = val2.to_int + 1
|
55
|
+
else
|
56
|
+
const_set(val1, @next_enum_val)
|
57
|
+
@next_enum_val += 1
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
# Cater for the last element in case it is not an explicit
|
62
|
+
# value that has already been assigned above.
|
63
|
+
unless args.last.respond_to?(:to_int)
|
64
|
+
const_set(args.last, @next_enum_val)
|
65
|
+
end
|
66
|
+
|
67
|
+
@next_enum_val = 0
|
68
|
+
nil
|
69
|
+
end
|
70
|
+
|
71
|
+
# Try loading any of the given names as a shared
|
72
|
+
# object. Raises Fiddle::DLError if none can
|
73
|
+
# be opened.
|
74
|
+
def dlloadanyof(*names)
|
75
|
+
names.each do |name|
|
76
|
+
begin
|
77
|
+
dlload(name)
|
78
|
+
rescue Fiddle::DLError
|
79
|
+
# Continue with next one
|
80
|
+
else
|
81
|
+
# Success
|
82
|
+
return name
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
raise Fiddle::DLError, "Failed to open any of these shared object files: #{names.join(', ')}"
|
87
|
+
end
|
88
|
+
|
89
|
+
end
|
90
|
+
|
91
|
+
end
|
data/lib/xz/lib_lzma.rb
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
# -*- coding: utf-8 -*-
|
2
2
|
#--
|
3
|
-
# The MIT License
|
4
|
-
#
|
5
3
|
# Basic liblzma-bindings for Ruby.
|
6
4
|
#
|
7
|
-
# Copyright © 2011
|
5
|
+
# Copyright © 2011-2018 Marvin Gülker et al.
|
6
|
+
#
|
7
|
+
# See AUTHORS for the full list of contributors.
|
8
8
|
#
|
9
9
|
# Permission is hereby granted, free of charge, to any person obtaining a
|
10
10
|
# copy of this software and associated documentation files (the ‘Software’),
|
@@ -27,9 +27,31 @@
|
|
27
27
|
|
28
28
|
module XZ
|
29
29
|
|
30
|
-
# This module wraps functions and enums
|
30
|
+
# This module wraps functions and enums provided by liblzma.
|
31
|
+
# It contains the direct mapping to the underlying C functions;
|
32
|
+
# you should never have to use this. It's the lowlevel API
|
33
|
+
# the other methods provided by ruby-xz are based on.
|
31
34
|
module LibLZMA
|
32
|
-
extend
|
35
|
+
extend Fiddle::Importer
|
36
|
+
extend XZ::FiddleHelper
|
37
|
+
|
38
|
+
dlloadanyof 'liblzma.so.5', 'liblzma.so', 'liblzma.5.dylib', 'liblzma.dylib', 'liblzma'
|
39
|
+
|
40
|
+
typealias "uint32_t", "unsigned int"
|
41
|
+
typealias "uint64_t", "unsigned long long"
|
42
|
+
|
43
|
+
# lzma_ret enum
|
44
|
+
enum :LZMA_OK, 0, :LZMA_STREAM_END, 1, :LZMA_NO_CHECK, 2,
|
45
|
+
:LZMA_UNSUPPORTED_CHECK, 3, :LZMA_GET_CHECK, 4,
|
46
|
+
:LZMA_MEM_ERROR, 5, :LZMA_MEMLIMIT_ERROR, 6,
|
47
|
+
:LZMA_FORMAT_ERROR, 7, :LZMA_OPTIONS_ERROR, 8,
|
48
|
+
:LZMA_DATA_ERROR, 9, :LZMA_BUF_ERROR, 10,
|
49
|
+
:LZMA_PROG_ERROR, 11
|
50
|
+
|
51
|
+
# lzma_action enum
|
52
|
+
enum :LZMA_RUN, 0, :LZMA_SYNC_FLUSH, 1,
|
53
|
+
:LZMA_FULL_FLUSH, 2, :LZMA_FULL_BARRIER, 4,
|
54
|
+
:LZMA_FINISH, 3
|
33
55
|
|
34
56
|
# The maximum value of an uint64_t, as defined by liblzma.
|
35
57
|
# Should be the same as
|
@@ -39,54 +61,100 @@ module XZ
|
|
39
61
|
# Activates extreme compression. Same as xz's "-e" commandline switch.
|
40
62
|
LZMA_PRESET_EXTREME = 1 << 31
|
41
63
|
|
42
|
-
LZMA_TELL_NO_CHECK =
|
64
|
+
LZMA_TELL_NO_CHECK = 0x01
|
43
65
|
LZMA_TELL_UNSUPPORTED_CHECK = 0x02
|
44
66
|
LZMA_TELL_ANY_CHECK = 0x04
|
45
67
|
LZMA_CONCATENATED = 0x08
|
68
|
+
LZMA_IGNORE_CHECK = 0x10
|
46
69
|
|
47
70
|
# For access convenience of the above flags.
|
48
71
|
LZMA_DECODE_FLAGS = {
|
49
72
|
:tell_no_check => LZMA_TELL_NO_CHECK,
|
50
73
|
:tell_unsupported_check => LZMA_TELL_UNSUPPORTED_CHECK,
|
51
74
|
:tell_any_check => LZMA_TELL_ANY_CHECK,
|
52
|
-
:concatenated => LZMA_CONCATENATED
|
75
|
+
:concatenated => LZMA_CONCATENATED,
|
76
|
+
:ignore_check => LZMA_IGNORE_CHECK
|
53
77
|
}.freeze
|
54
78
|
|
55
79
|
# Placeholder enum used by liblzma for later additions.
|
56
|
-
|
57
|
-
|
58
|
-
#
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
#
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
80
|
+
enum :LZMA_RESERVED_ENUM, 0
|
81
|
+
|
82
|
+
# lzma_check enum
|
83
|
+
enum :LZMA_CHECK_NONE, 0, :LZMA_CHECK_CRC32, 1,
|
84
|
+
:LZMA_CHECK_CRC64, 4, :LZMA_CHECK_SHA256, 10
|
85
|
+
|
86
|
+
# Aliases for the enums as fiddle only understands plain int
|
87
|
+
typealias "lzma_ret", "int"
|
88
|
+
typealias "lzma_check", "int"
|
89
|
+
typealias "lzma_action", "int"
|
90
|
+
typealias "lzma_reserved_enum", "int"
|
91
|
+
|
92
|
+
# lzma_stream struct. When creating one with ::malloc, use
|
93
|
+
# ::LZMA_STREAM_INIT to make it ready for use.
|
94
|
+
#
|
95
|
+
# This is a Fiddle::CStruct. As such, this has a class method
|
96
|
+
# ::malloc for allocating an instance of it on the heap, and
|
97
|
+
# instances of it have a #to_ptr method that returns a
|
98
|
+
# Fiddle::Pointer. That pointer needs to be freed with
|
99
|
+
# Fiddle::free if the instance was created with ::malloc.
|
100
|
+
# To wrap an existing instance, call ::new with the
|
101
|
+
# Fiddle::Pointer to wrap as an argument.
|
102
|
+
LZMAStream = struct [
|
103
|
+
"uint8_t* next_in",
|
104
|
+
"size_t avail_in",
|
105
|
+
"uint64_t total_in",
|
106
|
+
"uint8_t* next_out",
|
107
|
+
"size_t avail_out",
|
108
|
+
"uint64_t total_out",
|
109
|
+
"void* allocator",
|
110
|
+
"void* internal",
|
111
|
+
"void* reserved_ptr1",
|
112
|
+
"void* reserved_ptr2",
|
113
|
+
"void* reserved_ptr3",
|
114
|
+
"void* reserved_ptr4",
|
115
|
+
"uint64_t reserved_int1",
|
116
|
+
"uint64_t reserved_int2",
|
117
|
+
"size_t reserved_int3",
|
118
|
+
"size_t reserved_int4",
|
119
|
+
"lzma_reserved_enum reserved_enum1",
|
120
|
+
"lzma_reserved_enum reserved_enum2"
|
121
|
+
]
|
122
|
+
|
123
|
+
# This method does basicly the same thing as the
|
124
|
+
# LZMA_STREAM_INIT macro of liblzma. Pass it an instance of
|
125
|
+
# LZMAStream that has not been initialised for use.
|
126
|
+
# The intended use of this method is:
|
127
|
+
#
|
128
|
+
# stream = LibLZMA::LZMAStream.malloc # ::malloc is provided by fiddle
|
129
|
+
# LibLZMA.LZMA_STREAM_INIT(stream)
|
130
|
+
# # ...do something with the stream...
|
131
|
+
# Fiddle.free(stream.to_ptr)
|
132
|
+
def self.LZMA_STREAM_INIT(stream)
|
133
|
+
stream.next_in = nil
|
134
|
+
stream.avail_in = 0
|
135
|
+
stream.total_in = 0
|
136
|
+
stream.next_out = nil
|
137
|
+
stream.avail_out = 0
|
138
|
+
stream.total_out = 0
|
139
|
+
stream.allocator = nil
|
140
|
+
stream.internal = nil
|
141
|
+
stream.reserved_ptr1 = nil
|
142
|
+
stream.reserved_ptr2 = nil
|
143
|
+
stream.reserved_ptr3 = nil
|
144
|
+
stream.reserved_ptr4 = nil
|
145
|
+
stream.reserved_int1 = 0
|
146
|
+
stream.reserved_int2 = 0
|
147
|
+
stream.reserved_int3 = 0
|
148
|
+
stream.reserved_int4 = 0
|
149
|
+
stream.reserved_enum1 = LZMA_RESERVED_ENUM
|
150
|
+
stream.reserved_enum2 = LZMA_RESERVED_ENUM
|
151
|
+
stream
|
152
|
+
end
|
153
|
+
|
154
|
+
extern "lzma_ret lzma_easy_encoder(lzma_stream*, uint32_t, lzma_check)"
|
155
|
+
extern "lzma_ret lzma_code(lzma_stream*, lzma_action)"
|
156
|
+
extern "lzma_ret lzma_stream_decoder(lzma_stream*, uint64_t, uint32_t)"
|
157
|
+
extern "void lzma_end(lzma_stream*)"
|
90
158
|
|
91
159
|
end
|
92
160
|
|
@@ -95,71 +163,17 @@ module XZ
|
|
95
163
|
|
96
164
|
# Raises an appropriate exception if +val+ isn't a liblzma success code.
|
97
165
|
def self.raise_if_necessary(val)
|
98
|
-
case
|
99
|
-
when
|
100
|
-
when
|
101
|
-
when
|
102
|
-
when
|
103
|
-
when
|
104
|
-
when
|
105
|
-
when
|
166
|
+
case val
|
167
|
+
when LibLZMA::LZMA_MEM_ERROR then raise(self, "Couldn't allocate memory!")
|
168
|
+
when LibLZMA::LZMA_MEMLIMIT_ERROR then raise(self, "Decoder ran out of (allowed) memory!")
|
169
|
+
when LibLZMA::LZMA_FORMAT_ERROR then raise(self, "Unrecognized file format!")
|
170
|
+
when LibLZMA::LZMA_OPTIONS_ERROR then raise(self, "Invalid options passed!")
|
171
|
+
when LibLZMA::LZMA_DATA_ERROR then raise(self, "Archive is currupt.")
|
172
|
+
when LibLZMA::LZMA_BUF_ERROR then raise(self, "Buffer unusable!")
|
173
|
+
when LibLZMA::LZMA_PROG_ERROR then raise(self, "Program error--if you're sure your code is correct, you may have found a bug in liblzma.")
|
106
174
|
end
|
107
175
|
end
|
108
176
|
|
109
177
|
end
|
110
178
|
|
111
|
-
# The main struct of the liblzma library.
|
112
|
-
class LZMAStream < FFI::Struct
|
113
|
-
layout :next_in, :pointer, #uint8
|
114
|
-
:avail_in, :size_t,
|
115
|
-
:total_in, :uint64,
|
116
|
-
:next_out, :pointer, #uint8
|
117
|
-
:avail_out, :size_t,
|
118
|
-
:total_out, :uint64,
|
119
|
-
:lzma_allocator, :pointer,
|
120
|
-
:lzma_internal, :pointer,
|
121
|
-
:reserved_ptr1, :pointer,
|
122
|
-
:reserved_ptr2, :pointer,
|
123
|
-
:reserved_ptr3, :pointer,
|
124
|
-
:reserved_ptr4, :pointer,
|
125
|
-
:reserved_int1, :uint64,
|
126
|
-
:reserved_int2, :uint64,
|
127
|
-
:reserved_int3, :size_t,
|
128
|
-
:reserved_int4, :size_t,
|
129
|
-
:reserved_enum1, :int,
|
130
|
-
:reserved_enum2, :int
|
131
|
-
|
132
|
-
# This method does basicly the same thing as the
|
133
|
-
# LZMA_STREAM_INIT macro of liblzma. Creates a new LZMAStream
|
134
|
-
# that has been initialized for usage. If any argument is passed,
|
135
|
-
# it is assumed to be a FFI::Pointer to a lzma_stream structure
|
136
|
-
# and that structure is wrapped.
|
137
|
-
def initialize(*args)
|
138
|
-
if !args.empty? #Got a pointer, want to wrap it
|
139
|
-
super
|
140
|
-
else
|
141
|
-
s = super()
|
142
|
-
s[:next_in] = nil
|
143
|
-
s[:avail_in] = 0
|
144
|
-
s[:total_in] = 0
|
145
|
-
s[:next_out] = nil
|
146
|
-
s[:avail_out] = 0
|
147
|
-
s[:total_out] = 0
|
148
|
-
s[:lzma_allocator] = nil
|
149
|
-
s[:lzma_internal] = nil
|
150
|
-
s[:reserved_ptr1] = nil
|
151
|
-
s[:reserved_ptr2] = nil
|
152
|
-
s[:reserved_ptr3] = nil
|
153
|
-
s[:reserved_ptr4] = nil
|
154
|
-
s[:reserved_int1] = 0
|
155
|
-
s[:reserved_int2] = 0
|
156
|
-
s[:reserved_int3] = 0
|
157
|
-
s[:reserved_int4] = 0
|
158
|
-
s[:reserved_enum1] = LibLZMA::LZMA_RESERVED_ENUM[:lzma_reserved_enum]
|
159
|
-
s[:reserved_enum2] = LibLZMA::LZMA_RESERVED_ENUM[:lzma_reserved_enum]
|
160
|
-
s
|
161
|
-
end
|
162
|
-
end
|
163
|
-
end
|
164
|
-
|
165
179
|
end
|
data/lib/xz/stream.rb
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
# -*- coding: utf-8 -*-
|
2
2
|
#--
|
3
|
-
# (The MIT license)
|
4
|
-
#
|
5
3
|
# Basic liblzma-bindings for Ruby.
|
6
4
|
#
|
7
|
-
# Copyright ©
|
5
|
+
# Copyright © 2011-2018 Marvin Gülker et al.
|
6
|
+
#
|
7
|
+
# See AUTHORS for the full list of contributors.
|
8
8
|
#
|
9
9
|
# Permission is hereby granted, free of charge, to any person obtaining a
|
10
10
|
# copy of this software and associated documentation files (the ‘Software’),
|
@@ -25,44 +25,441 @@
|
|
25
25
|
# THE SOFTWARE.
|
26
26
|
#++
|
27
27
|
|
28
|
-
# The base class for XZ::StreamReader and XZ::StreamWriter.
|
29
|
-
#
|
30
|
-
#
|
31
|
-
#
|
32
|
-
#
|
28
|
+
# The base class for XZ::StreamReader and XZ::StreamWriter. This is
|
29
|
+
# an abstract class that is not meant to be used directly. You can,
|
30
|
+
# however, test against this class in <tt>kind_of?</tt> tests.
|
31
|
+
#
|
32
|
+
# XZ::StreamReader and XZ::StreamWriter are IO-like classes that allow
|
33
|
+
# you to access XZ-compressed data the same way you access an
|
34
|
+
# IO-object, easily allowing to fool other libraries that expect IO
|
35
|
+
# objects. The most noticable example for this may be reading and
|
36
|
+
# writing XZ-compressed tarballs using the minitar
|
37
|
+
# RubyGem; see the README.md file for an example.
|
38
|
+
#
|
39
|
+
# Most of IO's methods are implemented in this class or one of the
|
40
|
+
# subclasses. The most notable exception is that it is not possible
|
41
|
+
# to seek in XZ archives (#seek and #pos= are not defined).
|
42
|
+
# Many methods that are not expressly documented in the RDoc
|
43
|
+
# still exist; this class uses Ruby's Forwardable module to forward
|
44
|
+
# them to the underlying IO object.
|
33
45
|
#
|
34
|
-
#
|
35
|
-
#
|
36
|
-
#
|
37
|
-
#
|
38
|
-
#
|
39
|
-
#
|
46
|
+
# Stream and its subclasses honour Ruby's external+internal encoding
|
47
|
+
# system just like Ruby's own IO does. All of what the Ruby docs say
|
48
|
+
# about external and internal encodings applies to this class with one
|
49
|
+
# important difference. The "external encoding" does not refer to the
|
50
|
+
# encoding of the file on the hard disk (this file is always a binary
|
51
|
+
# file as it's compressed data), but to the encoding of the
|
52
|
+
# decompressed data inside the compressed file.
|
40
53
|
#
|
41
|
-
#
|
42
|
-
#
|
43
|
-
#
|
44
|
-
#
|
45
|
-
#
|
54
|
+
# As with Ruby's IO class, instances of this class and its subclasses
|
55
|
+
# default their external encoding to Encoding.default_external and
|
56
|
+
# their internal encoding to Encoding.default_internal. You can use
|
57
|
+
# #set_encoding or pass appropriate arguments to the +new+ method to
|
58
|
+
# change these encodings per-instance.
|
46
59
|
class XZ::Stream
|
47
|
-
|
60
|
+
extend Forwardable
|
61
|
+
|
62
|
+
def_delegator :@delegate_io, :"autoclose="
|
63
|
+
def_delegator :@delegate_io, :"autoclose?"
|
64
|
+
def_delegator :@delegate_io, :binmode
|
65
|
+
def_delegator :@delegate_io, :"binmode?"
|
66
|
+
def_delegator :@delegate_io, :"close_on_exec="
|
67
|
+
def_delegator :@delegate_io, :"close_on_exec?"
|
68
|
+
def_delegator :@delegate_io, :fcntl
|
69
|
+
def_delegator :@delegate_io, :fdatasync
|
70
|
+
def_delegator :@delegate_io, :fileno
|
71
|
+
def_delegator :@delegate_io, :to_i
|
72
|
+
def_delegator :@delegate_io, :flush # TODO: liblzma might have its own flush method that should be used
|
73
|
+
def_delegator :@delegate_io, :fsync
|
74
|
+
def_delegator :@delegate_io, :ioctl
|
75
|
+
def_delegator :@delegate_io, :isatty
|
76
|
+
def_delegator :@delegate_io, :pid
|
77
|
+
#def_delegator :@delegate_io, :stat # If this is available the minitar gem thinks it's a File and wants to seek it O_o
|
78
|
+
def_delegator :@delegate_io, :sync # TODO: use liblzma's own syncing functionality?
|
79
|
+
def_delegator :@delegate_io, :"sync=" # TODO: use liblzma's own syncing functionality?
|
80
|
+
def_delegator :@delegate_io, :"tty?"
|
81
|
+
|
82
|
+
# Like IO#lineno and IO#lineno=.
|
83
|
+
attr_accessor :lineno
|
84
|
+
|
85
|
+
# Returns the encoding used inside the compressed data stream.
|
86
|
+
# Like IO#external_encoding.
|
87
|
+
attr_reader :external_encoding
|
88
|
+
|
89
|
+
# When compressed data is read, the decompressed data is transcoded
|
90
|
+
# from the external_encoding to this encoding. If this encoding is
|
91
|
+
# nil, no transcoding happens.
|
92
|
+
attr_reader :internal_encoding
|
93
|
+
|
94
|
+
# Private API only for use by subclasses.
|
95
|
+
def initialize(delegate_io) # :nodoc:
|
96
|
+
@delegate_io = delegate_io
|
97
|
+
@lzma_stream = XZ::LibLZMA::LZMAStream.malloc
|
98
|
+
XZ::LibLZMA::LZMA_STREAM_INIT(@lzma_stream)
|
99
|
+
|
100
|
+
@finished = false
|
101
|
+
@lineno = 0
|
102
|
+
@pos = 0
|
103
|
+
@external_encoding = Encoding.default_external
|
104
|
+
@internal_encoding = Encoding.default_internal
|
105
|
+
@transcode_options = {}
|
106
|
+
@input_buffer_p = Fiddle::Pointer.malloc(XZ::CHUNK_SIZE)
|
107
|
+
@output_buffer_p = Fiddle::Pointer.malloc(XZ::CHUNK_SIZE)
|
108
|
+
end
|
109
|
+
|
110
|
+
# Pass the given +str+ into libzlma's lzma_code() function.
|
111
|
+
# +action+ is either LibLZMA::LZMA_RUN (still working) or
|
112
|
+
# LibLZMA::LZMA_FINISH (this is the last piece).
|
113
|
+
def lzma_code(str, action) # :nodoc:
|
114
|
+
previous_encoding = str.encoding
|
115
|
+
str.force_encoding(Encoding::BINARY) # Need to operate on bytes now
|
116
|
+
|
117
|
+
begin
|
118
|
+
pos = 0
|
119
|
+
until pos > str.bytesize # Do not use >=, that conflicts with #lzma_finish
|
120
|
+
substr = str[pos, XZ::CHUNK_SIZE]
|
121
|
+
@input_buffer_p[0, str.bytesize] = substr
|
122
|
+
pos += XZ::CHUNK_SIZE
|
123
|
+
|
124
|
+
@lzma_stream.next_in = @input_buffer_p
|
125
|
+
@lzma_stream.avail_in = substr.bytesize
|
126
|
+
|
127
|
+
loop do
|
128
|
+
@lzma_stream.next_out = @output_buffer_p
|
129
|
+
@lzma_stream.avail_out = XZ::CHUNK_SIZE
|
130
|
+
res = XZ::LibLZMA.lzma_code(@lzma_stream.to_ptr, action)
|
131
|
+
XZ.send :check_lzma_code_retval, res # call package-private method
|
132
|
+
|
133
|
+
data = @output_buffer_p[0, XZ::CHUNK_SIZE - @lzma_stream.avail_out]
|
134
|
+
yield(data)
|
135
|
+
|
136
|
+
break unless @lzma_stream.avail_out == 0
|
137
|
+
end
|
138
|
+
end
|
139
|
+
ensure
|
140
|
+
str.force_encoding(previous_encoding)
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
# Partial implementation of +rewind+ abstracting common operations.
|
145
|
+
# The subclasses implement the rest.
|
146
|
+
def rewind # :nodoc:
|
147
|
+
# Free the current lzma stream and rewind the underlying IO.
|
148
|
+
# It is required to call #rewind before allocating a new lzma
|
149
|
+
# stream, because if #rewind raises an exception (because the
|
150
|
+
# underlying IO is not rewindable), a memory leak would occur
|
151
|
+
# with regard to an allocated-but-never-freed lzma stream.
|
152
|
+
finish
|
153
|
+
@delegate_io.rewind
|
154
|
+
|
155
|
+
# Reset internal state
|
156
|
+
@pos = @lineno = 0
|
157
|
+
@finished = false
|
158
|
+
|
159
|
+
# Allocate a new lzma stream (subclasses will configure it).
|
160
|
+
@lzma_stream = XZ::LibLZMA::LZMAStream.malloc
|
161
|
+
XZ::LibLZMA::LZMA_STREAM_INIT(@lzma_stream)
|
162
|
+
|
163
|
+
0 # Mimic IO#rewind's return value
|
164
|
+
end
|
165
|
+
|
166
|
+
# You can mostly treat this as if it were an IO object.
|
167
|
+
# At least for subclasses. This class itself is abstract,
|
168
|
+
# you shouldn't be using it directly at all.
|
169
|
+
#
|
170
|
+
# Returns the receiver.
|
171
|
+
def to_io
|
172
|
+
self
|
173
|
+
end
|
174
|
+
|
175
|
+
# Overridden in StreamReader to be like IO#eof?.
|
176
|
+
# This abstract implementation only raises IOError.
|
177
|
+
def eof?
|
178
|
+
raise(IOError, "Stream not opened for reading")
|
179
|
+
end
|
180
|
+
|
181
|
+
# Alias for #eof?
|
182
|
+
def eof
|
183
|
+
eof?
|
184
|
+
end
|
185
|
+
|
186
|
+
# True if the delegate IO has been closed.
|
187
|
+
def closed?
|
188
|
+
@delegate_io.closed?
|
189
|
+
end
|
190
|
+
|
191
|
+
# True if liblzma's internal memory has been freed. For writer
|
192
|
+
# instances, receiving true from this method also means that all
|
193
|
+
# of liblzma's compressed data has been flushed to the underlying
|
194
|
+
# IO object.
|
195
|
+
def finished?
|
196
|
+
@finished
|
197
|
+
end
|
198
|
+
|
199
|
+
# Free internal libzlma memory. This needs to be called before
|
200
|
+
# you leave this object for the GC. If you used a block-form
|
201
|
+
# initializer, this done automatically for you.
|
202
|
+
#
|
203
|
+
# Subsequent calls to #read or #write will cause an IOError.
|
204
|
+
#
|
205
|
+
# Returns the underlying IO object. This allows you to retrieve
|
206
|
+
# the File instance that was automatically created when using
|
207
|
+
# the +open+ method's block form.
|
208
|
+
def finish
|
209
|
+
return if @finished
|
210
|
+
|
211
|
+
# Clean up the lzma_stream structure's internal memory.
|
212
|
+
# This would belong into a destructor if Ruby had that.
|
213
|
+
XZ::LibLZMA.lzma_end(@lzma_stream)
|
214
|
+
@finished = true
|
215
|
+
|
216
|
+
@delegate_io
|
217
|
+
end
|
218
|
+
|
219
|
+
|
220
|
+
# If not done yet, call #finish. Then close the delegate IO.
|
221
|
+
# The latter action is going to cause the delegate IO to
|
222
|
+
# flush its buffer. After this method returns, it is guaranteed
|
223
|
+
# that all pending data has been flushed to the OS' kernel.
|
224
|
+
def close
|
225
|
+
finish unless @finished
|
226
|
+
@delegate_io.close unless @delegate_io.closed?
|
227
|
+
nil
|
228
|
+
end
|
229
|
+
|
230
|
+
# Always raises IOError, because XZ streams can never be duplex.
|
231
|
+
def close_read
|
232
|
+
raise(IOError, "Not a duplex I/O stream")
|
233
|
+
end
|
234
|
+
|
235
|
+
# Always raises IOError, because XZ streams can never be duplex.
|
236
|
+
def close_write
|
237
|
+
raise(IOError, "Not a duplex I/O stream")
|
238
|
+
end
|
239
|
+
|
240
|
+
# Overridden in StreamReader to be like IO#read.
|
241
|
+
# This abstract implementation only raises IOError.
|
242
|
+
def read(*args)
|
243
|
+
raise(IOError, "Stream not opened for reading")
|
244
|
+
end
|
245
|
+
|
246
|
+
# Overridden in StreamWriter to be like IO#write.
|
247
|
+
# This abstract implementation only raises IOError.
|
248
|
+
def write(*args)
|
249
|
+
raise(IOError, "Stream not opened for writing")
|
250
|
+
end
|
251
|
+
|
252
|
+
# Returns the position in the *decompressed* data (regardless of
|
253
|
+
# whether this is a reader or a writer instance).
|
254
|
+
def pos
|
255
|
+
@pos
|
256
|
+
end
|
257
|
+
alias tell pos
|
258
|
+
|
259
|
+
# Like IO#set_encoding.
|
260
|
+
def set_encoding(*args)
|
261
|
+
if args.count < 1 || args.count > 3
|
262
|
+
raise ArgumentError, "Wrong number of arguments: Expected 1-3, got #{args.count}"
|
263
|
+
end
|
264
|
+
|
265
|
+
# Clean `args' to [external_encoding, internal_encoding],
|
266
|
+
# and @transcode_options.
|
267
|
+
return set_encoding($`, $', *args[1..-1]) if args[0].respond_to?(:to_str) && args[0].to_str =~ /:/
|
268
|
+
@transcode_options = args.delete_at(-1) if args[-1].kind_of?(Hash)
|
269
|
+
|
270
|
+
# `args' is always [external, internal] or [external] at this point
|
271
|
+
@external_encoding = args[0].kind_of?(Encoding) ? args[0] : Encoding.find(args[0])
|
272
|
+
if args[1]
|
273
|
+
@internal_encoding = args[1].kind_of?(Encoding) ? args[1] : Encoding.find(args[1])
|
274
|
+
else
|
275
|
+
@internal_encoding = Encoding.default_internal # Encoding.default_internal defaults to nil
|
276
|
+
end
|
277
|
+
|
278
|
+
self
|
279
|
+
end
|
280
|
+
|
281
|
+
# Do not define #pos= and #seek, not even to throw NotImplementedError.
|
282
|
+
# Reason: The minitar gem thinks it can use this methods then and provokes
|
283
|
+
# the NotImplementedError exception.
|
284
|
+
|
285
|
+
# Like IO#<<.
|
286
|
+
def <<(obj)
|
287
|
+
write(obj.to_s)
|
288
|
+
end
|
289
|
+
|
290
|
+
# Like IO#advise. No-op, because not meaningful on compressed data.
|
291
|
+
def advise
|
292
|
+
nil
|
293
|
+
end
|
294
|
+
|
295
|
+
# Like IO#getbyte. Note this method isn't exactly performant,
|
296
|
+
# because it actually reads compressed data as a string and then
|
297
|
+
# needs to figure out the bytes from that again.
|
298
|
+
def getbyte
|
299
|
+
return nil if eof?
|
300
|
+
read(1).bytes.first
|
301
|
+
end
|
302
|
+
|
303
|
+
# Like IO#readbyte.
|
304
|
+
def readbyte
|
305
|
+
getbyte || raise(EOFError, "End of stream reached")
|
306
|
+
end
|
307
|
+
|
308
|
+
# Like IO#getc.
|
309
|
+
def getc
|
310
|
+
str = String.new
|
311
|
+
|
312
|
+
# Read byte-by-byte until a valid character in the external
|
313
|
+
# encoding was built.
|
314
|
+
loop do
|
315
|
+
str.force_encoding(Encoding::BINARY)
|
316
|
+
str << read(1)
|
317
|
+
str.force_encoding(@external_encoding)
|
318
|
+
|
319
|
+
break if str.valid_encoding? || eof?
|
320
|
+
end
|
321
|
+
|
322
|
+
# Transcode to internal encoding if one was requested
|
323
|
+
if @internal_encoding
|
324
|
+
str.encode(@internal_encoding)
|
325
|
+
else
|
326
|
+
str
|
327
|
+
end
|
328
|
+
end
|
329
|
+
|
330
|
+
# Like IO#readchar.
|
331
|
+
def readchar
|
332
|
+
getc || raise(EOFError, "End of stream reached")
|
333
|
+
end
|
334
|
+
|
335
|
+
# Like IO#gets.
|
336
|
+
def gets(separator = $/, limit = nil)
|
337
|
+
return nil if eof?
|
338
|
+
@lineno += 1
|
339
|
+
|
340
|
+
# Mirror IO#gets' weird call-seq
|
341
|
+
if separator.respond_to?(:to_int)
|
342
|
+
limit = separator.to_int
|
343
|
+
separator = $/
|
344
|
+
end
|
345
|
+
|
346
|
+
buf = String.new
|
347
|
+
buf.force_encoding(target_encoding)
|
348
|
+
until eof? || (limit && buf.length >= limit)
|
349
|
+
buf << getc
|
350
|
+
return buf if buf[-1] == separator
|
351
|
+
end
|
352
|
+
|
353
|
+
buf
|
354
|
+
end
|
355
|
+
|
356
|
+
# Like IO#readline.
|
357
|
+
def readline(*args)
|
358
|
+
gets(*args) || raise(EOFError, "End of stream reached")
|
359
|
+
end
|
360
|
+
|
361
|
+
# Like IO#each.
|
362
|
+
def each(*args)
|
363
|
+
return enum_for __method__ unless block_given?
|
364
|
+
|
365
|
+
while line = gets(*args)
|
366
|
+
yield(line)
|
367
|
+
end
|
368
|
+
end
|
369
|
+
alias each_line each
|
370
|
+
|
371
|
+
# Like IO#each_byte.
|
372
|
+
def each_byte
|
373
|
+
return enum_for __method__ unless block_given?
|
374
|
+
|
375
|
+
while byte = getbyte
|
376
|
+
yield(byte)
|
377
|
+
end
|
378
|
+
end
|
379
|
+
|
380
|
+
# Like IO#each_char.
|
381
|
+
def each_char
|
382
|
+
return enum_for __method__ unless block_given?
|
383
|
+
|
384
|
+
while char = getc
|
385
|
+
yield(char)
|
386
|
+
end
|
387
|
+
end
|
388
|
+
|
389
|
+
# Like IO#each_codepoint.
|
390
|
+
def each_codepoint
|
391
|
+
return enum_for __method__ unless block_given?
|
392
|
+
|
393
|
+
each_char{|c| yield(c.ord)}
|
394
|
+
end
|
395
|
+
|
396
|
+
# Like IO#printf.
|
397
|
+
def printf(*args)
|
398
|
+
write(sprintf(*args))
|
399
|
+
nil
|
400
|
+
end
|
401
|
+
|
402
|
+
# Like IO#putc.
|
403
|
+
def putc(obj)
|
404
|
+
if obj.respond_to? :chr
|
405
|
+
write(obj.chr)
|
406
|
+
elsif obj.respond_to? :to_str
|
407
|
+
write(obj.to_str)
|
408
|
+
else
|
409
|
+
raise(TypeError, "Can only #putc strings and numbers")
|
410
|
+
end
|
411
|
+
end
|
412
|
+
|
413
|
+
def puts(*objs)
|
414
|
+
if objs.empty?
|
415
|
+
write("\n")
|
416
|
+
return nil
|
417
|
+
end
|
418
|
+
|
419
|
+
objs.each do |obj|
|
420
|
+
if obj.respond_to? :to_ary
|
421
|
+
puts(*obj.to_ary)
|
422
|
+
else
|
423
|
+
# Don't squeeze multiple subsequent trailing newlines in `obj'
|
424
|
+
obj = obj.to_s
|
425
|
+
if obj.end_with?("\n".encode(obj.encoding))
|
426
|
+
write(obj)
|
427
|
+
else
|
428
|
+
write(obj + "\n".encode(obj.encoding))
|
429
|
+
end
|
430
|
+
end
|
431
|
+
end
|
432
|
+
nil
|
433
|
+
end
|
434
|
+
|
435
|
+
# Like IO#print.
|
436
|
+
def print(*objs)
|
437
|
+
if objs.empty?
|
438
|
+
write($_)
|
439
|
+
else
|
440
|
+
objs.each do |obj|
|
441
|
+
write(obj.to_s)
|
442
|
+
write($,) if $,
|
443
|
+
end
|
444
|
+
end
|
445
|
+
|
446
|
+
write($\) if $\
|
447
|
+
nil
|
448
|
+
end
|
48
449
|
|
49
|
-
#
|
50
|
-
#
|
51
|
-
def
|
52
|
-
|
53
|
-
@lzma_stream = XZ::LZMAStream.new
|
450
|
+
# It is not possible to reopen an lzma stream, hence this
|
451
|
+
# method always raises NotImplementedError.
|
452
|
+
def reopen(*args)
|
453
|
+
raise(NotImplementedError, "Can't reopen an lzma stream")
|
54
454
|
end
|
55
455
|
|
56
456
|
private
|
57
457
|
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
# I benchmarked it, and it is as twice as fast.
|
62
|
-
if str.respond_to? :force_encoding
|
63
|
-
str.dup.force_encoding(Encoding::BINARY).size
|
458
|
+
def target_encoding
|
459
|
+
if @internal_encoding
|
460
|
+
@internal_encoding
|
64
461
|
else
|
65
|
-
|
462
|
+
@external_encoding
|
66
463
|
end
|
67
464
|
end
|
68
465
|
|