omnizip 0.3.2 → 0.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +243 -368
- data/README.adoc +101 -5
- data/docs/guides/archive-formats/index.adoc +31 -1
- data/docs/guides/archive-formats/ole-format.adoc +316 -0
- data/docs/guides/archive-formats/rpm-format.adoc +249 -0
- data/docs/index.adoc +12 -2
- data/lib/omnizip/algorithms/lzma/distance_coder.rb +29 -18
- data/lib/omnizip/algorithms/lzma/encoder.rb +2 -1
- data/lib/omnizip/algorithms/lzma/length_coder.rb +6 -3
- data/lib/omnizip/algorithms/lzma/literal_decoder.rb +2 -1
- data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +40 -13
- data/lib/omnizip/algorithms/lzma/range_decoder.rb +36 -2
- data/lib/omnizip/algorithms/lzma/range_encoder.rb +19 -0
- data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +2 -1
- data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +148 -112
- data/lib/omnizip/algorithms/lzma.rb +20 -5
- data/lib/omnizip/algorithms/ppmd7/decoder.rb +25 -21
- data/lib/omnizip/algorithms/ppmd7/encoder.rb +4 -11
- data/lib/omnizip/algorithms/sevenzip_lzma2.rb +2 -1
- data/lib/omnizip/algorithms/xz_lzma2.rb +2 -1
- data/lib/omnizip/algorithms/zstandard/constants.rb +125 -9
- data/lib/omnizip/algorithms/zstandard/decoder.rb +202 -17
- data/lib/omnizip/algorithms/zstandard/encoder.rb +197 -17
- data/lib/omnizip/algorithms/zstandard/frame/block.rb +128 -0
- data/lib/omnizip/algorithms/zstandard/frame/header.rb +224 -0
- data/lib/omnizip/algorithms/zstandard/fse/bitstream.rb +186 -0
- data/lib/omnizip/algorithms/zstandard/fse/encoder.rb +325 -0
- data/lib/omnizip/algorithms/zstandard/fse/table.rb +269 -0
- data/lib/omnizip/algorithms/zstandard/huffman.rb +272 -0
- data/lib/omnizip/algorithms/zstandard/huffman_encoder.rb +339 -0
- data/lib/omnizip/algorithms/zstandard/literals.rb +178 -0
- data/lib/omnizip/algorithms/zstandard/literals_encoder.rb +251 -0
- data/lib/omnizip/algorithms/zstandard/sequences.rb +346 -0
- data/lib/omnizip/buffer/memory_extractor.rb +3 -3
- data/lib/omnizip/buffer.rb +2 -2
- data/lib/omnizip/filters/delta.rb +2 -1
- data/lib/omnizip/filters/registry.rb +6 -6
- data/lib/omnizip/formats/cpio/bounded_io.rb +66 -0
- data/lib/omnizip/formats/lzip.rb +2 -1
- data/lib/omnizip/formats/lzma_alone.rb +2 -1
- data/lib/omnizip/formats/ole/allocation_table.rb +244 -0
- data/lib/omnizip/formats/ole/constants.rb +61 -0
- data/lib/omnizip/formats/ole/dirent.rb +380 -0
- data/lib/omnizip/formats/ole/header.rb +198 -0
- data/lib/omnizip/formats/ole/ranges_io.rb +264 -0
- data/lib/omnizip/formats/ole/storage.rb +305 -0
- data/lib/omnizip/formats/ole/types/variant.rb +328 -0
- data/lib/omnizip/formats/ole.rb +145 -0
- data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +92 -49
- data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +13 -20
- data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +6 -2
- data/lib/omnizip/formats/rar3/reader.rb +6 -2
- data/lib/omnizip/formats/rar5/reader.rb +4 -1
- data/lib/omnizip/formats/rpm/constants.rb +58 -0
- data/lib/omnizip/formats/rpm/entry.rb +102 -0
- data/lib/omnizip/formats/rpm/header.rb +113 -0
- data/lib/omnizip/formats/rpm/lead.rb +122 -0
- data/lib/omnizip/formats/rpm/tag.rb +230 -0
- data/lib/omnizip/formats/rpm.rb +434 -0
- data/lib/omnizip/formats/seven_zip/bcj2_stream_decompressor.rb +239 -0
- data/lib/omnizip/formats/seven_zip/coder_chain.rb +32 -8
- data/lib/omnizip/formats/seven_zip/constants.rb +1 -1
- data/lib/omnizip/formats/seven_zip/reader.rb +84 -8
- data/lib/omnizip/formats/seven_zip/stream_compressor.rb +2 -1
- data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +6 -0
- data/lib/omnizip/formats/seven_zip/writer.rb +21 -9
- data/lib/omnizip/formats/seven_zip.rb +10 -0
- data/lib/omnizip/formats/xar/entry.rb +18 -5
- data/lib/omnizip/formats/xar/header.rb +34 -6
- data/lib/omnizip/formats/xar/reader.rb +43 -10
- data/lib/omnizip/formats/xar/toc.rb +34 -21
- data/lib/omnizip/formats/xar/writer.rb +15 -5
- data/lib/omnizip/formats/xz_impl/block_decoder.rb +45 -33
- data/lib/omnizip/formats/xz_impl/block_encoder.rb +2 -1
- data/lib/omnizip/formats/xz_impl/index_decoder.rb +3 -1
- data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +2 -1
- data/lib/omnizip/formats/zip/end_of_central_directory.rb +4 -3
- data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +14 -6
- data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +2 -1
- data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +28 -13
- data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +13 -6
- data/lib/omnizip/pipe/stream_compressor.rb +1 -1
- data/lib/omnizip/version.rb +1 -1
- data/readme-docs/compression-algorithms.adoc +6 -2
- metadata +30 -2
|
@@ -0,0 +1,346 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Copyright (C) 2025 Ribose Inc.
|
|
4
|
+
#
|
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a
|
|
6
|
+
# copy of this software and associated documentation files (the "Software"),
|
|
7
|
+
# to deal in the Software without restriction, including without limitation
|
|
8
|
+
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
9
|
+
# and/or sell copies of the Software, and to permit persons to whom the
|
|
10
|
+
# Software is furnished to do so, subject to the following conditions:
|
|
11
|
+
#
|
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
|
13
|
+
# all copies or substantial portions of the Software.
|
|
14
|
+
#
|
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
20
|
+
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
21
|
+
# DEALINGS IN THE SOFTWARE.
|
|
22
|
+
|
|
23
|
+
require_relative "constants"
|
|
24
|
+
require_relative "fse/bitstream"
|
|
25
|
+
require_relative "fse/table"
|
|
26
|
+
|
|
27
|
+
module Omnizip
|
|
28
|
+
module Algorithms
|
|
29
|
+
class Zstandard
|
|
30
|
+
# Sequences section decoder (RFC 8878 Section 3.1.1.3.2)
|
|
31
|
+
#
|
|
32
|
+
# Decodes sequences of (literals_length, match_length, offset)
|
|
33
|
+
# which are then executed to produce the decompressed output.
|
|
34
|
+
class SequencesDecoder
|
|
35
|
+
include Constants
|
|
36
|
+
|
|
37
|
+
# @return [Array<Hash>] Decoded sequences
|
|
38
|
+
attr_reader :sequences
|
|
39
|
+
|
|
40
|
+
# Parse and decode sequences section
|
|
41
|
+
#
|
|
42
|
+
# @param input [IO] Input stream positioned at sequences section
|
|
43
|
+
# @param literals_size [Integer] Size of decoded literals
|
|
44
|
+
# @param previous_tables [Hash] Previous FSE tables for REPEAT mode
|
|
45
|
+
# @return [SequencesDecoder] Decoder with decoded sequences
|
|
46
|
+
def self.decode(input, literals_size, previous_tables = {})
|
|
47
|
+
decoder = new(input, literals_size, previous_tables)
|
|
48
|
+
decoder.decode_section
|
|
49
|
+
decoder
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# Initialize decoder
|
|
53
|
+
#
|
|
54
|
+
# @param input [IO] Input stream
|
|
55
|
+
# @param literals_size [Integer] Size of decoded literals
|
|
56
|
+
# @param previous_tables [Hash] Previous FSE tables
|
|
57
|
+
def initialize(input, literals_size, previous_tables = {})
|
|
58
|
+
@input = input
|
|
59
|
+
@literals_size = literals_size
|
|
60
|
+
@previous_tables = previous_tables
|
|
61
|
+
@sequences = []
|
|
62
|
+
@fse_tables = {}
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
# Decode the sequences section
|
|
66
|
+
#
|
|
67
|
+
# @return [void]
|
|
68
|
+
def decode_section
|
|
69
|
+
# Read number of sequences
|
|
70
|
+
num_sequences = read_sequence_count
|
|
71
|
+
|
|
72
|
+
return if num_sequences.zero?
|
|
73
|
+
|
|
74
|
+
# Read symbol compression modes
|
|
75
|
+
modes = read_symbol_modes
|
|
76
|
+
|
|
77
|
+
# Build FSE tables based on modes
|
|
78
|
+
build_fse_tables(modes)
|
|
79
|
+
|
|
80
|
+
# Decode sequences
|
|
81
|
+
decode_sequences_internal(num_sequences)
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
private
|
|
85
|
+
|
|
86
|
+
# Read sequence count (1-3 bytes)
|
|
87
|
+
def read_sequence_count
|
|
88
|
+
byte1 = @input.read(1).ord
|
|
89
|
+
|
|
90
|
+
if byte1.zero?
|
|
91
|
+
0
|
|
92
|
+
elsif byte1 < 128
|
|
93
|
+
byte1
|
|
94
|
+
else
|
|
95
|
+
byte2 = @input.read(1).ord
|
|
96
|
+
((byte1 - 0x80) << 8) + byte2 + 0x80
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
# Read symbol compression modes for LL, ML, OF
|
|
101
|
+
def read_symbol_modes
|
|
102
|
+
modes_byte = @input.read(1).ord
|
|
103
|
+
|
|
104
|
+
{
|
|
105
|
+
ll: (modes_byte >> 6) & 0x03, # Literals length mode
|
|
106
|
+
of: (modes_byte >> 4) & 0x03, # Offset mode
|
|
107
|
+
ml: (modes_byte >> 2) & 0x03, # Match length mode
|
|
108
|
+
}
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
# Build FSE tables based on compression modes
|
|
112
|
+
def build_fse_tables(modes)
|
|
113
|
+
@fse_tables[:ll] = build_fse_table(modes[:ll], :ll)
|
|
114
|
+
@fse_tables[:ml] = build_fse_table(modes[:ml], :ml)
|
|
115
|
+
@fse_tables[:of] = build_fse_table(modes[:of], :of)
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
# Build a single FSE table
|
|
119
|
+
def build_fse_table(mode, type)
|
|
120
|
+
case mode
|
|
121
|
+
when MODE_PREDEFINED
|
|
122
|
+
build_predefined_table(type)
|
|
123
|
+
when MODE_RLE
|
|
124
|
+
build_rle_table(type)
|
|
125
|
+
when MODE_FSE
|
|
126
|
+
build_fse_table_from_stream(type)
|
|
127
|
+
when MODE_REPEAT
|
|
128
|
+
@previous_tables[type] || build_predefined_table(type)
|
|
129
|
+
end
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
# Build predefined FSE table
|
|
133
|
+
def build_predefined_table(type)
|
|
134
|
+
case type
|
|
135
|
+
when :ll
|
|
136
|
+
FSE::Table.build_predefined(PREDEFINED_LL_DISTRIBUTION.to_a,
|
|
137
|
+
LITERALS_LENGTH_ACCURACY_LOG)
|
|
138
|
+
when :ml
|
|
139
|
+
FSE::Table.build_predefined(PREDEFINED_ML_DISTRIBUTION.to_a,
|
|
140
|
+
MATCH_LENGTH_ACCURACY_LOG)
|
|
141
|
+
when :of
|
|
142
|
+
FSE::Table.build_predefined(PREDEFINED_OFFSET_DISTRIBUTION.to_a,
|
|
143
|
+
OFFSET_ACCURACY_LOG)
|
|
144
|
+
end
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
# Build RLE FSE table (single symbol repeated)
|
|
148
|
+
def build_rle_table(type)
|
|
149
|
+
# Read the repeated symbol
|
|
150
|
+
symbol = @input.read(1).ord
|
|
151
|
+
|
|
152
|
+
# Create a simple distribution with just this symbol
|
|
153
|
+
distribution = Array.new(symbol_count_for_type(type), 0)
|
|
154
|
+
distribution[symbol] = 1 << accuracy_log_for_type(type)
|
|
155
|
+
|
|
156
|
+
FSE::Table.build(distribution, accuracy_log_for_type(type))
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
# Build FSE table from compressed stream
|
|
160
|
+
def build_fse_table_from_stream(type)
|
|
161
|
+
# Read accuracy log
|
|
162
|
+
@input.read(1).ord
|
|
163
|
+
|
|
164
|
+
# For simplicity, return predefined table
|
|
165
|
+
# Full implementation would read compressed distribution
|
|
166
|
+
build_predefined_table(type)
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
# Decode sequences using FSE tables
|
|
170
|
+
def decode_sequences_internal(count)
|
|
171
|
+
return if count.zero?
|
|
172
|
+
|
|
173
|
+
# Read the bitstream (remaining data in block)
|
|
174
|
+
bitstream_data = @input.read # Read remaining data
|
|
175
|
+
bitstream = FSE::BitStream.new(bitstream_data)
|
|
176
|
+
|
|
177
|
+
# Initialize FSE decoders
|
|
178
|
+
ll_decoder = FSE::Decoder.new(@fse_tables[:ll]) if @fse_tables[:ll]
|
|
179
|
+
ml_decoder = FSE::Decoder.new(@fse_tables[:ml]) if @fse_tables[:ml]
|
|
180
|
+
of_decoder = FSE::Decoder.new(@fse_tables[:of]) if @fse_tables[:of]
|
|
181
|
+
|
|
182
|
+
# Initialize states
|
|
183
|
+
ll_decoder&.init_state(bitstream)
|
|
184
|
+
ml_decoder&.init_state(bitstream)
|
|
185
|
+
of_decoder&.init_state(bitstream)
|
|
186
|
+
|
|
187
|
+
# Decode each sequence
|
|
188
|
+
count.times do
|
|
189
|
+
ll_symbol = ll_decoder ? ll_decoder.decode(bitstream) : 0
|
|
190
|
+
ml_symbol = ml_decoder ? ml_decoder.decode(bitstream) : 0
|
|
191
|
+
of_symbol = of_decoder ? of_decoder.decode(bitstream) : 0
|
|
192
|
+
|
|
193
|
+
# Convert symbols to actual values
|
|
194
|
+
ll_value = decode_literal_length(ll_symbol, bitstream)
|
|
195
|
+
ml_value = decode_match_length(ml_symbol, bitstream)
|
|
196
|
+
of_value = decode_offset(of_symbol, bitstream)
|
|
197
|
+
|
|
198
|
+
@sequences << {
|
|
199
|
+
literals_length: ll_value,
|
|
200
|
+
match_length: ml_value,
|
|
201
|
+
offset: of_value,
|
|
202
|
+
}
|
|
203
|
+
end
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
# Decode literal length value from symbol
|
|
207
|
+
def decode_literal_length(symbol, bitstream)
|
|
208
|
+
return 0 if symbol.nil? || symbol.negative? || symbol >= LITERAL_LENGTH_TABLE.length
|
|
209
|
+
|
|
210
|
+
baseline, extra_bits = LITERAL_LENGTH_TABLE[symbol]
|
|
211
|
+
return baseline if extra_bits.zero?
|
|
212
|
+
|
|
213
|
+
extra = bitstream.read_bits(extra_bits)
|
|
214
|
+
baseline + extra
|
|
215
|
+
end
|
|
216
|
+
|
|
217
|
+
# Decode match length value from symbol
|
|
218
|
+
def decode_match_length(symbol, bitstream)
|
|
219
|
+
return 3 if symbol.nil? || symbol.negative? || symbol >= MATCH_LENGTH_TABLE.length
|
|
220
|
+
|
|
221
|
+
baseline, extra_bits = MATCH_LENGTH_TABLE[symbol]
|
|
222
|
+
return baseline if extra_bits.zero?
|
|
223
|
+
|
|
224
|
+
extra = bitstream.read_bits(extra_bits)
|
|
225
|
+
baseline + extra
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
# Decode offset value from symbol
|
|
229
|
+
def decode_offset(symbol, _bitstream)
|
|
230
|
+
# Offsets 1-3 are repeat offsets
|
|
231
|
+
return symbol if symbol <= 3
|
|
232
|
+
|
|
233
|
+
# For offsets > 3, read extra bits
|
|
234
|
+
# The offset is symbol - 3 plus extra bits
|
|
235
|
+
symbol - 3
|
|
236
|
+
end
|
|
237
|
+
|
|
238
|
+
# Get symbol count for type
|
|
239
|
+
def symbol_count_for_type(type)
|
|
240
|
+
case type
|
|
241
|
+
when :ll then LITERAL_LENGTH_TABLE.length
|
|
242
|
+
when :ml then MATCH_LENGTH_TABLE.length
|
|
243
|
+
when :of then 32
|
|
244
|
+
end
|
|
245
|
+
end
|
|
246
|
+
|
|
247
|
+
# Get accuracy log for type
|
|
248
|
+
def accuracy_log_for_type(type)
|
|
249
|
+
case type
|
|
250
|
+
when :ll then LITERALS_LENGTH_ACCURACY_LOG
|
|
251
|
+
when :ml then MATCH_LENGTH_ACCURACY_LOG
|
|
252
|
+
when :of then OFFSET_ACCURACY_LOG
|
|
253
|
+
end
|
|
254
|
+
end
|
|
255
|
+
end
|
|
256
|
+
|
|
257
|
+
# Sequence executor (RFC 8878 Section 3.1.2.2.3)
|
|
258
|
+
#
|
|
259
|
+
# Executes decoded sequences to produce output.
|
|
260
|
+
class SequenceExecutor
|
|
261
|
+
include Constants
|
|
262
|
+
|
|
263
|
+
# Execute sequences to produce decompressed output
|
|
264
|
+
#
|
|
265
|
+
# @param literals [String] Decoded literals
|
|
266
|
+
# @param sequences [Array<Hash>] Decoded sequences
|
|
267
|
+
# @return [String] Decompressed output
|
|
268
|
+
def self.execute(literals, sequences)
|
|
269
|
+
executor = new
|
|
270
|
+
executor.execute(literals, sequences)
|
|
271
|
+
end
|
|
272
|
+
|
|
273
|
+
# Initialize with default repeat offsets
|
|
274
|
+
def initialize
|
|
275
|
+
@repeat_offsets = DEFAULT_REPEAT_OFFSETS.dup
|
|
276
|
+
end
|
|
277
|
+
|
|
278
|
+
# Execute sequences
|
|
279
|
+
#
|
|
280
|
+
# @param literals [String] Decoded literals
|
|
281
|
+
# @param sequences [Array<Hash>] Decoded sequences
|
|
282
|
+
# @return [String] Decompressed output
|
|
283
|
+
def execute(literals, sequences)
|
|
284
|
+
output = String.new(encoding: Encoding::BINARY)
|
|
285
|
+
lit_pos = 0
|
|
286
|
+
|
|
287
|
+
sequences.each do |seq|
|
|
288
|
+
ll = seq[:literals_length] || 0
|
|
289
|
+
ml = seq[:match_length] || 0
|
|
290
|
+
offset_code = seq[:offset] || 0
|
|
291
|
+
|
|
292
|
+
# Copy literals
|
|
293
|
+
if ll.positive? && lit_pos < literals.length
|
|
294
|
+
copy_len = [ll, literals.length - lit_pos].min
|
|
295
|
+
output << literals.slice(lit_pos, copy_len)
|
|
296
|
+
lit_pos += copy_len
|
|
297
|
+
end
|
|
298
|
+
|
|
299
|
+
# Handle match
|
|
300
|
+
if ml.positive?
|
|
301
|
+
offset = resolve_offset(offset_code)
|
|
302
|
+
|
|
303
|
+
if offset.positive? && offset <= output.length
|
|
304
|
+
# Copy match from output history
|
|
305
|
+
match_str = output.slice(-offset, [ml, offset].min) || ""
|
|
306
|
+
# If match is longer than offset, we need to copy byte by byte
|
|
307
|
+
while match_str.length < ml && output.length.positive?
|
|
308
|
+
match_str << match_str[-offset] if offset <= match_str.length
|
|
309
|
+
end
|
|
310
|
+
output << match_str.slice(0, ml)
|
|
311
|
+
end
|
|
312
|
+
end
|
|
313
|
+
end
|
|
314
|
+
|
|
315
|
+
# Copy remaining literals (last sequence has no match)
|
|
316
|
+
if lit_pos < literals.length
|
|
317
|
+
output << literals.slice(lit_pos..-1)
|
|
318
|
+
end
|
|
319
|
+
|
|
320
|
+
output
|
|
321
|
+
end
|
|
322
|
+
|
|
323
|
+
private
|
|
324
|
+
|
|
325
|
+
# Resolve offset code to actual offset
|
|
326
|
+
def resolve_offset(code)
|
|
327
|
+
case code
|
|
328
|
+
when 1
|
|
329
|
+
@repeat_offsets[0]
|
|
330
|
+
when 2
|
|
331
|
+
@repeat_offsets[1]
|
|
332
|
+
when 3
|
|
333
|
+
@repeat_offsets[2]
|
|
334
|
+
else
|
|
335
|
+
# New offset - update repeat offsets
|
|
336
|
+
actual_offset = code - 3
|
|
337
|
+
@repeat_offsets[2] = @repeat_offsets[1]
|
|
338
|
+
@repeat_offsets[1] = @repeat_offsets[0]
|
|
339
|
+
@repeat_offsets[0] = actual_offset
|
|
340
|
+
actual_offset
|
|
341
|
+
end
|
|
342
|
+
end
|
|
343
|
+
end
|
|
344
|
+
end
|
|
345
|
+
end
|
|
346
|
+
end
|
|
@@ -48,7 +48,7 @@ module Omnizip
|
|
|
48
48
|
case @format
|
|
49
49
|
when :zip
|
|
50
50
|
extract_all_zip(result)
|
|
51
|
-
when :seven_zip, :
|
|
51
|
+
when :seven_zip, :"7z"
|
|
52
52
|
raise NotImplementedError, "7z format support coming in Phase 2"
|
|
53
53
|
else
|
|
54
54
|
raise ArgumentError, "Unsupported format: #{@format}"
|
|
@@ -75,7 +75,7 @@ module Omnizip
|
|
|
75
75
|
case @format
|
|
76
76
|
when :zip
|
|
77
77
|
content = extract_entry_zip(name)
|
|
78
|
-
when :seven_zip, :
|
|
78
|
+
when :seven_zip, :"7z"
|
|
79
79
|
raise NotImplementedError, "7z format support coming in Phase 2"
|
|
80
80
|
else
|
|
81
81
|
raise ArgumentError, "Unsupported format: #{@format}"
|
|
@@ -99,7 +99,7 @@ module Omnizip
|
|
|
99
99
|
case @format
|
|
100
100
|
when :zip
|
|
101
101
|
list_entries_zip(names)
|
|
102
|
-
when :seven_zip, :
|
|
102
|
+
when :seven_zip, :"7z"
|
|
103
103
|
raise NotImplementedError, "7z format support coming in Phase 2"
|
|
104
104
|
else
|
|
105
105
|
raise ArgumentError, "Unsupported format: #{@format}"
|
data/lib/omnizip/buffer.rb
CHANGED
|
@@ -51,7 +51,7 @@ module Omnizip
|
|
|
51
51
|
case format
|
|
52
52
|
when :zip
|
|
53
53
|
create_zip(buffer, options, &block)
|
|
54
|
-
when :seven_zip, :
|
|
54
|
+
when :seven_zip, :"7z"
|
|
55
55
|
raise NotImplementedError, "7z format support coming in Phase 2"
|
|
56
56
|
else
|
|
57
57
|
raise ArgumentError, "Unsupported format: #{format}"
|
|
@@ -82,7 +82,7 @@ module Omnizip
|
|
|
82
82
|
case format
|
|
83
83
|
when :zip
|
|
84
84
|
open_zip(buffer, &block)
|
|
85
|
-
when :seven_zip, :
|
|
85
|
+
when :seven_zip, :"7z"
|
|
86
86
|
raise NotImplementedError, "7z format support coming in Phase 2"
|
|
87
87
|
else
|
|
88
88
|
raise ArgumentError, "Unsupported format: #{format}"
|
|
@@ -63,7 +63,8 @@ module Omnizip
|
|
|
63
63
|
# - 3: RGB image data (24-bit)
|
|
64
64
|
# - 4: RGBA image data (32-bit) or 32-bit integers
|
|
65
65
|
# @raise [ArgumentError] If distance is invalid
|
|
66
|
-
def initialize(distance_arg = DEFAULT_DISTANCE,
|
|
66
|
+
def initialize(distance_arg = DEFAULT_DISTANCE,
|
|
67
|
+
distance: DEFAULT_DISTANCE)
|
|
67
68
|
# Support both positional and keyword argument styles
|
|
68
69
|
# If called with Delta.new(3), distance_arg=3, distance=DEFAULT (keyword not used)
|
|
69
70
|
# If called with Delta.new(distance: 3), distance_arg=DEFAULT, distance=3
|
|
@@ -44,13 +44,13 @@ module Omnizip
|
|
|
44
44
|
# @return [void]
|
|
45
45
|
def self.register_bcj_filters
|
|
46
46
|
# Individual BCJ architecture filters (use hyphens to match existing convention)
|
|
47
|
-
register_bcj_filter(:
|
|
48
|
-
register_bcj_filter(:
|
|
49
|
-
register_bcj_filter(:
|
|
47
|
+
register_bcj_filter(:"bcj-x86", BcjX86, architecture: :x86)
|
|
48
|
+
register_bcj_filter(:"bcj-arm", BcjArm, architecture: :arm)
|
|
49
|
+
register_bcj_filter(:"bcj-arm64", BcjArm64, architecture: :arm64,
|
|
50
50
|
xz_supported: false)
|
|
51
|
-
register_bcj_filter(:
|
|
52
|
-
register_bcj_filter(:
|
|
53
|
-
register_bcj_filter(:
|
|
51
|
+
register_bcj_filter(:"bcj-ia64", BcjIa64, architecture: :ia64)
|
|
52
|
+
register_bcj_filter(:"bcj-ppc", BcjPpc, architecture: :powerpc)
|
|
53
|
+
register_bcj_filter(:"bcj-sparc", BcjSparc, architecture: :sparc)
|
|
54
54
|
|
|
55
55
|
# Unified BCJ filter (Task 2) - supports all architectures
|
|
56
56
|
# Note: We register it as 'bcj' without architecture suffix
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Omnizip
|
|
4
|
+
module Formats
|
|
5
|
+
module Cpio
|
|
6
|
+
# IO wrapper that limits reading to a specific byte count
|
|
7
|
+
#
|
|
8
|
+
# Used to read file content from CPIO archives where the file size
|
|
9
|
+
# is known in advance but the underlying IO stream continues.
|
|
10
|
+
class BoundedIO
|
|
11
|
+
attr_reader :length, :remaining
|
|
12
|
+
|
|
13
|
+
# Initialize bounded IO
|
|
14
|
+
#
|
|
15
|
+
# @param io [IO] Underlying IO stream
|
|
16
|
+
# @param length [Integer] Maximum bytes to read
|
|
17
|
+
# @yield Block called when EOF is reached (for reading padding)
|
|
18
|
+
def initialize(io, length, &eof_callback)
|
|
19
|
+
@io = io
|
|
20
|
+
@length = length
|
|
21
|
+
@remaining = length
|
|
22
|
+
@eof_callback = eof_callback
|
|
23
|
+
@eof = false
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
# Read bytes from the IO
|
|
27
|
+
#
|
|
28
|
+
# @param size [Integer, nil] Number of bytes to read (nil = remaining)
|
|
29
|
+
# @return [String, nil] Data read or nil at EOF
|
|
30
|
+
def read(size = nil)
|
|
31
|
+
return nil if eof?
|
|
32
|
+
|
|
33
|
+
size = @remaining if size.nil?
|
|
34
|
+
data = @io.read(size)
|
|
35
|
+
return nil if data.nil?
|
|
36
|
+
|
|
37
|
+
@remaining -= data.bytesize
|
|
38
|
+
eof?
|
|
39
|
+
data
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# System read (raises on EOF)
|
|
43
|
+
#
|
|
44
|
+
# @param size [Integer] Number of bytes to read
|
|
45
|
+
# @return [String] Data read
|
|
46
|
+
# @raise [EOFError] If at end of bounded region
|
|
47
|
+
def sysread(size)
|
|
48
|
+
raise EOFError, "end of file reached" if eof?
|
|
49
|
+
|
|
50
|
+
read(size)
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Check if at end of bounded region
|
|
54
|
+
#
|
|
55
|
+
# @return [Boolean] True if no more bytes to read
|
|
56
|
+
def eof?
|
|
57
|
+
return false if @remaining.positive?
|
|
58
|
+
return @eof if @eof
|
|
59
|
+
|
|
60
|
+
@eof_callback&.call
|
|
61
|
+
@eof = true
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
end
|
data/lib/omnizip/formats/lzip.rb
CHANGED
|
@@ -140,7 +140,8 @@ module Omnizip
|
|
|
140
140
|
# @return [Hash] Metadata (version, dict_size, member_size)
|
|
141
141
|
def decompress_stream(input_io, output_io, options = {})
|
|
142
142
|
require_relative "../algorithms/lzma/lzip_decoder"
|
|
143
|
-
decoder = Omnizip::Algorithms::LZMA::LzipDecoder.new(input_io,
|
|
143
|
+
decoder = Omnizip::Algorithms::LZMA::LzipDecoder.new(input_io,
|
|
144
|
+
options)
|
|
144
145
|
result = decoder.decode_stream
|
|
145
146
|
|
|
146
147
|
output_io.write(result)
|
|
@@ -143,7 +143,8 @@ module Omnizip
|
|
|
143
143
|
# @return [Hash] Metadata (lc, lp, pb, dict_size, uncompressed_size)
|
|
144
144
|
def decompress_stream(input_io, output_io, options = {})
|
|
145
145
|
require_relative "../algorithms/lzma/lzma_alone_decoder"
|
|
146
|
-
decoder = Omnizip::Algorithms::LZMA::LzmaAloneDecoder.new(input_io,
|
|
146
|
+
decoder = Omnizip::Algorithms::LZMA::LzmaAloneDecoder.new(input_io,
|
|
147
|
+
options)
|
|
147
148
|
result = decoder.decode_stream
|
|
148
149
|
|
|
149
150
|
output_io.write(result)
|