omnizip 0.3.2 → 0.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +243 -368
- data/README.adoc +101 -5
- data/docs/guides/archive-formats/index.adoc +31 -1
- data/docs/guides/archive-formats/ole-format.adoc +316 -0
- data/docs/guides/archive-formats/rpm-format.adoc +249 -0
- data/docs/index.adoc +12 -2
- data/lib/omnizip/algorithms/lzma/distance_coder.rb +29 -18
- data/lib/omnizip/algorithms/lzma/encoder.rb +2 -1
- data/lib/omnizip/algorithms/lzma/length_coder.rb +6 -3
- data/lib/omnizip/algorithms/lzma/literal_decoder.rb +2 -1
- data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +40 -13
- data/lib/omnizip/algorithms/lzma/range_decoder.rb +36 -2
- data/lib/omnizip/algorithms/lzma/range_encoder.rb +19 -0
- data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +2 -1
- data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +148 -112
- data/lib/omnizip/algorithms/lzma.rb +20 -5
- data/lib/omnizip/algorithms/ppmd7/decoder.rb +25 -21
- data/lib/omnizip/algorithms/ppmd7/encoder.rb +4 -11
- data/lib/omnizip/algorithms/sevenzip_lzma2.rb +2 -1
- data/lib/omnizip/algorithms/xz_lzma2.rb +2 -1
- data/lib/omnizip/algorithms/zstandard/constants.rb +125 -9
- data/lib/omnizip/algorithms/zstandard/decoder.rb +202 -17
- data/lib/omnizip/algorithms/zstandard/encoder.rb +197 -17
- data/lib/omnizip/algorithms/zstandard/frame/block.rb +128 -0
- data/lib/omnizip/algorithms/zstandard/frame/header.rb +224 -0
- data/lib/omnizip/algorithms/zstandard/fse/bitstream.rb +186 -0
- data/lib/omnizip/algorithms/zstandard/fse/encoder.rb +325 -0
- data/lib/omnizip/algorithms/zstandard/fse/table.rb +269 -0
- data/lib/omnizip/algorithms/zstandard/huffman.rb +272 -0
- data/lib/omnizip/algorithms/zstandard/huffman_encoder.rb +339 -0
- data/lib/omnizip/algorithms/zstandard/literals.rb +178 -0
- data/lib/omnizip/algorithms/zstandard/literals_encoder.rb +251 -0
- data/lib/omnizip/algorithms/zstandard/sequences.rb +346 -0
- data/lib/omnizip/buffer/memory_extractor.rb +3 -3
- data/lib/omnizip/buffer.rb +2 -2
- data/lib/omnizip/filters/delta.rb +2 -1
- data/lib/omnizip/filters/registry.rb +6 -6
- data/lib/omnizip/formats/cpio/bounded_io.rb +66 -0
- data/lib/omnizip/formats/lzip.rb +2 -1
- data/lib/omnizip/formats/lzma_alone.rb +2 -1
- data/lib/omnizip/formats/ole/allocation_table.rb +244 -0
- data/lib/omnizip/formats/ole/constants.rb +61 -0
- data/lib/omnizip/formats/ole/dirent.rb +380 -0
- data/lib/omnizip/formats/ole/header.rb +198 -0
- data/lib/omnizip/formats/ole/ranges_io.rb +264 -0
- data/lib/omnizip/formats/ole/storage.rb +305 -0
- data/lib/omnizip/formats/ole/types/variant.rb +328 -0
- data/lib/omnizip/formats/ole.rb +145 -0
- data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +92 -49
- data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +13 -20
- data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +6 -2
- data/lib/omnizip/formats/rar3/reader.rb +6 -2
- data/lib/omnizip/formats/rar5/reader.rb +4 -1
- data/lib/omnizip/formats/rpm/constants.rb +58 -0
- data/lib/omnizip/formats/rpm/entry.rb +102 -0
- data/lib/omnizip/formats/rpm/header.rb +113 -0
- data/lib/omnizip/formats/rpm/lead.rb +122 -0
- data/lib/omnizip/formats/rpm/tag.rb +230 -0
- data/lib/omnizip/formats/rpm.rb +434 -0
- data/lib/omnizip/formats/seven_zip/bcj2_stream_decompressor.rb +239 -0
- data/lib/omnizip/formats/seven_zip/coder_chain.rb +32 -8
- data/lib/omnizip/formats/seven_zip/constants.rb +1 -1
- data/lib/omnizip/formats/seven_zip/reader.rb +84 -8
- data/lib/omnizip/formats/seven_zip/stream_compressor.rb +2 -1
- data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +6 -0
- data/lib/omnizip/formats/seven_zip/writer.rb +21 -9
- data/lib/omnizip/formats/seven_zip.rb +10 -0
- data/lib/omnizip/formats/xar/entry.rb +18 -5
- data/lib/omnizip/formats/xar/header.rb +34 -6
- data/lib/omnizip/formats/xar/reader.rb +43 -10
- data/lib/omnizip/formats/xar/toc.rb +34 -21
- data/lib/omnizip/formats/xar/writer.rb +15 -5
- data/lib/omnizip/formats/xz_impl/block_decoder.rb +45 -33
- data/lib/omnizip/formats/xz_impl/block_encoder.rb +2 -1
- data/lib/omnizip/formats/xz_impl/index_decoder.rb +3 -1
- data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +2 -1
- data/lib/omnizip/formats/zip/end_of_central_directory.rb +4 -3
- data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +14 -6
- data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +2 -1
- data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +28 -13
- data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +13 -6
- data/lib/omnizip/pipe/stream_compressor.rb +1 -1
- data/lib/omnizip/version.rb +1 -1
- data/readme-docs/compression-algorithms.adoc +6 -2
- metadata +30 -2
|
@@ -0,0 +1,434 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "stringio"
|
|
4
|
+
require "tempfile"
|
|
5
|
+
require_relative "rpm/constants"
|
|
6
|
+
require_relative "rpm/lead"
|
|
7
|
+
require_relative "rpm/header"
|
|
8
|
+
require_relative "rpm/entry"
|
|
9
|
+
require_relative "cpio"
|
|
10
|
+
|
|
11
|
+
module Omnizip
|
|
12
|
+
module Formats
|
|
13
|
+
# RPM package format support
|
|
14
|
+
#
|
|
15
|
+
# Provides read access to RPM packages, extracting metadata
|
|
16
|
+
# and file contents from the payload.
|
|
17
|
+
#
|
|
18
|
+
# @example Open RPM and list files
|
|
19
|
+
# Omnizip::Formats::Rpm.open('package.rpm') do |rpm|
|
|
20
|
+
# rpm.files.each { |f| puts f }
|
|
21
|
+
# end
|
|
22
|
+
#
|
|
23
|
+
# @example Extract RPM contents
|
|
24
|
+
# Omnizip::Formats::Rpm.extract('package.rpm', 'output/')
|
|
25
|
+
module Rpm
|
|
26
|
+
class << self
|
|
27
|
+
# Open RPM package
|
|
28
|
+
#
|
|
29
|
+
# @param path [String] Path to RPM file
|
|
30
|
+
# @yield [reader] Block for reading package
|
|
31
|
+
# @yieldparam reader [Reader] RPM reader
|
|
32
|
+
# @return [Reader]
|
|
33
|
+
def open(path)
|
|
34
|
+
reader = Reader.new(path)
|
|
35
|
+
reader.open
|
|
36
|
+
|
|
37
|
+
if block_given?
|
|
38
|
+
begin
|
|
39
|
+
yield reader
|
|
40
|
+
ensure
|
|
41
|
+
reader.close
|
|
42
|
+
end
|
|
43
|
+
else
|
|
44
|
+
reader
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# List files in RPM
|
|
49
|
+
#
|
|
50
|
+
# @param path [String] Path to RPM file
|
|
51
|
+
# @return [Array<String>] File paths
|
|
52
|
+
def list(path)
|
|
53
|
+
self.open(path, &:files)
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Extract RPM to directory
|
|
57
|
+
#
|
|
58
|
+
# @param rpm_path [String] Path to RPM file
|
|
59
|
+
# @param output_dir [String] Output directory
|
|
60
|
+
def extract(rpm_path, output_dir)
|
|
61
|
+
self.open(rpm_path) do |rpm|
|
|
62
|
+
rpm.extract(output_dir)
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# Get RPM information
|
|
67
|
+
#
|
|
68
|
+
# @param path [String] Path to RPM file
|
|
69
|
+
# @return [Hash] Package information
|
|
70
|
+
def info(path)
|
|
71
|
+
self.open(path) do |rpm|
|
|
72
|
+
{
|
|
73
|
+
name: rpm.name,
|
|
74
|
+
version: rpm.version,
|
|
75
|
+
release: rpm.release,
|
|
76
|
+
epoch: rpm.epoch,
|
|
77
|
+
arch: rpm.architecture,
|
|
78
|
+
summary: rpm.summary,
|
|
79
|
+
description: rpm.description,
|
|
80
|
+
license: rpm.license,
|
|
81
|
+
vendor: rpm.vendor,
|
|
82
|
+
build_time: rpm.build_time,
|
|
83
|
+
file_count: rpm.files.size,
|
|
84
|
+
}
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# RPM package reader
|
|
90
|
+
#
|
|
91
|
+
# Handles parsing and extraction of RPM packages.
|
|
92
|
+
class Reader
|
|
93
|
+
include Constants
|
|
94
|
+
|
|
95
|
+
# @return [String] File path
|
|
96
|
+
attr_reader :path
|
|
97
|
+
|
|
98
|
+
# @return [Lead] Parsed lead
|
|
99
|
+
attr_reader :lead
|
|
100
|
+
|
|
101
|
+
# @return [Header, nil] Signature header
|
|
102
|
+
attr_reader :signature
|
|
103
|
+
|
|
104
|
+
# @return [Header] Main header
|
|
105
|
+
attr_reader :header
|
|
106
|
+
|
|
107
|
+
# Initialize reader
|
|
108
|
+
#
|
|
109
|
+
# @param path [String] Path to RPM file
|
|
110
|
+
def initialize(path)
|
|
111
|
+
@path = path
|
|
112
|
+
@file = nil
|
|
113
|
+
@lead = nil
|
|
114
|
+
@signature = nil
|
|
115
|
+
@header = nil
|
|
116
|
+
@tags = nil
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
# Open and parse RPM
|
|
120
|
+
#
|
|
121
|
+
# @return [self]
|
|
122
|
+
def open
|
|
123
|
+
@file = File.open(@path, "rb")
|
|
124
|
+
parse!
|
|
125
|
+
self
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
# Close file handle
|
|
129
|
+
def close
|
|
130
|
+
@file&.close
|
|
131
|
+
@file = nil
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
# Get all tags as hash
|
|
135
|
+
#
|
|
136
|
+
# @return [Hash] Tag names to values
|
|
137
|
+
def tags
|
|
138
|
+
return @tags if @tags
|
|
139
|
+
|
|
140
|
+
@tags = @header.to_h
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
# Get package name
|
|
144
|
+
#
|
|
145
|
+
# @return [String]
|
|
146
|
+
def name
|
|
147
|
+
tags[:name]
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
# Get package version
|
|
151
|
+
#
|
|
152
|
+
# @return [String]
|
|
153
|
+
def version
|
|
154
|
+
tags[:version]
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
# Get package release
|
|
158
|
+
#
|
|
159
|
+
# @return [String]
|
|
160
|
+
def release
|
|
161
|
+
tags[:release]
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
# Get package epoch
|
|
165
|
+
#
|
|
166
|
+
# @return [Integer, nil]
|
|
167
|
+
def epoch
|
|
168
|
+
tags[:epochnum] || tags[:epoch]&.first
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
# Get package architecture
|
|
172
|
+
#
|
|
173
|
+
# @return [String]
|
|
174
|
+
def architecture
|
|
175
|
+
tags[:arch]
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
# Get package summary
|
|
179
|
+
#
|
|
180
|
+
# @return [String]
|
|
181
|
+
def summary
|
|
182
|
+
tags[:summary]
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
# Get package description
|
|
186
|
+
#
|
|
187
|
+
# @return [String]
|
|
188
|
+
def description
|
|
189
|
+
tags[:description]
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
# Get package license
|
|
193
|
+
#
|
|
194
|
+
# @return [String]
|
|
195
|
+
def license
|
|
196
|
+
tags[:license]
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
# Get package vendor
|
|
200
|
+
#
|
|
201
|
+
# @return [String]
|
|
202
|
+
def vendor
|
|
203
|
+
tags[:vendor]
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
# Get build time
|
|
207
|
+
#
|
|
208
|
+
# @return [Time]
|
|
209
|
+
def build_time
|
|
210
|
+
Time.at(tags[:buildtime]&.first || 0)
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
# Get payload compressor
|
|
214
|
+
#
|
|
215
|
+
# @return [String] Compressor name (gzip, bzip2, xz, zstd)
|
|
216
|
+
def payload_compressor
|
|
217
|
+
tags[:payloadcompressor] || "gzip"
|
|
218
|
+
end
|
|
219
|
+
|
|
220
|
+
# Get list of files
|
|
221
|
+
#
|
|
222
|
+
# @return [Array<String>] File paths
|
|
223
|
+
def files
|
|
224
|
+
basenames = tags[:basenames] || []
|
|
225
|
+
dirindexes = tags[:dirindexes] || []
|
|
226
|
+
dirnames = tags[:dirnames] || []
|
|
227
|
+
|
|
228
|
+
basenames.zip(dirindexes).map do |name, idx|
|
|
229
|
+
File.join(dirnames[idx] || "", name || "")
|
|
230
|
+
end
|
|
231
|
+
end
|
|
232
|
+
|
|
233
|
+
# Get file entries with metadata
|
|
234
|
+
#
|
|
235
|
+
# @return [Array<Entry>]
|
|
236
|
+
def entries
|
|
237
|
+
paths = files
|
|
238
|
+
sizes = tags[:filesizes] || []
|
|
239
|
+
modes = tags[:filemodes] || []
|
|
240
|
+
uids = tags[:fileuids] || []
|
|
241
|
+
gids = tags[:filegids] || []
|
|
242
|
+
mtimes = tags[:filemtimes] || []
|
|
243
|
+
flags = tags[:fileflags] || []
|
|
244
|
+
users = tags[:fileusername] || []
|
|
245
|
+
groups = tags[:filegroupname] || []
|
|
246
|
+
digests = tags[:filedigests] || []
|
|
247
|
+
linktos = tags[:filelinktos] || []
|
|
248
|
+
|
|
249
|
+
paths.each_with_index.map do |path, i|
|
|
250
|
+
Entry.new.tap do |entry|
|
|
251
|
+
entry.path = path
|
|
252
|
+
entry.size = sizes[i] || 0
|
|
253
|
+
entry.mode = modes[i] || 0o100_644
|
|
254
|
+
entry.uid = uids[i] || 0
|
|
255
|
+
entry.gid = gids[i] || 0
|
|
256
|
+
entry.mtime = Time.at(mtimes[i] || 0)
|
|
257
|
+
entry.flags = flags[i] || 0
|
|
258
|
+
entry.user = users[i] || ""
|
|
259
|
+
entry.group = groups[i] || ""
|
|
260
|
+
entry.digest = digests[i] || ""
|
|
261
|
+
entry.link_to = linktos[i] || ""
|
|
262
|
+
end
|
|
263
|
+
end
|
|
264
|
+
end
|
|
265
|
+
|
|
266
|
+
# Get requires
|
|
267
|
+
#
|
|
268
|
+
# @return [Array<Array>] [name, operator, version]
|
|
269
|
+
def requires
|
|
270
|
+
build_relations(:require)
|
|
271
|
+
end
|
|
272
|
+
|
|
273
|
+
# Get provides
|
|
274
|
+
#
|
|
275
|
+
# @return [Array<Array>] [name, operator, version]
|
|
276
|
+
def provides
|
|
277
|
+
build_relations(:provide)
|
|
278
|
+
end
|
|
279
|
+
|
|
280
|
+
# Get conflicts
|
|
281
|
+
#
|
|
282
|
+
# @return [Array<Array>] [name, operator, version]
|
|
283
|
+
def conflicts
|
|
284
|
+
build_relations(:conflict)
|
|
285
|
+
end
|
|
286
|
+
|
|
287
|
+
# Extract to directory
|
|
288
|
+
#
|
|
289
|
+
# @param output_dir [String] Output directory
|
|
290
|
+
def extract(output_dir)
|
|
291
|
+
raise "RPM not opened" unless @file
|
|
292
|
+
|
|
293
|
+
FileUtils.mkdir_p(output_dir)
|
|
294
|
+
|
|
295
|
+
# Get payload IO
|
|
296
|
+
payload_io = payload
|
|
297
|
+
|
|
298
|
+
# Decompress payload using appropriate decompressor
|
|
299
|
+
decompressor = create_decompressor(payload_io)
|
|
300
|
+
|
|
301
|
+
# Parse CPIO from decompressed stream
|
|
302
|
+
extract_cpio(decompressor, output_dir)
|
|
303
|
+
end
|
|
304
|
+
|
|
305
|
+
private
|
|
306
|
+
|
|
307
|
+
def parse!
|
|
308
|
+
# Parse lead
|
|
309
|
+
@lead = Lead.parse(@file)
|
|
310
|
+
|
|
311
|
+
# Parse signature header if present
|
|
312
|
+
if @lead.signature_type == HEADER_SIGNED_TYPE
|
|
313
|
+
@signature = Header.parse(@file)
|
|
314
|
+
|
|
315
|
+
# Skip padding to 8-byte boundary
|
|
316
|
+
padding = @signature.length % 8
|
|
317
|
+
@file.read(padding) if padding.positive?
|
|
318
|
+
end
|
|
319
|
+
|
|
320
|
+
# Parse main header
|
|
321
|
+
@header = Header.parse(@file)
|
|
322
|
+
end
|
|
323
|
+
|
|
324
|
+
def build_relations(type)
|
|
325
|
+
names = tags[:"#{type}name"]
|
|
326
|
+
flags = tags[:"#{type}flags"]
|
|
327
|
+
versions = tags[:"#{type}version"]
|
|
328
|
+
|
|
329
|
+
return [] unless names && flags && versions
|
|
330
|
+
|
|
331
|
+
names.zip(flags, versions).map do |name, flag, version|
|
|
332
|
+
[name, operator_from_flag(flag), version]
|
|
333
|
+
end
|
|
334
|
+
end
|
|
335
|
+
|
|
336
|
+
def operator_from_flag(flag)
|
|
337
|
+
return ">=" if flag.anybits?(FLAG_GREATER) && flag.anybits?(FLAG_EQUAL)
|
|
338
|
+
return "<=" if flag.anybits?(FLAG_LESS) && flag.anybits?(FLAG_EQUAL)
|
|
339
|
+
return ">" if flag.anybits?(FLAG_GREATER)
|
|
340
|
+
return "<" if flag.anybits?(FLAG_LESS)
|
|
341
|
+
return "=" if flag.anybits?(FLAG_EQUAL)
|
|
342
|
+
|
|
343
|
+
""
|
|
344
|
+
end
|
|
345
|
+
|
|
346
|
+
def payload
|
|
347
|
+
raise "RPM not opened" unless @file
|
|
348
|
+
|
|
349
|
+
# Calculate payload offset
|
|
350
|
+
offset = @lead.length
|
|
351
|
+
offset += @signature.length if @signature
|
|
352
|
+
offset += (@signature.length % 8) if @signature
|
|
353
|
+
offset += @header.length
|
|
354
|
+
|
|
355
|
+
# Create copy of file positioned at payload
|
|
356
|
+
payload_file = @file.dup
|
|
357
|
+
payload_file.seek(offset)
|
|
358
|
+
payload_file
|
|
359
|
+
end
|
|
360
|
+
|
|
361
|
+
def create_decompressor(io)
|
|
362
|
+
compressor = payload_compressor
|
|
363
|
+
|
|
364
|
+
case compressor
|
|
365
|
+
when "gzip"
|
|
366
|
+
require "zlib"
|
|
367
|
+
Zlib::GzipReader.new(io)
|
|
368
|
+
when "bzip2"
|
|
369
|
+
require_relative "../algorithms/bzip2/decompressor"
|
|
370
|
+
# Bzip2 decompressor needs the whole data
|
|
371
|
+
Omnizip::Algorithms::Bzip2::Decompressor.new.decompress(io.read)
|
|
372
|
+
when "xz", "lzma"
|
|
373
|
+
# XZ decompressor
|
|
374
|
+
decompress_xz(io)
|
|
375
|
+
when "zstd"
|
|
376
|
+
decompress_zstd(io)
|
|
377
|
+
else
|
|
378
|
+
# Unknown, try raw
|
|
379
|
+
io
|
|
380
|
+
end
|
|
381
|
+
end
|
|
382
|
+
|
|
383
|
+
def decompress_xz(io)
|
|
384
|
+
require_relative "xz"
|
|
385
|
+
Xz.decode(io.read)
|
|
386
|
+
end
|
|
387
|
+
|
|
388
|
+
# Decompress zstd payload using system command (fallback for complex zstd format)
|
|
389
|
+
def decompress_zstd(io)
|
|
390
|
+
data = io.read
|
|
391
|
+
|
|
392
|
+
# Use system zstd command for reliable decompression
|
|
393
|
+
# Pure Ruby decoder has incomplete FSE table support
|
|
394
|
+
decompress_with_command("zstd", "-d", "-c", data)
|
|
395
|
+
end
|
|
396
|
+
|
|
397
|
+
# Decompress data using external command
|
|
398
|
+
def decompress_with_command(cmd, *args, data)
|
|
399
|
+
require "open3"
|
|
400
|
+
|
|
401
|
+
output = +""
|
|
402
|
+
Open3.popen3(cmd, *args) do |stdin, stdout, _stderr, wait_thr|
|
|
403
|
+
stdin.binmode
|
|
404
|
+
stdout.binmode
|
|
405
|
+
stdin.write(data)
|
|
406
|
+
stdin.close
|
|
407
|
+
output = stdout.read
|
|
408
|
+
wait_thr.value
|
|
409
|
+
end
|
|
410
|
+
|
|
411
|
+
output
|
|
412
|
+
rescue StandardError => e
|
|
413
|
+
raise "Failed to decompress with #{cmd}: #{e.message}"
|
|
414
|
+
end
|
|
415
|
+
|
|
416
|
+
def extract_cpio(source, output_dir)
|
|
417
|
+
# Create temp file for CPIO data
|
|
418
|
+
Tempfile.create(["rpm_payload", ".cpio"]) do |temp|
|
|
419
|
+
temp.binmode
|
|
420
|
+
if source.is_a?(String)
|
|
421
|
+
temp.write(source)
|
|
422
|
+
else
|
|
423
|
+
temp.write(source.read)
|
|
424
|
+
end
|
|
425
|
+
temp.flush
|
|
426
|
+
|
|
427
|
+
# Use CPIO reader to extract
|
|
428
|
+
Cpio.extract(temp.path, output_dir)
|
|
429
|
+
end
|
|
430
|
+
end
|
|
431
|
+
end
|
|
432
|
+
end
|
|
433
|
+
end
|
|
434
|
+
end
|
|
@@ -0,0 +1,239 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "coder_chain"
|
|
4
|
+
require_relative "../../filters/bcj2/stream_data"
|
|
5
|
+
require_relative "../../filters/bcj2/decoder"
|
|
6
|
+
require "stringio"
|
|
7
|
+
|
|
8
|
+
module Omnizip
|
|
9
|
+
module Formats
|
|
10
|
+
module SevenZip
|
|
11
|
+
# Decompresses BCJ2 multi-stream folders
|
|
12
|
+
#
|
|
13
|
+
# BCJ2 archives have 4 separate pack streams that need to be:
|
|
14
|
+
# 1. Read separately
|
|
15
|
+
# 2. Decompressed individually (main is LZMA, others are usually COPY)
|
|
16
|
+
# 3. Combined via BCJ2 decoder
|
|
17
|
+
class Bcj2StreamDecompressor
|
|
18
|
+
include Constants
|
|
19
|
+
|
|
20
|
+
# Initialize BCJ2 decompressor
|
|
21
|
+
#
|
|
22
|
+
# @param archive_io [IO] Archive file handle
|
|
23
|
+
# @param folder [Models::Folder] Folder with BCJ2 coder
|
|
24
|
+
# @param pack_pos [Integer] Base position of packed data
|
|
25
|
+
# @param pack_sizes [Array<Integer>] Sizes of each pack stream
|
|
26
|
+
# @param stream_info [Models::StreamInfo] Stream info for unpack sizes
|
|
27
|
+
def initialize(archive_io, folder, pack_pos, pack_sizes, stream_info)
|
|
28
|
+
@archive_io = archive_io
|
|
29
|
+
@folder = folder
|
|
30
|
+
@pack_pos = pack_pos
|
|
31
|
+
@pack_sizes = pack_sizes
|
|
32
|
+
@stream_info = stream_info
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# Check if a folder contains BCJ2 coder
|
|
36
|
+
#
|
|
37
|
+
# @param folder [Models::Folder] Folder to check
|
|
38
|
+
# @return [Boolean] true if folder has BCJ2
|
|
39
|
+
def self.bcj2_folder?(folder)
|
|
40
|
+
folder.coders.any? { |c| c.method_id == FilterId::BCJ2 }
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# Decompress BCJ2 multi-stream folder
|
|
44
|
+
#
|
|
45
|
+
# @param expected_size [Integer] Expected final output size
|
|
46
|
+
# @return [String] Decompressed and BCJ2-decoded data
|
|
47
|
+
def decompress(expected_size)
|
|
48
|
+
# Find BCJ2 coder and compression coder
|
|
49
|
+
bcj2_coder = @folder.coders.find { |c| c.method_id == FilterId::BCJ2 }
|
|
50
|
+
compression_coder = find_compression_coder
|
|
51
|
+
|
|
52
|
+
raise "BCJ2 coder not found" unless bcj2_coder
|
|
53
|
+
|
|
54
|
+
# Determine stream layout based on folder structure
|
|
55
|
+
# BCJ2 has 4 input streams: main, call, jump, rc
|
|
56
|
+
stream_layout = determine_stream_layout(bcj2_coder, compression_coder)
|
|
57
|
+
|
|
58
|
+
# Read and decompress each of the 4 streams
|
|
59
|
+
streams = read_bcj2_streams(stream_layout)
|
|
60
|
+
|
|
61
|
+
# Apply BCJ2 decoder
|
|
62
|
+
bcj2_data = Omnizip::Filters::Bcj2StreamData.new
|
|
63
|
+
bcj2_data.main = streams[:main]
|
|
64
|
+
bcj2_data.call = streams[:call]
|
|
65
|
+
bcj2_data.jump = streams[:jump]
|
|
66
|
+
bcj2_data.rc = streams[:rc]
|
|
67
|
+
|
|
68
|
+
decoder = Omnizip::Filters::Bcj2Decoder.new(bcj2_data)
|
|
69
|
+
result = decoder.decode
|
|
70
|
+
|
|
71
|
+
# Truncate to expected size if needed
|
|
72
|
+
result.bytesize > expected_size ? result[0, expected_size] : result
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
private
|
|
76
|
+
|
|
77
|
+
# Find the compression coder (LZMA/LZMA2/etc) in the folder
|
|
78
|
+
#
|
|
79
|
+
# @return [Models::CoderInfo, nil] Compression coder
|
|
80
|
+
def find_compression_coder
|
|
81
|
+
@folder.coders.find do |c|
|
|
82
|
+
[MethodId::LZMA, MethodId::LZMA2, MethodId::BZIP2,
|
|
83
|
+
MethodId::DEFLATE, MethodId::DEFLATE64, MethodId::PPMD].include?(c.method_id)
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
# Determine how streams are laid out based on folder structure
|
|
88
|
+
#
|
|
89
|
+
# BCJ2 folders can have different layouts:
|
|
90
|
+
# Type 0 (7z default): numInStreams=5, numOutStreams=2
|
|
91
|
+
# - Coder 0: LZMA (1 in, 1 out)
|
|
92
|
+
# - Coder 1: BCJ2 (4 in, 1 out)
|
|
93
|
+
# - Pack streams: [main_lzma, call, jump, rc]
|
|
94
|
+
#
|
|
95
|
+
# @param bcj2_coder [Models::CoderInfo] BCJ2 coder
|
|
96
|
+
# @param compression_coder [Models::CoderInfo] Compression coder
|
|
97
|
+
# @return [Hash] Stream layout specification
|
|
98
|
+
def determine_stream_layout(_bcj2_coder, compression_coder)
|
|
99
|
+
num_in = @folder.num_in_streams
|
|
100
|
+
num_out = @folder.num_out_streams
|
|
101
|
+
num_pack = @pack_sizes.size
|
|
102
|
+
|
|
103
|
+
# Type 0: 7z default (5 in, 2 out, 4 pack)
|
|
104
|
+
if num_in == 5 && num_out == 2 && num_pack == 4
|
|
105
|
+
{
|
|
106
|
+
type: :type0,
|
|
107
|
+
main: { pack_idx: 0, coder: compression_coder },
|
|
108
|
+
call: { pack_idx: 1, coder: nil }, # Usually COPY
|
|
109
|
+
jump: { pack_idx: 2, coder: nil }, # Usually COPY
|
|
110
|
+
rc: { pack_idx: 3, coder: nil }, # Usually COPY
|
|
111
|
+
}
|
|
112
|
+
# Type 1: 7zr style (7 in, 4 out, 4 pack)
|
|
113
|
+
elsif num_in == 7 && num_out == 4 && num_pack == 4
|
|
114
|
+
# More complex layout - need to analyze bind pairs
|
|
115
|
+
determine_type1_layout(compression_coder)
|
|
116
|
+
else
|
|
117
|
+
raise "Unsupported BCJ2 folder layout: in=#{num_in}, out=#{num_out}, pack=#{num_pack}"
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
# Determine Type 1 layout (7zr style with separate compression per stream)
|
|
122
|
+
#
|
|
123
|
+
# @param compression_coder [Models::CoderInfo] Compression coder
|
|
124
|
+
# @return [Hash] Stream layout
|
|
125
|
+
def determine_type1_layout(compression_coder)
|
|
126
|
+
# In Type 1, each stream may have its own compression
|
|
127
|
+
# This is more complex and needs bind pair analysis
|
|
128
|
+
# For now, assume main is compressed, others are COPY
|
|
129
|
+
{
|
|
130
|
+
type: :type1,
|
|
131
|
+
main: { pack_idx: 0, coder: compression_coder },
|
|
132
|
+
call: { pack_idx: 1, coder: nil },
|
|
133
|
+
jump: { pack_idx: 2, coder: nil },
|
|
134
|
+
rc: { pack_idx: 3, coder: nil },
|
|
135
|
+
}
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
# Read and decompress BCJ2 streams
|
|
139
|
+
#
|
|
140
|
+
# @param layout [Hash] Stream layout specification
|
|
141
|
+
# @return [Hash] Decompressed stream data
|
|
142
|
+
def read_bcj2_streams(layout)
|
|
143
|
+
streams = {}
|
|
144
|
+
offset = 0
|
|
145
|
+
|
|
146
|
+
%i[main call jump rc].each_with_index do |stream_name, idx|
|
|
147
|
+
spec = layout[stream_name]
|
|
148
|
+
pack_idx = spec[:pack_idx]
|
|
149
|
+
pack_size = @pack_sizes[pack_idx] || 0
|
|
150
|
+
|
|
151
|
+
# Calculate absolute position
|
|
152
|
+
pos = @pack_pos + offset
|
|
153
|
+
|
|
154
|
+
# Read pack data
|
|
155
|
+
@archive_io.seek(pos)
|
|
156
|
+
packed_data = @archive_io.read(pack_size)
|
|
157
|
+
|
|
158
|
+
# Decompress if needed
|
|
159
|
+
streams[stream_name] = if spec[:coder]
|
|
160
|
+
decompress_stream(packed_data, spec[:coder], idx)
|
|
161
|
+
else
|
|
162
|
+
# COPY - no decompression needed
|
|
163
|
+
packed_data || "".b
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
offset += pack_size
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
streams
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
# Decompress a single stream
|
|
173
|
+
#
|
|
174
|
+
# @param packed_data [String] Compressed data
|
|
175
|
+
# @param coder [Models::CoderInfo] Coder specification
|
|
176
|
+
# @param stream_idx [Integer] Stream index for unpack size lookup
|
|
177
|
+
# @return [String] Decompressed data
|
|
178
|
+
def decompress_stream(packed_data, coder, stream_idx)
|
|
179
|
+
return packed_data if coder.nil?
|
|
180
|
+
|
|
181
|
+
algo_sym = CoderChain.algorithm_for_method(coder.method_id)
|
|
182
|
+
return packed_data unless algo_sym
|
|
183
|
+
|
|
184
|
+
algo_class = AlgorithmRegistry.get(algo_sym)
|
|
185
|
+
raise "Algorithm not found: #{algo_sym}" unless algo_class
|
|
186
|
+
|
|
187
|
+
# Build decoder options
|
|
188
|
+
options = build_decoder_options(coder, algo_sym)
|
|
189
|
+
|
|
190
|
+
# Decompress
|
|
191
|
+
input_io = StringIO.new(packed_data)
|
|
192
|
+
output_io = StringIO.new
|
|
193
|
+
output_io.set_encoding("BINARY")
|
|
194
|
+
|
|
195
|
+
decoder = algo_class.new(options)
|
|
196
|
+
|
|
197
|
+
# Get unpack size for this stream
|
|
198
|
+
unpack_size = @folder.unpack_sizes[stream_idx] || (packed_data.bytesize * 10)
|
|
199
|
+
|
|
200
|
+
decoder.decompress(input_io, output_io, size: unpack_size)
|
|
201
|
+
output_io.string
|
|
202
|
+
end
|
|
203
|
+
|
|
204
|
+
# Build decoder options from coder properties
|
|
205
|
+
#
|
|
206
|
+
# @param coder [Models::CoderInfo] Coder with properties
|
|
207
|
+
# @param algo_sym [Symbol] Algorithm symbol
|
|
208
|
+
# @return [Hash] Decoder options
|
|
209
|
+
def build_decoder_options(coder, algo_sym)
|
|
210
|
+
options = {}
|
|
211
|
+
properties = coder.properties
|
|
212
|
+
|
|
213
|
+
return options if properties.nil? || properties.empty?
|
|
214
|
+
|
|
215
|
+
case algo_sym
|
|
216
|
+
when :lzma2
|
|
217
|
+
prop_byte = properties.getbyte(0)
|
|
218
|
+
dict_size = Omnizip::Algorithms::LZMA2::Properties.decode_dict_size(prop_byte)
|
|
219
|
+
options[:raw_mode] = true
|
|
220
|
+
options[:dict_size] = dict_size
|
|
221
|
+
when :lzma
|
|
222
|
+
if properties.bytesize >= 5
|
|
223
|
+
props_byte = properties.getbyte(0)
|
|
224
|
+
dict_size = properties[1, 4].unpack1("V")
|
|
225
|
+
options[:lzma2_mode] = true
|
|
226
|
+
options[:lc] = props_byte % 9
|
|
227
|
+
remainder = props_byte / 9
|
|
228
|
+
options[:lp] = remainder % 5
|
|
229
|
+
options[:pb] = remainder / 5
|
|
230
|
+
options[:dict_size] = dict_size
|
|
231
|
+
end
|
|
232
|
+
end
|
|
233
|
+
|
|
234
|
+
options
|
|
235
|
+
end
|
|
236
|
+
end
|
|
237
|
+
end
|
|
238
|
+
end
|
|
239
|
+
end
|