omnizip 0.3.2 → 0.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +243 -368
- data/README.adoc +101 -5
- data/docs/guides/archive-formats/index.adoc +31 -1
- data/docs/guides/archive-formats/ole-format.adoc +316 -0
- data/docs/guides/archive-formats/rpm-format.adoc +249 -0
- data/docs/index.adoc +12 -2
- data/lib/omnizip/algorithms/lzma/distance_coder.rb +29 -18
- data/lib/omnizip/algorithms/lzma/encoder.rb +2 -1
- data/lib/omnizip/algorithms/lzma/length_coder.rb +6 -3
- data/lib/omnizip/algorithms/lzma/literal_decoder.rb +2 -1
- data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +40 -13
- data/lib/omnizip/algorithms/lzma/range_decoder.rb +36 -2
- data/lib/omnizip/algorithms/lzma/range_encoder.rb +19 -0
- data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +2 -1
- data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +148 -112
- data/lib/omnizip/algorithms/lzma.rb +20 -5
- data/lib/omnizip/algorithms/ppmd7/decoder.rb +25 -21
- data/lib/omnizip/algorithms/ppmd7/encoder.rb +4 -11
- data/lib/omnizip/algorithms/sevenzip_lzma2.rb +2 -1
- data/lib/omnizip/algorithms/xz_lzma2.rb +2 -1
- data/lib/omnizip/algorithms/zstandard/constants.rb +125 -9
- data/lib/omnizip/algorithms/zstandard/decoder.rb +202 -17
- data/lib/omnizip/algorithms/zstandard/encoder.rb +197 -17
- data/lib/omnizip/algorithms/zstandard/frame/block.rb +128 -0
- data/lib/omnizip/algorithms/zstandard/frame/header.rb +224 -0
- data/lib/omnizip/algorithms/zstandard/fse/bitstream.rb +186 -0
- data/lib/omnizip/algorithms/zstandard/fse/encoder.rb +325 -0
- data/lib/omnizip/algorithms/zstandard/fse/table.rb +269 -0
- data/lib/omnizip/algorithms/zstandard/huffman.rb +272 -0
- data/lib/omnizip/algorithms/zstandard/huffman_encoder.rb +339 -0
- data/lib/omnizip/algorithms/zstandard/literals.rb +178 -0
- data/lib/omnizip/algorithms/zstandard/literals_encoder.rb +251 -0
- data/lib/omnizip/algorithms/zstandard/sequences.rb +346 -0
- data/lib/omnizip/buffer/memory_extractor.rb +3 -3
- data/lib/omnizip/buffer.rb +2 -2
- data/lib/omnizip/filters/delta.rb +2 -1
- data/lib/omnizip/filters/registry.rb +6 -6
- data/lib/omnizip/formats/cpio/bounded_io.rb +66 -0
- data/lib/omnizip/formats/lzip.rb +2 -1
- data/lib/omnizip/formats/lzma_alone.rb +2 -1
- data/lib/omnizip/formats/ole/allocation_table.rb +244 -0
- data/lib/omnizip/formats/ole/constants.rb +61 -0
- data/lib/omnizip/formats/ole/dirent.rb +380 -0
- data/lib/omnizip/formats/ole/header.rb +198 -0
- data/lib/omnizip/formats/ole/ranges_io.rb +264 -0
- data/lib/omnizip/formats/ole/storage.rb +305 -0
- data/lib/omnizip/formats/ole/types/variant.rb +328 -0
- data/lib/omnizip/formats/ole.rb +145 -0
- data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +92 -49
- data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +13 -20
- data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +6 -2
- data/lib/omnizip/formats/rar3/reader.rb +6 -2
- data/lib/omnizip/formats/rar5/reader.rb +4 -1
- data/lib/omnizip/formats/rpm/constants.rb +58 -0
- data/lib/omnizip/formats/rpm/entry.rb +102 -0
- data/lib/omnizip/formats/rpm/header.rb +113 -0
- data/lib/omnizip/formats/rpm/lead.rb +122 -0
- data/lib/omnizip/formats/rpm/tag.rb +230 -0
- data/lib/omnizip/formats/rpm.rb +434 -0
- data/lib/omnizip/formats/seven_zip/bcj2_stream_decompressor.rb +239 -0
- data/lib/omnizip/formats/seven_zip/coder_chain.rb +32 -8
- data/lib/omnizip/formats/seven_zip/constants.rb +1 -1
- data/lib/omnizip/formats/seven_zip/reader.rb +84 -8
- data/lib/omnizip/formats/seven_zip/stream_compressor.rb +2 -1
- data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +6 -0
- data/lib/omnizip/formats/seven_zip/writer.rb +21 -9
- data/lib/omnizip/formats/seven_zip.rb +10 -0
- data/lib/omnizip/formats/xar/entry.rb +18 -5
- data/lib/omnizip/formats/xar/header.rb +34 -6
- data/lib/omnizip/formats/xar/reader.rb +43 -10
- data/lib/omnizip/formats/xar/toc.rb +34 -21
- data/lib/omnizip/formats/xar/writer.rb +15 -5
- data/lib/omnizip/formats/xz_impl/block_decoder.rb +45 -33
- data/lib/omnizip/formats/xz_impl/block_encoder.rb +2 -1
- data/lib/omnizip/formats/xz_impl/index_decoder.rb +3 -1
- data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +2 -1
- data/lib/omnizip/formats/zip/end_of_central_directory.rb +4 -3
- data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +14 -6
- data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +2 -1
- data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +28 -13
- data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +13 -6
- data/lib/omnizip/pipe/stream_compressor.rb +1 -1
- data/lib/omnizip/version.rb +1 -1
- data/readme-docs/compression-algorithms.adoc +6 -2
- metadata +30 -2
|
@@ -0,0 +1,249 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: RPM Format
|
|
3
|
+
nav_order: 9
|
|
4
|
+
parent: Archive Formats
|
|
5
|
+
grand_parent: Guides
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
[[rpm-format]]
|
|
9
|
+
== Purpose
|
|
10
|
+
|
|
11
|
+
RPM (Red Hat Package Manager) is the standard package format for Red Hat-based Linux distributions (RHEL, CentOS, Fedora). RPM packages contain software binaries, metadata, and scripts packaged in a single archive with a CPIO payload compressed using gzip, bzip2, xz, or zstd.
|
|
12
|
+
|
|
13
|
+
== Key Characteristics
|
|
14
|
+
|
|
15
|
+
[cols="1,3"]
|
|
16
|
+
|===
|
|
17
|
+
|Property |Value
|
|
18
|
+
|
|
19
|
+
|Compression
|
|
20
|
+
|gzip, bzip2, xz, or zstd
|
|
21
|
+
|
|
22
|
+
|Encryption
|
|
23
|
+
|GPG signature verification
|
|
24
|
+
|
|
25
|
+
|Archive Support
|
|
26
|
+
|Single package with CPIO payload
|
|
27
|
+
|
|
28
|
+
|Large Files
|
|
29
|
+
|Limited by CPIO and compression
|
|
30
|
+
|
|
31
|
+
|Best For
|
|
32
|
+
|Linux software distribution, RPM-based systems
|
|
33
|
+
|===
|
|
34
|
+
|
|
35
|
+
== Basic Usage
|
|
36
|
+
|
|
37
|
+
=== Read RPM Package Metadata
|
|
38
|
+
|
|
39
|
+
[source,ruby]
|
|
40
|
+
----
|
|
41
|
+
# Open and read RPM metadata
|
|
42
|
+
rpm = Omnizip::Rpm.open('package.rpm')
|
|
43
|
+
|
|
44
|
+
puts "Name: #{rpm.name}"
|
|
45
|
+
puts "Version: #{rpm.version}"
|
|
46
|
+
puts "Release: #{rpm.release}"
|
|
47
|
+
puts "Architecture: #{rpm.arch}"
|
|
48
|
+
puts "Summary: #{rpm.summary}"
|
|
49
|
+
puts "Description: #{rpm.description}"
|
|
50
|
+
|
|
51
|
+
# List package contents
|
|
52
|
+
rpm.each_entry do |entry|
|
|
53
|
+
puts "#{entry.path} (#{entry.size} bytes)"
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
rpm.close
|
|
57
|
+
----
|
|
58
|
+
|
|
59
|
+
=== Extract RPM Package
|
|
60
|
+
|
|
61
|
+
[source,ruby]
|
|
62
|
+
----
|
|
63
|
+
# Extract all files from RPM
|
|
64
|
+
Omnizip::Rpm.extract('package.rpm', '/output/directory/')
|
|
65
|
+
|
|
66
|
+
# Extract specific files
|
|
67
|
+
Omnizip::Rpm.open('package.rpm') do |rpm|
|
|
68
|
+
rpm.extract_files(['usr/bin/app', 'usr/share/man/man1/app.1.gz'], '/output/')
|
|
69
|
+
end
|
|
70
|
+
----
|
|
71
|
+
|
|
72
|
+
=== Check RPM Signature
|
|
73
|
+
|
|
74
|
+
[source,ruby]
|
|
75
|
+
----
|
|
76
|
+
# Verify RPM signature (requires GPG keys)
|
|
77
|
+
rpm = Omnizip::Rpm.open('package.rpm')
|
|
78
|
+
if rpm.signed?
|
|
79
|
+
puts "Package is GPG signed"
|
|
80
|
+
puts "Key ID: #{rpm.signature_key_id}"
|
|
81
|
+
end
|
|
82
|
+
----
|
|
83
|
+
|
|
84
|
+
== RPM Structure
|
|
85
|
+
|
|
86
|
+
RPM packages consist of:
|
|
87
|
+
|
|
88
|
+
. **Lead** - 96 bytes identifying the package
|
|
89
|
+
. **Signature** - Header with cryptographic signatures
|
|
90
|
+
. **Header** - Metadata (name, version, dependencies, scripts)
|
|
91
|
+
. **Payload** - CPIO archive compressed with gzip/bzip2/xz/zstd
|
|
92
|
+
|
|
93
|
+
[source,ruby]
|
|
94
|
+
----
|
|
95
|
+
# Internal RPM structure
|
|
96
|
+
rpm = Omnizip::Rpm.open('package.rpm')
|
|
97
|
+
|
|
98
|
+
# Access lead
|
|
99
|
+
puts rpm.lead.inspect
|
|
100
|
+
|
|
101
|
+
# Access signature
|
|
102
|
+
puts rpm.signature.inspect
|
|
103
|
+
|
|
104
|
+
# Access header (metadata)
|
|
105
|
+
puts rpm.header.inspect
|
|
106
|
+
|
|
107
|
+
# Access payload directly (compressed CPIO)
|
|
108
|
+
puts rpm.payload.inspect
|
|
109
|
+
----
|
|
110
|
+
|
|
111
|
+
== Payload Extraction
|
|
112
|
+
|
|
113
|
+
The RPM payload is a CPIO archive that can be extracted:
|
|
114
|
+
|
|
115
|
+
[source,ruby]
|
|
116
|
+
----
|
|
117
|
+
# Extract only the payload (files)
|
|
118
|
+
Omnizip::Rpm.open('package.rpm') do |rpm|
|
|
119
|
+
rpm.extract_payload('/output/directory/')
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
# Access payload entries directly
|
|
123
|
+
Omnizip::Rpm.open('package.rpm') do |rpm|
|
|
124
|
+
rpm.each_payload_entry do |entry|
|
|
125
|
+
puts "Entry: #{entry.path}"
|
|
126
|
+
puts "Content: #{entry.read[0, 100]}..."
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
----
|
|
130
|
+
|
|
131
|
+
== Compression Types
|
|
132
|
+
|
|
133
|
+
RPM supports four payload compression formats:
|
|
134
|
+
|
|
135
|
+
[cols="2,1,1"]
|
|
136
|
+
|===
|
|
137
|
+
|Format |Extension |Compression Ratio
|
|
138
|
+
|
|
139
|
+
|gzip
|
|
140
|
+
|.gz
|
|
141
|
+
|Good
|
|
142
|
+
|
|
143
|
+
|bzip2
|
|
144
|
+
|.bz2
|
|
145
|
+
|Better
|
|
146
|
+
|
|
147
|
+
|xz
|
|
148
|
+
|.xz
|
|
149
|
+
|Best
|
|
150
|
+
|
|
151
|
+
|zstd
|
|
152
|
+
|.zst
|
|
153
|
+
|Best (modern)
|
|
154
|
+
|===
|
|
155
|
+
|
|
156
|
+
[source,ruby]
|
|
157
|
+
----
|
|
158
|
+
# Check compression type
|
|
159
|
+
rpm = Omnizip::Rpm.open('package.rpm')
|
|
160
|
+
puts "Compression: #{rpm.compression_type}"
|
|
161
|
+
|
|
162
|
+
# Handle different compression types
|
|
163
|
+
case rpm.compression_type
|
|
164
|
+
when :gzip
|
|
165
|
+
puts "Using gzip decompression"
|
|
166
|
+
when :bzip2
|
|
167
|
+
puts "Using bzip2 decompression"
|
|
168
|
+
when :xz
|
|
169
|
+
puts "Using xz decompression"
|
|
170
|
+
when :zstd
|
|
171
|
+
puts "Using zstd decompression"
|
|
172
|
+
end
|
|
173
|
+
----
|
|
174
|
+
|
|
175
|
+
== Scriptlets
|
|
176
|
+
|
|
177
|
+
RPM packages can contain pre/post install scripts:
|
|
178
|
+
|
|
179
|
+
[source,ruby]
|
|
180
|
+
----
|
|
181
|
+
# Access scriptlets
|
|
182
|
+
rpm = Omnizip::Rpm.open('package.rpm')
|
|
183
|
+
|
|
184
|
+
puts "Pre-install script: #{rpm.pre_install_script}"
|
|
185
|
+
puts "Post-install script: #{rpm.post_install_script}"
|
|
186
|
+
puts "Pre-uninstall script: #{rpm.pre_uninstall_script}"
|
|
187
|
+
puts "Post-uninstall script: #{rpm.post_uninstall_script}"
|
|
188
|
+
----
|
|
189
|
+
|
|
190
|
+
== Dependencies
|
|
191
|
+
|
|
192
|
+
RPM packages declare dependencies:
|
|
193
|
+
|
|
194
|
+
[source,ruby]
|
|
195
|
+
----
|
|
196
|
+
# Access dependencies
|
|
197
|
+
rpm = Omnizip::Rpm.open('package.rpm')
|
|
198
|
+
|
|
199
|
+
# Required packages
|
|
200
|
+
puts "Requires:"
|
|
201
|
+
rpm.requires.each do |dep|
|
|
202
|
+
puts " #{dep.name} #{dep.version}"
|
|
203
|
+
end
|
|
204
|
+
|
|
205
|
+
# Provided capabilities
|
|
206
|
+
puts "Provides:"
|
|
207
|
+
rpm.provides.each do |cap|
|
|
208
|
+
puts " #{cap.name}"
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
# Required capabilities
|
|
212
|
+
puts "Requires:"
|
|
213
|
+
rpm.requires.each do |req|
|
|
214
|
+
puts " #{req.name} #{req.version}"
|
|
215
|
+
end
|
|
216
|
+
|
|
217
|
+
# Conflicts
|
|
218
|
+
puts "Conflicts:"
|
|
219
|
+
rpm.conflicts.each do |conf|
|
|
220
|
+
puts " #{conf.name} #{conf.version}"
|
|
221
|
+
end
|
|
222
|
+
----
|
|
223
|
+
|
|
224
|
+
== File Information
|
|
225
|
+
|
|
226
|
+
RPM tracks detailed file information:
|
|
227
|
+
|
|
228
|
+
[source,ruby]
|
|
229
|
+
----
|
|
230
|
+
# Access file entries
|
|
231
|
+
Omnizip::Rpm.open('package.rpm') do |rpm|
|
|
232
|
+
rpm.each_file do |file|
|
|
233
|
+
puts "Path: #{file.path}"
|
|
234
|
+
puts "Size: #{file.size}"
|
|
235
|
+
puts "Mode: #{file.mode.to_s(8)}"
|
|
236
|
+
puts "Owner: #{file.owner}"
|
|
237
|
+
puts "Group: #{file.group}"
|
|
238
|
+
puts "MD5: #{file.md5}" if file.md5
|
|
239
|
+
puts "Is config: #{file.config?}"
|
|
240
|
+
puts "Is doc: #{file.doc?}"
|
|
241
|
+
end
|
|
242
|
+
end
|
|
243
|
+
----
|
|
244
|
+
|
|
245
|
+
== See Also
|
|
246
|
+
|
|
247
|
+
* link:tar-format.html[TAR Format] - Related archive format
|
|
248
|
+
* link:xz-format.html[XZ Format] - XZ compression used in some RPMs
|
|
249
|
+
* link:cpio-format.html[CPIO Format] - Underlying payload format
|
data/docs/index.adoc
CHANGED
|
@@ -66,6 +66,8 @@ Follow this learning path:
|
|
|
66
66
|
* link:guides/archive-formats/tar-format.html[TAR archives]
|
|
67
67
|
* link:guides/archive-formats/gzip-format.html[GZIP files]
|
|
68
68
|
* link:guides/archive-formats/xz-format.html[XZ files]
|
|
69
|
+
* link:guides/archive-formats/rpm-format.html[RPM packages]
|
|
70
|
+
* link:guides/archive-formats/ole-format.html[OLE compound documents]
|
|
69
71
|
|
|
70
72
|
**Advanced Features**
|
|
71
73
|
|
|
@@ -113,7 +115,7 @@ Omnizip supports 6 major compression algorithms:
|
|
|
113
115
|
|
|
114
116
|
=== Supported Archive Formats
|
|
115
117
|
|
|
116
|
-
Omnizip supports
|
|
118
|
+
Omnizip supports 12 archive formats:
|
|
117
119
|
|
|
118
120
|
[cols="1,3,2"]
|
|
119
121
|
|===
|
|
@@ -154,13 +156,21 @@ Omnizip supports 10 archive formats:
|
|
|
154
156
|
|BZIP2
|
|
155
157
|
|BZIP2 compressed files
|
|
156
158
|
|Text file compression
|
|
159
|
+
|
|
160
|
+
|RPM
|
|
161
|
+
|RPM package archives with CPIO payload
|
|
162
|
+
|Linux package management
|
|
163
|
+
|
|
164
|
+
|OLE
|
|
165
|
+
|OLE compound documents (MSI, DOC, XLS, PPT)
|
|
166
|
+
|Windows compound files, installers
|
|
157
167
|
|===
|
|
158
168
|
|
|
159
169
|
=== Key Features
|
|
160
170
|
|
|
161
171
|
* **Pure Ruby** - No native dependencies, works on any Ruby platform
|
|
162
172
|
* **Registry-Based** - Extensible plugin architecture for algorithms and formats
|
|
163
|
-
* **Multiple Formats** - Support for
|
|
173
|
+
* **Multiple Formats** - Support for 12+ archive formats
|
|
164
174
|
* **Compression Profiles** - Smart algorithm selection based on file type
|
|
165
175
|
* **Solid Archives** - Shared dictionary for better compression ratios
|
|
166
176
|
* **Multi-Volume** - Split archives across multiple files
|
|
@@ -74,7 +74,8 @@ module Omnizip
|
|
|
74
74
|
#
|
|
75
75
|
# @return [void]
|
|
76
76
|
def reset_models
|
|
77
|
-
if (
|
|
77
|
+
if ENV.fetch("DEBUG_RESET_MODELS",
|
|
78
|
+
nil) && ENV.fetch("LZMA_DEBUG_DISTANCE", nil)
|
|
78
79
|
puts " [DistanceCoder.reset_models] Resetting #{@slot_encoders.size} len_states, each with #{@slot_encoders[0]&.size || '?'} models"
|
|
79
80
|
end
|
|
80
81
|
@slot_encoders.each do |len_state_models|
|
|
@@ -82,7 +83,8 @@ module Omnizip
|
|
|
82
83
|
end
|
|
83
84
|
@pos_encoders.each(&:reset)
|
|
84
85
|
@align_encoder.each(&:reset)
|
|
85
|
-
if (
|
|
86
|
+
if ENV.fetch("DEBUG_RESET_MODELS",
|
|
87
|
+
nil) && ENV.fetch("LZMA_DEBUG_DISTANCE", nil)
|
|
86
88
|
puts " [DistanceCoder.reset_models] Done resetting"
|
|
87
89
|
end
|
|
88
90
|
end
|
|
@@ -153,9 +155,11 @@ module Omnizip
|
|
|
153
155
|
)
|
|
154
156
|
|
|
155
157
|
# DEBUG: Trace all when LZMA_DEBUG_DISTANCE is set
|
|
156
|
-
trace_all = ENV
|
|
158
|
+
trace_all = ENV.fetch("LZMA_DEBUG_DISTANCE", nil)
|
|
157
159
|
|
|
158
|
-
if (trace_325 || trace_large || trace_all) && (
|
|
160
|
+
if (trace_325 || trace_large || trace_all) && ENV.fetch(
|
|
161
|
+
"LZMA_DEBUG_DISTANCE", nil
|
|
162
|
+
)
|
|
159
163
|
puts " [DistanceCoder.decode ##{$distance_decode_count}] START - len_state=#{len_state}"
|
|
160
164
|
puts " BEFORE: range=#{range_decoder.range.inspect}, code=#{range_decoder.code.inspect}"
|
|
161
165
|
end
|
|
@@ -163,7 +167,9 @@ module Omnizip
|
|
|
163
167
|
slot = decode_tree(range_decoder, @slot_encoders[len_state],
|
|
164
168
|
NUM_DIST_SLOT_BITS)
|
|
165
169
|
|
|
166
|
-
if (debug_this || trace_large || trace_all) && (
|
|
170
|
+
if (debug_this || trace_large || trace_all) && ENV.fetch(
|
|
171
|
+
"LZMA_DEBUG_DISTANCE", nil
|
|
172
|
+
)
|
|
167
173
|
puts " [DistanceCoder.decode ##{$distance_decode_count}] len_state=#{len_state}, slot=#{slot}"
|
|
168
174
|
puts " @slot_encoders[#{len_state}] object_id=#{@slot_encoders[len_state].object_id}"
|
|
169
175
|
end
|
|
@@ -172,7 +178,7 @@ module Omnizip
|
|
|
172
178
|
if slot < START_POS_MODEL_INDEX
|
|
173
179
|
# Slots 0-3: No extra bits
|
|
174
180
|
$distance_decode_count += 1
|
|
175
|
-
if debug_this && (
|
|
181
|
+
if debug_this && ENV.fetch("LZMA_DEBUG_DISTANCE", nil)
|
|
176
182
|
puts " -> distance=#{slot}"
|
|
177
183
|
end
|
|
178
184
|
slot
|
|
@@ -187,7 +193,7 @@ module Omnizip
|
|
|
187
193
|
base - slot - 1,
|
|
188
194
|
footer_bits)
|
|
189
195
|
$distance_decode_count += 1
|
|
190
|
-
if debug_this && (
|
|
196
|
+
if debug_this && ENV.fetch("LZMA_DEBUG_DISTANCE", nil)
|
|
191
197
|
puts " -> distance=#{result} (slot #{slot})"
|
|
192
198
|
end
|
|
193
199
|
else
|
|
@@ -213,14 +219,16 @@ module Omnizip
|
|
|
213
219
|
|
|
214
220
|
# Use decode_direct_bits_with_base to match XZ Utils rc_direct
|
|
215
221
|
# rc_direct builds on the base value iteratively
|
|
216
|
-
result = range_decoder.decode_direct_bits_with_base(
|
|
222
|
+
result = range_decoder.decode_direct_bits_with_base(
|
|
223
|
+
num_direct_bits, result
|
|
224
|
+
)
|
|
217
225
|
|
|
218
226
|
# Decode low 4 bits using aligned encoder (reverse tree)
|
|
219
227
|
low_bits = decode_reverse_tree(range_decoder,
|
|
220
228
|
@align_encoder,
|
|
221
229
|
0,
|
|
222
230
|
DIST_ALIGN_BITS)
|
|
223
|
-
if trace_326 && (
|
|
231
|
+
if trace_326 && ENV.fetch("LZMA_DEBUG_DISTANCE", nil)
|
|
224
232
|
puts " TRACE_326: low_bits=#{low_bits}"
|
|
225
233
|
end
|
|
226
234
|
|
|
@@ -228,7 +236,9 @@ module Omnizip
|
|
|
228
236
|
# NOTE: slot value is NOT added (XZ Utils pattern - line 513 adds symbol for EOPM check only)
|
|
229
237
|
result = (result << DIST_ALIGN_BITS) + low_bits
|
|
230
238
|
$distance_decode_count += 1
|
|
231
|
-
if (debug_this || trace_large) && (
|
|
239
|
+
if (debug_this || trace_large) && ENV.fetch(
|
|
240
|
+
"LZMA_DEBUG_DISTANCE", nil
|
|
241
|
+
)
|
|
232
242
|
puts " -> slot=#{slot}, result_after_direct=#{result >> DIST_ALIGN_BITS}, low_bits=#{low_bits}, distance=#{result}"
|
|
233
243
|
end
|
|
234
244
|
if result > 100000
|
|
@@ -314,10 +324,10 @@ module Omnizip
|
|
|
314
324
|
# @return [void]
|
|
315
325
|
def encode_tree(range_encoder, models, symbol, num_bits)
|
|
316
326
|
m = 1
|
|
317
|
-
trace_all = ENV
|
|
327
|
+
trace_all = ENV.fetch("TRACE_ALL_SLOT_ENCODE", nil)
|
|
318
328
|
iteration = 0
|
|
319
329
|
|
|
320
|
-
if trace_all && (
|
|
330
|
+
if trace_all && ENV.fetch("LZMA_DEBUG_ENCODE", nil)
|
|
321
331
|
puts " [encode_tree START] RECEIVED symbol=#{symbol}, num_bits=#{num_bits}"
|
|
322
332
|
puts " BEFORE: range=#{range_encoder.range}, low=#{range_encoder.low}"
|
|
323
333
|
end
|
|
@@ -325,7 +335,7 @@ module Omnizip
|
|
|
325
335
|
(num_bits - 1).downto(0) do |i|
|
|
326
336
|
iteration += 1
|
|
327
337
|
bit = (symbol >> i) & 1
|
|
328
|
-
if trace_all && (
|
|
338
|
+
if trace_all && ENV.fetch("LZMA_DEBUG_ENCODE", nil)
|
|
329
339
|
model_idx = m
|
|
330
340
|
puts " [#{iteration}/#{num_bits}] i=#{i}, bit=#{bit}, m=#{m}, model_idx=#{model_idx}, prob=#{models[m].probability}"
|
|
331
341
|
end
|
|
@@ -333,7 +343,7 @@ module Omnizip
|
|
|
333
343
|
m = (m << 1) | bit
|
|
334
344
|
end
|
|
335
345
|
|
|
336
|
-
if trace_all && (
|
|
346
|
+
if trace_all && ENV.fetch("LZMA_DEBUG_ENCODE", nil)
|
|
337
347
|
puts " AFTER: range=#{range_encoder.range}, low=#{range_encoder.low}"
|
|
338
348
|
puts " [encode_tree END] ENCODED symbol=#{symbol}"
|
|
339
349
|
end
|
|
@@ -350,10 +360,10 @@ module Omnizip
|
|
|
350
360
|
symbol = 0
|
|
351
361
|
trace_this = (num_bits == 6 && ENV.fetch("TRACE_SLOT_DECODE",
|
|
352
362
|
nil)) || ($distance_decode_count == 28)
|
|
353
|
-
trace_all = ENV
|
|
363
|
+
trace_all = ENV.fetch("TRACE_ALL_SLOT_DECODE", nil)
|
|
354
364
|
iteration = 0
|
|
355
365
|
|
|
356
|
-
if (trace_this || trace_all) && (
|
|
366
|
+
if (trace_this || trace_all) && ENV.fetch("LZMA_DEBUG_DISTANCE", nil)
|
|
357
367
|
puts " [decode_tree START] num_bits=#{num_bits}, range=#{range_decoder.range}, code=#{range_decoder.code}"
|
|
358
368
|
puts " models array object_id=#{models.object_id}"
|
|
359
369
|
end
|
|
@@ -364,11 +374,12 @@ module Omnizip
|
|
|
364
374
|
bit = range_decoder.decode_bit(model)
|
|
365
375
|
m = (m << 1) | bit
|
|
366
376
|
symbol |= (bit << i)
|
|
367
|
-
if (trace_this || trace_all) && (
|
|
377
|
+
if (trace_this || trace_all) && ENV.fetch("LZMA_DEBUG_DISTANCE",
|
|
378
|
+
nil)
|
|
368
379
|
puts " [#{iteration}/#{num_bits}] i=#{i}, bit=#{bit}, m=#{m}, model.object_id=#{model.object_id}, prob=#{model.probability}, symbol=#{symbol}"
|
|
369
380
|
end
|
|
370
381
|
end
|
|
371
|
-
if (trace_this || trace_all) && (
|
|
382
|
+
if (trace_this || trace_all) && ENV.fetch("LZMA_DEBUG_DISTANCE", nil)
|
|
372
383
|
puts " [decode_tree END] symbol=#{symbol}"
|
|
373
384
|
end
|
|
374
385
|
symbol
|
|
@@ -76,7 +76,8 @@ module Omnizip
|
|
|
76
76
|
XzEncoderAdapter.new(output, options)
|
|
77
77
|
else
|
|
78
78
|
# Use SdkEncoder (7-Zip LZMA SDK compatible) - DEFAULT
|
|
79
|
-
Implementations::SevenZip::LZMA::Encoder.new(output,
|
|
79
|
+
Implementations::SevenZip::LZMA::Encoder.new(output,
|
|
80
|
+
options)
|
|
80
81
|
end
|
|
81
82
|
end
|
|
82
83
|
|
|
@@ -70,7 +70,8 @@ module Omnizip
|
|
|
70
70
|
# @param pos_state [Integer] Position state for tree selection
|
|
71
71
|
# @return [void]
|
|
72
72
|
def encode(range_encoder, length, pos_state)
|
|
73
|
-
trace_encode = ENV.fetch("LZMA_DEBUG_ENCODE",
|
|
73
|
+
trace_encode = ENV.fetch("LZMA_DEBUG_ENCODE",
|
|
74
|
+
nil) && ENV.fetch("TRACE_LENGTH_CODER", nil)
|
|
74
75
|
|
|
75
76
|
if trace_encode
|
|
76
77
|
puts " [LengthCoder.encode] START: length=#{length}, pos_state=#{pos_state}"
|
|
@@ -138,7 +139,8 @@ module Omnizip
|
|
|
138
139
|
# @param pos_state [Integer] Position state for tree selection
|
|
139
140
|
# @return [Integer] Decoded length value (before adding MATCH_LEN_MIN)
|
|
140
141
|
def decode(range_decoder, pos_state)
|
|
141
|
-
trace_decode = ENV.fetch("LZMA_DEBUG_DISTANCE",
|
|
142
|
+
trace_decode = ENV.fetch("LZMA_DEBUG_DISTANCE",
|
|
143
|
+
nil) && ENV.fetch("TRACE_LENGTH_CODER", nil)
|
|
142
144
|
|
|
143
145
|
if trace_decode
|
|
144
146
|
caller_loc = caller_locations(2, 1).first
|
|
@@ -160,7 +162,8 @@ module Omnizip
|
|
|
160
162
|
if trace_decode
|
|
161
163
|
puts " Using LOW tree"
|
|
162
164
|
end
|
|
163
|
-
result = decode_tree(range_decoder, @low[pos_state],
|
|
165
|
+
result = decode_tree(range_decoder, @low[pos_state],
|
|
166
|
+
NUM_LEN_LOW_BITS)
|
|
164
167
|
elsif range_decoder.decode_bit(@choice2).zero?
|
|
165
168
|
# Mid tree
|
|
166
169
|
if trace_decode
|
|
@@ -281,7 +281,8 @@ module Omnizip
|
|
|
281
281
|
end
|
|
282
282
|
|
|
283
283
|
result = symbol - 0x100
|
|
284
|
-
if trace_233 || (ENV.fetch("TRACE_MATCHED_DECODE",
|
|
284
|
+
if trace_233 || (ENV.fetch("TRACE_MATCHED_DECODE",
|
|
285
|
+
nil) && lit_state == 96)
|
|
285
286
|
puts "\n FINAL RESULT: 0x#{result.to_s(16).upcase} ('#{result.chr}')"
|
|
286
287
|
if trace_233
|
|
287
288
|
puts " Result bits: #{result_bits.join}"
|
|
@@ -151,7 +151,7 @@ module Omnizip
|
|
|
151
151
|
@input.read(footer_size)
|
|
152
152
|
@member_size += footer_size
|
|
153
153
|
else
|
|
154
|
-
data_to_crc = decoded_data || +
|
|
154
|
+
data_to_crc = decoded_data || +""
|
|
155
155
|
calculated_crc = Omnizip::Checksums::Crc32.calculate(data_to_crc)
|
|
156
156
|
@uncompressed_size = data_to_crc.bytesize
|
|
157
157
|
|
|
@@ -211,31 +211,45 @@ module Omnizip
|
|
|
211
211
|
# Step 1: Verify magic bytes (SEQ_ID_STRING)
|
|
212
212
|
# Reference: lzip_decoder.c:104-153
|
|
213
213
|
magic_bytes = @input.read(4)
|
|
214
|
-
|
|
214
|
+
if magic_bytes.nil? || magic_bytes.bytesize < 4
|
|
215
|
+
raise Omnizip::DecompressionError,
|
|
216
|
+
"Incomplete .lz header: missing magic bytes"
|
|
217
|
+
end
|
|
215
218
|
|
|
216
219
|
4.times do |i|
|
|
217
220
|
if magic_bytes.getbyte(i) != MAGIC[i]
|
|
218
|
-
raise Omnizip::DecompressionError, "Invalid .lz header: magic bytes don't match LZIP (expected #{MAGIC.map
|
|
221
|
+
raise Omnizip::DecompressionError, "Invalid .lz header: magic bytes don't match LZIP (expected #{MAGIC.map do |b|
|
|
222
|
+
"0x#{b.to_s(16).upcase}"
|
|
223
|
+
end.join(' ')}, got #{magic_bytes.bytes.map do |b|
|
|
224
|
+
"0x#{b.to_s(16).upcase}"
|
|
225
|
+
end.join(' ')})"
|
|
219
226
|
end
|
|
220
227
|
end
|
|
221
228
|
|
|
222
229
|
# Step 2: Read version byte (SEQ_VERSION)
|
|
223
230
|
# Reference: lzip_decoder.c:156-174
|
|
224
231
|
version_byte = @input.getbyte
|
|
225
|
-
|
|
232
|
+
if version_byte.nil?
|
|
233
|
+
raise Omnizip::DecompressionError,
|
|
234
|
+
"Incomplete .lz header: missing version byte"
|
|
235
|
+
end
|
|
226
236
|
|
|
227
237
|
@version = version_byte
|
|
228
238
|
|
|
229
239
|
# We support version 0 and unextended version 1
|
|
230
240
|
# Reference: lzip_decoder.c:163-164
|
|
231
241
|
if @version > 1
|
|
232
|
-
raise Omnizip::UnsupportedFormatError,
|
|
242
|
+
raise Omnizip::UnsupportedFormatError,
|
|
243
|
+
"Unsupported .lz version: #{@version} (only 0 and 1 are supported)"
|
|
233
244
|
end
|
|
234
245
|
|
|
235
246
|
# Step 3: Parse dictionary size (SEQ_DICT_SIZE)
|
|
236
247
|
# Reference: lzip_decoder.c:177-222
|
|
237
248
|
dict_size_byte = @input.getbyte
|
|
238
|
-
|
|
249
|
+
if dict_size_byte.nil?
|
|
250
|
+
raise Omnizip::DecompressionError,
|
|
251
|
+
"Incomplete .lz header: missing dictionary size byte"
|
|
252
|
+
end
|
|
239
253
|
|
|
240
254
|
# Decode dictionary size from the encoded byte
|
|
241
255
|
# The five lowest bits are for the base-2 logarithm of the dictionary size
|
|
@@ -247,7 +261,8 @@ module Omnizip
|
|
|
247
261
|
# Validate range: [4 KiB, 512 MiB]
|
|
248
262
|
# Reference: lzip_decoder.c:198-199
|
|
249
263
|
if b2log < 12 || b2log > 29 || (b2log == 12 && fracnum.positive?)
|
|
250
|
-
raise Omnizip::DecompressionError,
|
|
264
|
+
raise Omnizip::DecompressionError,
|
|
265
|
+
"Invalid .lz header: dictionary size byte 0x#{dict_size_byte.to_s(16).upcase} is out of valid range"
|
|
251
266
|
end
|
|
252
267
|
|
|
253
268
|
# Calculate: 2^[b2log] - [fracnum] * 2^([b2log] - 4)
|
|
@@ -255,8 +270,14 @@ module Omnizip
|
|
|
255
270
|
@dict_size = (1 << b2log) - (fracnum << (b2log - 4))
|
|
256
271
|
|
|
257
272
|
# Sanity checks
|
|
258
|
-
|
|
259
|
-
|
|
273
|
+
if @dict_size < MIN_DICT_SIZE
|
|
274
|
+
raise Omnizip::DecompressionError,
|
|
275
|
+
"Dictionary size calculation error: too small"
|
|
276
|
+
end
|
|
277
|
+
if @dict_size > MAX_DICT_SIZE
|
|
278
|
+
raise Omnizip::DecompressionError,
|
|
279
|
+
"Dictionary size calculation error: too large"
|
|
280
|
+
end
|
|
260
281
|
end
|
|
261
282
|
|
|
262
283
|
# Verify .lz format footer
|
|
@@ -273,7 +294,10 @@ module Omnizip
|
|
|
273
294
|
def verify_footer(calculated_crc)
|
|
274
295
|
footer_size = @version.zero? ? LZIP_V0_FOOTER_SIZE : LZIP_V1_FOOTER_SIZE
|
|
275
296
|
footer = @input.read(footer_size)
|
|
276
|
-
|
|
297
|
+
if footer.nil? || footer.bytesize < footer_size
|
|
298
|
+
raise Omnizip::DecompressionError,
|
|
299
|
+
"Incomplete .lz footer: expected #{footer_size} bytes, got #{footer&.bytesize || 0}"
|
|
300
|
+
end
|
|
277
301
|
|
|
278
302
|
# Update member_size to include the footer
|
|
279
303
|
@member_size += footer_size
|
|
@@ -284,7 +308,8 @@ module Omnizip
|
|
|
284
308
|
|
|
285
309
|
# Verify CRC32
|
|
286
310
|
if calculated_crc != stored_crc
|
|
287
|
-
raise Omnizip::ChecksumError,
|
|
311
|
+
raise Omnizip::ChecksumError,
|
|
312
|
+
"CRC32 mismatch: calculated 0x#{calculated_crc.to_s(16).upcase}, stored 0x#{stored_crc.to_s(16).upcase}"
|
|
288
313
|
end
|
|
289
314
|
|
|
290
315
|
# Parse and verify uncompressed size (little-endian)
|
|
@@ -294,7 +319,8 @@ module Omnizip
|
|
|
294
319
|
(footer.getbyte(10) << 48) | (footer.getbyte(11) << 56)
|
|
295
320
|
|
|
296
321
|
if @uncompressed_size != stored_uncompressed_size
|
|
297
|
-
raise Omnizip::ChecksumError,
|
|
322
|
+
raise Omnizip::ChecksumError,
|
|
323
|
+
"Uncompressed size mismatch: decoded #{@uncompressed_size}, stored #{stored_uncompressed_size}"
|
|
298
324
|
end
|
|
299
325
|
|
|
300
326
|
# For version 1, verify member size
|
|
@@ -305,7 +331,8 @@ module Omnizip
|
|
|
305
331
|
(footer.getbyte(18) << 48) | (footer.getbyte(19) << 56)
|
|
306
332
|
|
|
307
333
|
if @member_size != stored_member_size
|
|
308
|
-
raise Omnizip::ChecksumError,
|
|
334
|
+
raise Omnizip::ChecksumError,
|
|
335
|
+
"Member size mismatch: decoded #{@member_size}, stored #{stored_member_size}"
|
|
309
336
|
end
|
|
310
337
|
end
|
|
311
338
|
end
|
|
@@ -84,7 +84,8 @@ module Omnizip
|
|
|
84
84
|
prob_before = model.probability if trace_model_updates
|
|
85
85
|
|
|
86
86
|
# DEBUG: Trace is_rep bit decoding
|
|
87
|
-
trace_is_rep = ENV.fetch("TRACE_IS_REP_BITS",
|
|
87
|
+
trace_is_rep = ENV.fetch("TRACE_IS_REP_BITS",
|
|
88
|
+
nil) && (bound > 1_000_000)
|
|
88
89
|
|
|
89
90
|
if trace_is_rep
|
|
90
91
|
puts " [RangeDecoder.decode_bit] BEFORE: range=#{@range}, code=#{@code}, bound=#{bound}, prob=#{model.probability}"
|
|
@@ -118,7 +119,8 @@ module Omnizip
|
|
|
118
119
|
end
|
|
119
120
|
|
|
120
121
|
# DEBUG: Trace decode_bit for specific problematic state
|
|
121
|
-
if ENV.fetch("TRACE_SPECIFIC_DECODE",
|
|
122
|
+
if ENV.fetch("TRACE_SPECIFIC_DECODE",
|
|
123
|
+
nil) && @range == 0x40000000 && @code == 0x21407d82
|
|
122
124
|
puts " === CRITICAL DECODE_BIT (MATCHED LITERAL) ==="
|
|
123
125
|
puts " BEFORE: range=0x#{@range.to_s(16)} (#{@range})"
|
|
124
126
|
puts " BEFORE: code=0x#{@code.to_s(16)} (#{@code})"
|
|
@@ -219,6 +221,38 @@ module Omnizip
|
|
|
219
221
|
result
|
|
220
222
|
end
|
|
221
223
|
|
|
224
|
+
# Decode a cumulative frequency value
|
|
225
|
+
#
|
|
226
|
+
# This is used by PPMd for decoding symbols based on their
|
|
227
|
+
# frequency distribution. Returns the cumulative frequency
|
|
228
|
+
# that can be mapped back to a symbol.
|
|
229
|
+
#
|
|
230
|
+
# @param total_freq [Integer] Total frequency of all symbols in context
|
|
231
|
+
# @return [Integer] The cumulative frequency value
|
|
232
|
+
def decode_freq(total_freq)
|
|
233
|
+
normalize
|
|
234
|
+
range_freq = @range / total_freq
|
|
235
|
+
@code / range_freq
|
|
236
|
+
end
|
|
237
|
+
|
|
238
|
+
# Normalize after decoding a symbol with frequency
|
|
239
|
+
#
|
|
240
|
+
# After using decode_freq to get the cumulative frequency,
|
|
241
|
+
# call this to update the range decoder state.
|
|
242
|
+
#
|
|
243
|
+
# @param cum_freq [Integer] Cumulative frequency of decoded symbol
|
|
244
|
+
# @param freq [Integer] Frequency of decoded symbol
|
|
245
|
+
# @param total_freq [Integer] Total frequency of all symbols
|
|
246
|
+
# @return [void]
|
|
247
|
+
def normalize_freq(cum_freq, freq, total_freq)
|
|
248
|
+
range_freq = @range / total_freq
|
|
249
|
+
low_bound = range_freq * cum_freq
|
|
250
|
+
high_bound = range_freq * (cum_freq + freq)
|
|
251
|
+
|
|
252
|
+
@code -= low_bound
|
|
253
|
+
@range = (high_bound - low_bound) & 0xFFFFFFFF
|
|
254
|
+
end
|
|
255
|
+
|
|
222
256
|
# Decode bits directly using a base value (XZ Utils rc_direct pattern)
|
|
223
257
|
#
|
|
224
258
|
# This method implements the XZ Utils rc_direct macro which is used
|