omnizip 0.3.2 → 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +243 -368
  3. data/README.adoc +101 -5
  4. data/docs/guides/archive-formats/index.adoc +31 -1
  5. data/docs/guides/archive-formats/ole-format.adoc +316 -0
  6. data/docs/guides/archive-formats/rpm-format.adoc +249 -0
  7. data/docs/index.adoc +12 -2
  8. data/lib/omnizip/algorithms/lzma/distance_coder.rb +29 -18
  9. data/lib/omnizip/algorithms/lzma/encoder.rb +2 -1
  10. data/lib/omnizip/algorithms/lzma/length_coder.rb +6 -3
  11. data/lib/omnizip/algorithms/lzma/literal_decoder.rb +2 -1
  12. data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +40 -13
  13. data/lib/omnizip/algorithms/lzma/range_decoder.rb +36 -2
  14. data/lib/omnizip/algorithms/lzma/range_encoder.rb +19 -0
  15. data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +2 -1
  16. data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +148 -112
  17. data/lib/omnizip/algorithms/lzma.rb +20 -5
  18. data/lib/omnizip/algorithms/ppmd7/decoder.rb +25 -21
  19. data/lib/omnizip/algorithms/ppmd7/encoder.rb +4 -11
  20. data/lib/omnizip/algorithms/sevenzip_lzma2.rb +2 -1
  21. data/lib/omnizip/algorithms/xz_lzma2.rb +2 -1
  22. data/lib/omnizip/algorithms/zstandard/constants.rb +125 -9
  23. data/lib/omnizip/algorithms/zstandard/decoder.rb +202 -17
  24. data/lib/omnizip/algorithms/zstandard/encoder.rb +197 -17
  25. data/lib/omnizip/algorithms/zstandard/frame/block.rb +128 -0
  26. data/lib/omnizip/algorithms/zstandard/frame/header.rb +224 -0
  27. data/lib/omnizip/algorithms/zstandard/fse/bitstream.rb +186 -0
  28. data/lib/omnizip/algorithms/zstandard/fse/encoder.rb +325 -0
  29. data/lib/omnizip/algorithms/zstandard/fse/table.rb +269 -0
  30. data/lib/omnizip/algorithms/zstandard/huffman.rb +272 -0
  31. data/lib/omnizip/algorithms/zstandard/huffman_encoder.rb +339 -0
  32. data/lib/omnizip/algorithms/zstandard/literals.rb +178 -0
  33. data/lib/omnizip/algorithms/zstandard/literals_encoder.rb +251 -0
  34. data/lib/omnizip/algorithms/zstandard/sequences.rb +346 -0
  35. data/lib/omnizip/buffer/memory_extractor.rb +3 -3
  36. data/lib/omnizip/buffer.rb +2 -2
  37. data/lib/omnizip/filters/delta.rb +2 -1
  38. data/lib/omnizip/filters/registry.rb +6 -6
  39. data/lib/omnizip/formats/cpio/bounded_io.rb +66 -0
  40. data/lib/omnizip/formats/lzip.rb +2 -1
  41. data/lib/omnizip/formats/lzma_alone.rb +2 -1
  42. data/lib/omnizip/formats/ole/allocation_table.rb +244 -0
  43. data/lib/omnizip/formats/ole/constants.rb +61 -0
  44. data/lib/omnizip/formats/ole/dirent.rb +380 -0
  45. data/lib/omnizip/formats/ole/header.rb +198 -0
  46. data/lib/omnizip/formats/ole/ranges_io.rb +264 -0
  47. data/lib/omnizip/formats/ole/storage.rb +305 -0
  48. data/lib/omnizip/formats/ole/types/variant.rb +328 -0
  49. data/lib/omnizip/formats/ole.rb +145 -0
  50. data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +92 -49
  51. data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +13 -20
  52. data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +6 -2
  53. data/lib/omnizip/formats/rar3/reader.rb +6 -2
  54. data/lib/omnizip/formats/rar5/reader.rb +4 -1
  55. data/lib/omnizip/formats/rpm/constants.rb +58 -0
  56. data/lib/omnizip/formats/rpm/entry.rb +102 -0
  57. data/lib/omnizip/formats/rpm/header.rb +113 -0
  58. data/lib/omnizip/formats/rpm/lead.rb +122 -0
  59. data/lib/omnizip/formats/rpm/tag.rb +230 -0
  60. data/lib/omnizip/formats/rpm.rb +434 -0
  61. data/lib/omnizip/formats/seven_zip/bcj2_stream_decompressor.rb +239 -0
  62. data/lib/omnizip/formats/seven_zip/coder_chain.rb +32 -8
  63. data/lib/omnizip/formats/seven_zip/constants.rb +1 -1
  64. data/lib/omnizip/formats/seven_zip/reader.rb +84 -8
  65. data/lib/omnizip/formats/seven_zip/stream_compressor.rb +2 -1
  66. data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +6 -0
  67. data/lib/omnizip/formats/seven_zip/writer.rb +21 -9
  68. data/lib/omnizip/formats/seven_zip.rb +10 -0
  69. data/lib/omnizip/formats/xar/entry.rb +18 -5
  70. data/lib/omnizip/formats/xar/header.rb +34 -6
  71. data/lib/omnizip/formats/xar/reader.rb +43 -10
  72. data/lib/omnizip/formats/xar/toc.rb +34 -21
  73. data/lib/omnizip/formats/xar/writer.rb +15 -5
  74. data/lib/omnizip/formats/xz_impl/block_decoder.rb +45 -33
  75. data/lib/omnizip/formats/xz_impl/block_encoder.rb +2 -1
  76. data/lib/omnizip/formats/xz_impl/index_decoder.rb +3 -1
  77. data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +2 -1
  78. data/lib/omnizip/formats/zip/end_of_central_directory.rb +4 -3
  79. data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +14 -6
  80. data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +2 -1
  81. data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +28 -13
  82. data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +13 -6
  83. data/lib/omnizip/pipe/stream_compressor.rb +1 -1
  84. data/lib/omnizip/version.rb +1 -1
  85. data/readme-docs/compression-algorithms.adoc +6 -2
  86. metadata +30 -2
@@ -0,0 +1,249 @@
1
+ ---
2
+ title: RPM Format
3
+ nav_order: 9
4
+ parent: Archive Formats
5
+ grand_parent: Guides
6
+ ---
7
+
8
+ [[rpm-format]]
9
+ == Purpose
10
+
11
+ RPM (Red Hat Package Manager) is the standard package format for Red Hat-based Linux distributions (RHEL, CentOS, Fedora). RPM packages contain software binaries, metadata, and scripts packaged in a single archive with a CPIO payload compressed using gzip, bzip2, xz, or zstd.
12
+
13
+ == Key Characteristics
14
+
15
+ [cols="1,3"]
16
+ |===
17
+ |Property |Value
18
+
19
+ |Compression
20
+ |gzip, bzip2, xz, or zstd
21
+
22
+ |Encryption
23
+ |GPG signature verification
24
+
25
+ |Archive Support
26
+ |Single package with CPIO payload
27
+
28
+ |Large Files
29
+ |Limited by CPIO and compression
30
+
31
+ |Best For
32
+ |Linux software distribution, RPM-based systems
33
+ |===
34
+
35
+ == Basic Usage
36
+
37
+ === Read RPM Package Metadata
38
+
39
+ [source,ruby]
40
+ ----
41
+ # Open and read RPM metadata
42
+ rpm = Omnizip::Rpm.open('package.rpm')
43
+
44
+ puts "Name: #{rpm.name}"
45
+ puts "Version: #{rpm.version}"
46
+ puts "Release: #{rpm.release}"
47
+ puts "Architecture: #{rpm.arch}"
48
+ puts "Summary: #{rpm.summary}"
49
+ puts "Description: #{rpm.description}"
50
+
51
+ # List package contents
52
+ rpm.each_entry do |entry|
53
+ puts "#{entry.path} (#{entry.size} bytes)"
54
+ end
55
+
56
+ rpm.close
57
+ ----
58
+
59
+ === Extract RPM Package
60
+
61
+ [source,ruby]
62
+ ----
63
+ # Extract all files from RPM
64
+ Omnizip::Rpm.extract('package.rpm', '/output/directory/')
65
+
66
+ # Extract specific files
67
+ Omnizip::Rpm.open('package.rpm') do |rpm|
68
+ rpm.extract_files(['usr/bin/app', 'usr/share/man/man1/app.1.gz'], '/output/')
69
+ end
70
+ ----
71
+
72
+ === Check RPM Signature
73
+
74
+ [source,ruby]
75
+ ----
76
+ # Verify RPM signature (requires GPG keys)
77
+ rpm = Omnizip::Rpm.open('package.rpm')
78
+ if rpm.signed?
79
+ puts "Package is GPG signed"
80
+ puts "Key ID: #{rpm.signature_key_id}"
81
+ end
82
+ ----
83
+
84
+ == RPM Structure
85
+
86
+ RPM packages consist of:
87
+
88
+ . **Lead** - 96 bytes identifying the package
89
+ . **Signature** - Header with cryptographic signatures
90
+ . **Header** - Metadata (name, version, dependencies, scripts)
91
+ . **Payload** - CPIO archive compressed with gzip/bzip2/xz/zstd
92
+
93
+ [source,ruby]
94
+ ----
95
+ # Internal RPM structure
96
+ rpm = Omnizip::Rpm.open('package.rpm')
97
+
98
+ # Access lead
99
+ puts rpm.lead.inspect
100
+
101
+ # Access signature
102
+ puts rpm.signature.inspect
103
+
104
+ # Access header (metadata)
105
+ puts rpm.header.inspect
106
+
107
+ # Access payload directly (compressed CPIO)
108
+ puts rpm.payload.inspect
109
+ ----
110
+
111
+ == Payload Extraction
112
+
113
+ The RPM payload is a CPIO archive that can be extracted:
114
+
115
+ [source,ruby]
116
+ ----
117
+ # Extract only the payload (files)
118
+ Omnizip::Rpm.open('package.rpm') do |rpm|
119
+ rpm.extract_payload('/output/directory/')
120
+ end
121
+
122
+ # Access payload entries directly
123
+ Omnizip::Rpm.open('package.rpm') do |rpm|
124
+ rpm.each_payload_entry do |entry|
125
+ puts "Entry: #{entry.path}"
126
+ puts "Content: #{entry.read[0, 100]}..."
127
+ end
128
+ end
129
+ ----
130
+
131
+ == Compression Types
132
+
133
+ RPM supports four payload compression formats:
134
+
135
+ [cols="2,1,1"]
136
+ |===
137
+ |Format |Extension |Compression Ratio
138
+
139
+ |gzip
140
+ |.gz
141
+ |Good
142
+
143
+ |bzip2
144
+ |.bz2
145
+ |Better
146
+
147
+ |xz
148
+ |.xz
149
+ |Best
150
+
151
+ |zstd
152
+ |.zst
153
+ |Best (modern)
154
+ |===
155
+
156
+ [source,ruby]
157
+ ----
158
+ # Check compression type
159
+ rpm = Omnizip::Rpm.open('package.rpm')
160
+ puts "Compression: #{rpm.compression_type}"
161
+
162
+ # Handle different compression types
163
+ case rpm.compression_type
164
+ when :gzip
165
+ puts "Using gzip decompression"
166
+ when :bzip2
167
+ puts "Using bzip2 decompression"
168
+ when :xz
169
+ puts "Using xz decompression"
170
+ when :zstd
171
+ puts "Using zstd decompression"
172
+ end
173
+ ----
174
+
175
+ == Scriptlets
176
+
177
+ RPM packages can contain pre/post install scripts:
178
+
179
+ [source,ruby]
180
+ ----
181
+ # Access scriptlets
182
+ rpm = Omnizip::Rpm.open('package.rpm')
183
+
184
+ puts "Pre-install script: #{rpm.pre_install_script}"
185
+ puts "Post-install script: #{rpm.post_install_script}"
186
+ puts "Pre-uninstall script: #{rpm.pre_uninstall_script}"
187
+ puts "Post-uninstall script: #{rpm.post_uninstall_script}"
188
+ ----
189
+
190
+ == Dependencies
191
+
192
+ RPM packages declare dependencies:
193
+
194
+ [source,ruby]
195
+ ----
196
+ # Access dependencies
197
+ rpm = Omnizip::Rpm.open('package.rpm')
198
+
199
+ # Required packages
200
+ puts "Requires:"
201
+ rpm.requires.each do |dep|
202
+ puts " #{dep.name} #{dep.version}"
203
+ end
204
+
205
+ # Provided capabilities
206
+ puts "Provides:"
207
+ rpm.provides.each do |cap|
208
+ puts " #{cap.name}"
209
+ end
210
+
211
+ # Required capabilities
212
+ puts "Requires:"
213
+ rpm.requires.each do |req|
214
+ puts " #{req.name} #{req.version}"
215
+ end
216
+
217
+ # Conflicts
218
+ puts "Conflicts:"
219
+ rpm.conflicts.each do |conf|
220
+ puts " #{conf.name} #{conf.version}"
221
+ end
222
+ ----
223
+
224
+ == File Information
225
+
226
+ RPM tracks detailed file information:
227
+
228
+ [source,ruby]
229
+ ----
230
+ # Access file entries
231
+ Omnizip::Rpm.open('package.rpm') do |rpm|
232
+ rpm.each_file do |file|
233
+ puts "Path: #{file.path}"
234
+ puts "Size: #{file.size}"
235
+ puts "Mode: #{file.mode.to_s(8)}"
236
+ puts "Owner: #{file.owner}"
237
+ puts "Group: #{file.group}"
238
+ puts "MD5: #{file.md5}" if file.md5
239
+ puts "Is config: #{file.config?}"
240
+ puts "Is doc: #{file.doc?}"
241
+ end
242
+ end
243
+ ----
244
+
245
+ == See Also
246
+
247
+ * link:tar-format.html[TAR Format] - Related archive format
248
+ * link:xz-format.html[XZ Format] - XZ compression used in some RPMs
249
+ * link:cpio-format.html[CPIO Format] - Underlying payload format
data/docs/index.adoc CHANGED
@@ -66,6 +66,8 @@ Follow this learning path:
66
66
  * link:guides/archive-formats/tar-format.html[TAR archives]
67
67
  * link:guides/archive-formats/gzip-format.html[GZIP files]
68
68
  * link:guides/archive-formats/xz-format.html[XZ files]
69
+ * link:guides/archive-formats/rpm-format.html[RPM packages]
70
+ * link:guides/archive-formats/ole-format.html[OLE compound documents]
69
71
 
70
72
  **Advanced Features**
71
73
 
@@ -113,7 +115,7 @@ Omnizip supports 6 major compression algorithms:
113
115
 
114
116
  === Supported Archive Formats
115
117
 
116
- Omnizip supports 10 archive formats:
118
+ Omnizip supports 12 archive formats:
117
119
 
118
120
  [cols="1,3,2"]
119
121
  |===
@@ -154,13 +156,21 @@ Omnizip supports 10 archive formats:
154
156
  |BZIP2
155
157
  |BZIP2 compressed files
156
158
  |Text file compression
159
+
160
+ |RPM
161
+ |RPM package archives with CPIO payload
162
+ |Linux package management
163
+
164
+ |OLE
165
+ |OLE compound documents (MSI, DOC, XLS, PPT)
166
+ |Windows compound files, installers
157
167
  |===
158
168
 
159
169
  === Key Features
160
170
 
161
171
  * **Pure Ruby** - No native dependencies, works on any Ruby platform
162
172
  * **Registry-Based** - Extensible plugin architecture for algorithms and formats
163
- * **Multiple Formats** - Support for 10+ archive formats
173
+ * **Multiple Formats** - Support for 12+ archive formats
164
174
  * **Compression Profiles** - Smart algorithm selection based on file type
165
175
  * **Solid Archives** - Shared dictionary for better compression ratios
166
176
  * **Multi-Volume** - Split archives across multiple files
@@ -74,7 +74,8 @@ module Omnizip
74
74
  #
75
75
  # @return [void]
76
76
  def reset_models
77
- if (ENV["DEBUG_RESET_MODELS"]) && (ENV["LZMA_DEBUG_DISTANCE"])
77
+ if ENV.fetch("DEBUG_RESET_MODELS",
78
+ nil) && ENV.fetch("LZMA_DEBUG_DISTANCE", nil)
78
79
  puts " [DistanceCoder.reset_models] Resetting #{@slot_encoders.size} len_states, each with #{@slot_encoders[0]&.size || '?'} models"
79
80
  end
80
81
  @slot_encoders.each do |len_state_models|
@@ -82,7 +83,8 @@ module Omnizip
82
83
  end
83
84
  @pos_encoders.each(&:reset)
84
85
  @align_encoder.each(&:reset)
85
- if (ENV["DEBUG_RESET_MODELS"]) && (ENV["LZMA_DEBUG_DISTANCE"])
86
+ if ENV.fetch("DEBUG_RESET_MODELS",
87
+ nil) && ENV.fetch("LZMA_DEBUG_DISTANCE", nil)
86
88
  puts " [DistanceCoder.reset_models] Done resetting"
87
89
  end
88
90
  end
@@ -153,9 +155,11 @@ module Omnizip
153
155
  )
154
156
 
155
157
  # DEBUG: Trace all when LZMA_DEBUG_DISTANCE is set
156
- trace_all = ENV["LZMA_DEBUG_DISTANCE"]
158
+ trace_all = ENV.fetch("LZMA_DEBUG_DISTANCE", nil)
157
159
 
158
- if (trace_325 || trace_large || trace_all) && (ENV["LZMA_DEBUG_DISTANCE"])
160
+ if (trace_325 || trace_large || trace_all) && ENV.fetch(
161
+ "LZMA_DEBUG_DISTANCE", nil
162
+ )
159
163
  puts " [DistanceCoder.decode ##{$distance_decode_count}] START - len_state=#{len_state}"
160
164
  puts " BEFORE: range=#{range_decoder.range.inspect}, code=#{range_decoder.code.inspect}"
161
165
  end
@@ -163,7 +167,9 @@ module Omnizip
163
167
  slot = decode_tree(range_decoder, @slot_encoders[len_state],
164
168
  NUM_DIST_SLOT_BITS)
165
169
 
166
- if (debug_this || trace_large || trace_all) && (ENV["LZMA_DEBUG_DISTANCE"])
170
+ if (debug_this || trace_large || trace_all) && ENV.fetch(
171
+ "LZMA_DEBUG_DISTANCE", nil
172
+ )
167
173
  puts " [DistanceCoder.decode ##{$distance_decode_count}] len_state=#{len_state}, slot=#{slot}"
168
174
  puts " @slot_encoders[#{len_state}] object_id=#{@slot_encoders[len_state].object_id}"
169
175
  end
@@ -172,7 +178,7 @@ module Omnizip
172
178
  if slot < START_POS_MODEL_INDEX
173
179
  # Slots 0-3: No extra bits
174
180
  $distance_decode_count += 1
175
- if debug_this && (ENV["LZMA_DEBUG_DISTANCE"])
181
+ if debug_this && ENV.fetch("LZMA_DEBUG_DISTANCE", nil)
176
182
  puts " -> distance=#{slot}"
177
183
  end
178
184
  slot
@@ -187,7 +193,7 @@ module Omnizip
187
193
  base - slot - 1,
188
194
  footer_bits)
189
195
  $distance_decode_count += 1
190
- if debug_this && (ENV["LZMA_DEBUG_DISTANCE"])
196
+ if debug_this && ENV.fetch("LZMA_DEBUG_DISTANCE", nil)
191
197
  puts " -> distance=#{result} (slot #{slot})"
192
198
  end
193
199
  else
@@ -213,14 +219,16 @@ module Omnizip
213
219
 
214
220
  # Use decode_direct_bits_with_base to match XZ Utils rc_direct
215
221
  # rc_direct builds on the base value iteratively
216
- result = range_decoder.decode_direct_bits_with_base(num_direct_bits, result)
222
+ result = range_decoder.decode_direct_bits_with_base(
223
+ num_direct_bits, result
224
+ )
217
225
 
218
226
  # Decode low 4 bits using aligned encoder (reverse tree)
219
227
  low_bits = decode_reverse_tree(range_decoder,
220
228
  @align_encoder,
221
229
  0,
222
230
  DIST_ALIGN_BITS)
223
- if trace_326 && (ENV["LZMA_DEBUG_DISTANCE"])
231
+ if trace_326 && ENV.fetch("LZMA_DEBUG_DISTANCE", nil)
224
232
  puts " TRACE_326: low_bits=#{low_bits}"
225
233
  end
226
234
 
@@ -228,7 +236,9 @@ module Omnizip
228
236
  # NOTE: slot value is NOT added (XZ Utils pattern - line 513 adds symbol for EOPM check only)
229
237
  result = (result << DIST_ALIGN_BITS) + low_bits
230
238
  $distance_decode_count += 1
231
- if (debug_this || trace_large) && (ENV["LZMA_DEBUG_DISTANCE"])
239
+ if (debug_this || trace_large) && ENV.fetch(
240
+ "LZMA_DEBUG_DISTANCE", nil
241
+ )
232
242
  puts " -> slot=#{slot}, result_after_direct=#{result >> DIST_ALIGN_BITS}, low_bits=#{low_bits}, distance=#{result}"
233
243
  end
234
244
  if result > 100000
@@ -314,10 +324,10 @@ module Omnizip
314
324
  # @return [void]
315
325
  def encode_tree(range_encoder, models, symbol, num_bits)
316
326
  m = 1
317
- trace_all = ENV["TRACE_ALL_SLOT_ENCODE"]
327
+ trace_all = ENV.fetch("TRACE_ALL_SLOT_ENCODE", nil)
318
328
  iteration = 0
319
329
 
320
- if trace_all && (ENV["LZMA_DEBUG_ENCODE"])
330
+ if trace_all && ENV.fetch("LZMA_DEBUG_ENCODE", nil)
321
331
  puts " [encode_tree START] RECEIVED symbol=#{symbol}, num_bits=#{num_bits}"
322
332
  puts " BEFORE: range=#{range_encoder.range}, low=#{range_encoder.low}"
323
333
  end
@@ -325,7 +335,7 @@ module Omnizip
325
335
  (num_bits - 1).downto(0) do |i|
326
336
  iteration += 1
327
337
  bit = (symbol >> i) & 1
328
- if trace_all && (ENV["LZMA_DEBUG_ENCODE"])
338
+ if trace_all && ENV.fetch("LZMA_DEBUG_ENCODE", nil)
329
339
  model_idx = m
330
340
  puts " [#{iteration}/#{num_bits}] i=#{i}, bit=#{bit}, m=#{m}, model_idx=#{model_idx}, prob=#{models[m].probability}"
331
341
  end
@@ -333,7 +343,7 @@ module Omnizip
333
343
  m = (m << 1) | bit
334
344
  end
335
345
 
336
- if trace_all && (ENV["LZMA_DEBUG_ENCODE"])
346
+ if trace_all && ENV.fetch("LZMA_DEBUG_ENCODE", nil)
337
347
  puts " AFTER: range=#{range_encoder.range}, low=#{range_encoder.low}"
338
348
  puts " [encode_tree END] ENCODED symbol=#{symbol}"
339
349
  end
@@ -350,10 +360,10 @@ module Omnizip
350
360
  symbol = 0
351
361
  trace_this = (num_bits == 6 && ENV.fetch("TRACE_SLOT_DECODE",
352
362
  nil)) || ($distance_decode_count == 28)
353
- trace_all = ENV["TRACE_ALL_SLOT_DECODE"]
363
+ trace_all = ENV.fetch("TRACE_ALL_SLOT_DECODE", nil)
354
364
  iteration = 0
355
365
 
356
- if (trace_this || trace_all) && (ENV["LZMA_DEBUG_DISTANCE"])
366
+ if (trace_this || trace_all) && ENV.fetch("LZMA_DEBUG_DISTANCE", nil)
357
367
  puts " [decode_tree START] num_bits=#{num_bits}, range=#{range_decoder.range}, code=#{range_decoder.code}"
358
368
  puts " models array object_id=#{models.object_id}"
359
369
  end
@@ -364,11 +374,12 @@ module Omnizip
364
374
  bit = range_decoder.decode_bit(model)
365
375
  m = (m << 1) | bit
366
376
  symbol |= (bit << i)
367
- if (trace_this || trace_all) && (ENV["LZMA_DEBUG_DISTANCE"])
377
+ if (trace_this || trace_all) && ENV.fetch("LZMA_DEBUG_DISTANCE",
378
+ nil)
368
379
  puts " [#{iteration}/#{num_bits}] i=#{i}, bit=#{bit}, m=#{m}, model.object_id=#{model.object_id}, prob=#{model.probability}, symbol=#{symbol}"
369
380
  end
370
381
  end
371
- if (trace_this || trace_all) && (ENV["LZMA_DEBUG_DISTANCE"])
382
+ if (trace_this || trace_all) && ENV.fetch("LZMA_DEBUG_DISTANCE", nil)
372
383
  puts " [decode_tree END] symbol=#{symbol}"
373
384
  end
374
385
  symbol
@@ -76,7 +76,8 @@ module Omnizip
76
76
  XzEncoderAdapter.new(output, options)
77
77
  else
78
78
  # Use SdkEncoder (7-Zip LZMA SDK compatible) - DEFAULT
79
- Implementations::SevenZip::LZMA::Encoder.new(output, options)
79
+ Implementations::SevenZip::LZMA::Encoder.new(output,
80
+ options)
80
81
  end
81
82
  end
82
83
 
@@ -70,7 +70,8 @@ module Omnizip
70
70
  # @param pos_state [Integer] Position state for tree selection
71
71
  # @return [void]
72
72
  def encode(range_encoder, length, pos_state)
73
- trace_encode = ENV.fetch("LZMA_DEBUG_ENCODE", nil) && ENV.fetch("TRACE_LENGTH_CODER", nil)
73
+ trace_encode = ENV.fetch("LZMA_DEBUG_ENCODE",
74
+ nil) && ENV.fetch("TRACE_LENGTH_CODER", nil)
74
75
 
75
76
  if trace_encode
76
77
  puts " [LengthCoder.encode] START: length=#{length}, pos_state=#{pos_state}"
@@ -138,7 +139,8 @@ module Omnizip
138
139
  # @param pos_state [Integer] Position state for tree selection
139
140
  # @return [Integer] Decoded length value (before adding MATCH_LEN_MIN)
140
141
  def decode(range_decoder, pos_state)
141
- trace_decode = ENV.fetch("LZMA_DEBUG_DISTANCE", nil) && ENV.fetch("TRACE_LENGTH_CODER", nil)
142
+ trace_decode = ENV.fetch("LZMA_DEBUG_DISTANCE",
143
+ nil) && ENV.fetch("TRACE_LENGTH_CODER", nil)
142
144
 
143
145
  if trace_decode
144
146
  caller_loc = caller_locations(2, 1).first
@@ -160,7 +162,8 @@ module Omnizip
160
162
  if trace_decode
161
163
  puts " Using LOW tree"
162
164
  end
163
- result = decode_tree(range_decoder, @low[pos_state], NUM_LEN_LOW_BITS)
165
+ result = decode_tree(range_decoder, @low[pos_state],
166
+ NUM_LEN_LOW_BITS)
164
167
  elsif range_decoder.decode_bit(@choice2).zero?
165
168
  # Mid tree
166
169
  if trace_decode
@@ -281,7 +281,8 @@ module Omnizip
281
281
  end
282
282
 
283
283
  result = symbol - 0x100
284
- if trace_233 || (ENV.fetch("TRACE_MATCHED_DECODE", nil) && lit_state == 96)
284
+ if trace_233 || (ENV.fetch("TRACE_MATCHED_DECODE",
285
+ nil) && lit_state == 96)
285
286
  puts "\n FINAL RESULT: 0x#{result.to_s(16).upcase} ('#{result.chr}')"
286
287
  if trace_233
287
288
  puts " Result bits: #{result_bits.join}"
@@ -151,7 +151,7 @@ module Omnizip
151
151
  @input.read(footer_size)
152
152
  @member_size += footer_size
153
153
  else
154
- data_to_crc = decoded_data || +''
154
+ data_to_crc = decoded_data || +""
155
155
  calculated_crc = Omnizip::Checksums::Crc32.calculate(data_to_crc)
156
156
  @uncompressed_size = data_to_crc.bytesize
157
157
 
@@ -211,31 +211,45 @@ module Omnizip
211
211
  # Step 1: Verify magic bytes (SEQ_ID_STRING)
212
212
  # Reference: lzip_decoder.c:104-153
213
213
  magic_bytes = @input.read(4)
214
- raise Omnizip::DecompressionError, "Incomplete .lz header: missing magic bytes" if magic_bytes.nil? || magic_bytes.bytesize < 4
214
+ if magic_bytes.nil? || magic_bytes.bytesize < 4
215
+ raise Omnizip::DecompressionError,
216
+ "Incomplete .lz header: missing magic bytes"
217
+ end
215
218
 
216
219
  4.times do |i|
217
220
  if magic_bytes.getbyte(i) != MAGIC[i]
218
- raise Omnizip::DecompressionError, "Invalid .lz header: magic bytes don't match LZIP (expected #{MAGIC.map { |b| "0x#{b.to_s(16).upcase}" }.join(' ')}, got #{magic_bytes.bytes.map { |b| "0x#{b.to_s(16).upcase}" }.join(' ')})"
221
+ raise Omnizip::DecompressionError, "Invalid .lz header: magic bytes don't match LZIP (expected #{MAGIC.map do |b|
222
+ "0x#{b.to_s(16).upcase}"
223
+ end.join(' ')}, got #{magic_bytes.bytes.map do |b|
224
+ "0x#{b.to_s(16).upcase}"
225
+ end.join(' ')})"
219
226
  end
220
227
  end
221
228
 
222
229
  # Step 2: Read version byte (SEQ_VERSION)
223
230
  # Reference: lzip_decoder.c:156-174
224
231
  version_byte = @input.getbyte
225
- raise Omnizip::DecompressionError, "Incomplete .lz header: missing version byte" if version_byte.nil?
232
+ if version_byte.nil?
233
+ raise Omnizip::DecompressionError,
234
+ "Incomplete .lz header: missing version byte"
235
+ end
226
236
 
227
237
  @version = version_byte
228
238
 
229
239
  # We support version 0 and unextended version 1
230
240
  # Reference: lzip_decoder.c:163-164
231
241
  if @version > 1
232
- raise Omnizip::UnsupportedFormatError, "Unsupported .lz version: #{@version} (only 0 and 1 are supported)"
242
+ raise Omnizip::UnsupportedFormatError,
243
+ "Unsupported .lz version: #{@version} (only 0 and 1 are supported)"
233
244
  end
234
245
 
235
246
  # Step 3: Parse dictionary size (SEQ_DICT_SIZE)
236
247
  # Reference: lzip_decoder.c:177-222
237
248
  dict_size_byte = @input.getbyte
238
- raise Omnizip::DecompressionError, "Incomplete .lz header: missing dictionary size byte" if dict_size_byte.nil?
249
+ if dict_size_byte.nil?
250
+ raise Omnizip::DecompressionError,
251
+ "Incomplete .lz header: missing dictionary size byte"
252
+ end
239
253
 
240
254
  # Decode dictionary size from the encoded byte
241
255
  # The five lowest bits are for the base-2 logarithm of the dictionary size
@@ -247,7 +261,8 @@ module Omnizip
247
261
  # Validate range: [4 KiB, 512 MiB]
248
262
  # Reference: lzip_decoder.c:198-199
249
263
  if b2log < 12 || b2log > 29 || (b2log == 12 && fracnum.positive?)
250
- raise Omnizip::DecompressionError, "Invalid .lz header: dictionary size byte 0x#{dict_size_byte.to_s(16).upcase} is out of valid range"
264
+ raise Omnizip::DecompressionError,
265
+ "Invalid .lz header: dictionary size byte 0x#{dict_size_byte.to_s(16).upcase} is out of valid range"
251
266
  end
252
267
 
253
268
  # Calculate: 2^[b2log] - [fracnum] * 2^([b2log] - 4)
@@ -255,8 +270,14 @@ module Omnizip
255
270
  @dict_size = (1 << b2log) - (fracnum << (b2log - 4))
256
271
 
257
272
  # Sanity checks
258
- raise Omnizip::DecompressionError, "Dictionary size calculation error: too small" if @dict_size < MIN_DICT_SIZE
259
- raise Omnizip::DecompressionError, "Dictionary size calculation error: too large" if @dict_size > MAX_DICT_SIZE
273
+ if @dict_size < MIN_DICT_SIZE
274
+ raise Omnizip::DecompressionError,
275
+ "Dictionary size calculation error: too small"
276
+ end
277
+ if @dict_size > MAX_DICT_SIZE
278
+ raise Omnizip::DecompressionError,
279
+ "Dictionary size calculation error: too large"
280
+ end
260
281
  end
261
282
 
262
283
  # Verify .lz format footer
@@ -273,7 +294,10 @@ module Omnizip
273
294
  def verify_footer(calculated_crc)
274
295
  footer_size = @version.zero? ? LZIP_V0_FOOTER_SIZE : LZIP_V1_FOOTER_SIZE
275
296
  footer = @input.read(footer_size)
276
- raise Omnizip::DecompressionError, "Incomplete .lz footer: expected #{footer_size} bytes, got #{footer&.bytesize || 0}" if footer.nil? || footer.bytesize < footer_size
297
+ if footer.nil? || footer.bytesize < footer_size
298
+ raise Omnizip::DecompressionError,
299
+ "Incomplete .lz footer: expected #{footer_size} bytes, got #{footer&.bytesize || 0}"
300
+ end
277
301
 
278
302
  # Update member_size to include the footer
279
303
  @member_size += footer_size
@@ -284,7 +308,8 @@ module Omnizip
284
308
 
285
309
  # Verify CRC32
286
310
  if calculated_crc != stored_crc
287
- raise Omnizip::ChecksumError, "CRC32 mismatch: calculated 0x#{calculated_crc.to_s(16).upcase}, stored 0x#{stored_crc.to_s(16).upcase}"
311
+ raise Omnizip::ChecksumError,
312
+ "CRC32 mismatch: calculated 0x#{calculated_crc.to_s(16).upcase}, stored 0x#{stored_crc.to_s(16).upcase}"
288
313
  end
289
314
 
290
315
  # Parse and verify uncompressed size (little-endian)
@@ -294,7 +319,8 @@ module Omnizip
294
319
  (footer.getbyte(10) << 48) | (footer.getbyte(11) << 56)
295
320
 
296
321
  if @uncompressed_size != stored_uncompressed_size
297
- raise Omnizip::ChecksumError, "Uncompressed size mismatch: decoded #{@uncompressed_size}, stored #{stored_uncompressed_size}"
322
+ raise Omnizip::ChecksumError,
323
+ "Uncompressed size mismatch: decoded #{@uncompressed_size}, stored #{stored_uncompressed_size}"
298
324
  end
299
325
 
300
326
  # For version 1, verify member size
@@ -305,7 +331,8 @@ module Omnizip
305
331
  (footer.getbyte(18) << 48) | (footer.getbyte(19) << 56)
306
332
 
307
333
  if @member_size != stored_member_size
308
- raise Omnizip::ChecksumError, "Member size mismatch: decoded #{@member_size}, stored #{stored_member_size}"
334
+ raise Omnizip::ChecksumError,
335
+ "Member size mismatch: decoded #{@member_size}, stored #{stored_member_size}"
309
336
  end
310
337
  end
311
338
  end
@@ -84,7 +84,8 @@ module Omnizip
84
84
  prob_before = model.probability if trace_model_updates
85
85
 
86
86
  # DEBUG: Trace is_rep bit decoding
87
- trace_is_rep = ENV.fetch("TRACE_IS_REP_BITS", nil) && (bound > 1_000_000)
87
+ trace_is_rep = ENV.fetch("TRACE_IS_REP_BITS",
88
+ nil) && (bound > 1_000_000)
88
89
 
89
90
  if trace_is_rep
90
91
  puts " [RangeDecoder.decode_bit] BEFORE: range=#{@range}, code=#{@code}, bound=#{bound}, prob=#{model.probability}"
@@ -118,7 +119,8 @@ module Omnizip
118
119
  end
119
120
 
120
121
  # DEBUG: Trace decode_bit for specific problematic state
121
- if ENV.fetch("TRACE_SPECIFIC_DECODE", nil) && @range == 0x40000000 && @code == 0x21407d82
122
+ if ENV.fetch("TRACE_SPECIFIC_DECODE",
123
+ nil) && @range == 0x40000000 && @code == 0x21407d82
122
124
  puts " === CRITICAL DECODE_BIT (MATCHED LITERAL) ==="
123
125
  puts " BEFORE: range=0x#{@range.to_s(16)} (#{@range})"
124
126
  puts " BEFORE: code=0x#{@code.to_s(16)} (#{@code})"
@@ -219,6 +221,38 @@ module Omnizip
219
221
  result
220
222
  end
221
223
 
224
+ # Decode a cumulative frequency value
225
+ #
226
+ # This is used by PPMd for decoding symbols based on their
227
+ # frequency distribution. Returns the cumulative frequency
228
+ # that can be mapped back to a symbol.
229
+ #
230
+ # @param total_freq [Integer] Total frequency of all symbols in context
231
+ # @return [Integer] The cumulative frequency value
232
+ def decode_freq(total_freq)
233
+ normalize
234
+ range_freq = @range / total_freq
235
+ @code / range_freq
236
+ end
237
+
238
+ # Normalize after decoding a symbol with frequency
239
+ #
240
+ # After using decode_freq to get the cumulative frequency,
241
+ # call this to update the range decoder state.
242
+ #
243
+ # @param cum_freq [Integer] Cumulative frequency of decoded symbol
244
+ # @param freq [Integer] Frequency of decoded symbol
245
+ # @param total_freq [Integer] Total frequency of all symbols
246
+ # @return [void]
247
+ def normalize_freq(cum_freq, freq, total_freq)
248
+ range_freq = @range / total_freq
249
+ low_bound = range_freq * cum_freq
250
+ high_bound = range_freq * (cum_freq + freq)
251
+
252
+ @code -= low_bound
253
+ @range = (high_bound - low_bound) & 0xFFFFFFFF
254
+ end
255
+
222
256
  # Decode bits directly using a base value (XZ Utils rc_direct pattern)
223
257
  #
224
258
  # This method implements the XZ Utils rc_direct macro which is used