omnizip 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (511) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.rubocop.yml +32 -0
  4. data/.rubocop_todo.yml +754 -0
  5. data/COPYING +502 -0
  6. data/Gemfile +17 -0
  7. data/LICENSE +12 -0
  8. data/README.adoc +1045 -0
  9. data/Rakefile +12 -0
  10. data/benchmark/README.md +260 -0
  11. data/benchmark/benchmark_suite.rb +125 -0
  12. data/benchmark/compression_bench.rb +181 -0
  13. data/benchmark/filter_bench.rb +180 -0
  14. data/benchmark/models/benchmark_result.rb +59 -0
  15. data/benchmark/models/comparison_result.rb +69 -0
  16. data/benchmark/profile_suite.rb +167 -0
  17. data/benchmark/reporter.rb +150 -0
  18. data/benchmark/run_benchmarks.rb +66 -0
  19. data/benchmark/test_data.rb +137 -0
  20. data/config/formats/rar3_spec.yml +91 -0
  21. data/config/formats/rar5_spec.yml +102 -0
  22. data/docs/.github/workflows/docs.yml +142 -0
  23. data/docs/.gitignore +21 -0
  24. data/docs/.lychee.toml +67 -0
  25. data/docs/Gemfile +13 -0
  26. data/docs/RAR_WRITE_SUPPORT.md +26 -0
  27. data/docs/README.md +101 -0
  28. data/docs/_config.yml +112 -0
  29. data/docs/assets/logo.svg +1 -0
  30. data/docs/assets/omnizip-logo.pdf +1540 -11
  31. data/docs/comparison/feature-matrix.adoc +694 -0
  32. data/docs/comparison/index.adoc +113 -0
  33. data/docs/comparison/vs-7zip.adoc +309 -0
  34. data/docs/comparison/vs-peazip.adoc +77 -0
  35. data/docs/comparison/vs-rubyzip.adoc +342 -0
  36. data/docs/comparison/vs-winrar.adoc +100 -0
  37. data/docs/compatibility.adoc +579 -0
  38. data/docs/concepts/index.adoc +129 -0
  39. data/docs/developer/architecture.adoc +256 -0
  40. data/docs/developer/contributing.adoc +158 -0
  41. data/docs/developer/index.adoc +25 -0
  42. data/docs/developer/testing.adoc +212 -0
  43. data/docs/getting-started/basic-usage.adoc +271 -0
  44. data/docs/getting-started/index.adoc +42 -0
  45. data/docs/getting-started/installation.adoc +138 -0
  46. data/docs/getting-started/quick-start.adoc +185 -0
  47. data/docs/getting-started/your-first-archive.adoc +218 -0
  48. data/docs/guides/advanced-features/encryption.adoc +300 -0
  49. data/docs/guides/advanced-features/index.adoc +49 -0
  50. data/docs/guides/advanced-features/parallel-processing.adoc +246 -0
  51. data/docs/guides/advanced-features/progress-tracking.adoc +320 -0
  52. data/docs/guides/advanced-features/streaming.adoc +212 -0
  53. data/docs/guides/archive-formats/gzip-format.adoc +107 -0
  54. data/docs/guides/archive-formats/index.adoc +130 -0
  55. data/docs/guides/archive-formats/rar-format.adoc +104 -0
  56. data/docs/guides/archive-formats/rar5.adoc +521 -0
  57. data/docs/guides/archive-formats/seven-zip-format.adoc +35 -0
  58. data/docs/guides/archive-formats/tar-format.adoc +106 -0
  59. data/docs/guides/archive-formats/xz-format.adoc +118 -0
  60. data/docs/guides/archive-formats/zip-format.adoc +35 -0
  61. data/docs/guides/compression-algorithms/bzip2.adoc +113 -0
  62. data/docs/guides/compression-algorithms/deflate.adoc +319 -0
  63. data/docs/guides/compression-algorithms/index.adoc +190 -0
  64. data/docs/guides/compression-algorithms/lzma.adoc +398 -0
  65. data/docs/guides/compression-algorithms/lzma2.adoc +327 -0
  66. data/docs/guides/compression-algorithms/ppmd.adoc +316 -0
  67. data/docs/guides/compression-algorithms/zstandard.adoc +361 -0
  68. data/docs/guides/creating-archives.adoc +354 -0
  69. data/docs/guides/extracting-archives.adoc +53 -0
  70. data/docs/guides/format-conversion.adoc +64 -0
  71. data/docs/guides/index.adoc +49 -0
  72. data/docs/guides/migration-rubyzip.adoc +217 -0
  73. data/docs/guides/parity-archives.adoc +605 -0
  74. data/docs/guides/performance-tuning.adoc +88 -0
  75. data/docs/index.adoc +218 -0
  76. data/docs/lychee.toml +67 -0
  77. data/docs/reference/api/overview.adoc +188 -0
  78. data/docs/reference/cli/compress-command.adoc +114 -0
  79. data/docs/reference/cli/overview.adoc +140 -0
  80. data/docs/reference/index.adoc +26 -0
  81. data/docs/resources/faq.adoc +185 -0
  82. data/docs/resources/quick-reference.adoc +222 -0
  83. data/docs/troubleshooting/index.adoc +208 -0
  84. data/examples/api_comparison.rb +205 -0
  85. data/examples/deflate64_example.rb +96 -0
  86. data/examples/par2_demo.rb +121 -0
  87. data/examples/quick_start_native.rb +150 -0
  88. data/examples/quick_start_rubyzip.rb +115 -0
  89. data/examples/rubyzip_compatibility_demo.rb +194 -0
  90. data/exe/omnizip +27 -0
  91. data/lib/omnizip/algorithm.rb +130 -0
  92. data/lib/omnizip/algorithm_registry.rb +86 -0
  93. data/lib/omnizip/algorithms/.keep +0 -0
  94. data/lib/omnizip/algorithms/bzip2/bwt.rb +225 -0
  95. data/lib/omnizip/algorithms/bzip2/decoder.rb +193 -0
  96. data/lib/omnizip/algorithms/bzip2/encoder.rb +237 -0
  97. data/lib/omnizip/algorithms/bzip2/huffman.rb +206 -0
  98. data/lib/omnizip/algorithms/bzip2/mtf.rb +101 -0
  99. data/lib/omnizip/algorithms/bzip2/rle.rb +151 -0
  100. data/lib/omnizip/algorithms/bzip2.rb +130 -0
  101. data/lib/omnizip/algorithms/deflate/constants.rb +28 -0
  102. data/lib/omnizip/algorithms/deflate/decoder.rb +38 -0
  103. data/lib/omnizip/algorithms/deflate/encoder.rb +46 -0
  104. data/lib/omnizip/algorithms/deflate.rb +128 -0
  105. data/lib/omnizip/algorithms/deflate64/constants.rb +45 -0
  106. data/lib/omnizip/algorithms/deflate64/decoder.rb +153 -0
  107. data/lib/omnizip/algorithms/deflate64/encoder.rb +98 -0
  108. data/lib/omnizip/algorithms/deflate64/huffman_coder.rb +354 -0
  109. data/lib/omnizip/algorithms/deflate64/lz77_encoder.rb +142 -0
  110. data/lib/omnizip/algorithms/deflate64.rb +109 -0
  111. data/lib/omnizip/algorithms/lzma/bit_model.rb +120 -0
  112. data/lib/omnizip/algorithms/lzma/constants.rb +112 -0
  113. data/lib/omnizip/algorithms/lzma/decoder.rb +148 -0
  114. data/lib/omnizip/algorithms/lzma/dictionary.rb +69 -0
  115. data/lib/omnizip/algorithms/lzma/distance_coder.rb +415 -0
  116. data/lib/omnizip/algorithms/lzma/encoder.rb +142 -0
  117. data/lib/omnizip/algorithms/lzma/length_coder.rb +260 -0
  118. data/lib/omnizip/algorithms/lzma/literal_decoder.rb +320 -0
  119. data/lib/omnizip/algorithms/lzma/literal_encoder.rb +210 -0
  120. data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +341 -0
  121. data/lib/omnizip/algorithms/lzma/lzma_alone_decoder.rb +192 -0
  122. data/lib/omnizip/algorithms/lzma/lzma_state.rb +128 -0
  123. data/lib/omnizip/algorithms/lzma/match.rb +32 -0
  124. data/lib/omnizip/algorithms/lzma/match_finder.rb +205 -0
  125. data/lib/omnizip/algorithms/lzma/match_finder_config.rb +142 -0
  126. data/lib/omnizip/algorithms/lzma/match_finder_factory.rb +88 -0
  127. data/lib/omnizip/algorithms/lzma/optimal_encoder.rb +130 -0
  128. data/lib/omnizip/algorithms/lzma/probability_models.rb +72 -0
  129. data/lib/omnizip/algorithms/lzma/range_coder.rb +85 -0
  130. data/lib/omnizip/algorithms/lzma/range_decoder.rb +434 -0
  131. data/lib/omnizip/algorithms/lzma/range_encoder.rb +194 -0
  132. data/lib/omnizip/algorithms/lzma/state.rb +127 -0
  133. data/lib/omnizip/algorithms/lzma/xz_buffered_range_encoder.rb +325 -0
  134. data/lib/omnizip/algorithms/lzma/xz_encoder.rb +426 -0
  135. data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +645 -0
  136. data/lib/omnizip/algorithms/lzma/xz_match_finder_adapter.rb +227 -0
  137. data/lib/omnizip/algorithms/lzma/xz_price_calculator.rb +169 -0
  138. data/lib/omnizip/algorithms/lzma/xz_probability_models.rb +261 -0
  139. data/lib/omnizip/algorithms/lzma/xz_range_encoder.rb +223 -0
  140. data/lib/omnizip/algorithms/lzma/xz_range_encoder_exact.rb +331 -0
  141. data/lib/omnizip/algorithms/lzma/xz_state.rb +116 -0
  142. data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +2055 -0
  143. data/lib/omnizip/algorithms/lzma.rb +238 -0
  144. data/lib/omnizip/algorithms/lzma2/chunk_manager.rb +182 -0
  145. data/lib/omnizip/algorithms/lzma2/constants.rb +41 -0
  146. data/lib/omnizip/algorithms/lzma2/encoder.rb +147 -0
  147. data/lib/omnizip/algorithms/lzma2/lzma2_chunk.rb +161 -0
  148. data/lib/omnizip/algorithms/lzma2/properties.rb +179 -0
  149. data/lib/omnizip/algorithms/lzma2/simple_lzma2_encoder.rb +127 -0
  150. data/lib/omnizip/algorithms/lzma2/xz_encoder_adapter.rb +85 -0
  151. data/lib/omnizip/algorithms/lzma2.rb +141 -0
  152. data/lib/omnizip/algorithms/ppmd7/constants.rb +74 -0
  153. data/lib/omnizip/algorithms/ppmd7/context.rb +154 -0
  154. data/lib/omnizip/algorithms/ppmd7/decoder.rb +126 -0
  155. data/lib/omnizip/algorithms/ppmd7/encoder.rb +163 -0
  156. data/lib/omnizip/algorithms/ppmd7/model.rb +248 -0
  157. data/lib/omnizip/algorithms/ppmd7/symbol_state.rb +57 -0
  158. data/lib/omnizip/algorithms/ppmd7.rb +116 -0
  159. data/lib/omnizip/algorithms/ppmd8/constants.rb +61 -0
  160. data/lib/omnizip/algorithms/ppmd8/context.rb +34 -0
  161. data/lib/omnizip/algorithms/ppmd8/decoder.rb +107 -0
  162. data/lib/omnizip/algorithms/ppmd8/encoder.rb +138 -0
  163. data/lib/omnizip/algorithms/ppmd8/model.rb +250 -0
  164. data/lib/omnizip/algorithms/ppmd8/restoration_method.rb +78 -0
  165. data/lib/omnizip/algorithms/ppmd8.rb +82 -0
  166. data/lib/omnizip/algorithms/ppmd_base.rb +138 -0
  167. data/lib/omnizip/algorithms/sevenzip_lzma2.rb +123 -0
  168. data/lib/omnizip/algorithms/xz_lzma2.rb +118 -0
  169. data/lib/omnizip/algorithms/zstandard/constants.rb +25 -0
  170. data/lib/omnizip/algorithms/zstandard/decoder.rb +46 -0
  171. data/lib/omnizip/algorithms/zstandard/encoder.rb +51 -0
  172. data/lib/omnizip/algorithms/zstandard.rb +138 -0
  173. data/lib/omnizip/buffer/memory_archive.rb +251 -0
  174. data/lib/omnizip/buffer/memory_extractor.rb +224 -0
  175. data/lib/omnizip/buffer.rb +176 -0
  176. data/lib/omnizip/checksum_registry.rb +114 -0
  177. data/lib/omnizip/checksums/crc32.rb +100 -0
  178. data/lib/omnizip/checksums/crc64.rb +101 -0
  179. data/lib/omnizip/checksums/crc_base.rb +158 -0
  180. data/lib/omnizip/checksums/verifier.rb +131 -0
  181. data/lib/omnizip/chunked/memory_manager.rb +194 -0
  182. data/lib/omnizip/chunked/reader.rb +78 -0
  183. data/lib/omnizip/chunked/writer.rb +120 -0
  184. data/lib/omnizip/chunked.rb +129 -0
  185. data/lib/omnizip/cli/output_formatter.rb +104 -0
  186. data/lib/omnizip/cli.rb +572 -0
  187. data/lib/omnizip/commands/.keep +0 -0
  188. data/lib/omnizip/commands/archive_create_command.rb +427 -0
  189. data/lib/omnizip/commands/archive_extract_command.rb +272 -0
  190. data/lib/omnizip/commands/archive_list_command.rb +218 -0
  191. data/lib/omnizip/commands/archive_repair_command.rb +131 -0
  192. data/lib/omnizip/commands/archive_verify_command.rb +117 -0
  193. data/lib/omnizip/commands/compress_command.rb +117 -0
  194. data/lib/omnizip/commands/decompress_command.rb +120 -0
  195. data/lib/omnizip/commands/list_command.rb +53 -0
  196. data/lib/omnizip/commands/metadata_command.rb +153 -0
  197. data/lib/omnizip/commands/parity_create_command.rb +122 -0
  198. data/lib/omnizip/commands/parity_repair_command.rb +122 -0
  199. data/lib/omnizip/commands/parity_verify_command.rb +124 -0
  200. data/lib/omnizip/commands/profile_list_command.rb +56 -0
  201. data/lib/omnizip/commands/profile_show_command.rb +44 -0
  202. data/lib/omnizip/convenience.rb +359 -0
  203. data/lib/omnizip/converter/conversion_registry.rb +49 -0
  204. data/lib/omnizip/converter/conversion_strategy.rb +121 -0
  205. data/lib/omnizip/converter/seven_zip_to_zip_strategy.rb +97 -0
  206. data/lib/omnizip/converter/zip_to_seven_zip_strategy.rb +112 -0
  207. data/lib/omnizip/converter.rb +105 -0
  208. data/lib/omnizip/crypto/aes256/cipher.rb +100 -0
  209. data/lib/omnizip/crypto/aes256/constants.rb +28 -0
  210. data/lib/omnizip/crypto/aes256/key_derivation.rb +101 -0
  211. data/lib/omnizip/crypto/aes256.rb +102 -0
  212. data/lib/omnizip/error.rb +106 -0
  213. data/lib/omnizip/eta/exponential_smoothing_estimator.rb +98 -0
  214. data/lib/omnizip/eta/moving_average_estimator.rb +99 -0
  215. data/lib/omnizip/eta/rate_calculator.rb +104 -0
  216. data/lib/omnizip/eta/sample_history.rb +143 -0
  217. data/lib/omnizip/eta/time_estimator.rb +106 -0
  218. data/lib/omnizip/eta.rb +63 -0
  219. data/lib/omnizip/extraction/filter_chain.rb +177 -0
  220. data/lib/omnizip/extraction/glob_pattern.rb +140 -0
  221. data/lib/omnizip/extraction/pattern_matcher.rb +70 -0
  222. data/lib/omnizip/extraction/predicate_pattern.rb +52 -0
  223. data/lib/omnizip/extraction/regex_pattern.rb +50 -0
  224. data/lib/omnizip/extraction/selective_extractor.rb +240 -0
  225. data/lib/omnizip/extraction.rb +111 -0
  226. data/lib/omnizip/file_type/mime_classifier.rb +144 -0
  227. data/lib/omnizip/file_type.rb +113 -0
  228. data/lib/omnizip/filter.rb +139 -0
  229. data/lib/omnizip/filter_pipeline.rb +108 -0
  230. data/lib/omnizip/filter_registry.rb +166 -0
  231. data/lib/omnizip/filters/bcj.rb +279 -0
  232. data/lib/omnizip/filters/bcj2/constants.rb +53 -0
  233. data/lib/omnizip/filters/bcj2/decoder.rb +200 -0
  234. data/lib/omnizip/filters/bcj2/encoder.rb +61 -0
  235. data/lib/omnizip/filters/bcj2/stream_data.rb +93 -0
  236. data/lib/omnizip/filters/bcj2.rb +99 -0
  237. data/lib/omnizip/filters/bcj_arm.rb +176 -0
  238. data/lib/omnizip/filters/bcj_arm64.rb +244 -0
  239. data/lib/omnizip/filters/bcj_ia64.rb +196 -0
  240. data/lib/omnizip/filters/bcj_ppc.rb +190 -0
  241. data/lib/omnizip/filters/bcj_sparc.rb +176 -0
  242. data/lib/omnizip/filters/bcj_x86.rb +193 -0
  243. data/lib/omnizip/filters/delta.rb +196 -0
  244. data/lib/omnizip/filters/filter_base.rb +72 -0
  245. data/lib/omnizip/filters/registry.rb +123 -0
  246. data/lib/omnizip/filters/xz_delta.rb +258 -0
  247. data/lib/omnizip/format_detector.rb +162 -0
  248. data/lib/omnizip/format_registry.rb +59 -0
  249. data/lib/omnizip/formats/.keep +0 -0
  250. data/lib/omnizip/formats/bzip2_file.rb +172 -0
  251. data/lib/omnizip/formats/cpio/constants.rb +55 -0
  252. data/lib/omnizip/formats/cpio/entry.rb +385 -0
  253. data/lib/omnizip/formats/cpio/reader.rb +196 -0
  254. data/lib/omnizip/formats/cpio/writer.rb +234 -0
  255. data/lib/omnizip/formats/cpio.rb +140 -0
  256. data/lib/omnizip/formats/format_spec_loader.rb +230 -0
  257. data/lib/omnizip/formats/gzip.rb +238 -0
  258. data/lib/omnizip/formats/iso/directory_builder.rb +297 -0
  259. data/lib/omnizip/formats/iso/directory_record.rb +152 -0
  260. data/lib/omnizip/formats/iso/joliet.rb +204 -0
  261. data/lib/omnizip/formats/iso/path_table.rb +125 -0
  262. data/lib/omnizip/formats/iso/reader.rb +197 -0
  263. data/lib/omnizip/formats/iso/rock_ridge.rb +349 -0
  264. data/lib/omnizip/formats/iso/volume_builder.rb +320 -0
  265. data/lib/omnizip/formats/iso/volume_descriptor.rb +168 -0
  266. data/lib/omnizip/formats/iso/writer.rb +530 -0
  267. data/lib/omnizip/formats/iso.rb +140 -0
  268. data/lib/omnizip/formats/lzip.rb +175 -0
  269. data/lib/omnizip/formats/lzma_alone.rb +171 -0
  270. data/lib/omnizip/formats/rar/archive_repairer.rb +243 -0
  271. data/lib/omnizip/formats/rar/archive_verifier.rb +195 -0
  272. data/lib/omnizip/formats/rar/block_parser.rb +243 -0
  273. data/lib/omnizip/formats/rar/compression/bit_stream.rb +180 -0
  274. data/lib/omnizip/formats/rar/compression/dispatcher.rb +217 -0
  275. data/lib/omnizip/formats/rar/compression/lz77_huffman/decoder.rb +216 -0
  276. data/lib/omnizip/formats/rar/compression/lz77_huffman/encoder.rb +158 -0
  277. data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_builder.rb +217 -0
  278. data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_coder.rb +189 -0
  279. data/lib/omnizip/formats/rar/compression/lz77_huffman/match_finder.rb +135 -0
  280. data/lib/omnizip/formats/rar/compression/lz77_huffman/sliding_window.rb +165 -0
  281. data/lib/omnizip/formats/rar/compression/ppmd/context.rb +105 -0
  282. data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +219 -0
  283. data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +262 -0
  284. data/lib/omnizip/formats/rar/compression_method_registry.rb +106 -0
  285. data/lib/omnizip/formats/rar/constants.rb +82 -0
  286. data/lib/omnizip/formats/rar/decompressor.rb +238 -0
  287. data/lib/omnizip/formats/rar/external_writer.rb +312 -0
  288. data/lib/omnizip/formats/rar/header.rb +192 -0
  289. data/lib/omnizip/formats/rar/license_validator.rb +109 -0
  290. data/lib/omnizip/formats/rar/models/rar_archive.rb +77 -0
  291. data/lib/omnizip/formats/rar/models/rar_entry.rb +65 -0
  292. data/lib/omnizip/formats/rar/models/rar_volume.rb +56 -0
  293. data/lib/omnizip/formats/rar/parity_handler.rb +292 -0
  294. data/lib/omnizip/formats/rar/rar5/compression/lzma.rb +202 -0
  295. data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +578 -0
  296. data/lib/omnizip/formats/rar/rar5/compression/store.rb +60 -0
  297. data/lib/omnizip/formats/rar/rar5/crc32.rb +39 -0
  298. data/lib/omnizip/formats/rar/rar5/encryption/aes256_cbc.rb +97 -0
  299. data/lib/omnizip/formats/rar/rar5/encryption/encryption_header.rb +114 -0
  300. data/lib/omnizip/formats/rar/rar5/encryption/encryption_manager.rb +166 -0
  301. data/lib/omnizip/formats/rar/rar5/encryption/key_derivation.rb +97 -0
  302. data/lib/omnizip/formats/rar/rar5/header.rb +187 -0
  303. data/lib/omnizip/formats/rar/rar5/models/encryption_options.rb +74 -0
  304. data/lib/omnizip/formats/rar/rar5/models/recovery_options.rb +63 -0
  305. data/lib/omnizip/formats/rar/rar5/models/solid_options.rb +63 -0
  306. data/lib/omnizip/formats/rar/rar5/models/volume_options.rb +74 -0
  307. data/lib/omnizip/formats/rar/rar5/multi_volume/ARCHITECTURE.md +290 -0
  308. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_manager.rb +264 -0
  309. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_splitter.rb +155 -0
  310. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_writer.rb +194 -0
  311. data/lib/omnizip/formats/rar/rar5/solid/solid_encoder.rb +109 -0
  312. data/lib/omnizip/formats/rar/rar5/solid/solid_manager.rb +142 -0
  313. data/lib/omnizip/formats/rar/rar5/solid/solid_stream.rb +121 -0
  314. data/lib/omnizip/formats/rar/rar5/vint.rb +65 -0
  315. data/lib/omnizip/formats/rar/rar5/writer.rb +466 -0
  316. data/lib/omnizip/formats/rar/rar_format_base.rb +241 -0
  317. data/lib/omnizip/formats/rar/reader.rb +366 -0
  318. data/lib/omnizip/formats/rar/recovery_record.rb +245 -0
  319. data/lib/omnizip/formats/rar/volume_manager.rb +168 -0
  320. data/lib/omnizip/formats/rar/writer.rb +431 -0
  321. data/lib/omnizip/formats/rar.rb +205 -0
  322. data/lib/omnizip/formats/rar3/compressor.rb +73 -0
  323. data/lib/omnizip/formats/rar3/decompressor.rb +66 -0
  324. data/lib/omnizip/formats/rar3/reader.rb +386 -0
  325. data/lib/omnizip/formats/rar3/writer.rb +219 -0
  326. data/lib/omnizip/formats/rar5/compressor.rb +73 -0
  327. data/lib/omnizip/formats/rar5/decompressor.rb +66 -0
  328. data/lib/omnizip/formats/rar5/reader.rb +342 -0
  329. data/lib/omnizip/formats/rar5/writer.rb +214 -0
  330. data/lib/omnizip/formats/seven_zip/coder_chain.rb +150 -0
  331. data/lib/omnizip/formats/seven_zip/constants.rb +126 -0
  332. data/lib/omnizip/formats/seven_zip/encoded_header.rb +114 -0
  333. data/lib/omnizip/formats/seven_zip/encrypted_header.rb +142 -0
  334. data/lib/omnizip/formats/seven_zip/file_collector.rb +144 -0
  335. data/lib/omnizip/formats/seven_zip/header.rb +106 -0
  336. data/lib/omnizip/formats/seven_zip/header_encryptor.rb +134 -0
  337. data/lib/omnizip/formats/seven_zip/header_writer.rb +466 -0
  338. data/lib/omnizip/formats/seven_zip/models/coder_info.rb +30 -0
  339. data/lib/omnizip/formats/seven_zip/models/file_entry.rb +58 -0
  340. data/lib/omnizip/formats/seven_zip/models/folder.rb +69 -0
  341. data/lib/omnizip/formats/seven_zip/models/stream_info.rb +42 -0
  342. data/lib/omnizip/formats/seven_zip/parser.rb +660 -0
  343. data/lib/omnizip/formats/seven_zip/reader.rb +458 -0
  344. data/lib/omnizip/formats/seven_zip/split_archive_reader.rb +632 -0
  345. data/lib/omnizip/formats/seven_zip/split_archive_writer.rb +315 -0
  346. data/lib/omnizip/formats/seven_zip/stream_compressor.rb +151 -0
  347. data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +162 -0
  348. data/lib/omnizip/formats/seven_zip/writer.rb +740 -0
  349. data/lib/omnizip/formats/seven_zip.rb +93 -0
  350. data/lib/omnizip/formats/tar/constants.rb +73 -0
  351. data/lib/omnizip/formats/tar/entry.rb +94 -0
  352. data/lib/omnizip/formats/tar/header.rb +168 -0
  353. data/lib/omnizip/formats/tar/reader.rb +121 -0
  354. data/lib/omnizip/formats/tar/writer.rb +216 -0
  355. data/lib/omnizip/formats/tar.rb +84 -0
  356. data/lib/omnizip/formats/xz/reader.rb +116 -0
  357. data/lib/omnizip/formats/xz.rb +237 -0
  358. data/lib/omnizip/formats/xz_impl/block_decoder.rb +754 -0
  359. data/lib/omnizip/formats/xz_impl/block_encoder.rb +306 -0
  360. data/lib/omnizip/formats/xz_impl/block_header.rb +210 -0
  361. data/lib/omnizip/formats/xz_impl/block_header_parser.rb +186 -0
  362. data/lib/omnizip/formats/xz_impl/constants.rb +49 -0
  363. data/lib/omnizip/formats/xz_impl/index_decoder.rb +174 -0
  364. data/lib/omnizip/formats/xz_impl/index_encoder.rb +122 -0
  365. data/lib/omnizip/formats/xz_impl/stream_decoder.rb +468 -0
  366. data/lib/omnizip/formats/xz_impl/stream_encoder.rb +99 -0
  367. data/lib/omnizip/formats/xz_impl/stream_footer.rb +81 -0
  368. data/lib/omnizip/formats/xz_impl/stream_footer_parser.rb +117 -0
  369. data/lib/omnizip/formats/xz_impl/stream_header.rb +55 -0
  370. data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +108 -0
  371. data/lib/omnizip/formats/xz_impl/vli.rb +128 -0
  372. data/lib/omnizip/formats/xz_impl/writer.rb +421 -0
  373. data/lib/omnizip/formats/zip/central_directory_header.rb +195 -0
  374. data/lib/omnizip/formats/zip/constants.rb +69 -0
  375. data/lib/omnizip/formats/zip/end_of_central_directory.rb +133 -0
  376. data/lib/omnizip/formats/zip/local_file_header.rb +138 -0
  377. data/lib/omnizip/formats/zip/reader.rb +250 -0
  378. data/lib/omnizip/formats/zip/unix_extra_field.rb +153 -0
  379. data/lib/omnizip/formats/zip/writer.rb +375 -0
  380. data/lib/omnizip/formats/zip/zip64_end_of_central_directory.rb +104 -0
  381. data/lib/omnizip/formats/zip/zip64_end_of_central_directory_locator.rb +66 -0
  382. data/lib/omnizip/formats/zip/zip64_extra_field.rb +114 -0
  383. data/lib/omnizip/formats/zip.rb +50 -0
  384. data/lib/omnizip/implementations/base/lzma2_decoder_base.rb +75 -0
  385. data/lib/omnizip/implementations/base/lzma2_encoder_base.rb +128 -0
  386. data/lib/omnizip/implementations/base/lzma_decoder_base.rb +83 -0
  387. data/lib/omnizip/implementations/base/lzma_encoder_base.rb +108 -0
  388. data/lib/omnizip/implementations/base/state_machine_base.rb +182 -0
  389. data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +421 -0
  390. data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +465 -0
  391. data/lib/omnizip/implementations/seven_zip/lzma/match_finder.rb +288 -0
  392. data/lib/omnizip/implementations/seven_zip/lzma/range_decoder.rb +200 -0
  393. data/lib/omnizip/implementations/seven_zip/lzma/range_encoder.rb +197 -0
  394. data/lib/omnizip/implementations/seven_zip/lzma/state_machine.rb +141 -0
  395. data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +519 -0
  396. data/lib/omnizip/implementations/xz_utils/lzma2/decoder.rb +723 -0
  397. data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +750 -0
  398. data/lib/omnizip/io/buffered_input.rb +146 -0
  399. data/lib/omnizip/io/buffered_output.rb +105 -0
  400. data/lib/omnizip/io/stream_manager.rb +115 -0
  401. data/lib/omnizip/link_handler/hard_link.rb +79 -0
  402. data/lib/omnizip/link_handler/symbolic_link.rb +74 -0
  403. data/lib/omnizip/link_handler.rb +124 -0
  404. data/lib/omnizip/metadata/archive_metadata.rb +114 -0
  405. data/lib/omnizip/metadata/entry_metadata.rb +146 -0
  406. data/lib/omnizip/metadata/metadata_editor.rb +171 -0
  407. data/lib/omnizip/metadata/metadata_registry.rb +64 -0
  408. data/lib/omnizip/metadata/metadata_validator.rb +99 -0
  409. data/lib/omnizip/metadata.rb +57 -0
  410. data/lib/omnizip/models/.keep +0 -0
  411. data/lib/omnizip/models/algorithm_metadata.rb +73 -0
  412. data/lib/omnizip/models/compression_options.rb +71 -0
  413. data/lib/omnizip/models/conversion_options.rb +87 -0
  414. data/lib/omnizip/models/conversion_result.rb +135 -0
  415. data/lib/omnizip/models/eta_result.rb +46 -0
  416. data/lib/omnizip/models/extraction_rule.rb +115 -0
  417. data/lib/omnizip/models/filter_chain.rb +144 -0
  418. data/lib/omnizip/models/filter_config.rb +183 -0
  419. data/lib/omnizip/models/match_result.rb +124 -0
  420. data/lib/omnizip/models/optimization_suggestion.rb +91 -0
  421. data/lib/omnizip/models/parallel_options.rb +104 -0
  422. data/lib/omnizip/models/performance_result.rb +79 -0
  423. data/lib/omnizip/models/profile_report.rb +82 -0
  424. data/lib/omnizip/models/progress_options.rb +38 -0
  425. data/lib/omnizip/models/split_options.rb +116 -0
  426. data/lib/omnizip/optimization_registry.rb +81 -0
  427. data/lib/omnizip/parallel/job_queue.rb +209 -0
  428. data/lib/omnizip/parallel/job_scheduler.rb +203 -0
  429. data/lib/omnizip/parallel/parallel_compressor.rb +347 -0
  430. data/lib/omnizip/parallel/parallel_extractor.rb +329 -0
  431. data/lib/omnizip/parallel/worker_pool.rb +223 -0
  432. data/lib/omnizip/parallel.rb +149 -0
  433. data/lib/omnizip/parity/chunked_block_processor.rb +196 -0
  434. data/lib/omnizip/parity/galois16.rb +145 -0
  435. data/lib/omnizip/parity/models/creator_packet.rb +73 -0
  436. data/lib/omnizip/parity/models/file_description_packet.rb +133 -0
  437. data/lib/omnizip/parity/models/ifsc_packet.rb +123 -0
  438. data/lib/omnizip/parity/models/main_packet.rb +128 -0
  439. data/lib/omnizip/parity/models/packet.rb +156 -0
  440. data/lib/omnizip/parity/models/packet_registry.rb +109 -0
  441. data/lib/omnizip/parity/models/recovery_slice_packet.rb +78 -0
  442. data/lib/omnizip/parity/par2_creator.rb +531 -0
  443. data/lib/omnizip/parity/par2_repairer.rb +407 -0
  444. data/lib/omnizip/parity/par2_verifier.rb +364 -0
  445. data/lib/omnizip/parity/par2cmdline_algorithm.rb +110 -0
  446. data/lib/omnizip/parity/par2cmdline_coefficients.rb +78 -0
  447. data/lib/omnizip/parity/reed_solomon_decoder.rb +266 -0
  448. data/lib/omnizip/parity/reed_solomon_encoder.rb +111 -0
  449. data/lib/omnizip/parity/reed_solomon_matrix.rb +342 -0
  450. data/lib/omnizip/parity.rb +186 -0
  451. data/lib/omnizip/password/encryption_registry.rb +65 -0
  452. data/lib/omnizip/password/encryption_strategy.rb +96 -0
  453. data/lib/omnizip/password/password_validator.rb +129 -0
  454. data/lib/omnizip/password/winzip_aes_strategy.rb +192 -0
  455. data/lib/omnizip/password/zip_crypto_strategy.rb +141 -0
  456. data/lib/omnizip/password.rb +87 -0
  457. data/lib/omnizip/pipe/stream_compressor.rb +124 -0
  458. data/lib/omnizip/pipe/stream_decompressor.rb +174 -0
  459. data/lib/omnizip/pipe.rb +121 -0
  460. data/lib/omnizip/platform/ntfs_streams.rb +201 -0
  461. data/lib/omnizip/platform.rb +189 -0
  462. data/lib/omnizip/profile/archive_profile.rb +39 -0
  463. data/lib/omnizip/profile/balanced_profile.rb +33 -0
  464. data/lib/omnizip/profile/binary_profile.rb +36 -0
  465. data/lib/omnizip/profile/compression_profile.rb +158 -0
  466. data/lib/omnizip/profile/custom_profile.rb +157 -0
  467. data/lib/omnizip/profile/fast_profile.rb +33 -0
  468. data/lib/omnizip/profile/maximum_profile.rb +33 -0
  469. data/lib/omnizip/profile/profile_detector.rb +110 -0
  470. data/lib/omnizip/profile/profile_registry.rb +161 -0
  471. data/lib/omnizip/profile/text_profile.rb +36 -0
  472. data/lib/omnizip/profile.rb +190 -0
  473. data/lib/omnizip/profiler/memory_profiler.rb +66 -0
  474. data/lib/omnizip/profiler/method_profiler.rb +49 -0
  475. data/lib/omnizip/profiler/report_generator.rb +169 -0
  476. data/lib/omnizip/profiler.rb +204 -0
  477. data/lib/omnizip/progress/callback_reporter.rb +36 -0
  478. data/lib/omnizip/progress/console_reporter.rb +62 -0
  479. data/lib/omnizip/progress/log_reporter.rb +91 -0
  480. data/lib/omnizip/progress/operation_progress.rb +118 -0
  481. data/lib/omnizip/progress/progress_bar.rb +156 -0
  482. data/lib/omnizip/progress/progress_reporter.rb +40 -0
  483. data/lib/omnizip/progress/progress_tracker.rb +190 -0
  484. data/lib/omnizip/progress/silent_reporter.rb +24 -0
  485. data/lib/omnizip/progress.rb +127 -0
  486. data/lib/omnizip/rubyzip_compat.rb +63 -0
  487. data/lib/omnizip/temp/safe_extract.rb +168 -0
  488. data/lib/omnizip/temp/temp_file.rb +124 -0
  489. data/lib/omnizip/temp/temp_file_pool.rb +109 -0
  490. data/lib/omnizip/temp.rb +181 -0
  491. data/lib/omnizip/version.rb +5 -0
  492. data/lib/omnizip/zip/entry.rb +156 -0
  493. data/lib/omnizip/zip/file.rb +485 -0
  494. data/lib/omnizip/zip/input_stream.rb +273 -0
  495. data/lib/omnizip/zip/output_stream.rb +324 -0
  496. data/lib/omnizip.rb +156 -0
  497. data/readme-docs/advanced-features.adoc +515 -0
  498. data/readme-docs/api-usage.adoc +444 -0
  499. data/readme-docs/architecture.adoc +449 -0
  500. data/readme-docs/archive-formats.adoc +479 -0
  501. data/readme-docs/cli-usage.adoc +222 -0
  502. data/readme-docs/compression-algorithms.adoc +442 -0
  503. data/readme-docs/compression-profiles.adoc +247 -0
  504. data/readme-docs/encryption-checksums.adoc +328 -0
  505. data/readme-docs/format-converter.adoc +325 -0
  506. data/readme-docs/installation.adoc +228 -0
  507. data/readme-docs/par2-archives.adoc +608 -0
  508. data/readme-docs/performance-profiler.adoc +389 -0
  509. data/readme-docs/preprocessing-filters.adoc +280 -0
  510. data/xz-file-format-1.2.1.txt +1174 -0
  511. metadata +617 -0
@@ -0,0 +1,723 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Copyright (C) 2025 Ribose Inc.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a
6
+ # copy of this software and associated documentation files (the "Software"),
7
+ # to deal in the Software without restriction, including without limitation
8
+ # the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
+ # and/or sell copies of the Software, and to permit persons to whom the
10
+ # Software is furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
+ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21
+ # DEALINGS IN THE SOFTWARE.
22
+
23
+ require_relative "../../../error"
24
+ require_relative "../../../algorithms/lzma2/constants"
25
+ require_relative "../../../algorithms/lzma2/properties"
26
+ require_relative "../../../algorithms/lzma/decoder"
27
+ require_relative "../../../algorithms/lzma/xz_utils_decoder"
28
+
29
+ module Omnizip
30
+ module Implementations
31
+ module XZUtils
32
+ module LZMA2
33
+ # XZ Utils LZMA2 decoder implementation.
34
+ #
35
+ # This is the original Decoder moved from algorithms/lzma2/decoder.rb
36
+ # to the new namespace structure.
37
+ class Decoder
38
+ include Omnizip::Algorithms::LZMA2Const
39
+
40
+ attr_reader :dict_size
41
+
42
+ # Initialize the decoder
43
+ #
44
+ # @param input [IO] Input stream of compressed data
45
+ # @param options [Hash] Decoding options
46
+ # @option options [Boolean] :raw_mode If true, skip property byte reading (for XZ format)
47
+ # @option options [Integer] :dict_size Dictionary size to use (required for raw_mode)
48
+ def initialize(input, options = {})
49
+ @input = input
50
+ @options = options
51
+ @raw_mode = options[:raw_mode] || false
52
+
53
+ if @raw_mode
54
+ # In raw_mode (XZ format), property byte is provided by caller
55
+ # Only dict_size comes from the XZ filter properties
56
+ @dict_size = options[:dict_size] || (8 * 1024 * 1024)
57
+ @properties = Omnizip::Algorithms::LZMA2::Properties.new(@dict_size)
58
+ else
59
+ read_property_byte
60
+ end
61
+ end
62
+
63
+ # Decode a compressed stream
64
+ #
65
+ # XZ Utils pattern (lzma2_decoder.c):
66
+ # - LZMA decoder is created ONCE and reused across all chunks
67
+ # - State (dictionary, probability models) persists between chunks
68
+ # - Reset only when control byte indicates new properties (control >= 0xC0)
69
+ #
70
+ # @return [String] Decompressed data
71
+ def decode_stream
72
+ output = []
73
+
74
+ if ENV["LZMA2_DEBUG"]
75
+ warn "DEBUG: decode_stream - starting..."
76
+ # Note: Can't peek at input without consuming, skip debug output
77
+ end
78
+
79
+ # XZ Utils pattern: Create LZMA decoder ONCE (lzma2_decoder_init)
80
+ # The decoder will be reused across all chunks
81
+ @lzma_decoder = nil
82
+ @need_properties = true # First LZMA chunk needs properties (XZ Utils line 45)
83
+ @need_dictionary_reset = true # First chunk must reset dictionary (XZ Utils line 43)
84
+
85
+ chunk_num = 0
86
+ loop do
87
+ control = read_control_byte
88
+
89
+ # puts "DEBUG LZMA2 chunk ##{chunk_num}: control=0x#{control.to_s(16)}" if ENV["LZMA2_DEBUG"]
90
+
91
+ if ENV["LZMA2_DEBUG"]
92
+ warn "DEBUG: decode_stream - chunk ##{chunk_num}, control=0x#{control.to_s(16)}"
93
+ end
94
+
95
+ break if control == CONTROL_END
96
+
97
+ # XZ Utils pattern (lzma2_decoder.c:75-82):
98
+ # Dictionary reset is needed if control >= 0xE0 or control == 1
99
+ # If dictionary reset is needed but control doesn't do it, error
100
+ # Reference: /Users/mulgogi/src/external/xz/src/liblzma/lzma/lzma2_decoder.c:75-82
101
+ if control >= 0xE0 || control == CONTROL_UNCOMPRESSED_RESET
102
+ @need_properties = true
103
+ @need_dictionary_reset = true
104
+ elsif @need_dictionary_reset
105
+ raise Omnizip::FormatError,
106
+ "LZMA2 dictionary reset required but not performed (control=0x#{control.to_s(16).upcase})"
107
+ end
108
+
109
+ # XZ Utils pattern (lzma2_decoder.c:121-126):
110
+ # Perform dictionary reset if needed
111
+ # For control >= 0xE0 or control == 1, need_dictionary_reset is set above
112
+ # and we perform the reset here, then clear the flag
113
+ # IMPORTANT: Only UNCOMPRESSED chunks with reset (control == 1) should
114
+ # suppress output. Compressed chunks with reset (control >= 0x80) should
115
+ # ALWAYS produce output - the dictionary reset happens before decoding.
116
+ # Reference: /Users/mulgogi/src/external/xz/src/liblzma/lzma/lzma2_decoder.c:121-127
117
+ false
118
+ if @need_dictionary_reset
119
+ @need_dictionary_reset = false
120
+ # For uncompressed chunks with reset (control == 1), output is suppressed
121
+ # For compressed chunks (control >= 0x80), output is always produced
122
+ (control == CONTROL_UNCOMPRESSED_RESET)
123
+ # Note: Dictionary reset will be handled by the LZMA decoder
124
+ # based on the control byte
125
+ end
126
+
127
+ # XZ Utils pattern (lzma2_decoder.c:84-110):
128
+ # For LZMA chunks (control >= 0x80), validate properties requirements
129
+ # Reference: /Users/mulgogi/src/external/xz/src/liblzma/lzma/lzma2_decoder.c:98-99
130
+ if control >= 0x80
131
+ if control >= 0xC0
132
+ # New properties present - properties will be read below
133
+ @need_properties = false
134
+ elsif @need_properties
135
+ # LZMA chunk without properties but properties are needed
136
+ # This happens after dictionary reset when next chunk must have properties
137
+ raise Omnizip::FormatError,
138
+ "LZMA2 properties required but not provided (control=0x#{control.to_s(16).upcase})"
139
+ end
140
+ end
141
+
142
+ chunk_data = decode_chunk(control, chunk_num)
143
+
144
+ if ENV["LZMA2_DEBUG"]
145
+ warn "DEBUG: decode_stream - chunk ##{chunk_num} produced #{chunk_data.bytesize} bytes"
146
+ end
147
+
148
+ # XZ Utils pattern: Uncompressed chunks ALWAYS produce output
149
+ # Dictionary reset chunks (control == 1) initialize the dictionary
150
+ # with the chunk data, then the dictionary is flushed to output
151
+ # So we should NEVER skip output for valid chunks
152
+ # Reference: /Users/mulgogi/src/external/xz/src/liblzma/lzma/lzma2_decoder.c:121-127
153
+ output << chunk_data
154
+ chunk_num += 1
155
+ end
156
+
157
+ if ENV["LZMA2_DEBUG"]
158
+ total_size = output.sum(&:bytesize)
159
+ warn "DEBUG: decode_stream - finished, total chunks=#{chunk_num}, total_size=#{total_size}"
160
+ end
161
+
162
+ output.join.force_encoding("ASCII-8BIT")
163
+ end
164
+
165
+ private
166
+
167
+ # Read and parse LZMA2 property byte
168
+ #
169
+ # @return [void]
170
+ # @raise [Omnizip::FormatError] If property byte is invalid
171
+ def read_property_byte
172
+ prop_byte = @input.getbyte
173
+ raise Omnizip::FormatError, "Invalid LZMA2 header" if prop_byte.nil?
174
+
175
+ @properties = Omnizip::Algorithms::LZMA2::Properties.from_byte(prop_byte)
176
+ @dict_size = @properties.actual_dict_size
177
+ end
178
+
179
+ # Read control byte
180
+ #
181
+ # @return [Integer] Control byte value
182
+ # @raise [Omnizip::IOError] If stream ends unexpectedly
183
+ def read_control_byte
184
+ byte = @input.getbyte
185
+ raise Omnizip::IOError, "Unexpected end of stream" if byte.nil?
186
+
187
+ byte
188
+ end
189
+
190
+ # Decode chunk based on control byte
191
+ #
192
+ # XZ Utils pattern (lzma2_decoder.c:75-102):
193
+ # - control >= 0xE0 or control == 1: Dictionary reset + properties needed
194
+ # - control >= 0xC0: State reset + properties
195
+ # - control >= 0xA0: State reset only
196
+ # - control >= 0x80: LZMA chunk (no reset)
197
+ # - control == 0x01 or 0x02: Uncompressed chunk
198
+ # - control > 2 and < 0x80: INVALID (LZMA2_DATA_ERROR)
199
+ #
200
+ # @param control [Integer] Control byte
201
+ # @param chunk_num [Integer] Chunk sequence number
202
+ # @return [String] Decoded chunk data
203
+ def decode_chunk(control, chunk_num)
204
+ if ENV["LZMA2_DEBUG"]
205
+ pos = @input.respond_to?(:pos) ? @input.pos : "N/A"
206
+ warn "DEBUG: decode_chunk - chunk=#{chunk_num}, control=0x#{control.to_s(16)}, pos=#{pos}"
207
+ end
208
+
209
+ # XZ Utils pattern (lzma2_decoder.c:138-140):
210
+ # Invalid control values: control > 2 and < 0x80 are invalid
211
+ # Reference: /Users/mulgogi/src/external/xz/src/liblzma/lzma/lzma2_decoder.c:138-140
212
+ if control > 2 && control < 0x80
213
+ raise Omnizip::FormatError,
214
+ "Invalid LZMA2 control byte: 0x#{control.to_s(16).upcase} " \
215
+ "(valid ranges: 0x00-0x02, 0x80-0xFF)"
216
+ end
217
+
218
+ if uncompressed_chunk?(control)
219
+ decode_uncompressed_chunk(control)
220
+ else
221
+ decode_compressed_chunk(control, chunk_num)
222
+ end
223
+ end
224
+
225
+ # Check if control byte indicates uncompressed chunk
226
+ #
227
+ # @param control [Integer] Control byte
228
+ # @return [Boolean] True if uncompressed
229
+ def uncompressed_chunk?(control)
230
+ [CONTROL_UNCOMPRESSED_RESET,
231
+ CONTROL_UNCOMPRESSED].include?(control)
232
+ end
233
+
234
+ # Decode uncompressed chunk
235
+ #
236
+ # XZ Utils pattern (lzma2_decoder.c:193-200):
237
+ # - Copy from input to the dictionary as is using dict_write()
238
+ # - This ensures subsequent compressed chunks can reference the data
239
+ # - If LZMA decoder exists, add data to dictionary directly
240
+ # - If LZMA decoder doesn't exist, store data in @uncompressed_buffer
241
+ #
242
+ # @param control [Integer] Control byte
243
+ # @return [String] Uncompressed data
244
+ def decode_uncompressed_chunk(_control)
245
+ # Read uncompressed size (2 bytes, big-endian)
246
+ size = read_size_bytes(2) + 1
247
+
248
+ if ENV["LZMA2_DEBUG"]
249
+ pos_before = @input.respond_to?(:pos) ? @input.pos : "N/A"
250
+ warn "DEBUG: decode_uncompressed_chunk - size=#{size}, pos_before=#{pos_before}"
251
+ end
252
+
253
+ # Read uncompressed data
254
+ data = @input.read(size)
255
+
256
+ if ENV["LZMA2_DEBUG"]
257
+ pos_after = @input.respond_to?(:pos) ? @input.pos : "N/A"
258
+ actual_size = data&.bytesize || 0
259
+ warn "DEBUG: decode_uncompressed_chunk - expected=#{size}, actual=#{actual_size}, pos_after=#{pos_after}"
260
+ warn "DEBUG: decode_uncompressed_chunk - data_nil=#{data.nil?}"
261
+ end
262
+
263
+ if data.nil? || data.bytesize != size
264
+ raise Omnizip::IOError,
265
+ "Unexpected end of stream"
266
+ end
267
+
268
+ # XZ Utils pattern: Copy from input to the dictionary as is.
269
+ # Reference: lzma2_decoder.c:195 - dict_write(dict, in, in_pos, in_size, &coder->compressed_size)
270
+ #
271
+ # If the LZMA decoder exists, add the data to its dictionary directly
272
+ # Otherwise, store it in @uncompressed_buffer for later use
273
+ if @lzma_decoder
274
+ # LZMA decoder exists - add data to its dictionary
275
+ @lzma_decoder.add_to_dictionary(data)
276
+ if ENV["LZMA2_DEBUG"]
277
+ warn "DEBUG: decode_uncompressed_chunk - Added #{data.bytesize} bytes to LZMA decoder's dictionary"
278
+ end
279
+ else
280
+ # LZMA decoder doesn't exist yet - store data for later
281
+ # This will be added to the dictionary when the first compressed chunk arrives
282
+ @uncompressed_buffer ||= String.new(encoding: "ASCII-8BIT")
283
+ @uncompressed_buffer << data
284
+ if ENV["LZMA2_DEBUG"]
285
+ warn "DEBUG: decode_uncompressed_chunk - Stored #{data.bytesize} bytes in uncompressed_buffer (total #{@uncompressed_buffer.bytesize} bytes)"
286
+ end
287
+ end
288
+
289
+ data
290
+ end
291
+
292
+ # Decode compressed chunk
293
+ #
294
+ # XZ Utils pattern (lzma2_decoder.c:84-103, 154-161, 163-191):
295
+ # - control >= 0xC0: New properties present, call decoder.reset()
296
+ # - control >= 0xA0: State reset only
297
+ # - control >= 0x80: LZMA chunk with explicit uncompressed/compressed size
298
+ # - control 0x03-0x7F: INVALID (rejected in decode_chunk)
299
+ # - LZMA decoder is created once and reused across all chunks
300
+ #
301
+ # DEBUG: Trace chunk decompression
302
+ dict_full_before = begin
303
+ @lzma_decoder.instance_variable_get(:@dict_full)
304
+ rescue StandardError
305
+ "nil"
306
+ end
307
+ warn "DEBUG: decode_compressed_chunk START (control=#{control}, dict_full=#{dict_full_before})" if dict_full_before.is_a?(Integer) && dict_full_before >= 210
308
+ # @param control [Integer] Control byte
309
+ # @param chunk_num [Integer] Chunk sequence number
310
+ # @return [String] Decompressed data
311
+ def decode_compressed_chunk(control, chunk_num)
312
+ if control >= 0x80
313
+ # Compressed chunk with explicit uncompressed/compressed size
314
+ # Read uncompressed size (2 bytes, big-endian)
315
+ # High 3 bits are in bits 2-0 of the control byte (bits 19-17 of uncompressed size)
316
+ uncompressed_low_bytes = [@input.getbyte, @input.getbyte]
317
+ uncompressed_low = (uncompressed_low_bytes[0] << 8) | uncompressed_low_bytes[1]
318
+ # XZ Utils lzma2_decoder.c:87: (control & 0x1F) << 16, then += each byte
319
+ # High 5 bits of (uncompressed_size - 1) are in bits 4-0 of control byte
320
+ uncompressed_high = control & 0x1F
321
+ uncompressed_size = (uncompressed_high << 16) + uncompressed_low + 1
322
+
323
+ # Read compressed size (2 bytes, big-endian)
324
+ compressed_size = read_size_bytes(2) + 1
325
+ else
326
+ # This should never be reached because control bytes 0x03-0x7F are
327
+ # rejected in decode_chunk() before this method is called.
328
+ # Control bytes < 0x80 should only be 0x01 or 0x02, which are
329
+ # handled by decode_uncompressed_chunk(), not this method.
330
+ raise Omnizip::FormatError,
331
+ "Invalid LZMA2 control byte: 0x#{control.to_s(16).upcase} " \
332
+ "(control < 0x80 but not 0x01 or 0x02)"
333
+ end
334
+ # Note: For control >= 0x80, compressed_data will be read below.
335
+ # For control < 0x80 (unreachable), this method raises above.
336
+
337
+ # Read properties byte
338
+ # LZMA2 format: Properties byte is ONLY present for control >= 0xC0
339
+ # For control >= 0xA0 but < 0xC0, use default properties (no properties byte)
340
+ # For control < 0xA0 (but >= 0x80), use previous properties (no properties byte)
341
+ # Reference: XZ Utils lzma2_decoder.c:92-96, 154-160
342
+ if control >= 0xC0
343
+ # New properties present - read properties byte
344
+ properties = @input.getbyte
345
+ if properties.nil?
346
+ raise Omnizip::IOError,
347
+ "Unexpected end of stream"
348
+ end
349
+ else
350
+ # No properties byte for control >= 0xA0 but < 0xC0
351
+ # Use default properties for LZMA2
352
+ properties = nil
353
+ end
354
+
355
+ if ENV["LZMA2_DEBUG"]
356
+ warn "DEBUG: decode_compressed_chunk - control=0x#{control.to_s(16)}"
357
+ # Note: control >= 0x80 is guaranteed here since:
358
+ # 1. decode_chunk() rejects control bytes 0x03-0x7F
359
+ # 2. decode_uncompressed_chunk() handles control bytes 0x01-0x02
360
+ # So only control >= 0x80 reaches this method
361
+ warn " uncompressed_size: #{uncompressed_size}"
362
+ warn " compressed_size: #{compressed_size}"
363
+ warn " properties: #{properties&.to_s(16)}"
364
+ end
365
+
366
+ if control >= 0x80
367
+ if ENV["LZMA2_DEBUG"]
368
+ pos_before = @input.respond_to?(:pos) ? @input.pos : "N/A"
369
+ warn "DEBUG: decode_compressed_chunk - uncompressed=#{uncompressed_size}, compressed=#{compressed_size}, properties=#{properties&.to_s(16)}, pos_before=#{pos_before}"
370
+ warn "DEBUG: @input.respond_to?(:pos)=#{@input.respond_to?(:pos)}, @input.class=#{@input.class}"
371
+ end
372
+
373
+ # Read compressed data
374
+ compressed_data = @input.read(compressed_size)
375
+ if ENV["LZMA2_DEBUG"]
376
+ @input.respond_to?(:pos) ? @input.pos : "N/A"
377
+ actual_size = compressed_data&.bytesize || 0
378
+ warn "DEBUG: decode_compressed_chunk - expected=#{compressed_size}, actual=#{actual_size}"
379
+ warn "DEBUG: compressed_data hex: #{compressed_data.bytes.map do |b|
380
+ "0x#{b.to_s(16).rjust(2, '0')}"
381
+ end.join(' ')}"
382
+ end
383
+ if compressed_data.nil? || compressed_data.bytesize != compressed_size
384
+ if ENV["LZMA2_DEBUG"]
385
+ actual_size = compressed_data&.bytesize || 0
386
+ warn "DEBUG: decode_compressed_chunk - FAILED - expected=#{compressed_size}, actual=#{actual_size}"
387
+ end
388
+ raise Omnizip::IOError, "Unexpected end of stream"
389
+ end
390
+ end
391
+
392
+ # Decompress using LZMA
393
+ # Pass control byte to handle decoder creation/reset logic
394
+ decompress_lzma_chunk(compressed_data, uncompressed_size, properties,
395
+ control, chunk_num)
396
+ end
397
+
398
+ # Decompress LZMA chunk
399
+ #
400
+ # XZ Utils pattern (lzma2_decoder.c:92-103, 154-191):
401
+ # - Create LZMA decoder on first chunk or when control >= 0xC0
402
+ # - Call decoder.reset() when new properties are present (control >= 0xC0)
403
+ # - Reuse decoder state across chunks (preserves probability models)
404
+ # - Reset range decoder between chunks (lzma_decoder.c:1014-1017)
405
+ #
406
+ # @param compressed_data [String] Compressed data (no LZMA header)
407
+ # @param expected_size [Integer] Expected decompressed size (from LZMA2 chunk header)
408
+ # @param properties [Integer, nil] LZMA properties byte from LZMA2 chunk (if present)
409
+ # @param control [Integer] LZMA2 control byte for this chunk
410
+ # @param chunk_num [Integer] Chunk sequence number
411
+ # @return [String] Decompressed data
412
+ def decompress_lzma_chunk(compressed_data, expected_size, properties,
413
+ control, chunk_num)
414
+ # puts "\nDEBUG decompress_lzma_chunk: chunk=#{chunk_num}, expected_size=#{expected_size}, control=0x#{control.to_s(16)}" if ENV["LZMA2_DEBUG"]
415
+
416
+ if ENV["LZMA2_DEBUG"]
417
+ warn "DEBUG: decompress_lzma_chunk - expected_size=#{expected_size}, compressed_size=#{compressed_data.bytesize}, properties=#{properties&.to_s(16)}"
418
+ warn "DEBUG: @expected_uncompressed_size=#{@expected_uncompressed_size}" if defined?(@expected_uncompressed_size)
419
+ end
420
+
421
+ # XZ Utils pattern (lzma2_decoder.c:140-141):
422
+ # Pass the chunk's uncompressed_size to the LZMA decoder.
423
+ # The block header's uncompressed_size is for validation only.
424
+ # For simple compressed chunks (control < 0x80), expected_size is nil,
425
+ # which means decode until LZMA end-of-stream marker.
426
+ lzma_uncompressed_size = expected_size || 0xFFFFFFFFFFFFFFFF # UNKNOWN = decode until EOS
427
+
428
+ # Decode lc, lp, pb from LZMA chunk properties byte
429
+ # In XZ format, the chunk properties byte is inside the compressed chunk
430
+ # and contains: (pb * 9 * 5) + (lp * 9) + lc
431
+ # Reference: /tmp/xz-source/src/liblzma/lzma/lzma_decoder.c:1199-1209
432
+ if properties && properties >= 0
433
+ # Decode lc, lp, pb from chunk properties byte using XZ Utils formula
434
+ pb = properties / (9 * 5)
435
+ remainder = properties - (pb * 9 * 5)
436
+ lp = remainder / 9
437
+ lc = remainder - (lp * 9)
438
+ else
439
+ # Default values when no properties present
440
+ # XZ Utils defaults: lc=3, lp=0, pb=2
441
+ lc = 3
442
+ lp = 0
443
+ pb = 2
444
+ end
445
+
446
+ if ENV["LZMA2_DEBUG"]
447
+ warn "DEBUG: decompress_lzma_chunk - lc=#{lc}, lp=#{lp}, pb=#{pb}, properties=#{properties&.to_s(16)}"
448
+ end
449
+
450
+ # XZ Utils pattern: Create/reuse LZMA decoder across chunks
451
+ # lzma2_decoder.c:92-103, 154-161: Handle decoder creation and reset
452
+ #
453
+ # IMPORTANT: We need to handle the case where the first chunk(s) are
454
+ # uncompressed. The uncompressed data must be added to the LZMA decoder's
455
+ # dictionary BEFORE we create the decoder, so we'll pass it as preloaded data.
456
+ if chunk_num.zero? || !@lzma_decoder
457
+ # First chunk - create LZMA decoder in lzma2_mode
458
+ # NO LZMA HEADER - pass compressed data directly
459
+ # XZ Utils: lzma_lz_decoder_create + lzma_lzma_decoder_create
460
+ input_buffer = StringIO.new(compressed_data)
461
+ input_buffer.set_encoding("ASCII-8BIT")
462
+
463
+ if ENV["LZMA2_DEBUG"]
464
+ warn "DEBUG: input_buffer created, pos=#{input_buffer.pos}, size=#{compressed_data.bytesize}"
465
+ warn "DEBUG: compressed_data bytes (first 20): #{compressed_data[0..20].bytes.map do |b|
466
+ b.to_s(16).rjust(2, '0')
467
+ end.join(' ')}"
468
+ end
469
+
470
+ # Check if we have uncompressed data to preload into the dictionary
471
+ preloaded_data = @uncompressed_buffer if @uncompressed_buffer && !@uncompressed_buffer.empty?
472
+
473
+ @lzma_decoder = Omnizip::Algorithms::XzUtilsDecoder.new(input_buffer,
474
+ lzma2_mode: true,
475
+ lc: lc,
476
+ lp: lp,
477
+ pb: pb,
478
+ dict_size: @dict_size,
479
+ uncompressed_size: lzma_uncompressed_size,
480
+ preloaded_data: preloaded_data) # Pass uncompressed data to preload
481
+
482
+ # Clear uncompressed buffer after passing to decoder
483
+ @uncompressed_buffer = nil if preloaded_data
484
+
485
+ if ENV["LZMA2_DEBUG"]
486
+ warn "DEBUG: decompress_lzma_chunk - Created new LZMA decoder (lzma2_mode)#{" with #{preloaded_data.bytesize} bytes of preloaded data" if preloaded_data}"
487
+ end
488
+ else
489
+ # Subsequent chunks - reuse decoder, reset if needed
490
+ # XZ Utils lzma2_decoder.c:92-96, 154-161
491
+
492
+ # Determine if dictionary should be preserved
493
+ # Use the same logic as at line 414 for consistency
494
+ # XZ Utils LZMA2 control byte decoding (lzma2_decoder.c:75-79):
495
+ # - control >= 0xE0: LZMA2 compressed + reset dictionary + properties byte present
496
+ # - control = 0x01: end of chunk marker
497
+ # XZ Utils sets need_dictionary_reset = true ONLY for control >= 0xE0 || control == 1
498
+ # Therefore, dict_reset is ONLY called for control >= 0xE0 || control == 1
499
+ # - control = 0xC0: LZMA2 compressed + state reset + default properties (NO dict reset!)
500
+ # - control < 0x80: LZMA2 uncompressed
501
+ # - 0x80 <= control < 0xC0: LZMA2 compressed + preserve dictionary
502
+ # Note: chunk_num >= 1 here (not the first chunk)
503
+ preserve_dict = !(control >= 0xE0 || control == 1)
504
+
505
+ if control >= 0xC0
506
+ # New properties present - reset decoder with new properties
507
+ @lzma_decoder.reset(new_lc: lc, new_lp: lp, new_pb: pb,
508
+ preserve_dict: preserve_dict)
509
+
510
+ # Pass compressed data directly (NO LZMA HEADER)
511
+ input_buffer = StringIO.new(compressed_data)
512
+ input_buffer.set_encoding("ASCII-8BIT")
513
+
514
+ @lzma_decoder.set_input(input_buffer)
515
+
516
+ if ENV["LZMA2_DEBUG"]
517
+ warn "DEBUG: decompress_lzma_chunk - Reset LZMA decoder with new properties (preserve_dict=#{preserve_dict})"
518
+ end
519
+ elsif control >= 0xA0
520
+ # State reset only (no new properties)
521
+ # IMPORTANT: XZ Utils source code (lzma2_decoder.c:107-109) shows that
522
+ # for control >= 0xA0, it calls coder->lzma.reset(), which resets
523
+ # rep distances to 0 (see lzma_decoder.c:1071-1074).
524
+ #
525
+ # A rep match with distance=0 is valid - it means "copy the last byte"
526
+ # (distance 0 from the current position, i.e., the byte just written).
527
+ decoder_dict_full = begin
528
+ @lzma_decoder.instance_variable_get(:@dict_full)
529
+ rescue StandardError
530
+ nil
531
+ end
532
+ if ENV["LZMA2_DEBUG"] || (decoder_dict_full && decoder_dict_full >= 220 && decoder_dict_full <= 230)
533
+ warn "DEBUG: decompress_lzma_chunk - Calling reset with preserved dict (control=#{control}, dict_full=#{decoder_dict_full})"
534
+ end
535
+ @lzma_decoder.reset(preserve_dict: preserve_dict)
536
+
537
+ # Pass compressed data directly (NO LZMA HEADER)
538
+ input_buffer = StringIO.new(compressed_data)
539
+ input_buffer.set_encoding("ASCII-8BIT")
540
+
541
+ @lzma_decoder.set_input(input_buffer)
542
+
543
+ if ENV["LZMA2_DEBUG"]
544
+ warn "DEBUG: decompress_lzma_chunk - After set_input, checking range_decoder..."
545
+ # Check if the decoder has a range_decoder variable
546
+ if @lzma_decoder.instance_variable_defined?(:@range_decoder)
547
+ range_decoder = @lzma_decoder.instance_variable_get(:@range_decoder)
548
+ if range_decoder
549
+ warn " range_decoder exists: code=0x#{range_decoder.instance_variable_get(:@code).to_s(16)}, range=0x#{range_decoder.instance_variable_get(:@range).to_s(16)}, init_bytes_remaining=#{range_decoder.instance_variable_get(:@init_bytes_remaining)}"
550
+ else
551
+ warn " range_decoder is nil"
552
+ end
553
+ else
554
+ warn " @range_decoder not defined yet"
555
+ end
556
+ end
557
+ else
558
+ # For control >= 0x80 but < 0xA0: No reset
559
+ # Pass compressed data directly (NO LZMA HEADER)
560
+ input_buffer = StringIO.new(compressed_data)
561
+ input_buffer.set_encoding("ASCII-8BIT")
562
+
563
+ @lzma_decoder.set_input(input_buffer)
564
+ end
565
+
566
+ # XZ Utils: Set uncompressed size for each chunk (lzma2_decoder.c:140-141)
567
+ @lzma_decoder.set_uncompressed_size(lzma_uncompressed_size,
568
+ allow_eopm: false)
569
+
570
+ if ENV["LZMA2_DEBUG"]
571
+ warn "DEBUG: decompress_lzma_chunk - Reusing LZMA decoder, set uncompressed_size=#{lzma_uncompressed_size}"
572
+ end
573
+ end
574
+
575
+ # For first chunk or when control >= 0xE0 or control == 1, reset dictionary (preserve_dict = false)
576
+ # For other chunks with control < 0xE0 and control != 1, preserve dictionary
577
+ # XZ Utils LZMA2 control byte decoding (lzma2_decoder.c:75-79):
578
+ # - control >= 0xE0: LZMA2 compressed + reset dictionary + properties byte present
579
+ # - control = 0x01: end of chunk marker
580
+ # XZ Utils sets need_dictionary_reset = true ONLY for control >= 0xE0 || control == 1
581
+ # Therefore, dict_reset is ONLY called for control >= 0xE0 || control == 1
582
+ # - control = 0xC0: LZMA2 compressed + state reset + default properties (NO dict reset!)
583
+ # - control < 0x80: LZMA2 uncompressed
584
+ # - 0x80 <= control < 0xC0: LZMA2 compressed + preserve dictionary
585
+ preserve_dictionary = chunk_num.zero? ? false : !(control >= 0xE0 || control == 1)
586
+
587
+ decompressed = @lzma_decoder.decode_stream(nil,
588
+ preserve_dict: preserve_dictionary,
589
+ check_rc_finished: false)
590
+
591
+ if ENV["LZMA2_DEBUG"]
592
+ warn "DEBUG: decompress_lzma_chunk - expected=#{lzma_uncompressed_size}, got=#{decompressed.bytesize}"
593
+ end
594
+
595
+ # Verify size matches expected
596
+ if ENV["LZMA2_DEBUG"]
597
+ # puts "DEBUG: Size check - decompressed=#{decompressed.bytesize}, expected=#{lzma_uncompressed_size}"
598
+ end
599
+ if decompressed.bytesize != lzma_uncompressed_size
600
+ puts "DEBUG: Size mismatch - decompressed=#{decompressed.bytesize}, expected=#{lzma_uncompressed_size}"
601
+ raise Omnizip::DecompressionError, "Decompressed size mismatch: expected #{lzma_uncompressed_size}, " \
602
+ "got #{decompressed.bytesize}"
603
+ end
604
+
605
+ decompressed
606
+ end
607
+
608
+ # Build LZMA header for decompression
609
+ #
610
+ # @param uncompressed_size [Integer] Expected size after decompression
611
+ # @param properties [Integer, nil] LZMA properties byte (lc/lp/pb encoding) from LZMA2 chunk
612
+ # @return [String] LZMA header (13 bytes)
613
+ def build_lzma_header(uncompressed_size, properties = nil)
614
+ header = String.new(encoding: "ASCII-8BIT")
615
+
616
+ # The properties byte from LZMA2 encodes lc, lp, pb (not dictionary size!)
617
+ # Decode using XZ Utils formula from lzma_lzma_lclppb_decode:
618
+ # pb = byte / (9 * 5)
619
+ # byte -= pb * 9 * 5
620
+ # lp = byte / 9
621
+ # lc = byte - lp * 9
622
+ if properties && properties >= 0
623
+ prop_byte = properties
624
+ pb = prop_byte / (9 * 5)
625
+ remainder = prop_byte - (pb * 9 * 5)
626
+ lp = remainder / 9
627
+ lc = remainder - (lp * 9)
628
+
629
+ props = lc + (lp * 9) + (pb * 9 * 5)
630
+
631
+ if ENV["LZMA2_DEBUG"]
632
+ warn "DEBUG: build_lzma_header - properties=0x#{prop_byte.to_s(16)} -> lc=#{lc}, lp=#{lp}, pb=#{pb}, props=0x#{props.to_s(16)}"
633
+ end
634
+ else
635
+ # Default values when no properties present
636
+ lc = 0
637
+ lp = 0
638
+ pb = 0
639
+
640
+ if ENV["LZMA2_DEBUG"]
641
+ warn "DEBUG: build_lzma_header - no properties, using defaults lc=0, lp=0, pb=0"
642
+ end
643
+ end
644
+
645
+ # Calculate props encoding (props encoding is calculated the same way for both cases)
646
+ props = lc + (lp * 9) + (pb * 9 * 5)
647
+
648
+ header << [props].pack("C")
649
+
650
+ # Dictionary size from @dict_size (set during initialization from LZMA2 filter properties)
651
+ header << [@dict_size].pack("V")
652
+
653
+ # Uncompressed size (8 bytes, little-endian)
654
+ header << [uncompressed_size].pack("Q<")
655
+
656
+ header
657
+ end
658
+
659
+ # Read size bytes in big-endian order
660
+ #
661
+ # @param num_bytes [Integer] Number of bytes to read
662
+ # @return [Integer] Size value
663
+ def read_size_bytes(num_bytes)
664
+ size = 0
665
+ num_bytes.times do
666
+ byte = @input.getbyte
667
+ raise Omnizip::IOError, "Unexpected end of stream" if byte.nil?
668
+
669
+ size = (size << 8) | byte
670
+ end
671
+ size
672
+ end
673
+
674
+ # Ensure LZMA decoder exists
675
+ # Creates a decoder with default properties if one doesn't exist yet
676
+ # This is needed for uncompressed chunks that come before the first compressed chunk
677
+ def ensure_lzma_decoder_exists
678
+ return if @lzma_decoder
679
+
680
+ if ENV["LZMA2_DEBUG"]
681
+ warn "DEBUG: ensure_lzma_decoder_exists - Creating LZMA decoder for uncompressed chunk"
682
+ end
683
+
684
+ # Create LZMA decoder with default properties (lc=3, lp=0, pb=2)
685
+ # These defaults match XZ Utils and ensure compatibility
686
+ @lzma_decoder = Omnizip::Algorithms::XzUtilsDecoder.new(
687
+ StringIO.new(""), # Empty input for now
688
+ lzma2_mode: true,
689
+ lc: 3,
690
+ lp: 0,
691
+ pb: 2,
692
+ dict_size: @dict_size,
693
+ uncompressed_size: 0xFFFFFFFFFFFFFFFF, # Unknown size
694
+ )
695
+
696
+ # Initialize dictionary buffer explicitly since we're not calling decode_stream
697
+ # This mimics the initialization done in decode_stream
698
+ dict_buf_size = @dict_size + Omnizip::Algorithms::LZMA::XzUtilsDecoder::LZ_DICT_INIT_POS
699
+ @lzma_decoder.instance_variable_set(:@dict_buf,
700
+ Array.new(dict_buf_size, 0))
701
+ @lzma_decoder.instance_variable_set(:@pos, Omnizip::Algorithms::LZMA::XzUtilsDecoder::LZ_DICT_INIT_POS)
702
+ @lzma_decoder.instance_variable_set(:@dict_full, 0)
703
+ @lzma_decoder.instance_variable_set(:@has_wrapped, false)
704
+
705
+ # Initialize rep distances
706
+ @lzma_decoder.instance_variable_set(:@rep0, 0)
707
+ @lzma_decoder.instance_variable_set(:@rep1, 0)
708
+ @lzma_decoder.instance_variable_set(:@rep2, 0)
709
+ @lzma_decoder.instance_variable_set(:@rep3, 0)
710
+
711
+ # Initialize state machine
712
+ @lzma_decoder.instance_variable_set(:@state, Omnizip::Algorithms::LZMA::SdkStateMachine.new)
713
+
714
+ if ENV["LZMA2_DEBUG"]
715
+ warn "DEBUG: ensure_lzma_decoder_exists - Created LZMA decoder with lc=3, lp=0, pb=2, dict_size=#{@dict_size}"
716
+ warn "DEBUG: ensure_lzma_decoder_exists - Initialized dict_buf_size=#{dict_buf_size}, pos=#{Omnizip::Algorithms::LZMA::XzUtilsDecoder::LZ_DICT_INIT_POS}"
717
+ end
718
+ end
719
+ end
720
+ end
721
+ end
722
+ end
723
+ end