omnizip 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (511) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.rubocop.yml +32 -0
  4. data/.rubocop_todo.yml +754 -0
  5. data/COPYING +502 -0
  6. data/Gemfile +17 -0
  7. data/LICENSE +12 -0
  8. data/README.adoc +1045 -0
  9. data/Rakefile +12 -0
  10. data/benchmark/README.md +260 -0
  11. data/benchmark/benchmark_suite.rb +125 -0
  12. data/benchmark/compression_bench.rb +181 -0
  13. data/benchmark/filter_bench.rb +180 -0
  14. data/benchmark/models/benchmark_result.rb +59 -0
  15. data/benchmark/models/comparison_result.rb +69 -0
  16. data/benchmark/profile_suite.rb +167 -0
  17. data/benchmark/reporter.rb +150 -0
  18. data/benchmark/run_benchmarks.rb +66 -0
  19. data/benchmark/test_data.rb +137 -0
  20. data/config/formats/rar3_spec.yml +91 -0
  21. data/config/formats/rar5_spec.yml +102 -0
  22. data/docs/.github/workflows/docs.yml +142 -0
  23. data/docs/.gitignore +21 -0
  24. data/docs/.lychee.toml +67 -0
  25. data/docs/Gemfile +13 -0
  26. data/docs/RAR_WRITE_SUPPORT.md +26 -0
  27. data/docs/README.md +101 -0
  28. data/docs/_config.yml +112 -0
  29. data/docs/assets/logo.svg +1 -0
  30. data/docs/assets/omnizip-logo.pdf +1540 -11
  31. data/docs/comparison/feature-matrix.adoc +694 -0
  32. data/docs/comparison/index.adoc +113 -0
  33. data/docs/comparison/vs-7zip.adoc +309 -0
  34. data/docs/comparison/vs-peazip.adoc +77 -0
  35. data/docs/comparison/vs-rubyzip.adoc +342 -0
  36. data/docs/comparison/vs-winrar.adoc +100 -0
  37. data/docs/compatibility.adoc +579 -0
  38. data/docs/concepts/index.adoc +129 -0
  39. data/docs/developer/architecture.adoc +256 -0
  40. data/docs/developer/contributing.adoc +158 -0
  41. data/docs/developer/index.adoc +25 -0
  42. data/docs/developer/testing.adoc +212 -0
  43. data/docs/getting-started/basic-usage.adoc +271 -0
  44. data/docs/getting-started/index.adoc +42 -0
  45. data/docs/getting-started/installation.adoc +138 -0
  46. data/docs/getting-started/quick-start.adoc +185 -0
  47. data/docs/getting-started/your-first-archive.adoc +218 -0
  48. data/docs/guides/advanced-features/encryption.adoc +300 -0
  49. data/docs/guides/advanced-features/index.adoc +49 -0
  50. data/docs/guides/advanced-features/parallel-processing.adoc +246 -0
  51. data/docs/guides/advanced-features/progress-tracking.adoc +320 -0
  52. data/docs/guides/advanced-features/streaming.adoc +212 -0
  53. data/docs/guides/archive-formats/gzip-format.adoc +107 -0
  54. data/docs/guides/archive-formats/index.adoc +130 -0
  55. data/docs/guides/archive-formats/rar-format.adoc +104 -0
  56. data/docs/guides/archive-formats/rar5.adoc +521 -0
  57. data/docs/guides/archive-formats/seven-zip-format.adoc +35 -0
  58. data/docs/guides/archive-formats/tar-format.adoc +106 -0
  59. data/docs/guides/archive-formats/xz-format.adoc +118 -0
  60. data/docs/guides/archive-formats/zip-format.adoc +35 -0
  61. data/docs/guides/compression-algorithms/bzip2.adoc +113 -0
  62. data/docs/guides/compression-algorithms/deflate.adoc +319 -0
  63. data/docs/guides/compression-algorithms/index.adoc +190 -0
  64. data/docs/guides/compression-algorithms/lzma.adoc +398 -0
  65. data/docs/guides/compression-algorithms/lzma2.adoc +327 -0
  66. data/docs/guides/compression-algorithms/ppmd.adoc +316 -0
  67. data/docs/guides/compression-algorithms/zstandard.adoc +361 -0
  68. data/docs/guides/creating-archives.adoc +354 -0
  69. data/docs/guides/extracting-archives.adoc +53 -0
  70. data/docs/guides/format-conversion.adoc +64 -0
  71. data/docs/guides/index.adoc +49 -0
  72. data/docs/guides/migration-rubyzip.adoc +217 -0
  73. data/docs/guides/parity-archives.adoc +605 -0
  74. data/docs/guides/performance-tuning.adoc +88 -0
  75. data/docs/index.adoc +218 -0
  76. data/docs/lychee.toml +67 -0
  77. data/docs/reference/api/overview.adoc +188 -0
  78. data/docs/reference/cli/compress-command.adoc +114 -0
  79. data/docs/reference/cli/overview.adoc +140 -0
  80. data/docs/reference/index.adoc +26 -0
  81. data/docs/resources/faq.adoc +185 -0
  82. data/docs/resources/quick-reference.adoc +222 -0
  83. data/docs/troubleshooting/index.adoc +208 -0
  84. data/examples/api_comparison.rb +205 -0
  85. data/examples/deflate64_example.rb +96 -0
  86. data/examples/par2_demo.rb +121 -0
  87. data/examples/quick_start_native.rb +150 -0
  88. data/examples/quick_start_rubyzip.rb +115 -0
  89. data/examples/rubyzip_compatibility_demo.rb +194 -0
  90. data/exe/omnizip +27 -0
  91. data/lib/omnizip/algorithm.rb +130 -0
  92. data/lib/omnizip/algorithm_registry.rb +86 -0
  93. data/lib/omnizip/algorithms/.keep +0 -0
  94. data/lib/omnizip/algorithms/bzip2/bwt.rb +225 -0
  95. data/lib/omnizip/algorithms/bzip2/decoder.rb +193 -0
  96. data/lib/omnizip/algorithms/bzip2/encoder.rb +237 -0
  97. data/lib/omnizip/algorithms/bzip2/huffman.rb +206 -0
  98. data/lib/omnizip/algorithms/bzip2/mtf.rb +101 -0
  99. data/lib/omnizip/algorithms/bzip2/rle.rb +151 -0
  100. data/lib/omnizip/algorithms/bzip2.rb +130 -0
  101. data/lib/omnizip/algorithms/deflate/constants.rb +28 -0
  102. data/lib/omnizip/algorithms/deflate/decoder.rb +38 -0
  103. data/lib/omnizip/algorithms/deflate/encoder.rb +46 -0
  104. data/lib/omnizip/algorithms/deflate.rb +128 -0
  105. data/lib/omnizip/algorithms/deflate64/constants.rb +45 -0
  106. data/lib/omnizip/algorithms/deflate64/decoder.rb +153 -0
  107. data/lib/omnizip/algorithms/deflate64/encoder.rb +98 -0
  108. data/lib/omnizip/algorithms/deflate64/huffman_coder.rb +354 -0
  109. data/lib/omnizip/algorithms/deflate64/lz77_encoder.rb +142 -0
  110. data/lib/omnizip/algorithms/deflate64.rb +109 -0
  111. data/lib/omnizip/algorithms/lzma/bit_model.rb +120 -0
  112. data/lib/omnizip/algorithms/lzma/constants.rb +112 -0
  113. data/lib/omnizip/algorithms/lzma/decoder.rb +148 -0
  114. data/lib/omnizip/algorithms/lzma/dictionary.rb +69 -0
  115. data/lib/omnizip/algorithms/lzma/distance_coder.rb +415 -0
  116. data/lib/omnizip/algorithms/lzma/encoder.rb +142 -0
  117. data/lib/omnizip/algorithms/lzma/length_coder.rb +260 -0
  118. data/lib/omnizip/algorithms/lzma/literal_decoder.rb +320 -0
  119. data/lib/omnizip/algorithms/lzma/literal_encoder.rb +210 -0
  120. data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +341 -0
  121. data/lib/omnizip/algorithms/lzma/lzma_alone_decoder.rb +192 -0
  122. data/lib/omnizip/algorithms/lzma/lzma_state.rb +128 -0
  123. data/lib/omnizip/algorithms/lzma/match.rb +32 -0
  124. data/lib/omnizip/algorithms/lzma/match_finder.rb +205 -0
  125. data/lib/omnizip/algorithms/lzma/match_finder_config.rb +142 -0
  126. data/lib/omnizip/algorithms/lzma/match_finder_factory.rb +88 -0
  127. data/lib/omnizip/algorithms/lzma/optimal_encoder.rb +130 -0
  128. data/lib/omnizip/algorithms/lzma/probability_models.rb +72 -0
  129. data/lib/omnizip/algorithms/lzma/range_coder.rb +85 -0
  130. data/lib/omnizip/algorithms/lzma/range_decoder.rb +434 -0
  131. data/lib/omnizip/algorithms/lzma/range_encoder.rb +194 -0
  132. data/lib/omnizip/algorithms/lzma/state.rb +127 -0
  133. data/lib/omnizip/algorithms/lzma/xz_buffered_range_encoder.rb +325 -0
  134. data/lib/omnizip/algorithms/lzma/xz_encoder.rb +426 -0
  135. data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +645 -0
  136. data/lib/omnizip/algorithms/lzma/xz_match_finder_adapter.rb +227 -0
  137. data/lib/omnizip/algorithms/lzma/xz_price_calculator.rb +169 -0
  138. data/lib/omnizip/algorithms/lzma/xz_probability_models.rb +261 -0
  139. data/lib/omnizip/algorithms/lzma/xz_range_encoder.rb +223 -0
  140. data/lib/omnizip/algorithms/lzma/xz_range_encoder_exact.rb +331 -0
  141. data/lib/omnizip/algorithms/lzma/xz_state.rb +116 -0
  142. data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +2055 -0
  143. data/lib/omnizip/algorithms/lzma.rb +238 -0
  144. data/lib/omnizip/algorithms/lzma2/chunk_manager.rb +182 -0
  145. data/lib/omnizip/algorithms/lzma2/constants.rb +41 -0
  146. data/lib/omnizip/algorithms/lzma2/encoder.rb +147 -0
  147. data/lib/omnizip/algorithms/lzma2/lzma2_chunk.rb +161 -0
  148. data/lib/omnizip/algorithms/lzma2/properties.rb +179 -0
  149. data/lib/omnizip/algorithms/lzma2/simple_lzma2_encoder.rb +127 -0
  150. data/lib/omnizip/algorithms/lzma2/xz_encoder_adapter.rb +85 -0
  151. data/lib/omnizip/algorithms/lzma2.rb +141 -0
  152. data/lib/omnizip/algorithms/ppmd7/constants.rb +74 -0
  153. data/lib/omnizip/algorithms/ppmd7/context.rb +154 -0
  154. data/lib/omnizip/algorithms/ppmd7/decoder.rb +126 -0
  155. data/lib/omnizip/algorithms/ppmd7/encoder.rb +163 -0
  156. data/lib/omnizip/algorithms/ppmd7/model.rb +248 -0
  157. data/lib/omnizip/algorithms/ppmd7/symbol_state.rb +57 -0
  158. data/lib/omnizip/algorithms/ppmd7.rb +116 -0
  159. data/lib/omnizip/algorithms/ppmd8/constants.rb +61 -0
  160. data/lib/omnizip/algorithms/ppmd8/context.rb +34 -0
  161. data/lib/omnizip/algorithms/ppmd8/decoder.rb +107 -0
  162. data/lib/omnizip/algorithms/ppmd8/encoder.rb +138 -0
  163. data/lib/omnizip/algorithms/ppmd8/model.rb +250 -0
  164. data/lib/omnizip/algorithms/ppmd8/restoration_method.rb +78 -0
  165. data/lib/omnizip/algorithms/ppmd8.rb +82 -0
  166. data/lib/omnizip/algorithms/ppmd_base.rb +138 -0
  167. data/lib/omnizip/algorithms/sevenzip_lzma2.rb +123 -0
  168. data/lib/omnizip/algorithms/xz_lzma2.rb +118 -0
  169. data/lib/omnizip/algorithms/zstandard/constants.rb +25 -0
  170. data/lib/omnizip/algorithms/zstandard/decoder.rb +46 -0
  171. data/lib/omnizip/algorithms/zstandard/encoder.rb +51 -0
  172. data/lib/omnizip/algorithms/zstandard.rb +138 -0
  173. data/lib/omnizip/buffer/memory_archive.rb +251 -0
  174. data/lib/omnizip/buffer/memory_extractor.rb +224 -0
  175. data/lib/omnizip/buffer.rb +176 -0
  176. data/lib/omnizip/checksum_registry.rb +114 -0
  177. data/lib/omnizip/checksums/crc32.rb +100 -0
  178. data/lib/omnizip/checksums/crc64.rb +101 -0
  179. data/lib/omnizip/checksums/crc_base.rb +158 -0
  180. data/lib/omnizip/checksums/verifier.rb +131 -0
  181. data/lib/omnizip/chunked/memory_manager.rb +194 -0
  182. data/lib/omnizip/chunked/reader.rb +78 -0
  183. data/lib/omnizip/chunked/writer.rb +120 -0
  184. data/lib/omnizip/chunked.rb +129 -0
  185. data/lib/omnizip/cli/output_formatter.rb +104 -0
  186. data/lib/omnizip/cli.rb +572 -0
  187. data/lib/omnizip/commands/.keep +0 -0
  188. data/lib/omnizip/commands/archive_create_command.rb +427 -0
  189. data/lib/omnizip/commands/archive_extract_command.rb +272 -0
  190. data/lib/omnizip/commands/archive_list_command.rb +218 -0
  191. data/lib/omnizip/commands/archive_repair_command.rb +131 -0
  192. data/lib/omnizip/commands/archive_verify_command.rb +117 -0
  193. data/lib/omnizip/commands/compress_command.rb +117 -0
  194. data/lib/omnizip/commands/decompress_command.rb +120 -0
  195. data/lib/omnizip/commands/list_command.rb +53 -0
  196. data/lib/omnizip/commands/metadata_command.rb +153 -0
  197. data/lib/omnizip/commands/parity_create_command.rb +122 -0
  198. data/lib/omnizip/commands/parity_repair_command.rb +122 -0
  199. data/lib/omnizip/commands/parity_verify_command.rb +124 -0
  200. data/lib/omnizip/commands/profile_list_command.rb +56 -0
  201. data/lib/omnizip/commands/profile_show_command.rb +44 -0
  202. data/lib/omnizip/convenience.rb +359 -0
  203. data/lib/omnizip/converter/conversion_registry.rb +49 -0
  204. data/lib/omnizip/converter/conversion_strategy.rb +121 -0
  205. data/lib/omnizip/converter/seven_zip_to_zip_strategy.rb +97 -0
  206. data/lib/omnizip/converter/zip_to_seven_zip_strategy.rb +112 -0
  207. data/lib/omnizip/converter.rb +105 -0
  208. data/lib/omnizip/crypto/aes256/cipher.rb +100 -0
  209. data/lib/omnizip/crypto/aes256/constants.rb +28 -0
  210. data/lib/omnizip/crypto/aes256/key_derivation.rb +101 -0
  211. data/lib/omnizip/crypto/aes256.rb +102 -0
  212. data/lib/omnizip/error.rb +106 -0
  213. data/lib/omnizip/eta/exponential_smoothing_estimator.rb +98 -0
  214. data/lib/omnizip/eta/moving_average_estimator.rb +99 -0
  215. data/lib/omnizip/eta/rate_calculator.rb +104 -0
  216. data/lib/omnizip/eta/sample_history.rb +143 -0
  217. data/lib/omnizip/eta/time_estimator.rb +106 -0
  218. data/lib/omnizip/eta.rb +63 -0
  219. data/lib/omnizip/extraction/filter_chain.rb +177 -0
  220. data/lib/omnizip/extraction/glob_pattern.rb +140 -0
  221. data/lib/omnizip/extraction/pattern_matcher.rb +70 -0
  222. data/lib/omnizip/extraction/predicate_pattern.rb +52 -0
  223. data/lib/omnizip/extraction/regex_pattern.rb +50 -0
  224. data/lib/omnizip/extraction/selective_extractor.rb +240 -0
  225. data/lib/omnizip/extraction.rb +111 -0
  226. data/lib/omnizip/file_type/mime_classifier.rb +144 -0
  227. data/lib/omnizip/file_type.rb +113 -0
  228. data/lib/omnizip/filter.rb +139 -0
  229. data/lib/omnizip/filter_pipeline.rb +108 -0
  230. data/lib/omnizip/filter_registry.rb +166 -0
  231. data/lib/omnizip/filters/bcj.rb +279 -0
  232. data/lib/omnizip/filters/bcj2/constants.rb +53 -0
  233. data/lib/omnizip/filters/bcj2/decoder.rb +200 -0
  234. data/lib/omnizip/filters/bcj2/encoder.rb +61 -0
  235. data/lib/omnizip/filters/bcj2/stream_data.rb +93 -0
  236. data/lib/omnizip/filters/bcj2.rb +99 -0
  237. data/lib/omnizip/filters/bcj_arm.rb +176 -0
  238. data/lib/omnizip/filters/bcj_arm64.rb +244 -0
  239. data/lib/omnizip/filters/bcj_ia64.rb +196 -0
  240. data/lib/omnizip/filters/bcj_ppc.rb +190 -0
  241. data/lib/omnizip/filters/bcj_sparc.rb +176 -0
  242. data/lib/omnizip/filters/bcj_x86.rb +193 -0
  243. data/lib/omnizip/filters/delta.rb +196 -0
  244. data/lib/omnizip/filters/filter_base.rb +72 -0
  245. data/lib/omnizip/filters/registry.rb +123 -0
  246. data/lib/omnizip/filters/xz_delta.rb +258 -0
  247. data/lib/omnizip/format_detector.rb +162 -0
  248. data/lib/omnizip/format_registry.rb +59 -0
  249. data/lib/omnizip/formats/.keep +0 -0
  250. data/lib/omnizip/formats/bzip2_file.rb +172 -0
  251. data/lib/omnizip/formats/cpio/constants.rb +55 -0
  252. data/lib/omnizip/formats/cpio/entry.rb +385 -0
  253. data/lib/omnizip/formats/cpio/reader.rb +196 -0
  254. data/lib/omnizip/formats/cpio/writer.rb +234 -0
  255. data/lib/omnizip/formats/cpio.rb +140 -0
  256. data/lib/omnizip/formats/format_spec_loader.rb +230 -0
  257. data/lib/omnizip/formats/gzip.rb +238 -0
  258. data/lib/omnizip/formats/iso/directory_builder.rb +297 -0
  259. data/lib/omnizip/formats/iso/directory_record.rb +152 -0
  260. data/lib/omnizip/formats/iso/joliet.rb +204 -0
  261. data/lib/omnizip/formats/iso/path_table.rb +125 -0
  262. data/lib/omnizip/formats/iso/reader.rb +197 -0
  263. data/lib/omnizip/formats/iso/rock_ridge.rb +349 -0
  264. data/lib/omnizip/formats/iso/volume_builder.rb +320 -0
  265. data/lib/omnizip/formats/iso/volume_descriptor.rb +168 -0
  266. data/lib/omnizip/formats/iso/writer.rb +530 -0
  267. data/lib/omnizip/formats/iso.rb +140 -0
  268. data/lib/omnizip/formats/lzip.rb +175 -0
  269. data/lib/omnizip/formats/lzma_alone.rb +171 -0
  270. data/lib/omnizip/formats/rar/archive_repairer.rb +243 -0
  271. data/lib/omnizip/formats/rar/archive_verifier.rb +195 -0
  272. data/lib/omnizip/formats/rar/block_parser.rb +243 -0
  273. data/lib/omnizip/formats/rar/compression/bit_stream.rb +180 -0
  274. data/lib/omnizip/formats/rar/compression/dispatcher.rb +217 -0
  275. data/lib/omnizip/formats/rar/compression/lz77_huffman/decoder.rb +216 -0
  276. data/lib/omnizip/formats/rar/compression/lz77_huffman/encoder.rb +158 -0
  277. data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_builder.rb +217 -0
  278. data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_coder.rb +189 -0
  279. data/lib/omnizip/formats/rar/compression/lz77_huffman/match_finder.rb +135 -0
  280. data/lib/omnizip/formats/rar/compression/lz77_huffman/sliding_window.rb +165 -0
  281. data/lib/omnizip/formats/rar/compression/ppmd/context.rb +105 -0
  282. data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +219 -0
  283. data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +262 -0
  284. data/lib/omnizip/formats/rar/compression_method_registry.rb +106 -0
  285. data/lib/omnizip/formats/rar/constants.rb +82 -0
  286. data/lib/omnizip/formats/rar/decompressor.rb +238 -0
  287. data/lib/omnizip/formats/rar/external_writer.rb +312 -0
  288. data/lib/omnizip/formats/rar/header.rb +192 -0
  289. data/lib/omnizip/formats/rar/license_validator.rb +109 -0
  290. data/lib/omnizip/formats/rar/models/rar_archive.rb +77 -0
  291. data/lib/omnizip/formats/rar/models/rar_entry.rb +65 -0
  292. data/lib/omnizip/formats/rar/models/rar_volume.rb +56 -0
  293. data/lib/omnizip/formats/rar/parity_handler.rb +292 -0
  294. data/lib/omnizip/formats/rar/rar5/compression/lzma.rb +202 -0
  295. data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +578 -0
  296. data/lib/omnizip/formats/rar/rar5/compression/store.rb +60 -0
  297. data/lib/omnizip/formats/rar/rar5/crc32.rb +39 -0
  298. data/lib/omnizip/formats/rar/rar5/encryption/aes256_cbc.rb +97 -0
  299. data/lib/omnizip/formats/rar/rar5/encryption/encryption_header.rb +114 -0
  300. data/lib/omnizip/formats/rar/rar5/encryption/encryption_manager.rb +166 -0
  301. data/lib/omnizip/formats/rar/rar5/encryption/key_derivation.rb +97 -0
  302. data/lib/omnizip/formats/rar/rar5/header.rb +187 -0
  303. data/lib/omnizip/formats/rar/rar5/models/encryption_options.rb +74 -0
  304. data/lib/omnizip/formats/rar/rar5/models/recovery_options.rb +63 -0
  305. data/lib/omnizip/formats/rar/rar5/models/solid_options.rb +63 -0
  306. data/lib/omnizip/formats/rar/rar5/models/volume_options.rb +74 -0
  307. data/lib/omnizip/formats/rar/rar5/multi_volume/ARCHITECTURE.md +290 -0
  308. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_manager.rb +264 -0
  309. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_splitter.rb +155 -0
  310. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_writer.rb +194 -0
  311. data/lib/omnizip/formats/rar/rar5/solid/solid_encoder.rb +109 -0
  312. data/lib/omnizip/formats/rar/rar5/solid/solid_manager.rb +142 -0
  313. data/lib/omnizip/formats/rar/rar5/solid/solid_stream.rb +121 -0
  314. data/lib/omnizip/formats/rar/rar5/vint.rb +65 -0
  315. data/lib/omnizip/formats/rar/rar5/writer.rb +466 -0
  316. data/lib/omnizip/formats/rar/rar_format_base.rb +241 -0
  317. data/lib/omnizip/formats/rar/reader.rb +366 -0
  318. data/lib/omnizip/formats/rar/recovery_record.rb +245 -0
  319. data/lib/omnizip/formats/rar/volume_manager.rb +168 -0
  320. data/lib/omnizip/formats/rar/writer.rb +431 -0
  321. data/lib/omnizip/formats/rar.rb +205 -0
  322. data/lib/omnizip/formats/rar3/compressor.rb +73 -0
  323. data/lib/omnizip/formats/rar3/decompressor.rb +66 -0
  324. data/lib/omnizip/formats/rar3/reader.rb +386 -0
  325. data/lib/omnizip/formats/rar3/writer.rb +219 -0
  326. data/lib/omnizip/formats/rar5/compressor.rb +73 -0
  327. data/lib/omnizip/formats/rar5/decompressor.rb +66 -0
  328. data/lib/omnizip/formats/rar5/reader.rb +342 -0
  329. data/lib/omnizip/formats/rar5/writer.rb +214 -0
  330. data/lib/omnizip/formats/seven_zip/coder_chain.rb +150 -0
  331. data/lib/omnizip/formats/seven_zip/constants.rb +126 -0
  332. data/lib/omnizip/formats/seven_zip/encoded_header.rb +114 -0
  333. data/lib/omnizip/formats/seven_zip/encrypted_header.rb +142 -0
  334. data/lib/omnizip/formats/seven_zip/file_collector.rb +144 -0
  335. data/lib/omnizip/formats/seven_zip/header.rb +106 -0
  336. data/lib/omnizip/formats/seven_zip/header_encryptor.rb +134 -0
  337. data/lib/omnizip/formats/seven_zip/header_writer.rb +466 -0
  338. data/lib/omnizip/formats/seven_zip/models/coder_info.rb +30 -0
  339. data/lib/omnizip/formats/seven_zip/models/file_entry.rb +58 -0
  340. data/lib/omnizip/formats/seven_zip/models/folder.rb +69 -0
  341. data/lib/omnizip/formats/seven_zip/models/stream_info.rb +42 -0
  342. data/lib/omnizip/formats/seven_zip/parser.rb +660 -0
  343. data/lib/omnizip/formats/seven_zip/reader.rb +458 -0
  344. data/lib/omnizip/formats/seven_zip/split_archive_reader.rb +632 -0
  345. data/lib/omnizip/formats/seven_zip/split_archive_writer.rb +315 -0
  346. data/lib/omnizip/formats/seven_zip/stream_compressor.rb +151 -0
  347. data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +162 -0
  348. data/lib/omnizip/formats/seven_zip/writer.rb +740 -0
  349. data/lib/omnizip/formats/seven_zip.rb +93 -0
  350. data/lib/omnizip/formats/tar/constants.rb +73 -0
  351. data/lib/omnizip/formats/tar/entry.rb +94 -0
  352. data/lib/omnizip/formats/tar/header.rb +168 -0
  353. data/lib/omnizip/formats/tar/reader.rb +121 -0
  354. data/lib/omnizip/formats/tar/writer.rb +216 -0
  355. data/lib/omnizip/formats/tar.rb +84 -0
  356. data/lib/omnizip/formats/xz/reader.rb +116 -0
  357. data/lib/omnizip/formats/xz.rb +237 -0
  358. data/lib/omnizip/formats/xz_impl/block_decoder.rb +754 -0
  359. data/lib/omnizip/formats/xz_impl/block_encoder.rb +306 -0
  360. data/lib/omnizip/formats/xz_impl/block_header.rb +210 -0
  361. data/lib/omnizip/formats/xz_impl/block_header_parser.rb +186 -0
  362. data/lib/omnizip/formats/xz_impl/constants.rb +49 -0
  363. data/lib/omnizip/formats/xz_impl/index_decoder.rb +174 -0
  364. data/lib/omnizip/formats/xz_impl/index_encoder.rb +122 -0
  365. data/lib/omnizip/formats/xz_impl/stream_decoder.rb +468 -0
  366. data/lib/omnizip/formats/xz_impl/stream_encoder.rb +99 -0
  367. data/lib/omnizip/formats/xz_impl/stream_footer.rb +81 -0
  368. data/lib/omnizip/formats/xz_impl/stream_footer_parser.rb +117 -0
  369. data/lib/omnizip/formats/xz_impl/stream_header.rb +55 -0
  370. data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +108 -0
  371. data/lib/omnizip/formats/xz_impl/vli.rb +128 -0
  372. data/lib/omnizip/formats/xz_impl/writer.rb +421 -0
  373. data/lib/omnizip/formats/zip/central_directory_header.rb +195 -0
  374. data/lib/omnizip/formats/zip/constants.rb +69 -0
  375. data/lib/omnizip/formats/zip/end_of_central_directory.rb +133 -0
  376. data/lib/omnizip/formats/zip/local_file_header.rb +138 -0
  377. data/lib/omnizip/formats/zip/reader.rb +250 -0
  378. data/lib/omnizip/formats/zip/unix_extra_field.rb +153 -0
  379. data/lib/omnizip/formats/zip/writer.rb +375 -0
  380. data/lib/omnizip/formats/zip/zip64_end_of_central_directory.rb +104 -0
  381. data/lib/omnizip/formats/zip/zip64_end_of_central_directory_locator.rb +66 -0
  382. data/lib/omnizip/formats/zip/zip64_extra_field.rb +114 -0
  383. data/lib/omnizip/formats/zip.rb +50 -0
  384. data/lib/omnizip/implementations/base/lzma2_decoder_base.rb +75 -0
  385. data/lib/omnizip/implementations/base/lzma2_encoder_base.rb +128 -0
  386. data/lib/omnizip/implementations/base/lzma_decoder_base.rb +83 -0
  387. data/lib/omnizip/implementations/base/lzma_encoder_base.rb +108 -0
  388. data/lib/omnizip/implementations/base/state_machine_base.rb +182 -0
  389. data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +421 -0
  390. data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +465 -0
  391. data/lib/omnizip/implementations/seven_zip/lzma/match_finder.rb +288 -0
  392. data/lib/omnizip/implementations/seven_zip/lzma/range_decoder.rb +200 -0
  393. data/lib/omnizip/implementations/seven_zip/lzma/range_encoder.rb +197 -0
  394. data/lib/omnizip/implementations/seven_zip/lzma/state_machine.rb +141 -0
  395. data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +519 -0
  396. data/lib/omnizip/implementations/xz_utils/lzma2/decoder.rb +723 -0
  397. data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +750 -0
  398. data/lib/omnizip/io/buffered_input.rb +146 -0
  399. data/lib/omnizip/io/buffered_output.rb +105 -0
  400. data/lib/omnizip/io/stream_manager.rb +115 -0
  401. data/lib/omnizip/link_handler/hard_link.rb +79 -0
  402. data/lib/omnizip/link_handler/symbolic_link.rb +74 -0
  403. data/lib/omnizip/link_handler.rb +124 -0
  404. data/lib/omnizip/metadata/archive_metadata.rb +114 -0
  405. data/lib/omnizip/metadata/entry_metadata.rb +146 -0
  406. data/lib/omnizip/metadata/metadata_editor.rb +171 -0
  407. data/lib/omnizip/metadata/metadata_registry.rb +64 -0
  408. data/lib/omnizip/metadata/metadata_validator.rb +99 -0
  409. data/lib/omnizip/metadata.rb +57 -0
  410. data/lib/omnizip/models/.keep +0 -0
  411. data/lib/omnizip/models/algorithm_metadata.rb +73 -0
  412. data/lib/omnizip/models/compression_options.rb +71 -0
  413. data/lib/omnizip/models/conversion_options.rb +87 -0
  414. data/lib/omnizip/models/conversion_result.rb +135 -0
  415. data/lib/omnizip/models/eta_result.rb +46 -0
  416. data/lib/omnizip/models/extraction_rule.rb +115 -0
  417. data/lib/omnizip/models/filter_chain.rb +144 -0
  418. data/lib/omnizip/models/filter_config.rb +183 -0
  419. data/lib/omnizip/models/match_result.rb +124 -0
  420. data/lib/omnizip/models/optimization_suggestion.rb +91 -0
  421. data/lib/omnizip/models/parallel_options.rb +104 -0
  422. data/lib/omnizip/models/performance_result.rb +79 -0
  423. data/lib/omnizip/models/profile_report.rb +82 -0
  424. data/lib/omnizip/models/progress_options.rb +38 -0
  425. data/lib/omnizip/models/split_options.rb +116 -0
  426. data/lib/omnizip/optimization_registry.rb +81 -0
  427. data/lib/omnizip/parallel/job_queue.rb +209 -0
  428. data/lib/omnizip/parallel/job_scheduler.rb +203 -0
  429. data/lib/omnizip/parallel/parallel_compressor.rb +347 -0
  430. data/lib/omnizip/parallel/parallel_extractor.rb +329 -0
  431. data/lib/omnizip/parallel/worker_pool.rb +223 -0
  432. data/lib/omnizip/parallel.rb +149 -0
  433. data/lib/omnizip/parity/chunked_block_processor.rb +196 -0
  434. data/lib/omnizip/parity/galois16.rb +145 -0
  435. data/lib/omnizip/parity/models/creator_packet.rb +73 -0
  436. data/lib/omnizip/parity/models/file_description_packet.rb +133 -0
  437. data/lib/omnizip/parity/models/ifsc_packet.rb +123 -0
  438. data/lib/omnizip/parity/models/main_packet.rb +128 -0
  439. data/lib/omnizip/parity/models/packet.rb +156 -0
  440. data/lib/omnizip/parity/models/packet_registry.rb +109 -0
  441. data/lib/omnizip/parity/models/recovery_slice_packet.rb +78 -0
  442. data/lib/omnizip/parity/par2_creator.rb +531 -0
  443. data/lib/omnizip/parity/par2_repairer.rb +407 -0
  444. data/lib/omnizip/parity/par2_verifier.rb +364 -0
  445. data/lib/omnizip/parity/par2cmdline_algorithm.rb +110 -0
  446. data/lib/omnizip/parity/par2cmdline_coefficients.rb +78 -0
  447. data/lib/omnizip/parity/reed_solomon_decoder.rb +266 -0
  448. data/lib/omnizip/parity/reed_solomon_encoder.rb +111 -0
  449. data/lib/omnizip/parity/reed_solomon_matrix.rb +342 -0
  450. data/lib/omnizip/parity.rb +186 -0
  451. data/lib/omnizip/password/encryption_registry.rb +65 -0
  452. data/lib/omnizip/password/encryption_strategy.rb +96 -0
  453. data/lib/omnizip/password/password_validator.rb +129 -0
  454. data/lib/omnizip/password/winzip_aes_strategy.rb +192 -0
  455. data/lib/omnizip/password/zip_crypto_strategy.rb +141 -0
  456. data/lib/omnizip/password.rb +87 -0
  457. data/lib/omnizip/pipe/stream_compressor.rb +124 -0
  458. data/lib/omnizip/pipe/stream_decompressor.rb +174 -0
  459. data/lib/omnizip/pipe.rb +121 -0
  460. data/lib/omnizip/platform/ntfs_streams.rb +201 -0
  461. data/lib/omnizip/platform.rb +189 -0
  462. data/lib/omnizip/profile/archive_profile.rb +39 -0
  463. data/lib/omnizip/profile/balanced_profile.rb +33 -0
  464. data/lib/omnizip/profile/binary_profile.rb +36 -0
  465. data/lib/omnizip/profile/compression_profile.rb +158 -0
  466. data/lib/omnizip/profile/custom_profile.rb +157 -0
  467. data/lib/omnizip/profile/fast_profile.rb +33 -0
  468. data/lib/omnizip/profile/maximum_profile.rb +33 -0
  469. data/lib/omnizip/profile/profile_detector.rb +110 -0
  470. data/lib/omnizip/profile/profile_registry.rb +161 -0
  471. data/lib/omnizip/profile/text_profile.rb +36 -0
  472. data/lib/omnizip/profile.rb +190 -0
  473. data/lib/omnizip/profiler/memory_profiler.rb +66 -0
  474. data/lib/omnizip/profiler/method_profiler.rb +49 -0
  475. data/lib/omnizip/profiler/report_generator.rb +169 -0
  476. data/lib/omnizip/profiler.rb +204 -0
  477. data/lib/omnizip/progress/callback_reporter.rb +36 -0
  478. data/lib/omnizip/progress/console_reporter.rb +62 -0
  479. data/lib/omnizip/progress/log_reporter.rb +91 -0
  480. data/lib/omnizip/progress/operation_progress.rb +118 -0
  481. data/lib/omnizip/progress/progress_bar.rb +156 -0
  482. data/lib/omnizip/progress/progress_reporter.rb +40 -0
  483. data/lib/omnizip/progress/progress_tracker.rb +190 -0
  484. data/lib/omnizip/progress/silent_reporter.rb +24 -0
  485. data/lib/omnizip/progress.rb +127 -0
  486. data/lib/omnizip/rubyzip_compat.rb +63 -0
  487. data/lib/omnizip/temp/safe_extract.rb +168 -0
  488. data/lib/omnizip/temp/temp_file.rb +124 -0
  489. data/lib/omnizip/temp/temp_file_pool.rb +109 -0
  490. data/lib/omnizip/temp.rb +181 -0
  491. data/lib/omnizip/version.rb +5 -0
  492. data/lib/omnizip/zip/entry.rb +156 -0
  493. data/lib/omnizip/zip/file.rb +485 -0
  494. data/lib/omnizip/zip/input_stream.rb +273 -0
  495. data/lib/omnizip/zip/output_stream.rb +324 -0
  496. data/lib/omnizip.rb +156 -0
  497. data/readme-docs/advanced-features.adoc +515 -0
  498. data/readme-docs/api-usage.adoc +444 -0
  499. data/readme-docs/architecture.adoc +449 -0
  500. data/readme-docs/archive-formats.adoc +479 -0
  501. data/readme-docs/cli-usage.adoc +222 -0
  502. data/readme-docs/compression-algorithms.adoc +442 -0
  503. data/readme-docs/compression-profiles.adoc +247 -0
  504. data/readme-docs/encryption-checksums.adoc +328 -0
  505. data/readme-docs/format-converter.adoc +325 -0
  506. data/readme-docs/installation.adoc +228 -0
  507. data/readme-docs/par2-archives.adoc +608 -0
  508. data/readme-docs/performance-profiler.adoc +389 -0
  509. data/readme-docs/preprocessing-filters.adoc +280 -0
  510. data/xz-file-format-1.2.1.txt +1174 -0
  511. metadata +617 -0
@@ -0,0 +1,260 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Copyright (C) 2025 Ribose Inc.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a
6
+ # copy of this software and associated documentation files (the "Software"),
7
+ # to deal in the Software without restriction, including without limitation
8
+ # the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
+ # and/or sell copies of the Software, and to permit persons to whom the
10
+ # Software is furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
+ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21
+ # DEALINGS IN THE SOFTWARE.
22
+
23
+ require_relative "constants"
24
+ require_relative "bit_model"
25
+
26
+ module Omnizip
27
+ module Algorithms
28
+ class LZMA < Algorithm
29
+ # SDK-compatible length encoder/decoder
30
+ #
31
+ # This class implements the LZMA SDK's length encoding scheme:
32
+ # - Lengths 0-7: choice=0, 3 bits from low tree
33
+ # - Lengths 8-15: choice=1, choice2=0, 3 bits from mid tree
34
+ # - Lengths 16+: choice=1, choice2=1, 8 bits from high tree
35
+ #
36
+ # Position state is used to select which low/mid tree to use,
37
+ # providing context-dependent compression.
38
+ class LengthCoder
39
+ include Constants
40
+
41
+ # Initialize the length coder
42
+ #
43
+ # @param num_pos_states [Integer] Number of position states (1 << pb)
44
+ def initialize(num_pos_states)
45
+ @num_pos_states = num_pos_states
46
+ @choice = BitModel.new
47
+ @choice2 = BitModel.new
48
+
49
+ # Low trees: one per position state, 8 symbols each
50
+ # Tree needs 2^(num_bits+1) models: 2^4 = 16 for 3-bit tree
51
+ @low = Array.new(num_pos_states) do
52
+ Array.new(1 << (NUM_LEN_LOW_BITS + 1)) { BitModel.new }
53
+ end
54
+
55
+ # Mid trees: one per position state, 8 symbols each
56
+ # Tree needs 2^(num_bits+1) models: 2^4 = 16 for 3-bit tree
57
+ @mid = Array.new(num_pos_states) do
58
+ Array.new(1 << (NUM_LEN_MID_BITS + 1)) { BitModel.new }
59
+ end
60
+
61
+ # High tree: shared across all position states, 256 symbols
62
+ # Tree needs 2^(num_bits+1) models: 2^9 = 512 for 8-bit tree
63
+ @high = Array.new(1 << (NUM_LEN_HIGH_BITS + 1)) { BitModel.new }
64
+ end
65
+
66
+ # Encode a match length using SDK-compatible encoding
67
+ #
68
+ # @param range_encoder [RangeEncoder] The range encoder
69
+ # @param length [Integer] Length value (already subtracted MATCH_LEN_MIN)
70
+ # @param pos_state [Integer] Position state for tree selection
71
+ # @return [void]
72
+ def encode(range_encoder, length, pos_state)
73
+ trace_encode = ENV.fetch("LZMA_DEBUG_ENCODE", nil) && ENV.fetch("TRACE_LENGTH_CODER", nil)
74
+
75
+ if trace_encode
76
+ puts " [LengthCoder.encode] START: length=#{length}, pos_state=#{pos_state}"
77
+ puts " @choice.prob=#{@choice.probability} @choice2.prob=#{@choice2.probability}"
78
+ end
79
+
80
+ if length < LEN_LOW_SYMBOLS
81
+ # 0-7: Use low tree
82
+ if trace_encode
83
+ puts " Using LOW tree (length #{length} < #{LEN_LOW_SYMBOLS})"
84
+ puts " Encoding choice=0 with prob=#{@choice.probability}"
85
+ end
86
+ range_encoder.encode_bit(@choice, 0)
87
+ if trace_encode
88
+ puts " After choice: @choice.prob=#{@choice.probability}"
89
+ end
90
+ encode_tree(range_encoder, @low[pos_state], length,
91
+ NUM_LEN_LOW_BITS)
92
+ elsif length < LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS
93
+ # 8-15: Use mid tree
94
+ if trace_encode
95
+ puts " Using MID tree (length #{length} < #{LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS})"
96
+ puts " Encoding choice=1 with prob=#{@choice.probability}"
97
+ end
98
+ range_encoder.encode_bit(@choice, 1)
99
+ if trace_encode
100
+ puts " After choice: @choice.prob=#{@choice.probability}"
101
+ puts " Encoding choice2=0 with prob=#{@choice2.probability}"
102
+ end
103
+ range_encoder.encode_bit(@choice2, 0)
104
+ if trace_encode
105
+ puts " After choice2: @choice2.prob=#{@choice2.probability}"
106
+ end
107
+ encode_tree(range_encoder, @mid[pos_state],
108
+ length - LEN_LOW_SYMBOLS, NUM_LEN_MID_BITS)
109
+ else
110
+ # 16+: Use high tree
111
+ if trace_encode
112
+ puts " Using HIGH tree (length #{length} >= #{LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS})"
113
+ puts " Encoding choice=1 with prob=#{@choice.probability}"
114
+ end
115
+ range_encoder.encode_bit(@choice, 1)
116
+ if trace_encode
117
+ puts " After choice: @choice.prob=#{@choice.probability}"
118
+ puts " Encoding choice2=1 with prob=#{@choice2.probability}"
119
+ end
120
+ range_encoder.encode_bit(@choice2, 1)
121
+ if trace_encode
122
+ puts " After choice2: @choice2.prob=#{@choice2.probability}"
123
+ end
124
+ encode_tree(range_encoder, @high,
125
+ length - LEN_LOW_SYMBOLS - LEN_MID_SYMBOLS,
126
+ NUM_LEN_HIGH_BITS)
127
+ end
128
+
129
+ if trace_encode
130
+ puts " FINAL @choice.prob=#{@choice.probability} @choice2.prob=#{@choice2.probability}"
131
+ puts " [LengthCoder.encode] END"
132
+ end
133
+ end
134
+
135
+ # Decode a match length using SDK-compatible decoding
136
+ #
137
+ # @param range_decoder [RangeDecoder] The range decoder
138
+ # @param pos_state [Integer] Position state for tree selection
139
+ # @return [Integer] Decoded length value (before adding MATCH_LEN_MIN)
140
+ def decode(range_decoder, pos_state)
141
+ trace_decode = ENV.fetch("LZMA_DEBUG_DISTANCE", nil) && ENV.fetch("TRACE_LENGTH_CODER", nil)
142
+
143
+ if trace_decode
144
+ caller_loc = caller_locations(2, 1).first
145
+ puts " [LengthCoder.decode] START: pos_state=#{pos_state}"
146
+ puts " self.object_id=#{object_id}"
147
+ puts " @choice.object_id=#{@choice.object_id} prob=#{@choice.probability}"
148
+ puts " @choice2.object_id=#{@choice2.object_id} prob=#{@choice2.probability}"
149
+ puts " Called from: #{caller_loc.label} at #{caller_loc.lineno}"
150
+ end
151
+
152
+ choice_bit = range_decoder.decode_bit(@choice)
153
+ if trace_decode
154
+ puts " Decoded choice=#{choice_bit} with prob=#{@choice.probability}"
155
+ puts " After choice decode: @choice.prob=#{@choice.probability}"
156
+ end
157
+
158
+ if choice_bit.zero?
159
+ # Low tree
160
+ if trace_decode
161
+ puts " Using LOW tree"
162
+ end
163
+ result = decode_tree(range_decoder, @low[pos_state], NUM_LEN_LOW_BITS)
164
+ elsif range_decoder.decode_bit(@choice2).zero?
165
+ # Mid tree
166
+ if trace_decode
167
+ puts " Decoded choice2=0 with prob=#{@choice2.probability}"
168
+ puts " After choice2 decode: @choice2.prob=#{@choice2.probability}"
169
+ puts " Using MID tree"
170
+ end
171
+ result = LEN_LOW_SYMBOLS +
172
+ decode_tree(range_decoder, @mid[pos_state], NUM_LEN_MID_BITS)
173
+ else
174
+ # High tree
175
+ if trace_decode
176
+ puts " Decoded choice2=1 with prob=#{@choice2.probability}"
177
+ puts " After choice2 decode: @choice2.prob=#{@choice2.probability}"
178
+ puts " Using HIGH tree"
179
+ end
180
+ result = LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS +
181
+ decode_tree(range_decoder, @high, NUM_LEN_HIGH_BITS)
182
+ end
183
+
184
+ if trace_decode
185
+ puts " FINAL @choice.prob=#{@choice.probability} @choice2.prob=#{@choice2.probability}"
186
+ puts " Result: length_encoded=#{result}"
187
+ puts " [LengthCoder.decode] END"
188
+ end
189
+
190
+ result
191
+ end
192
+
193
+ # Reset probability models to initial values
194
+ #
195
+ # Called during state reset (control >= 0xA0) to reset the length
196
+ # coder's probability models. This matches XZ Utils behavior.
197
+ #
198
+ # @return [void]
199
+ def reset_models
200
+ if ENV["TRACE_RESET_MODELS"]
201
+ puts " [LengthCoder.reset_models] CALLED!"
202
+ puts " Before reset: @choice.prob=#{@choice.probability} @choice2.prob=#{@choice2.probability}"
203
+ caller_loc = caller_locations(2, 1).first
204
+ puts " Called from: #{caller_loc.label} at #{caller_loc.path}:#{caller_loc.lineno}"
205
+ end
206
+ @choice.reset
207
+ @choice2.reset
208
+
209
+ @low.each do |state_models|
210
+ state_models.each(&:reset)
211
+ end
212
+
213
+ @mid.each do |state_models|
214
+ state_models.each(&:reset)
215
+ end
216
+
217
+ @high.each(&:reset)
218
+ if ENV["TRACE_RESET_MODELS"]
219
+ puts " After reset: @choice.prob=#{@choice.probability} @choice2.prob=#{@choice2.probability}"
220
+ end
221
+ end
222
+
223
+ private
224
+
225
+ # Encode a value using a tree of bit models
226
+ #
227
+ # @param range_encoder [RangeEncoder] The range encoder
228
+ # @param models [Array<BitModel>] Array of bit models for the tree
229
+ # @param symbol [Integer] Symbol to encode
230
+ # @param num_bits [Integer] Number of bits in the tree
231
+ # @return [void]
232
+ def encode_tree(range_encoder, models, symbol, num_bits)
233
+ m = 1
234
+ (num_bits - 1).downto(0) do |i|
235
+ bit = (symbol >> i) & 1
236
+ range_encoder.encode_bit(models[m], bit)
237
+ m = (m << 1) | bit
238
+ end
239
+ end
240
+
241
+ # Decode a value using a tree of bit models
242
+ #
243
+ # @param range_decoder [RangeDecoder] The range decoder
244
+ # @param models [Array<BitModel>] Array of bit models for the tree
245
+ # @param num_bits [Integer] Number of bits in the tree
246
+ # @return [Integer] Decoded symbol
247
+ def decode_tree(range_decoder, models, num_bits)
248
+ m = 1
249
+ symbol = 0
250
+ (num_bits - 1).downto(0) do |i|
251
+ bit = range_decoder.decode_bit(models[m])
252
+ m = (m << 1) | bit
253
+ symbol |= (bit << i)
254
+ end
255
+ symbol
256
+ end
257
+ end
258
+ end
259
+ end
260
+ end
@@ -0,0 +1,320 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Copyright (C) 2025 Ribose Inc.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a
6
+ # copy of this software and associated documentation files (the "Software"),
7
+ # to deal in the Software without restriction, including without limitation
8
+ # the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
+ # and/or sell copies of the Software, and to permit persons to whom the
10
+ # Software is furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
+ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21
+ # DEALINGS IN THE SOFTWARE.
22
+
23
+ require_relative "constants"
24
+
25
+ module Omnizip
26
+ module Algorithms
27
+ class LZMA < Algorithm
28
+ # Literal byte decoder
29
+ #
30
+ # This class is responsible for decoding literal bytes using
31
+ # probability models. It supports two modes matching the encoder:
32
+ #
33
+ # 1. Unmatched mode: Simple 8-bit decoding
34
+ # 2. Matched mode: Uses match byte for context (SDK feature)
35
+ #
36
+ # The decoder must perfectly mirror the encoder's decisions
37
+ # about which probability models to use.
38
+ #
39
+ # Single Responsibility: Literal byte decoding only
40
+ #
41
+ # @example Unmatched decoding
42
+ # decoder = LiteralDecoder.new
43
+ # byte = decoder.decode_unmatched(lit_state, range_decoder, models)
44
+ #
45
+ # @example Matched decoding (SDK mode)
46
+ # decoder = LiteralDecoder.new
47
+ # byte = decoder.decode_matched(match_byte, lit_state, range_decoder, models)
48
+ class LiteralDecoder
49
+ include Constants
50
+
51
+ # Decode literal byte in unmatched mode
52
+ #
53
+ # This is the standard LZMA literal decoding where each bit
54
+ # is decoded using probability models based on the partial
55
+ # symbol value.
56
+ #
57
+ # @param lit_state [Integer] Literal context value (0-7 for lc=3, unshifted)
58
+ # @param lc [Integer] Literal context bits (unused, kept for compatibility)
59
+ # @param range_decoder [RangeDecoder] Range decoder instance
60
+ # @param models [Array<BitModel>] Literal probability models
61
+ # @return [Integer] Decoded byte value (0-255)
62
+ def decode_unmatched(lit_state, lc, range_decoder, models)
63
+ # XZ Utils literal_subcoder returns: probs + 3 * (context_value << lc)
64
+ # where context_value = (((pos << 8) + prev_byte) & literal_mask)
65
+ # Our lit_state is context_value (unshifted)
66
+ # IMPORTANT: Shift BEFORE multiplying by 3 (XZ Utils formula order)
67
+ base_offset = 3 * (lit_state << lc)
68
+
69
+ # Start with symbol = 1
70
+ # We build it up bit by bit until it reaches 0x100
71
+ symbol = 1
72
+
73
+ # TEMP DEBUG: Trace first literal decode
74
+ if ENV["TRACE_LITERAL_DECODE"] && lit_state.zero?
75
+ # range = range_decoder.instance_variable_get(:@range)
76
+ # code = range_decoder.instance_variable_get(:@code)
77
+ # puts ""
78
+ # puts "=== decode_unmatched START: lit_state=#{lit_state}, base_offset=#{base_offset} ==="
79
+ # puts "Initial: range=0x#{range.to_s(16)}, code=0x#{code.to_s(16)}"
80
+ end
81
+
82
+ # DEBUG: Trace lit_state=96 (the corrupted literal)
83
+ if lit_state == 96
84
+ # range = range_decoder.instance_variable_get(:@range)
85
+ # code = range_decoder.instance_variable_get(:@code)
86
+ # puts ""
87
+ # puts "=== decode_unmatched START: lit_state=#{lit_state}, base_offset=#{base_offset} ==="
88
+ # puts "Initial: range=0x#{range.to_s(16)}, code=0x#{code.to_s(16)}"
89
+ end
90
+
91
+ # Decode 8 bits to build the symbol from 1 to 0x100
92
+ while symbol < 0x100
93
+ # Model index based on current symbol value
94
+ model_index = base_offset + symbol
95
+
96
+ # Decode next bit
97
+ bit = range_decoder.decode_bit(models[model_index])
98
+
99
+ if ENV["TRACE_LITERAL_DECODE"] && lit_state.zero?
100
+ range_after = range_decoder.instance_variable_get(:@range)
101
+ code_after = range_decoder.instance_variable_get(:@code)
102
+ puts "Bit #{symbol}: model_index=#{model_index}, bit=#{bit}, range=0x#{range_after.to_s(16)}, code=0x#{code_after.to_s(16)}" if ENV["LZMA_DEBUG_BITS"]
103
+ end
104
+
105
+ # DEBUG: Trace bits for lit_state=96
106
+ if ENV["LZMA_DEBUG_BITS"] && lit_state == 96
107
+ range_after = range_decoder.instance_variable_get(:@range)
108
+ code_after = range_decoder.instance_variable_get(:@code)
109
+ puts " symbol=#{symbol}: model_index=#{model_index}, bit=#{bit}, range=0x#{range_after.to_s(16)}, code=0x#{code_after.to_s(16)}"
110
+ end
111
+
112
+ # Update symbol: shift left and add bit
113
+ symbol = (symbol << 1) | bit
114
+ end
115
+
116
+ # Symbol is now in range 0x100-0x1FF
117
+ # Extract the byte value by subtracting 0x100
118
+ result = symbol - 0x100
119
+
120
+ if ENV["TRACE_LITERAL_DECODE"] && lit_state.zero?
121
+ puts "Result: 0x#{result.to_s(16)} ('#{result.chr}')"
122
+ puts "=== decode_unmatched END ==="
123
+ puts ""
124
+ end
125
+
126
+ result
127
+ end
128
+
129
+ # Decode literal byte in matched mode (SDK feature)
130
+ #
131
+ # This mode uses a byte from the dictionary (the "match byte")
132
+ # as context for decoding the literal. The decoder must use
133
+ # the same probability model selection as the encoder.
134
+ #
135
+ # SDK algorithm (from LzmaDec.c):
136
+ # - Processes bits in pairs (match bit, literal bit)
137
+ # - Uses match bit to select probability model
138
+ # - Offset updates based on DECODED bit, not match bit (XZ Utils rc_matched_literal)
139
+ # - Switches to unmatched mode when bits diverge
140
+ #
141
+ # @param match_byte [Integer] Corresponding byte from dictionary
142
+ # @param lit_state [Integer] Literal context value (0-7 for lc=3, unshifted)
143
+ # @param lc [Integer] Literal context bits (unused, kept for compatibility)
144
+ # @param range_decoder [RangeDecoder] Range decoder instance
145
+ # @param models [Array<BitModel>] Literal probability models
146
+ # @return [Integer] Decoded byte value (0-255)
147
+ def decode_matched(match_byte, lit_state, lc, range_decoder, models)
148
+ base_offset = 3 * (lit_state << lc)
149
+ symbol = 1
150
+ # XZ Utils: uint32_t t_match_byte = (match_byte);
151
+ # IMPORTANT: Do NOT OR with 0x100 - start with just match_byte!
152
+ # The offset mechanism handles the bit selection.
153
+ match_sym = match_byte
154
+ # XZ Utils: offset starts at 0x100 and is updated based on DECODED bits
155
+ # See: /Users/mulgogi/src/external/xz/src/liblzma/rangecoder/range_decoder.h:342-357
156
+ offset = 0x100
157
+
158
+ # DEBUG: Trace matched literal decode at position 61
159
+ if ENV["TRACE_MATCHED_DECODE"] && lit_state == 96
160
+ puts "=== MATCHED LITERAL DECODE: lit_state=#{lit_state}, match_byte=0x#{match_byte.to_s(16).upcase} ==="
161
+ puts " base_offset=#{base_offset}"
162
+ puts " Initial: symbol=#{symbol}, offset=0x#{offset.to_s(16).upcase}"
163
+ end
164
+
165
+ # SDK matched literal decoding algorithm
166
+ # Process bits while match byte provides context
167
+ bit_num = 0
168
+ result_bits = [] # DEBUG: Track decoded bits
169
+
170
+ # DEBUG: Trace at dict_full=233
171
+ trace_233 = ENV.fetch("DICT_FULL_233_TRACE", nil) && lit_state.zero?
172
+
173
+ if trace_233
174
+ puts "=== MATCHED LITERAL TRACE at dict_full=233 ==="
175
+ puts " match_byte=0x#{match_byte.to_s(16).upcase}"
176
+ puts " base_offset=#{base_offset}"
177
+ puts " Initial: symbol=#{symbol}, offset=0x#{offset.to_s(16).upcase}"
178
+ end
179
+
180
+ loop do
181
+ if trace_233
182
+ puts "\n Bit #{bit_num}:"
183
+ puts " match_sym=0x#{(match_sym & 0xFF).to_s(16).upcase}, offset=0x#{offset.to_s(16).upcase}"
184
+ end
185
+
186
+ # XZ Utils pattern: t_match_byte <<= 1; t_match_bit = t_match_byte & t_offset;
187
+ # IMPORTANT: Shift FIRST, then extract the bit!
188
+ # Shift match symbol (brings next bit into position 8)
189
+ match_sym <<= 1
190
+
191
+ # Extract current bit from match symbol
192
+ # XZ Utils: t_match_bit = t_match_byte & t_offset
193
+ # IMPORTANT: This is not just checking if non-zero! The result is used directly:
194
+ # - If the bit is 1: t_match_bit = t_offset (e.g., 0x100)
195
+ # - If the bit is 0: t_match_bit = 0
196
+ # This value is used in model_index calculation AND offset updates
197
+ match_bit = match_sym & offset
198
+
199
+ # Calculate model index: XZ Utils formula is t_subcoder_index = t_offset + t_match_bit + symbol
200
+ # where t_offset is updated based on PREVIOUS decoded bit, t_match_bit is from match byte
201
+ model_index = base_offset + offset + match_bit + symbol
202
+
203
+ if trace_233
204
+ puts " match_bit=0x#{match_bit.to_s(16).upcase}, symbol=#{symbol}"
205
+ puts " model_index=#{model_index}"
206
+ puts " offset_from_base=#{model_index - base_offset}"
207
+ prob_before = models[model_index].probability
208
+ puts " probability_before=0x#{prob_before.to_s(16).upcase} (#{prob_before})"
209
+ # Trace range decoder state BEFORE decode_bit
210
+ rd_range_before = range_decoder.instance_variable_get(:@range)
211
+ rd_code_before = range_decoder.instance_variable_get(:@code)
212
+ puts " range_decoder BEFORE: range=0x#{rd_range_before.to_s(16)}, code=0x#{rd_code_before.to_s(16)}"
213
+ end
214
+
215
+ # Decode literal bit
216
+ bit = range_decoder.decode_bit(models[model_index])
217
+ result_bits << bit # DEBUG: Track bit
218
+
219
+ if trace_233
220
+ prob_after = models[model_index].probability
221
+ puts " decoded_bit=#{bit}"
222
+ puts " probability_after=0x#{prob_after.to_s(16).upcase} (#{prob_after})"
223
+ # Also trace the range decoder state after decode_bit
224
+ rd_range = range_decoder.instance_variable_get(:@range)
225
+ rd_code = range_decoder.instance_variable_get(:@code)
226
+ puts " range_decoder AFTER: range=0x#{rd_range.to_s(16)}, code=0x#{rd_code.to_s(16)}"
227
+ end
228
+
229
+ # Update offset based on DECODED bit (XZ Utils pattern)
230
+ # IMPORTANT: XZ Utils rc_bit macro updates symbol BEFORE running the action!
231
+ # So we must update symbol FIRST, then use it for offset calculation.
232
+ # XZ Utils pattern:
233
+ # - bit=0: symbol <<= 1; t_offset &= ~t_match_bit
234
+ # - bit=1: symbol = (symbol << 1) + 1; t_offset &= t_match_bit
235
+ # We can simplify this to:
236
+ # - If bit=0: offset &= ~match_bit
237
+ # - If bit=1: offset &= match_bit
238
+
239
+ if bit.zero?
240
+ # Clear the match_bit from offset
241
+ offset &= ~match_bit
242
+ # Update symbol (shift left, add 0)
243
+ symbol <<= 1
244
+ else
245
+ # Keep only the match_bit in offset
246
+ offset &= match_bit
247
+ # Update symbol (shift left, add 1)
248
+ symbol = (symbol << 1) | 1
249
+ end
250
+
251
+ if ENV["TRACE_MATCHED_DECODE"] && lit_state == 96
252
+ puts " new_offset=0x#{offset.to_s(16).upcase}"
253
+ puts " new_symbol=#{symbol} (0x#{symbol.to_s(16).upcase})"
254
+ end
255
+
256
+ # If bits diverge, switch to unmatched mode
257
+ if (match_bit.positive? ? 1 : 0) != bit
258
+ if ENV["TRACE_MATCHED_DECODE"] && lit_state == 96
259
+ puts " *** BITS DIVERGE - switching to unmatched mode ***"
260
+ end
261
+ if trace_233
262
+ puts " *** BITS DIVERGE at bit #{bit_num} - match_bit=#{match_bit.positive? ? 1 : 0}, decoded_bit=#{bit} ***"
263
+ end
264
+ # Continue in unmatched mode for remaining bits
265
+ break if symbol >= 0x100
266
+
267
+ result = decode_unmatched_tail(symbol, base_offset, lc, range_decoder,
268
+ models)
269
+ if trace_233
270
+ puts "\n FINAL RESULT (after unmatched tail): 0x#{result.to_s(16).upcase} ('#{result.chr}')"
271
+ puts " Result bits: #{result_bits.join}"
272
+ puts "=== END MATCHED LITERAL TRACE ===\n"
273
+ end
274
+ return result
275
+ end
276
+
277
+ # Done when symbol reaches 0x100
278
+ break if symbol >= 0x100
279
+
280
+ bit_num += 1
281
+ end
282
+
283
+ result = symbol - 0x100
284
+ if trace_233 || (ENV.fetch("TRACE_MATCHED_DECODE", nil) && lit_state == 96)
285
+ puts "\n FINAL RESULT: 0x#{result.to_s(16).upcase} ('#{result.chr}')"
286
+ if trace_233
287
+ puts " Result bits: #{result_bits.join}"
288
+ end
289
+ puts "=== END MATCHED LITERAL DECODE ===\n"
290
+ end
291
+ result
292
+ end
293
+
294
+ private
295
+
296
+ # Decode remaining bits in unmatched mode
297
+ #
298
+ # Called from matched mode when bits diverge.
299
+ # Similar to decode_unmatched but starts with partial symbol.
300
+ #
301
+ # @param symbol [Integer] Partial symbol value
302
+ # @param base_offset [Integer] Model base offset
303
+ # @param lc [Integer] Literal context bits
304
+ # @param range_decoder [RangeDecoder] Range decoder instance
305
+ # @param models [Array<BitModel>] Literal probability models
306
+ # @return [Integer] Decoded byte value (0-255)
307
+ def decode_unmatched_tail(symbol, base_offset, _lc, range_decoder,
308
+ models)
309
+ # Continue building symbol from current value to 0x100
310
+ while symbol < 0x100
311
+ model_index = base_offset + symbol
312
+ bit = range_decoder.decode_bit(models[model_index])
313
+ symbol = (symbol << 1) | bit
314
+ end
315
+ symbol - 0x100
316
+ end
317
+ end
318
+ end
319
+ end
320
+ end