omnizip 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (511) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.rubocop.yml +32 -0
  4. data/.rubocop_todo.yml +754 -0
  5. data/COPYING +502 -0
  6. data/Gemfile +17 -0
  7. data/LICENSE +12 -0
  8. data/README.adoc +1045 -0
  9. data/Rakefile +12 -0
  10. data/benchmark/README.md +260 -0
  11. data/benchmark/benchmark_suite.rb +125 -0
  12. data/benchmark/compression_bench.rb +181 -0
  13. data/benchmark/filter_bench.rb +180 -0
  14. data/benchmark/models/benchmark_result.rb +59 -0
  15. data/benchmark/models/comparison_result.rb +69 -0
  16. data/benchmark/profile_suite.rb +167 -0
  17. data/benchmark/reporter.rb +150 -0
  18. data/benchmark/run_benchmarks.rb +66 -0
  19. data/benchmark/test_data.rb +137 -0
  20. data/config/formats/rar3_spec.yml +91 -0
  21. data/config/formats/rar5_spec.yml +102 -0
  22. data/docs/.github/workflows/docs.yml +142 -0
  23. data/docs/.gitignore +21 -0
  24. data/docs/.lychee.toml +67 -0
  25. data/docs/Gemfile +13 -0
  26. data/docs/RAR_WRITE_SUPPORT.md +26 -0
  27. data/docs/README.md +101 -0
  28. data/docs/_config.yml +112 -0
  29. data/docs/assets/logo.svg +1 -0
  30. data/docs/assets/omnizip-logo.pdf +1540 -11
  31. data/docs/comparison/feature-matrix.adoc +694 -0
  32. data/docs/comparison/index.adoc +113 -0
  33. data/docs/comparison/vs-7zip.adoc +309 -0
  34. data/docs/comparison/vs-peazip.adoc +77 -0
  35. data/docs/comparison/vs-rubyzip.adoc +342 -0
  36. data/docs/comparison/vs-winrar.adoc +100 -0
  37. data/docs/compatibility.adoc +579 -0
  38. data/docs/concepts/index.adoc +129 -0
  39. data/docs/developer/architecture.adoc +256 -0
  40. data/docs/developer/contributing.adoc +158 -0
  41. data/docs/developer/index.adoc +25 -0
  42. data/docs/developer/testing.adoc +212 -0
  43. data/docs/getting-started/basic-usage.adoc +271 -0
  44. data/docs/getting-started/index.adoc +42 -0
  45. data/docs/getting-started/installation.adoc +138 -0
  46. data/docs/getting-started/quick-start.adoc +185 -0
  47. data/docs/getting-started/your-first-archive.adoc +218 -0
  48. data/docs/guides/advanced-features/encryption.adoc +300 -0
  49. data/docs/guides/advanced-features/index.adoc +49 -0
  50. data/docs/guides/advanced-features/parallel-processing.adoc +246 -0
  51. data/docs/guides/advanced-features/progress-tracking.adoc +320 -0
  52. data/docs/guides/advanced-features/streaming.adoc +212 -0
  53. data/docs/guides/archive-formats/gzip-format.adoc +107 -0
  54. data/docs/guides/archive-formats/index.adoc +130 -0
  55. data/docs/guides/archive-formats/rar-format.adoc +104 -0
  56. data/docs/guides/archive-formats/rar5.adoc +521 -0
  57. data/docs/guides/archive-formats/seven-zip-format.adoc +35 -0
  58. data/docs/guides/archive-formats/tar-format.adoc +106 -0
  59. data/docs/guides/archive-formats/xz-format.adoc +118 -0
  60. data/docs/guides/archive-formats/zip-format.adoc +35 -0
  61. data/docs/guides/compression-algorithms/bzip2.adoc +113 -0
  62. data/docs/guides/compression-algorithms/deflate.adoc +319 -0
  63. data/docs/guides/compression-algorithms/index.adoc +190 -0
  64. data/docs/guides/compression-algorithms/lzma.adoc +398 -0
  65. data/docs/guides/compression-algorithms/lzma2.adoc +327 -0
  66. data/docs/guides/compression-algorithms/ppmd.adoc +316 -0
  67. data/docs/guides/compression-algorithms/zstandard.adoc +361 -0
  68. data/docs/guides/creating-archives.adoc +354 -0
  69. data/docs/guides/extracting-archives.adoc +53 -0
  70. data/docs/guides/format-conversion.adoc +64 -0
  71. data/docs/guides/index.adoc +49 -0
  72. data/docs/guides/migration-rubyzip.adoc +217 -0
  73. data/docs/guides/parity-archives.adoc +605 -0
  74. data/docs/guides/performance-tuning.adoc +88 -0
  75. data/docs/index.adoc +218 -0
  76. data/docs/lychee.toml +67 -0
  77. data/docs/reference/api/overview.adoc +188 -0
  78. data/docs/reference/cli/compress-command.adoc +114 -0
  79. data/docs/reference/cli/overview.adoc +140 -0
  80. data/docs/reference/index.adoc +26 -0
  81. data/docs/resources/faq.adoc +185 -0
  82. data/docs/resources/quick-reference.adoc +222 -0
  83. data/docs/troubleshooting/index.adoc +208 -0
  84. data/examples/api_comparison.rb +205 -0
  85. data/examples/deflate64_example.rb +96 -0
  86. data/examples/par2_demo.rb +121 -0
  87. data/examples/quick_start_native.rb +150 -0
  88. data/examples/quick_start_rubyzip.rb +115 -0
  89. data/examples/rubyzip_compatibility_demo.rb +194 -0
  90. data/exe/omnizip +27 -0
  91. data/lib/omnizip/algorithm.rb +130 -0
  92. data/lib/omnizip/algorithm_registry.rb +86 -0
  93. data/lib/omnizip/algorithms/.keep +0 -0
  94. data/lib/omnizip/algorithms/bzip2/bwt.rb +225 -0
  95. data/lib/omnizip/algorithms/bzip2/decoder.rb +193 -0
  96. data/lib/omnizip/algorithms/bzip2/encoder.rb +237 -0
  97. data/lib/omnizip/algorithms/bzip2/huffman.rb +206 -0
  98. data/lib/omnizip/algorithms/bzip2/mtf.rb +101 -0
  99. data/lib/omnizip/algorithms/bzip2/rle.rb +151 -0
  100. data/lib/omnizip/algorithms/bzip2.rb +130 -0
  101. data/lib/omnizip/algorithms/deflate/constants.rb +28 -0
  102. data/lib/omnizip/algorithms/deflate/decoder.rb +38 -0
  103. data/lib/omnizip/algorithms/deflate/encoder.rb +46 -0
  104. data/lib/omnizip/algorithms/deflate.rb +128 -0
  105. data/lib/omnizip/algorithms/deflate64/constants.rb +45 -0
  106. data/lib/omnizip/algorithms/deflate64/decoder.rb +153 -0
  107. data/lib/omnizip/algorithms/deflate64/encoder.rb +98 -0
  108. data/lib/omnizip/algorithms/deflate64/huffman_coder.rb +354 -0
  109. data/lib/omnizip/algorithms/deflate64/lz77_encoder.rb +142 -0
  110. data/lib/omnizip/algorithms/deflate64.rb +109 -0
  111. data/lib/omnizip/algorithms/lzma/bit_model.rb +120 -0
  112. data/lib/omnizip/algorithms/lzma/constants.rb +112 -0
  113. data/lib/omnizip/algorithms/lzma/decoder.rb +148 -0
  114. data/lib/omnizip/algorithms/lzma/dictionary.rb +69 -0
  115. data/lib/omnizip/algorithms/lzma/distance_coder.rb +415 -0
  116. data/lib/omnizip/algorithms/lzma/encoder.rb +142 -0
  117. data/lib/omnizip/algorithms/lzma/length_coder.rb +260 -0
  118. data/lib/omnizip/algorithms/lzma/literal_decoder.rb +320 -0
  119. data/lib/omnizip/algorithms/lzma/literal_encoder.rb +210 -0
  120. data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +341 -0
  121. data/lib/omnizip/algorithms/lzma/lzma_alone_decoder.rb +192 -0
  122. data/lib/omnizip/algorithms/lzma/lzma_state.rb +128 -0
  123. data/lib/omnizip/algorithms/lzma/match.rb +32 -0
  124. data/lib/omnizip/algorithms/lzma/match_finder.rb +205 -0
  125. data/lib/omnizip/algorithms/lzma/match_finder_config.rb +142 -0
  126. data/lib/omnizip/algorithms/lzma/match_finder_factory.rb +88 -0
  127. data/lib/omnizip/algorithms/lzma/optimal_encoder.rb +130 -0
  128. data/lib/omnizip/algorithms/lzma/probability_models.rb +72 -0
  129. data/lib/omnizip/algorithms/lzma/range_coder.rb +85 -0
  130. data/lib/omnizip/algorithms/lzma/range_decoder.rb +434 -0
  131. data/lib/omnizip/algorithms/lzma/range_encoder.rb +194 -0
  132. data/lib/omnizip/algorithms/lzma/state.rb +127 -0
  133. data/lib/omnizip/algorithms/lzma/xz_buffered_range_encoder.rb +325 -0
  134. data/lib/omnizip/algorithms/lzma/xz_encoder.rb +426 -0
  135. data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +645 -0
  136. data/lib/omnizip/algorithms/lzma/xz_match_finder_adapter.rb +227 -0
  137. data/lib/omnizip/algorithms/lzma/xz_price_calculator.rb +169 -0
  138. data/lib/omnizip/algorithms/lzma/xz_probability_models.rb +261 -0
  139. data/lib/omnizip/algorithms/lzma/xz_range_encoder.rb +223 -0
  140. data/lib/omnizip/algorithms/lzma/xz_range_encoder_exact.rb +331 -0
  141. data/lib/omnizip/algorithms/lzma/xz_state.rb +116 -0
  142. data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +2055 -0
  143. data/lib/omnizip/algorithms/lzma.rb +238 -0
  144. data/lib/omnizip/algorithms/lzma2/chunk_manager.rb +182 -0
  145. data/lib/omnizip/algorithms/lzma2/constants.rb +41 -0
  146. data/lib/omnizip/algorithms/lzma2/encoder.rb +147 -0
  147. data/lib/omnizip/algorithms/lzma2/lzma2_chunk.rb +161 -0
  148. data/lib/omnizip/algorithms/lzma2/properties.rb +179 -0
  149. data/lib/omnizip/algorithms/lzma2/simple_lzma2_encoder.rb +127 -0
  150. data/lib/omnizip/algorithms/lzma2/xz_encoder_adapter.rb +85 -0
  151. data/lib/omnizip/algorithms/lzma2.rb +141 -0
  152. data/lib/omnizip/algorithms/ppmd7/constants.rb +74 -0
  153. data/lib/omnizip/algorithms/ppmd7/context.rb +154 -0
  154. data/lib/omnizip/algorithms/ppmd7/decoder.rb +126 -0
  155. data/lib/omnizip/algorithms/ppmd7/encoder.rb +163 -0
  156. data/lib/omnizip/algorithms/ppmd7/model.rb +248 -0
  157. data/lib/omnizip/algorithms/ppmd7/symbol_state.rb +57 -0
  158. data/lib/omnizip/algorithms/ppmd7.rb +116 -0
  159. data/lib/omnizip/algorithms/ppmd8/constants.rb +61 -0
  160. data/lib/omnizip/algorithms/ppmd8/context.rb +34 -0
  161. data/lib/omnizip/algorithms/ppmd8/decoder.rb +107 -0
  162. data/lib/omnizip/algorithms/ppmd8/encoder.rb +138 -0
  163. data/lib/omnizip/algorithms/ppmd8/model.rb +250 -0
  164. data/lib/omnizip/algorithms/ppmd8/restoration_method.rb +78 -0
  165. data/lib/omnizip/algorithms/ppmd8.rb +82 -0
  166. data/lib/omnizip/algorithms/ppmd_base.rb +138 -0
  167. data/lib/omnizip/algorithms/sevenzip_lzma2.rb +123 -0
  168. data/lib/omnizip/algorithms/xz_lzma2.rb +118 -0
  169. data/lib/omnizip/algorithms/zstandard/constants.rb +25 -0
  170. data/lib/omnizip/algorithms/zstandard/decoder.rb +46 -0
  171. data/lib/omnizip/algorithms/zstandard/encoder.rb +51 -0
  172. data/lib/omnizip/algorithms/zstandard.rb +138 -0
  173. data/lib/omnizip/buffer/memory_archive.rb +251 -0
  174. data/lib/omnizip/buffer/memory_extractor.rb +224 -0
  175. data/lib/omnizip/buffer.rb +176 -0
  176. data/lib/omnizip/checksum_registry.rb +114 -0
  177. data/lib/omnizip/checksums/crc32.rb +100 -0
  178. data/lib/omnizip/checksums/crc64.rb +101 -0
  179. data/lib/omnizip/checksums/crc_base.rb +158 -0
  180. data/lib/omnizip/checksums/verifier.rb +131 -0
  181. data/lib/omnizip/chunked/memory_manager.rb +194 -0
  182. data/lib/omnizip/chunked/reader.rb +78 -0
  183. data/lib/omnizip/chunked/writer.rb +120 -0
  184. data/lib/omnizip/chunked.rb +129 -0
  185. data/lib/omnizip/cli/output_formatter.rb +104 -0
  186. data/lib/omnizip/cli.rb +572 -0
  187. data/lib/omnizip/commands/.keep +0 -0
  188. data/lib/omnizip/commands/archive_create_command.rb +427 -0
  189. data/lib/omnizip/commands/archive_extract_command.rb +272 -0
  190. data/lib/omnizip/commands/archive_list_command.rb +218 -0
  191. data/lib/omnizip/commands/archive_repair_command.rb +131 -0
  192. data/lib/omnizip/commands/archive_verify_command.rb +117 -0
  193. data/lib/omnizip/commands/compress_command.rb +117 -0
  194. data/lib/omnizip/commands/decompress_command.rb +120 -0
  195. data/lib/omnizip/commands/list_command.rb +53 -0
  196. data/lib/omnizip/commands/metadata_command.rb +153 -0
  197. data/lib/omnizip/commands/parity_create_command.rb +122 -0
  198. data/lib/omnizip/commands/parity_repair_command.rb +122 -0
  199. data/lib/omnizip/commands/parity_verify_command.rb +124 -0
  200. data/lib/omnizip/commands/profile_list_command.rb +56 -0
  201. data/lib/omnizip/commands/profile_show_command.rb +44 -0
  202. data/lib/omnizip/convenience.rb +359 -0
  203. data/lib/omnizip/converter/conversion_registry.rb +49 -0
  204. data/lib/omnizip/converter/conversion_strategy.rb +121 -0
  205. data/lib/omnizip/converter/seven_zip_to_zip_strategy.rb +97 -0
  206. data/lib/omnizip/converter/zip_to_seven_zip_strategy.rb +112 -0
  207. data/lib/omnizip/converter.rb +105 -0
  208. data/lib/omnizip/crypto/aes256/cipher.rb +100 -0
  209. data/lib/omnizip/crypto/aes256/constants.rb +28 -0
  210. data/lib/omnizip/crypto/aes256/key_derivation.rb +101 -0
  211. data/lib/omnizip/crypto/aes256.rb +102 -0
  212. data/lib/omnizip/error.rb +106 -0
  213. data/lib/omnizip/eta/exponential_smoothing_estimator.rb +98 -0
  214. data/lib/omnizip/eta/moving_average_estimator.rb +99 -0
  215. data/lib/omnizip/eta/rate_calculator.rb +104 -0
  216. data/lib/omnizip/eta/sample_history.rb +143 -0
  217. data/lib/omnizip/eta/time_estimator.rb +106 -0
  218. data/lib/omnizip/eta.rb +63 -0
  219. data/lib/omnizip/extraction/filter_chain.rb +177 -0
  220. data/lib/omnizip/extraction/glob_pattern.rb +140 -0
  221. data/lib/omnizip/extraction/pattern_matcher.rb +70 -0
  222. data/lib/omnizip/extraction/predicate_pattern.rb +52 -0
  223. data/lib/omnizip/extraction/regex_pattern.rb +50 -0
  224. data/lib/omnizip/extraction/selective_extractor.rb +240 -0
  225. data/lib/omnizip/extraction.rb +111 -0
  226. data/lib/omnizip/file_type/mime_classifier.rb +144 -0
  227. data/lib/omnizip/file_type.rb +113 -0
  228. data/lib/omnizip/filter.rb +139 -0
  229. data/lib/omnizip/filter_pipeline.rb +108 -0
  230. data/lib/omnizip/filter_registry.rb +166 -0
  231. data/lib/omnizip/filters/bcj.rb +279 -0
  232. data/lib/omnizip/filters/bcj2/constants.rb +53 -0
  233. data/lib/omnizip/filters/bcj2/decoder.rb +200 -0
  234. data/lib/omnizip/filters/bcj2/encoder.rb +61 -0
  235. data/lib/omnizip/filters/bcj2/stream_data.rb +93 -0
  236. data/lib/omnizip/filters/bcj2.rb +99 -0
  237. data/lib/omnizip/filters/bcj_arm.rb +176 -0
  238. data/lib/omnizip/filters/bcj_arm64.rb +244 -0
  239. data/lib/omnizip/filters/bcj_ia64.rb +196 -0
  240. data/lib/omnizip/filters/bcj_ppc.rb +190 -0
  241. data/lib/omnizip/filters/bcj_sparc.rb +176 -0
  242. data/lib/omnizip/filters/bcj_x86.rb +193 -0
  243. data/lib/omnizip/filters/delta.rb +196 -0
  244. data/lib/omnizip/filters/filter_base.rb +72 -0
  245. data/lib/omnizip/filters/registry.rb +123 -0
  246. data/lib/omnizip/filters/xz_delta.rb +258 -0
  247. data/lib/omnizip/format_detector.rb +162 -0
  248. data/lib/omnizip/format_registry.rb +59 -0
  249. data/lib/omnizip/formats/.keep +0 -0
  250. data/lib/omnizip/formats/bzip2_file.rb +172 -0
  251. data/lib/omnizip/formats/cpio/constants.rb +55 -0
  252. data/lib/omnizip/formats/cpio/entry.rb +385 -0
  253. data/lib/omnizip/formats/cpio/reader.rb +196 -0
  254. data/lib/omnizip/formats/cpio/writer.rb +234 -0
  255. data/lib/omnizip/formats/cpio.rb +140 -0
  256. data/lib/omnizip/formats/format_spec_loader.rb +230 -0
  257. data/lib/omnizip/formats/gzip.rb +238 -0
  258. data/lib/omnizip/formats/iso/directory_builder.rb +297 -0
  259. data/lib/omnizip/formats/iso/directory_record.rb +152 -0
  260. data/lib/omnizip/formats/iso/joliet.rb +204 -0
  261. data/lib/omnizip/formats/iso/path_table.rb +125 -0
  262. data/lib/omnizip/formats/iso/reader.rb +197 -0
  263. data/lib/omnizip/formats/iso/rock_ridge.rb +349 -0
  264. data/lib/omnizip/formats/iso/volume_builder.rb +320 -0
  265. data/lib/omnizip/formats/iso/volume_descriptor.rb +168 -0
  266. data/lib/omnizip/formats/iso/writer.rb +530 -0
  267. data/lib/omnizip/formats/iso.rb +140 -0
  268. data/lib/omnizip/formats/lzip.rb +175 -0
  269. data/lib/omnizip/formats/lzma_alone.rb +171 -0
  270. data/lib/omnizip/formats/rar/archive_repairer.rb +243 -0
  271. data/lib/omnizip/formats/rar/archive_verifier.rb +195 -0
  272. data/lib/omnizip/formats/rar/block_parser.rb +243 -0
  273. data/lib/omnizip/formats/rar/compression/bit_stream.rb +180 -0
  274. data/lib/omnizip/formats/rar/compression/dispatcher.rb +217 -0
  275. data/lib/omnizip/formats/rar/compression/lz77_huffman/decoder.rb +216 -0
  276. data/lib/omnizip/formats/rar/compression/lz77_huffman/encoder.rb +158 -0
  277. data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_builder.rb +217 -0
  278. data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_coder.rb +189 -0
  279. data/lib/omnizip/formats/rar/compression/lz77_huffman/match_finder.rb +135 -0
  280. data/lib/omnizip/formats/rar/compression/lz77_huffman/sliding_window.rb +165 -0
  281. data/lib/omnizip/formats/rar/compression/ppmd/context.rb +105 -0
  282. data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +219 -0
  283. data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +262 -0
  284. data/lib/omnizip/formats/rar/compression_method_registry.rb +106 -0
  285. data/lib/omnizip/formats/rar/constants.rb +82 -0
  286. data/lib/omnizip/formats/rar/decompressor.rb +238 -0
  287. data/lib/omnizip/formats/rar/external_writer.rb +312 -0
  288. data/lib/omnizip/formats/rar/header.rb +192 -0
  289. data/lib/omnizip/formats/rar/license_validator.rb +109 -0
  290. data/lib/omnizip/formats/rar/models/rar_archive.rb +77 -0
  291. data/lib/omnizip/formats/rar/models/rar_entry.rb +65 -0
  292. data/lib/omnizip/formats/rar/models/rar_volume.rb +56 -0
  293. data/lib/omnizip/formats/rar/parity_handler.rb +292 -0
  294. data/lib/omnizip/formats/rar/rar5/compression/lzma.rb +202 -0
  295. data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +578 -0
  296. data/lib/omnizip/formats/rar/rar5/compression/store.rb +60 -0
  297. data/lib/omnizip/formats/rar/rar5/crc32.rb +39 -0
  298. data/lib/omnizip/formats/rar/rar5/encryption/aes256_cbc.rb +97 -0
  299. data/lib/omnizip/formats/rar/rar5/encryption/encryption_header.rb +114 -0
  300. data/lib/omnizip/formats/rar/rar5/encryption/encryption_manager.rb +166 -0
  301. data/lib/omnizip/formats/rar/rar5/encryption/key_derivation.rb +97 -0
  302. data/lib/omnizip/formats/rar/rar5/header.rb +187 -0
  303. data/lib/omnizip/formats/rar/rar5/models/encryption_options.rb +74 -0
  304. data/lib/omnizip/formats/rar/rar5/models/recovery_options.rb +63 -0
  305. data/lib/omnizip/formats/rar/rar5/models/solid_options.rb +63 -0
  306. data/lib/omnizip/formats/rar/rar5/models/volume_options.rb +74 -0
  307. data/lib/omnizip/formats/rar/rar5/multi_volume/ARCHITECTURE.md +290 -0
  308. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_manager.rb +264 -0
  309. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_splitter.rb +155 -0
  310. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_writer.rb +194 -0
  311. data/lib/omnizip/formats/rar/rar5/solid/solid_encoder.rb +109 -0
  312. data/lib/omnizip/formats/rar/rar5/solid/solid_manager.rb +142 -0
  313. data/lib/omnizip/formats/rar/rar5/solid/solid_stream.rb +121 -0
  314. data/lib/omnizip/formats/rar/rar5/vint.rb +65 -0
  315. data/lib/omnizip/formats/rar/rar5/writer.rb +466 -0
  316. data/lib/omnizip/formats/rar/rar_format_base.rb +241 -0
  317. data/lib/omnizip/formats/rar/reader.rb +366 -0
  318. data/lib/omnizip/formats/rar/recovery_record.rb +245 -0
  319. data/lib/omnizip/formats/rar/volume_manager.rb +168 -0
  320. data/lib/omnizip/formats/rar/writer.rb +431 -0
  321. data/lib/omnizip/formats/rar.rb +205 -0
  322. data/lib/omnizip/formats/rar3/compressor.rb +73 -0
  323. data/lib/omnizip/formats/rar3/decompressor.rb +66 -0
  324. data/lib/omnizip/formats/rar3/reader.rb +386 -0
  325. data/lib/omnizip/formats/rar3/writer.rb +219 -0
  326. data/lib/omnizip/formats/rar5/compressor.rb +73 -0
  327. data/lib/omnizip/formats/rar5/decompressor.rb +66 -0
  328. data/lib/omnizip/formats/rar5/reader.rb +342 -0
  329. data/lib/omnizip/formats/rar5/writer.rb +214 -0
  330. data/lib/omnizip/formats/seven_zip/coder_chain.rb +150 -0
  331. data/lib/omnizip/formats/seven_zip/constants.rb +126 -0
  332. data/lib/omnizip/formats/seven_zip/encoded_header.rb +114 -0
  333. data/lib/omnizip/formats/seven_zip/encrypted_header.rb +142 -0
  334. data/lib/omnizip/formats/seven_zip/file_collector.rb +144 -0
  335. data/lib/omnizip/formats/seven_zip/header.rb +106 -0
  336. data/lib/omnizip/formats/seven_zip/header_encryptor.rb +134 -0
  337. data/lib/omnizip/formats/seven_zip/header_writer.rb +466 -0
  338. data/lib/omnizip/formats/seven_zip/models/coder_info.rb +30 -0
  339. data/lib/omnizip/formats/seven_zip/models/file_entry.rb +58 -0
  340. data/lib/omnizip/formats/seven_zip/models/folder.rb +69 -0
  341. data/lib/omnizip/formats/seven_zip/models/stream_info.rb +42 -0
  342. data/lib/omnizip/formats/seven_zip/parser.rb +660 -0
  343. data/lib/omnizip/formats/seven_zip/reader.rb +458 -0
  344. data/lib/omnizip/formats/seven_zip/split_archive_reader.rb +632 -0
  345. data/lib/omnizip/formats/seven_zip/split_archive_writer.rb +315 -0
  346. data/lib/omnizip/formats/seven_zip/stream_compressor.rb +151 -0
  347. data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +162 -0
  348. data/lib/omnizip/formats/seven_zip/writer.rb +740 -0
  349. data/lib/omnizip/formats/seven_zip.rb +93 -0
  350. data/lib/omnizip/formats/tar/constants.rb +73 -0
  351. data/lib/omnizip/formats/tar/entry.rb +94 -0
  352. data/lib/omnizip/formats/tar/header.rb +168 -0
  353. data/lib/omnizip/formats/tar/reader.rb +121 -0
  354. data/lib/omnizip/formats/tar/writer.rb +216 -0
  355. data/lib/omnizip/formats/tar.rb +84 -0
  356. data/lib/omnizip/formats/xz/reader.rb +116 -0
  357. data/lib/omnizip/formats/xz.rb +237 -0
  358. data/lib/omnizip/formats/xz_impl/block_decoder.rb +754 -0
  359. data/lib/omnizip/formats/xz_impl/block_encoder.rb +306 -0
  360. data/lib/omnizip/formats/xz_impl/block_header.rb +210 -0
  361. data/lib/omnizip/formats/xz_impl/block_header_parser.rb +186 -0
  362. data/lib/omnizip/formats/xz_impl/constants.rb +49 -0
  363. data/lib/omnizip/formats/xz_impl/index_decoder.rb +174 -0
  364. data/lib/omnizip/formats/xz_impl/index_encoder.rb +122 -0
  365. data/lib/omnizip/formats/xz_impl/stream_decoder.rb +468 -0
  366. data/lib/omnizip/formats/xz_impl/stream_encoder.rb +99 -0
  367. data/lib/omnizip/formats/xz_impl/stream_footer.rb +81 -0
  368. data/lib/omnizip/formats/xz_impl/stream_footer_parser.rb +117 -0
  369. data/lib/omnizip/formats/xz_impl/stream_header.rb +55 -0
  370. data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +108 -0
  371. data/lib/omnizip/formats/xz_impl/vli.rb +128 -0
  372. data/lib/omnizip/formats/xz_impl/writer.rb +421 -0
  373. data/lib/omnizip/formats/zip/central_directory_header.rb +195 -0
  374. data/lib/omnizip/formats/zip/constants.rb +69 -0
  375. data/lib/omnizip/formats/zip/end_of_central_directory.rb +133 -0
  376. data/lib/omnizip/formats/zip/local_file_header.rb +138 -0
  377. data/lib/omnizip/formats/zip/reader.rb +250 -0
  378. data/lib/omnizip/formats/zip/unix_extra_field.rb +153 -0
  379. data/lib/omnizip/formats/zip/writer.rb +375 -0
  380. data/lib/omnizip/formats/zip/zip64_end_of_central_directory.rb +104 -0
  381. data/lib/omnizip/formats/zip/zip64_end_of_central_directory_locator.rb +66 -0
  382. data/lib/omnizip/formats/zip/zip64_extra_field.rb +114 -0
  383. data/lib/omnizip/formats/zip.rb +50 -0
  384. data/lib/omnizip/implementations/base/lzma2_decoder_base.rb +75 -0
  385. data/lib/omnizip/implementations/base/lzma2_encoder_base.rb +128 -0
  386. data/lib/omnizip/implementations/base/lzma_decoder_base.rb +83 -0
  387. data/lib/omnizip/implementations/base/lzma_encoder_base.rb +108 -0
  388. data/lib/omnizip/implementations/base/state_machine_base.rb +182 -0
  389. data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +421 -0
  390. data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +465 -0
  391. data/lib/omnizip/implementations/seven_zip/lzma/match_finder.rb +288 -0
  392. data/lib/omnizip/implementations/seven_zip/lzma/range_decoder.rb +200 -0
  393. data/lib/omnizip/implementations/seven_zip/lzma/range_encoder.rb +197 -0
  394. data/lib/omnizip/implementations/seven_zip/lzma/state_machine.rb +141 -0
  395. data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +519 -0
  396. data/lib/omnizip/implementations/xz_utils/lzma2/decoder.rb +723 -0
  397. data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +750 -0
  398. data/lib/omnizip/io/buffered_input.rb +146 -0
  399. data/lib/omnizip/io/buffered_output.rb +105 -0
  400. data/lib/omnizip/io/stream_manager.rb +115 -0
  401. data/lib/omnizip/link_handler/hard_link.rb +79 -0
  402. data/lib/omnizip/link_handler/symbolic_link.rb +74 -0
  403. data/lib/omnizip/link_handler.rb +124 -0
  404. data/lib/omnizip/metadata/archive_metadata.rb +114 -0
  405. data/lib/omnizip/metadata/entry_metadata.rb +146 -0
  406. data/lib/omnizip/metadata/metadata_editor.rb +171 -0
  407. data/lib/omnizip/metadata/metadata_registry.rb +64 -0
  408. data/lib/omnizip/metadata/metadata_validator.rb +99 -0
  409. data/lib/omnizip/metadata.rb +57 -0
  410. data/lib/omnizip/models/.keep +0 -0
  411. data/lib/omnizip/models/algorithm_metadata.rb +73 -0
  412. data/lib/omnizip/models/compression_options.rb +71 -0
  413. data/lib/omnizip/models/conversion_options.rb +87 -0
  414. data/lib/omnizip/models/conversion_result.rb +135 -0
  415. data/lib/omnizip/models/eta_result.rb +46 -0
  416. data/lib/omnizip/models/extraction_rule.rb +115 -0
  417. data/lib/omnizip/models/filter_chain.rb +144 -0
  418. data/lib/omnizip/models/filter_config.rb +183 -0
  419. data/lib/omnizip/models/match_result.rb +124 -0
  420. data/lib/omnizip/models/optimization_suggestion.rb +91 -0
  421. data/lib/omnizip/models/parallel_options.rb +104 -0
  422. data/lib/omnizip/models/performance_result.rb +79 -0
  423. data/lib/omnizip/models/profile_report.rb +82 -0
  424. data/lib/omnizip/models/progress_options.rb +38 -0
  425. data/lib/omnizip/models/split_options.rb +116 -0
  426. data/lib/omnizip/optimization_registry.rb +81 -0
  427. data/lib/omnizip/parallel/job_queue.rb +209 -0
  428. data/lib/omnizip/parallel/job_scheduler.rb +203 -0
  429. data/lib/omnizip/parallel/parallel_compressor.rb +347 -0
  430. data/lib/omnizip/parallel/parallel_extractor.rb +329 -0
  431. data/lib/omnizip/parallel/worker_pool.rb +223 -0
  432. data/lib/omnizip/parallel.rb +149 -0
  433. data/lib/omnizip/parity/chunked_block_processor.rb +196 -0
  434. data/lib/omnizip/parity/galois16.rb +145 -0
  435. data/lib/omnizip/parity/models/creator_packet.rb +73 -0
  436. data/lib/omnizip/parity/models/file_description_packet.rb +133 -0
  437. data/lib/omnizip/parity/models/ifsc_packet.rb +123 -0
  438. data/lib/omnizip/parity/models/main_packet.rb +128 -0
  439. data/lib/omnizip/parity/models/packet.rb +156 -0
  440. data/lib/omnizip/parity/models/packet_registry.rb +109 -0
  441. data/lib/omnizip/parity/models/recovery_slice_packet.rb +78 -0
  442. data/lib/omnizip/parity/par2_creator.rb +531 -0
  443. data/lib/omnizip/parity/par2_repairer.rb +407 -0
  444. data/lib/omnizip/parity/par2_verifier.rb +364 -0
  445. data/lib/omnizip/parity/par2cmdline_algorithm.rb +110 -0
  446. data/lib/omnizip/parity/par2cmdline_coefficients.rb +78 -0
  447. data/lib/omnizip/parity/reed_solomon_decoder.rb +266 -0
  448. data/lib/omnizip/parity/reed_solomon_encoder.rb +111 -0
  449. data/lib/omnizip/parity/reed_solomon_matrix.rb +342 -0
  450. data/lib/omnizip/parity.rb +186 -0
  451. data/lib/omnizip/password/encryption_registry.rb +65 -0
  452. data/lib/omnizip/password/encryption_strategy.rb +96 -0
  453. data/lib/omnizip/password/password_validator.rb +129 -0
  454. data/lib/omnizip/password/winzip_aes_strategy.rb +192 -0
  455. data/lib/omnizip/password/zip_crypto_strategy.rb +141 -0
  456. data/lib/omnizip/password.rb +87 -0
  457. data/lib/omnizip/pipe/stream_compressor.rb +124 -0
  458. data/lib/omnizip/pipe/stream_decompressor.rb +174 -0
  459. data/lib/omnizip/pipe.rb +121 -0
  460. data/lib/omnizip/platform/ntfs_streams.rb +201 -0
  461. data/lib/omnizip/platform.rb +189 -0
  462. data/lib/omnizip/profile/archive_profile.rb +39 -0
  463. data/lib/omnizip/profile/balanced_profile.rb +33 -0
  464. data/lib/omnizip/profile/binary_profile.rb +36 -0
  465. data/lib/omnizip/profile/compression_profile.rb +158 -0
  466. data/lib/omnizip/profile/custom_profile.rb +157 -0
  467. data/lib/omnizip/profile/fast_profile.rb +33 -0
  468. data/lib/omnizip/profile/maximum_profile.rb +33 -0
  469. data/lib/omnizip/profile/profile_detector.rb +110 -0
  470. data/lib/omnizip/profile/profile_registry.rb +161 -0
  471. data/lib/omnizip/profile/text_profile.rb +36 -0
  472. data/lib/omnizip/profile.rb +190 -0
  473. data/lib/omnizip/profiler/memory_profiler.rb +66 -0
  474. data/lib/omnizip/profiler/method_profiler.rb +49 -0
  475. data/lib/omnizip/profiler/report_generator.rb +169 -0
  476. data/lib/omnizip/profiler.rb +204 -0
  477. data/lib/omnizip/progress/callback_reporter.rb +36 -0
  478. data/lib/omnizip/progress/console_reporter.rb +62 -0
  479. data/lib/omnizip/progress/log_reporter.rb +91 -0
  480. data/lib/omnizip/progress/operation_progress.rb +118 -0
  481. data/lib/omnizip/progress/progress_bar.rb +156 -0
  482. data/lib/omnizip/progress/progress_reporter.rb +40 -0
  483. data/lib/omnizip/progress/progress_tracker.rb +190 -0
  484. data/lib/omnizip/progress/silent_reporter.rb +24 -0
  485. data/lib/omnizip/progress.rb +127 -0
  486. data/lib/omnizip/rubyzip_compat.rb +63 -0
  487. data/lib/omnizip/temp/safe_extract.rb +168 -0
  488. data/lib/omnizip/temp/temp_file.rb +124 -0
  489. data/lib/omnizip/temp/temp_file_pool.rb +109 -0
  490. data/lib/omnizip/temp.rb +181 -0
  491. data/lib/omnizip/version.rb +5 -0
  492. data/lib/omnizip/zip/entry.rb +156 -0
  493. data/lib/omnizip/zip/file.rb +485 -0
  494. data/lib/omnizip/zip/input_stream.rb +273 -0
  495. data/lib/omnizip/zip/output_stream.rb +324 -0
  496. data/lib/omnizip.rb +156 -0
  497. data/readme-docs/advanced-features.adoc +515 -0
  498. data/readme-docs/api-usage.adoc +444 -0
  499. data/readme-docs/architecture.adoc +449 -0
  500. data/readme-docs/archive-formats.adoc +479 -0
  501. data/readme-docs/cli-usage.adoc +222 -0
  502. data/readme-docs/compression-algorithms.adoc +442 -0
  503. data/readme-docs/compression-profiles.adoc +247 -0
  504. data/readme-docs/encryption-checksums.adoc +328 -0
  505. data/readme-docs/format-converter.adoc +325 -0
  506. data/readme-docs/installation.adoc +228 -0
  507. data/readme-docs/par2-archives.adoc +608 -0
  508. data/readme-docs/performance-profiler.adoc +389 -0
  509. data/readme-docs/preprocessing-filters.adoc +280 -0
  510. data/xz-file-format-1.2.1.txt +1174 -0
  511. metadata +617 -0
@@ -0,0 +1,434 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Copyright (C) 2025 Ribose Inc.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a
6
+ # copy of this software and associated documentation files (the "Software"),
7
+ # to deal in the Software without restriction, including without limitation
8
+ # the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
+ # and/or sell copies of the Software, and to permit persons to whom the
10
+ # Software is furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
+ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21
+ # DEALINGS IN THE SOFTWARE.
22
+
23
+ require_relative "range_coder"
24
+
25
+ module Omnizip
26
+ module Algorithms
27
+ class LZMA < Algorithm
28
+ # Range decoder for LZMA decompression
29
+ #
30
+ # This class implements the decoding side of arithmetic coding
31
+ # using integer range arithmetic. It decodes bits from the
32
+ # compressed byte stream based on their probability models.
33
+ #
34
+ # The decoder mirrors the encoder's range subdivisions to
35
+ # extract the original bit values. It maintains a code value
36
+ # that represents the current position within the range.
37
+ class RangeDecoder < RangeCoder
38
+ attr_reader :code
39
+
40
+ # Initialize the range decoder
41
+ #
42
+ # @param input_stream [IO] The input stream of encoded bytes
43
+ def initialize(input_stream)
44
+ super
45
+ @code = 0
46
+ @initialization_complete = false
47
+ @init_bytes_remaining = 5
48
+ init_decoder
49
+ end
50
+
51
+ # Update the input stream (for LZMA2 multi-chunk streams)
52
+ #
53
+ # When processing LZMA2 chunks, we need to update the stream
54
+ # reference for each new chunk while preserving the range decoder
55
+ # state (range, code) across chunks.
56
+ #
57
+ # XZ Utils pattern: The range coder uses a buffer pointer that's
58
+ # updated for each chunk, while rc_reset() resets range/code.
59
+ #
60
+ # @param new_stream [IO] New input stream
61
+ # @return [void]
62
+ def update_stream(new_stream)
63
+ @stream = new_stream
64
+ end
65
+
66
+ # Decode a single bit using a probability model
67
+ #
68
+ # The range is split based on the bit's probability,
69
+ # and the code value determines which portion contains
70
+ # the actual bit value.
71
+ #
72
+ # XZ Utils pattern (rc_if_0): normalize BEFORE bound calculation
73
+ # See: /Users/mulgogi/src/external/xz/src/liblzma/rangecoder/range_decoder.h:181-184
74
+ #
75
+ # @param model [BitModel] The probability model for this bit
76
+ # @return [Integer] The decoded bit value (0 or 1)
77
+ def decode_bit(model)
78
+ # XZ Utils: rc_normalize FIRST, then calculate bound
79
+ normalize
80
+ bound = (@range >> 11) * model.probability
81
+
82
+ # DEBUG: Trace model updates to find probability corruption
83
+ trace_model_updates = ENV.fetch("TRACE_MODEL_UPDATES", nil)
84
+ prob_before = model.probability if trace_model_updates
85
+
86
+ # DEBUG: Trace is_rep bit decoding
87
+ trace_is_rep = ENV.fetch("TRACE_IS_REP_BITS", nil) && (bound > 1_000_000)
88
+
89
+ if trace_is_rep
90
+ puts " [RangeDecoder.decode_bit] BEFORE: range=#{@range}, code=#{@code}, bound=#{bound}, prob=#{model.probability}"
91
+ end
92
+
93
+ # DEBUG: Trace model selection at dict_full=227
94
+ if ENV["TRACE_MODEL_SELECTION"]
95
+ begin
96
+ ObjectSpace.each_object(Omnizip::Algorithms::XzUtilsDecoder) do |decoder|
97
+ dict_full = decoder.instance_variable_get(:@dict_full)
98
+ if dict_full && dict_full >= 220 && dict_full <= 235
99
+ pos = decoder.instance_variable_get(:@pos)
100
+ state = decoder.instance_variable_get(:@state)
101
+ puts " [decode_bit] dict_full=#{dict_full}, pos=#{pos}, state=#{state}"
102
+ puts " [decode_bit] model.object_id=#{model.object_id}, prob=#{model.probability}"
103
+ puts " [decode_bit] range=0x#{@range.to_s(16)}, code=0x#{@code.to_s(16)}, bound=0x#{bound.to_s(16)}"
104
+ $stderr.flush
105
+ end
106
+ break
107
+ end
108
+ rescue StandardError => e
109
+ # Context not available
110
+ puts " [decode_bit] ERROR: #{e.message}"
111
+ $stderr.flush
112
+ end
113
+ end
114
+
115
+ # DEBUG: Trace decode_bit for lit_state=96 literal decoding
116
+ if ENV["TRACE_DECODE_BIT_LIT96"]
117
+ puts " decode_bit: range=0x#{@range.to_s(16)}, code=0x#{@code.to_s(16)}, prob=#{model.probability}, bound=0x#{bound.to_s(16)}, code<bound?=#{@code < bound}"
118
+ end
119
+
120
+ # DEBUG: Trace decode_bit for specific problematic state
121
+ if ENV.fetch("TRACE_SPECIFIC_DECODE", nil) && @range == 0x40000000 && @code == 0x21407d82
122
+ puts " === CRITICAL DECODE_BIT (MATCHED LITERAL) ==="
123
+ puts " BEFORE: range=0x#{@range.to_s(16)} (#{@range})"
124
+ puts " BEFORE: code=0x#{@code.to_s(16)} (#{@code})"
125
+ puts " probability=#{model.probability}"
126
+ puts " bound=0x#{bound.to_s(16)} (#{bound})"
127
+ puts " range >> 11 = 0x#{(@range >> 11).to_s(16)} (#{@range >> 11})"
128
+ puts " (range >> 11) * probability = 0x#{((@range >> 11) * model.probability).to_s(16)} (#{(@range >> 11) * model.probability})"
129
+ puts " code < bound? #{@code < bound}"
130
+ puts " result should be: #{@code < bound ? 0 : 1}"
131
+ puts " =========================================="
132
+ end
133
+
134
+ # DEBUG: Trace decode_bit for model_index=257 (the problematic one)
135
+ if ENV["TRACE_DECODE_BIT_257"]
136
+ # We need to know which model is being used
137
+ # Unfortunately, we don't have direct access to the model_index here
138
+ puts " [decode_bit] range=0x#{@range.to_s(16)}, code=0x#{@code.to_s(16)}, prob=#{model.probability}, bound=0x#{bound.to_s(16)}, code<bound?=#{@code < bound}, result=#{@code < bound ? 0 : 1}"
139
+ end
140
+
141
+ if @code < bound
142
+ @range = bound
143
+ model.update(0)
144
+ if trace_model_updates && prob_before != model.probability
145
+ puts " [decode_bit] model UPDATE: #{prob_before} -> #{model.probability} (bit=0, object_id=#{model.object_id})"
146
+ end
147
+ if trace_is_rep
148
+ puts " [RangeDecoder.decode_bit] AFTER (bit=0): range=#{@range}, code=#{@code}"
149
+ end
150
+ 0
151
+ else
152
+ @code -= bound
153
+ @range -= bound
154
+ model.update(1)
155
+ if trace_model_updates && prob_before != model.probability
156
+ puts " [decode_bit] model UPDATE: #{prob_before} -> #{model.probability} (bit=1, object_id=#{model.object_id})"
157
+ end
158
+ if trace_is_rep
159
+ puts " [RangeDecoder.decode_bit] AFTER (bit=1): range=#{@range}, code=#{@code}"
160
+ end
161
+ 1
162
+ end
163
+ end
164
+
165
+ # Decode bits directly without using probability model
166
+ #
167
+ # This is used for decoding values with uniform distribution
168
+ # where all bit values are equally likely.
169
+ #
170
+ # @param num_bits [Integer] Number of bits to decode
171
+ # @return [Integer] The decoded value
172
+ def decode_direct_bits(num_bits)
173
+ result = 0
174
+ trace_this = (num_bits == 25)
175
+ iteration = 0
176
+
177
+ if trace_this
178
+ begin
179
+ warn " decode_direct_bits START: num_bits=#{num_bits}"
180
+ warn " BEFORE: range=#{@range.inspect}, code=#{@code.inspect}"
181
+ $stderr.flush
182
+ rescue StandardError => e
183
+ warn " ERROR in trace: #{e.message}"
184
+ $stderr.flush
185
+ end
186
+ end
187
+
188
+ begin
189
+ num_bits.downto(1) do |_i|
190
+ iteration += 1
191
+ normalize
192
+ @range >>= 1
193
+
194
+ bit = @code >= @range ? 1 : 0
195
+ if trace_this && iteration <= 3 # Only first 3 iterations
196
+ warn " [#{iteration}/#{num_bits}] range=#{@range.inspect}, code=#{@code.inspect}, bit=#{bit}, result=#{result}"
197
+ $stderr.flush
198
+ end
199
+
200
+ if bit == 1
201
+ @code -= @range
202
+ result = (result << 1) | 1
203
+ else
204
+ result = (result << 1) | 0
205
+ end
206
+ end
207
+ rescue StandardError => e
208
+ warn " ERROR in iteration #{iteration}: #{e.message}"
209
+ warn " range=#{@range.inspect}, code=#{@code.inspect}"
210
+ $stderr.flush
211
+ raise
212
+ end
213
+
214
+ if trace_this
215
+ warn " AFTER #{iteration} iterations: result=#{result}"
216
+ $stderr.flush
217
+ end
218
+
219
+ result
220
+ end
221
+
222
+ # Decode bits directly using a base value (XZ Utils rc_direct pattern)
223
+ #
224
+ # This method implements the XZ Utils rc_direct macro which is used
225
+ # for decoding distance values in slots 14+. The pattern matches
226
+ # XZ Utils' implementation in rangecoder/range_decoder.h:366-375.
227
+ #
228
+ # XZ Utils rc_direct behavior (from C macro):
229
+ # - dest = (dest << 1) + 1 (unconditionally)
230
+ # - Normalize range, halve it, subtract from code
231
+ # - bound = 0 - (code >> 31) extracts sign bit
232
+ # - If code >= range (bit=1): sign=0, bound=0, dest stays at (dest << 1) + 1
233
+ # - If code < range (bit=0): sign=1, bound=-1, dest = (dest << 1) + 1 - 1 = dest << 1
234
+ # - dest += bound
235
+ # - code += range & bound (restore code if bit=0)
236
+ #
237
+ # In Ruby (without unsigned wraparound), we explicitly check if code >= range
238
+ # and undo the +1 if the bit is 0.
239
+ #
240
+ # @param num_bits [Integer] Number of bits to decode
241
+ # @param base [Integer] Base value to start from (2 or 3 for distances)
242
+ # @return [Integer] The decoded value
243
+ def decode_direct_bits_with_base(num_bits, base)
244
+ result = base
245
+ # DEBUG: Trace for slot=40 (num_bits=15)
246
+ if ENV["TRACE_DIRECT_BITS_SLOT40"]
247
+ puts " [decode_direct_bits_with_base] START: base=#{base}, num_bits=#{num_bits}"
248
+ puts " BEFORE: range=0x#{@range.to_s(16)}, code=0x#{@code.to_s(16)}"
249
+ end
250
+ num_bits.times do |i|
251
+ result = (result << 1) + 1
252
+ normalize
253
+ @range >>= 1
254
+
255
+ # Check if bit is 1 before modifying @code
256
+ # If code >= range, bit is 1; otherwise bit is 0
257
+ bit = @code >= @range ? 1 : 0
258
+
259
+ if ENV["TRACE_DIRECT_BITS_SLOT40"] && i < 15
260
+ puts " [#{i + 1}/#{num_bits}] bit=#{bit}, result after this step = #{result - (bit.zero? ? 1 : 0)}, range=0x#{@range.to_s(16)}, code=0x#{@code.to_s(16)}"
261
+ end
262
+
263
+ if bit == 1
264
+ # Bit is 1: result stays at (result << 1) + 1
265
+ @code -= @range
266
+ else
267
+ # Bit is 0: undo the +1, result = (result << 1) + 1 - 1 = result << 1
268
+ result -= 1
269
+ end
270
+ end
271
+ if ENV["TRACE_DIRECT_BITS_SLOT40"]
272
+ puts " [decode_direct_bits_with_base] END: result=#{result}"
273
+ end
274
+ result
275
+ end
276
+
277
+ # Reset the range decoder for a new chunk
278
+ #
279
+ # This matches XZ Utils rc_reset() behavior:
280
+ # - Reset range to UINT32_MAX (0xFFFFFFFF)
281
+ # - Reset code to 0
282
+ # - Set init_bytes_remaining to 5 (lazy initialization)
283
+ # - Let normalize() read the initialization bytes during actual decoding
284
+ #
285
+ # Called during state reset (control >= 0xA0) to reset the range decoder
286
+ # for the new chunk's compressed data.
287
+ #
288
+ # XZ Utils reference: /Users/mulgogi/src/external/xz/src/liblzma/rangecoder/range_decoder.h:181
289
+ #
290
+ # @return [void]
291
+ def reset
292
+ if ENV["LZMA_DEBUG"]
293
+ stream_pos = begin
294
+ @stream.pos
295
+ rescue StandardError
296
+ "N/A"
297
+ end
298
+ warn " RangeDecoder.reset: BEFORE reset, range=0x#{@range.to_s(16)}, code=0x#{@code.to_s(16)}, stream.pos=#{stream_pos}, init_bytes_remaining=#{@init_bytes_remaining}"
299
+ end
300
+ @range = 0xFFFFFFFF
301
+ @code = 0
302
+ # Lazy initialization: set remaining bytes but don't read yet
303
+ # normalize() will read these bytes during actual decoding
304
+ @init_bytes_remaining = 5
305
+ if ENV["LZMA_DEBUG"]
306
+ stream_pos_after = begin
307
+ @stream.pos
308
+ rescue StandardError
309
+ "N/A"
310
+ end
311
+ warn " RangeDecoder.reset: AFTER reset, code=0x#{@code.to_s(16)}, stream.pos=#{stream_pos_after}, init_bytes_remaining=#{@init_bytes_remaining}"
312
+ end
313
+ end
314
+
315
+ # Normalize the range when it becomes too small
316
+ #
317
+ # When range drops below TOP threshold, shift in a new
318
+ # byte from the input stream and scale up the range by 256.
319
+ #
320
+ # XZ Utils pattern (rc_normalize): uses IF, not WHILE!
321
+ # Each normalize call shifts in at most ONE byte.
322
+ # See: /Users/mulgogi/src/external/xz/src/liblzma/rangecoder/range_decoder.h:143-149
323
+ #
324
+ # XZ Utils lazy initialization (range_decoder.h:146-149):
325
+ # If init_bytes_remaining > 0, read byte for code initialization
326
+ # Otherwise, read byte for range normalization
327
+ #
328
+ # @return [void]
329
+ def normalize
330
+ # DEBUG: Trace normalize calls
331
+ if @init_bytes_remaining.positive?
332
+ stream_pos_before = begin
333
+ @stream.pos
334
+ rescue StandardError
335
+ "N/A"
336
+ end
337
+ stream_size = begin
338
+ @stream.size
339
+ rescue StandardError
340
+ "N/A"
341
+ end
342
+ end
343
+
344
+ # XZ Utils: Handle lazy initialization first
345
+ # IMPORTANT: Read ALL initialization bytes in a loop, not just one!
346
+ # XZ Utils rc_normalize reads one byte per call, but decode_bit only calls
347
+ # normalize once at the start, so we need to loop to read all 5 bytes.
348
+ while @init_bytes_remaining.positive?
349
+ byte = @stream.getbyte
350
+ byte ||= 0
351
+ code_before = @code
352
+ @code = ((code_before << 8) | byte) & 0xFFFFFFFF
353
+ @init_bytes_remaining -= 1
354
+
355
+ if ENV["RANGE_DECODER_TRACE"]
356
+ puts "\n=== RangeDecoder.normalize (init_bytes_remaining=#{@init_bytes_remaining + 1}) ==="
357
+ puts " stream_pos_before=#{stream_pos_before}, stream_size=#{stream_size}"
358
+ puts " byte=0x#{byte.to_s(16).upcase}, code_before=0x#{code_before.to_s(16).upcase}"
359
+ puts " (code_before << 8) = 0x#{(code_before << 8).to_s(16).upcase}"
360
+ puts " ((code_before << 8) | byte) = 0x#{((code_before << 8) | byte).to_s(16).upcase}"
361
+ puts " code_after=0x#{@code.to_s(16).upcase}"
362
+ end
363
+ end
364
+
365
+ if @range < TOP
366
+ byte = read_byte
367
+ @range <<= 8
368
+ @code = ((@code << 8) | byte) & 0xFFFFFFFF
369
+ if ENV["RANGE_DECODER_TRACE"]
370
+ pos = begin
371
+ @stream.pos
372
+ rescue StandardError
373
+ "N/A"
374
+ end
375
+ warn " NORMALIZE: pos=#{pos}, byte=0x#{byte.to_s(16).upcase}, code=0x#{@code.to_s(16).upcase}, range=0x#{@range.to_s(16).upcase}"
376
+ $stderr.flush
377
+ end
378
+ end
379
+ end
380
+
381
+ private
382
+
383
+ # Initialize the decoder by reading the first bytes
384
+ #
385
+ # XZ Utils rc_read_init (range_decoder.h:160-167):
386
+ # - Read 5 bytes and construct code value
387
+ # - code is uint32_t, so it's automatically masked to 32 bits
388
+ # - In Ruby, we need to explicitly mask to ensure 32-bit value
389
+ #
390
+ # @return [void]
391
+ def init_decoder
392
+ 5.times do
393
+ @code = ((@code << 8) | read_byte) & 0xFFFFFFFF
394
+ @init_bytes_remaining -= 1 if @init_bytes_remaining.positive?
395
+ end
396
+ @initialization_complete = true
397
+ end
398
+
399
+ # Read a single byte from the input stream
400
+ #
401
+ # @return [Integer] The byte value (0-255)
402
+ # @raise [Omnizip::DecompressionError] If stream is exhausted during normal decoding
403
+ def read_byte
404
+ byte = @stream.getbyte
405
+
406
+ # During normal decoding (after initialization), if we run out of input,
407
+ # this indicates corrupted data - the compressed stream ended prematurely
408
+ if byte.nil? && @initialization_complete && @init_bytes_remaining.zero?
409
+ raise Omnizip::DecompressionError,
410
+ "LZMA compressed data exhausted prematurely. The file may be corrupted or the uncompressed size field may be incorrect."
411
+ end
412
+
413
+ # Only track as data byte if initialization is complete
414
+ if @initialization_complete && @init_bytes_remaining.zero?
415
+ pos = begin
416
+ @stream.pos
417
+ rescue StandardError
418
+ "N/A"
419
+ end
420
+ if ENV["RANGE_DECODER_TRACE"]
421
+ warn " READ_BYTE: pos=#{pos.inspect}, byte=0x#{byte.to_s(16).upcase}"
422
+ $stderr.flush
423
+ end
424
+ if ENV["LZMA_DEBUG"]
425
+ warn " READ_BYTE: pos=#{pos.inspect}, byte=0x#{byte.to_s(16).upcase}, @code now=0x#{@code.to_s(16)}"
426
+ end
427
+ end
428
+
429
+ byte || 0
430
+ end
431
+ end
432
+ end
433
+ end
434
+ end
@@ -0,0 +1,194 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Copyright (C) 2025 Ribose Inc.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a
6
+ # copy of this software and associated documentation files (the "Software"),
7
+ # to deal in the Software without restriction, including without limitation
8
+ # the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
+ # and/or sell copies of the Software, and to permit persons to whom the
10
+ # Software is furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
+ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21
+ # DEALINGS IN THE SOFTWARE.
22
+
23
+ # Ported from XZ Utils src/liblzma/rangecoder/range_encoder.h
24
+ # Direct port of the reference implementation for byte-for-byte compatibility.
25
+
26
+ require_relative "range_coder"
27
+
28
+ module Omnizip
29
+ module Algorithms
30
+ class LZMA < Algorithm
31
+ # Range encoder for LZMA compression
32
+ #
33
+ # This is a direct port of XZ Utils' range encoder implementation
34
+ # for guaranteed byte-for-byte compatibility.
35
+ #
36
+ # The encoder maintains a range [low, low+range) and subdivides
37
+ # it proportionally based on symbol probabilities.
38
+ class RangeEncoder < RangeCoder
39
+ # Initialize the range encoder
40
+ #
41
+ # @param output_stream [IO] The output stream for encoded bytes
42
+ def initialize(output_stream)
43
+ super
44
+ @cache = 0
45
+ @cache_size = 1 # XZ Utils initializes to 1, not 0
46
+ @pre_flush_pos = 0
47
+ end
48
+
49
+ # Encode a single bit using a probability model
50
+ #
51
+ # Ported from XZ Utils rc_encode() - RC_BIT_0 and RC_BIT_1 cases.
52
+ # The key is that normalization happens BEFORE encoding the bit.
53
+ #
54
+ # IMPORTANT: We must emulate 32-bit unsigned arithmetic by masking
55
+ # after each operation, since Ruby's integers are arbitrary precision.
56
+ #
57
+ # @param model [BitModel] The probability model for this bit
58
+ # @param bit [Integer] The bit value (0 or 1)
59
+ # @return [void]
60
+ def encode_bit(model, bit)
61
+ # Normalize BEFORE encoding (matches XZ Utils)
62
+ normalize
63
+
64
+ prob = model.probability
65
+
66
+ # DEBUG: Trace is_rep bit encoding
67
+ if ENV["TRACE_IS_REP_BITS"] && bit.zero?
68
+ puts " [RangeEncoder.encode_bit] BEFORE: range=#{@range}, low=#{@low}, prob=#{prob}, bit=#{bit}"
69
+ end
70
+
71
+ if bit.zero?
72
+ # RC_BIT_0: shrink range to lower portion
73
+ # rc->range = (rc->range >> 11) * prob
74
+ # Emulate 32-bit unsigned multiplication with truncation
75
+ @range = ((@range >> 11) * prob) & 0xFFFFFFFF
76
+ else
77
+ # RC_BIT_1: add bound to low, shrink range to upper portion
78
+ # const uint32_t bound = prob * (rc->range >> 11)
79
+ # rc->low += bound
80
+ # rc->range -= bound
81
+ bound = prob * (@range >> 11)
82
+ @low = (@low + bound) & 0xFFFFFFFFFFFFFFFF # low can grow beyond 32 bits
83
+ @range = (@range - bound) & 0xFFFFFFFF
84
+ end
85
+
86
+ if ENV["TRACE_IS_REP_BITS"] && bit.zero?
87
+ puts " [RangeEncoder.encode_bit] AFTER: range=#{@range}, low=#{@low}"
88
+ end
89
+
90
+ # Update probability model based on the bit value
91
+ # This matches the decoder's update behavior (proper OOP symmetry)
92
+ model.update(bit)
93
+ end
94
+
95
+ # Encode bits directly without using probability model
96
+ #
97
+ # Used for encoding values with uniform distribution.
98
+ # Emulates 32-bit unsigned arithmetic.
99
+ #
100
+ # @param value [Integer] The value to encode
101
+ # @param num_bits [Integer] Number of bits to encode
102
+ # @return [void]
103
+ def encode_direct_bits(value, num_bits)
104
+ num_bits.downto(1) do |i|
105
+ normalize
106
+ @range = (@range >> 1) & 0xFFFFFFFF
107
+ bit = (value >> (i - 1)) & 1
108
+ @low = (@low + @range) & 0xFFFFFFFFFFFFFFFF if bit == 1
109
+ end
110
+ end
111
+
112
+ # Flush remaining bytes to output stream
113
+ #
114
+ # Ported from XZ Utils rc_flush().
115
+ #
116
+ # @return [void]
117
+ def flush
118
+ # Store position BEFORE flush for LZMA2 compatibility
119
+ # The decoder only needs bytes up to this point
120
+ @pre_flush_pos = @stream.pos
121
+
122
+ # Prevent further normalizations
123
+ @range = 0xFFFFFFFF
124
+
125
+ # Flush 5 bytes (see rc_flush() in xz)
126
+ 5.times { shift_low }
127
+ end
128
+
129
+ # Return bytes needed for decoding
130
+ #
131
+ # For LZMA2: returns pre-flush position (excludes 5-byte flush padding)
132
+ # For regular LZMA: returns full output size
133
+ #
134
+ # @return [Integer] Number of bytes decoder will consume
135
+ def bytes_for_decode
136
+ @pre_flush_pos || @stream.pos
137
+ end
138
+
139
+ protected
140
+
141
+ # Normalize the range when it becomes too small
142
+ #
143
+ # Ported from XZ Utils rc_encode() normalization logic.
144
+ # IMPORTANT: shift_low is called BEFORE range is shifted!
145
+ #
146
+ # @return [void]
147
+ def normalize
148
+ while @range < TOP
149
+ shift_low
150
+ @range <<= 8
151
+ end
152
+ end
153
+
154
+ private
155
+
156
+ # Shift the top byte of 'low' to output
157
+ #
158
+ # Direct port of XZ Utils rc_shift_low() from range_encoder.h:136-159
159
+ # Handles carry propagation through the cache mechanism.
160
+ #
161
+ # @return [void]
162
+ def shift_low
163
+ # if ((uint32_t)(rc->low) < (uint32_t)(0xFF000000)
164
+ # || (uint32_t)(rc->low >> 32) != 0)
165
+ low_32 = @low & 0xFFFFFFFF
166
+ carry = (@low >> 32) & 0xFF
167
+
168
+ if low_32 < 0xFF000000 || carry != 0
169
+ # do {
170
+ # out[*out_pos] = rc->cache + (uint8_t)(rc->low >> 32);
171
+ # ++*out_pos;
172
+ # rc->cache = 0xFF;
173
+ # } while (--rc->cache_size != 0);
174
+ loop do
175
+ @stream.putc((@cache + carry) & 0xFF)
176
+ @cache = 0xFF
177
+ @cache_size -= 1
178
+ break if @cache_size.zero?
179
+ end
180
+
181
+ # rc->cache = (rc->low >> 24) & 0xFF;
182
+ @cache = (low_32 >> 24) & 0xFF
183
+ end
184
+
185
+ # ++rc->cache_size;
186
+ @cache_size += 1
187
+
188
+ # rc->low = (rc->low & 0x00FFFFFF) << RC_SHIFT_BITS;
189
+ @low = (@low & 0x00FFFFFF) << 8
190
+ end
191
+ end
192
+ end
193
+ end
194
+ end