omnizip 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (511) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.rubocop.yml +32 -0
  4. data/.rubocop_todo.yml +754 -0
  5. data/COPYING +502 -0
  6. data/Gemfile +17 -0
  7. data/LICENSE +12 -0
  8. data/README.adoc +1045 -0
  9. data/Rakefile +12 -0
  10. data/benchmark/README.md +260 -0
  11. data/benchmark/benchmark_suite.rb +125 -0
  12. data/benchmark/compression_bench.rb +181 -0
  13. data/benchmark/filter_bench.rb +180 -0
  14. data/benchmark/models/benchmark_result.rb +59 -0
  15. data/benchmark/models/comparison_result.rb +69 -0
  16. data/benchmark/profile_suite.rb +167 -0
  17. data/benchmark/reporter.rb +150 -0
  18. data/benchmark/run_benchmarks.rb +66 -0
  19. data/benchmark/test_data.rb +137 -0
  20. data/config/formats/rar3_spec.yml +91 -0
  21. data/config/formats/rar5_spec.yml +102 -0
  22. data/docs/.github/workflows/docs.yml +142 -0
  23. data/docs/.gitignore +21 -0
  24. data/docs/.lychee.toml +67 -0
  25. data/docs/Gemfile +13 -0
  26. data/docs/RAR_WRITE_SUPPORT.md +26 -0
  27. data/docs/README.md +101 -0
  28. data/docs/_config.yml +112 -0
  29. data/docs/assets/logo.svg +1 -0
  30. data/docs/assets/omnizip-logo.pdf +1540 -11
  31. data/docs/comparison/feature-matrix.adoc +694 -0
  32. data/docs/comparison/index.adoc +113 -0
  33. data/docs/comparison/vs-7zip.adoc +309 -0
  34. data/docs/comparison/vs-peazip.adoc +77 -0
  35. data/docs/comparison/vs-rubyzip.adoc +342 -0
  36. data/docs/comparison/vs-winrar.adoc +100 -0
  37. data/docs/compatibility.adoc +579 -0
  38. data/docs/concepts/index.adoc +129 -0
  39. data/docs/developer/architecture.adoc +256 -0
  40. data/docs/developer/contributing.adoc +158 -0
  41. data/docs/developer/index.adoc +25 -0
  42. data/docs/developer/testing.adoc +212 -0
  43. data/docs/getting-started/basic-usage.adoc +271 -0
  44. data/docs/getting-started/index.adoc +42 -0
  45. data/docs/getting-started/installation.adoc +138 -0
  46. data/docs/getting-started/quick-start.adoc +185 -0
  47. data/docs/getting-started/your-first-archive.adoc +218 -0
  48. data/docs/guides/advanced-features/encryption.adoc +300 -0
  49. data/docs/guides/advanced-features/index.adoc +49 -0
  50. data/docs/guides/advanced-features/parallel-processing.adoc +246 -0
  51. data/docs/guides/advanced-features/progress-tracking.adoc +320 -0
  52. data/docs/guides/advanced-features/streaming.adoc +212 -0
  53. data/docs/guides/archive-formats/gzip-format.adoc +107 -0
  54. data/docs/guides/archive-formats/index.adoc +130 -0
  55. data/docs/guides/archive-formats/rar-format.adoc +104 -0
  56. data/docs/guides/archive-formats/rar5.adoc +521 -0
  57. data/docs/guides/archive-formats/seven-zip-format.adoc +35 -0
  58. data/docs/guides/archive-formats/tar-format.adoc +106 -0
  59. data/docs/guides/archive-formats/xz-format.adoc +118 -0
  60. data/docs/guides/archive-formats/zip-format.adoc +35 -0
  61. data/docs/guides/compression-algorithms/bzip2.adoc +113 -0
  62. data/docs/guides/compression-algorithms/deflate.adoc +319 -0
  63. data/docs/guides/compression-algorithms/index.adoc +190 -0
  64. data/docs/guides/compression-algorithms/lzma.adoc +398 -0
  65. data/docs/guides/compression-algorithms/lzma2.adoc +327 -0
  66. data/docs/guides/compression-algorithms/ppmd.adoc +316 -0
  67. data/docs/guides/compression-algorithms/zstandard.adoc +361 -0
  68. data/docs/guides/creating-archives.adoc +354 -0
  69. data/docs/guides/extracting-archives.adoc +53 -0
  70. data/docs/guides/format-conversion.adoc +64 -0
  71. data/docs/guides/index.adoc +49 -0
  72. data/docs/guides/migration-rubyzip.adoc +217 -0
  73. data/docs/guides/parity-archives.adoc +605 -0
  74. data/docs/guides/performance-tuning.adoc +88 -0
  75. data/docs/index.adoc +218 -0
  76. data/docs/lychee.toml +67 -0
  77. data/docs/reference/api/overview.adoc +188 -0
  78. data/docs/reference/cli/compress-command.adoc +114 -0
  79. data/docs/reference/cli/overview.adoc +140 -0
  80. data/docs/reference/index.adoc +26 -0
  81. data/docs/resources/faq.adoc +185 -0
  82. data/docs/resources/quick-reference.adoc +222 -0
  83. data/docs/troubleshooting/index.adoc +208 -0
  84. data/examples/api_comparison.rb +205 -0
  85. data/examples/deflate64_example.rb +96 -0
  86. data/examples/par2_demo.rb +121 -0
  87. data/examples/quick_start_native.rb +150 -0
  88. data/examples/quick_start_rubyzip.rb +115 -0
  89. data/examples/rubyzip_compatibility_demo.rb +194 -0
  90. data/exe/omnizip +27 -0
  91. data/lib/omnizip/algorithm.rb +130 -0
  92. data/lib/omnizip/algorithm_registry.rb +86 -0
  93. data/lib/omnizip/algorithms/.keep +0 -0
  94. data/lib/omnizip/algorithms/bzip2/bwt.rb +225 -0
  95. data/lib/omnizip/algorithms/bzip2/decoder.rb +193 -0
  96. data/lib/omnizip/algorithms/bzip2/encoder.rb +237 -0
  97. data/lib/omnizip/algorithms/bzip2/huffman.rb +206 -0
  98. data/lib/omnizip/algorithms/bzip2/mtf.rb +101 -0
  99. data/lib/omnizip/algorithms/bzip2/rle.rb +151 -0
  100. data/lib/omnizip/algorithms/bzip2.rb +130 -0
  101. data/lib/omnizip/algorithms/deflate/constants.rb +28 -0
  102. data/lib/omnizip/algorithms/deflate/decoder.rb +38 -0
  103. data/lib/omnizip/algorithms/deflate/encoder.rb +46 -0
  104. data/lib/omnizip/algorithms/deflate.rb +128 -0
  105. data/lib/omnizip/algorithms/deflate64/constants.rb +45 -0
  106. data/lib/omnizip/algorithms/deflate64/decoder.rb +153 -0
  107. data/lib/omnizip/algorithms/deflate64/encoder.rb +98 -0
  108. data/lib/omnizip/algorithms/deflate64/huffman_coder.rb +354 -0
  109. data/lib/omnizip/algorithms/deflate64/lz77_encoder.rb +142 -0
  110. data/lib/omnizip/algorithms/deflate64.rb +109 -0
  111. data/lib/omnizip/algorithms/lzma/bit_model.rb +120 -0
  112. data/lib/omnizip/algorithms/lzma/constants.rb +112 -0
  113. data/lib/omnizip/algorithms/lzma/decoder.rb +148 -0
  114. data/lib/omnizip/algorithms/lzma/dictionary.rb +69 -0
  115. data/lib/omnizip/algorithms/lzma/distance_coder.rb +415 -0
  116. data/lib/omnizip/algorithms/lzma/encoder.rb +142 -0
  117. data/lib/omnizip/algorithms/lzma/length_coder.rb +260 -0
  118. data/lib/omnizip/algorithms/lzma/literal_decoder.rb +320 -0
  119. data/lib/omnizip/algorithms/lzma/literal_encoder.rb +210 -0
  120. data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +341 -0
  121. data/lib/omnizip/algorithms/lzma/lzma_alone_decoder.rb +192 -0
  122. data/lib/omnizip/algorithms/lzma/lzma_state.rb +128 -0
  123. data/lib/omnizip/algorithms/lzma/match.rb +32 -0
  124. data/lib/omnizip/algorithms/lzma/match_finder.rb +205 -0
  125. data/lib/omnizip/algorithms/lzma/match_finder_config.rb +142 -0
  126. data/lib/omnizip/algorithms/lzma/match_finder_factory.rb +88 -0
  127. data/lib/omnizip/algorithms/lzma/optimal_encoder.rb +130 -0
  128. data/lib/omnizip/algorithms/lzma/probability_models.rb +72 -0
  129. data/lib/omnizip/algorithms/lzma/range_coder.rb +85 -0
  130. data/lib/omnizip/algorithms/lzma/range_decoder.rb +434 -0
  131. data/lib/omnizip/algorithms/lzma/range_encoder.rb +194 -0
  132. data/lib/omnizip/algorithms/lzma/state.rb +127 -0
  133. data/lib/omnizip/algorithms/lzma/xz_buffered_range_encoder.rb +325 -0
  134. data/lib/omnizip/algorithms/lzma/xz_encoder.rb +426 -0
  135. data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +645 -0
  136. data/lib/omnizip/algorithms/lzma/xz_match_finder_adapter.rb +227 -0
  137. data/lib/omnizip/algorithms/lzma/xz_price_calculator.rb +169 -0
  138. data/lib/omnizip/algorithms/lzma/xz_probability_models.rb +261 -0
  139. data/lib/omnizip/algorithms/lzma/xz_range_encoder.rb +223 -0
  140. data/lib/omnizip/algorithms/lzma/xz_range_encoder_exact.rb +331 -0
  141. data/lib/omnizip/algorithms/lzma/xz_state.rb +116 -0
  142. data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +2055 -0
  143. data/lib/omnizip/algorithms/lzma.rb +238 -0
  144. data/lib/omnizip/algorithms/lzma2/chunk_manager.rb +182 -0
  145. data/lib/omnizip/algorithms/lzma2/constants.rb +41 -0
  146. data/lib/omnizip/algorithms/lzma2/encoder.rb +147 -0
  147. data/lib/omnizip/algorithms/lzma2/lzma2_chunk.rb +161 -0
  148. data/lib/omnizip/algorithms/lzma2/properties.rb +179 -0
  149. data/lib/omnizip/algorithms/lzma2/simple_lzma2_encoder.rb +127 -0
  150. data/lib/omnizip/algorithms/lzma2/xz_encoder_adapter.rb +85 -0
  151. data/lib/omnizip/algorithms/lzma2.rb +141 -0
  152. data/lib/omnizip/algorithms/ppmd7/constants.rb +74 -0
  153. data/lib/omnizip/algorithms/ppmd7/context.rb +154 -0
  154. data/lib/omnizip/algorithms/ppmd7/decoder.rb +126 -0
  155. data/lib/omnizip/algorithms/ppmd7/encoder.rb +163 -0
  156. data/lib/omnizip/algorithms/ppmd7/model.rb +248 -0
  157. data/lib/omnizip/algorithms/ppmd7/symbol_state.rb +57 -0
  158. data/lib/omnizip/algorithms/ppmd7.rb +116 -0
  159. data/lib/omnizip/algorithms/ppmd8/constants.rb +61 -0
  160. data/lib/omnizip/algorithms/ppmd8/context.rb +34 -0
  161. data/lib/omnizip/algorithms/ppmd8/decoder.rb +107 -0
  162. data/lib/omnizip/algorithms/ppmd8/encoder.rb +138 -0
  163. data/lib/omnizip/algorithms/ppmd8/model.rb +250 -0
  164. data/lib/omnizip/algorithms/ppmd8/restoration_method.rb +78 -0
  165. data/lib/omnizip/algorithms/ppmd8.rb +82 -0
  166. data/lib/omnizip/algorithms/ppmd_base.rb +138 -0
  167. data/lib/omnizip/algorithms/sevenzip_lzma2.rb +123 -0
  168. data/lib/omnizip/algorithms/xz_lzma2.rb +118 -0
  169. data/lib/omnizip/algorithms/zstandard/constants.rb +25 -0
  170. data/lib/omnizip/algorithms/zstandard/decoder.rb +46 -0
  171. data/lib/omnizip/algorithms/zstandard/encoder.rb +51 -0
  172. data/lib/omnizip/algorithms/zstandard.rb +138 -0
  173. data/lib/omnizip/buffer/memory_archive.rb +251 -0
  174. data/lib/omnizip/buffer/memory_extractor.rb +224 -0
  175. data/lib/omnizip/buffer.rb +176 -0
  176. data/lib/omnizip/checksum_registry.rb +114 -0
  177. data/lib/omnizip/checksums/crc32.rb +100 -0
  178. data/lib/omnizip/checksums/crc64.rb +101 -0
  179. data/lib/omnizip/checksums/crc_base.rb +158 -0
  180. data/lib/omnizip/checksums/verifier.rb +131 -0
  181. data/lib/omnizip/chunked/memory_manager.rb +194 -0
  182. data/lib/omnizip/chunked/reader.rb +78 -0
  183. data/lib/omnizip/chunked/writer.rb +120 -0
  184. data/lib/omnizip/chunked.rb +129 -0
  185. data/lib/omnizip/cli/output_formatter.rb +104 -0
  186. data/lib/omnizip/cli.rb +572 -0
  187. data/lib/omnizip/commands/.keep +0 -0
  188. data/lib/omnizip/commands/archive_create_command.rb +427 -0
  189. data/lib/omnizip/commands/archive_extract_command.rb +272 -0
  190. data/lib/omnizip/commands/archive_list_command.rb +218 -0
  191. data/lib/omnizip/commands/archive_repair_command.rb +131 -0
  192. data/lib/omnizip/commands/archive_verify_command.rb +117 -0
  193. data/lib/omnizip/commands/compress_command.rb +117 -0
  194. data/lib/omnizip/commands/decompress_command.rb +120 -0
  195. data/lib/omnizip/commands/list_command.rb +53 -0
  196. data/lib/omnizip/commands/metadata_command.rb +153 -0
  197. data/lib/omnizip/commands/parity_create_command.rb +122 -0
  198. data/lib/omnizip/commands/parity_repair_command.rb +122 -0
  199. data/lib/omnizip/commands/parity_verify_command.rb +124 -0
  200. data/lib/omnizip/commands/profile_list_command.rb +56 -0
  201. data/lib/omnizip/commands/profile_show_command.rb +44 -0
  202. data/lib/omnizip/convenience.rb +359 -0
  203. data/lib/omnizip/converter/conversion_registry.rb +49 -0
  204. data/lib/omnizip/converter/conversion_strategy.rb +121 -0
  205. data/lib/omnizip/converter/seven_zip_to_zip_strategy.rb +97 -0
  206. data/lib/omnizip/converter/zip_to_seven_zip_strategy.rb +112 -0
  207. data/lib/omnizip/converter.rb +105 -0
  208. data/lib/omnizip/crypto/aes256/cipher.rb +100 -0
  209. data/lib/omnizip/crypto/aes256/constants.rb +28 -0
  210. data/lib/omnizip/crypto/aes256/key_derivation.rb +101 -0
  211. data/lib/omnizip/crypto/aes256.rb +102 -0
  212. data/lib/omnizip/error.rb +106 -0
  213. data/lib/omnizip/eta/exponential_smoothing_estimator.rb +98 -0
  214. data/lib/omnizip/eta/moving_average_estimator.rb +99 -0
  215. data/lib/omnizip/eta/rate_calculator.rb +104 -0
  216. data/lib/omnizip/eta/sample_history.rb +143 -0
  217. data/lib/omnizip/eta/time_estimator.rb +106 -0
  218. data/lib/omnizip/eta.rb +63 -0
  219. data/lib/omnizip/extraction/filter_chain.rb +177 -0
  220. data/lib/omnizip/extraction/glob_pattern.rb +140 -0
  221. data/lib/omnizip/extraction/pattern_matcher.rb +70 -0
  222. data/lib/omnizip/extraction/predicate_pattern.rb +52 -0
  223. data/lib/omnizip/extraction/regex_pattern.rb +50 -0
  224. data/lib/omnizip/extraction/selective_extractor.rb +240 -0
  225. data/lib/omnizip/extraction.rb +111 -0
  226. data/lib/omnizip/file_type/mime_classifier.rb +144 -0
  227. data/lib/omnizip/file_type.rb +113 -0
  228. data/lib/omnizip/filter.rb +139 -0
  229. data/lib/omnizip/filter_pipeline.rb +108 -0
  230. data/lib/omnizip/filter_registry.rb +166 -0
  231. data/lib/omnizip/filters/bcj.rb +279 -0
  232. data/lib/omnizip/filters/bcj2/constants.rb +53 -0
  233. data/lib/omnizip/filters/bcj2/decoder.rb +200 -0
  234. data/lib/omnizip/filters/bcj2/encoder.rb +61 -0
  235. data/lib/omnizip/filters/bcj2/stream_data.rb +93 -0
  236. data/lib/omnizip/filters/bcj2.rb +99 -0
  237. data/lib/omnizip/filters/bcj_arm.rb +176 -0
  238. data/lib/omnizip/filters/bcj_arm64.rb +244 -0
  239. data/lib/omnizip/filters/bcj_ia64.rb +196 -0
  240. data/lib/omnizip/filters/bcj_ppc.rb +190 -0
  241. data/lib/omnizip/filters/bcj_sparc.rb +176 -0
  242. data/lib/omnizip/filters/bcj_x86.rb +193 -0
  243. data/lib/omnizip/filters/delta.rb +196 -0
  244. data/lib/omnizip/filters/filter_base.rb +72 -0
  245. data/lib/omnizip/filters/registry.rb +123 -0
  246. data/lib/omnizip/filters/xz_delta.rb +258 -0
  247. data/lib/omnizip/format_detector.rb +162 -0
  248. data/lib/omnizip/format_registry.rb +59 -0
  249. data/lib/omnizip/formats/.keep +0 -0
  250. data/lib/omnizip/formats/bzip2_file.rb +172 -0
  251. data/lib/omnizip/formats/cpio/constants.rb +55 -0
  252. data/lib/omnizip/formats/cpio/entry.rb +385 -0
  253. data/lib/omnizip/formats/cpio/reader.rb +196 -0
  254. data/lib/omnizip/formats/cpio/writer.rb +234 -0
  255. data/lib/omnizip/formats/cpio.rb +140 -0
  256. data/lib/omnizip/formats/format_spec_loader.rb +230 -0
  257. data/lib/omnizip/formats/gzip.rb +238 -0
  258. data/lib/omnizip/formats/iso/directory_builder.rb +297 -0
  259. data/lib/omnizip/formats/iso/directory_record.rb +152 -0
  260. data/lib/omnizip/formats/iso/joliet.rb +204 -0
  261. data/lib/omnizip/formats/iso/path_table.rb +125 -0
  262. data/lib/omnizip/formats/iso/reader.rb +197 -0
  263. data/lib/omnizip/formats/iso/rock_ridge.rb +349 -0
  264. data/lib/omnizip/formats/iso/volume_builder.rb +320 -0
  265. data/lib/omnizip/formats/iso/volume_descriptor.rb +168 -0
  266. data/lib/omnizip/formats/iso/writer.rb +530 -0
  267. data/lib/omnizip/formats/iso.rb +140 -0
  268. data/lib/omnizip/formats/lzip.rb +175 -0
  269. data/lib/omnizip/formats/lzma_alone.rb +171 -0
  270. data/lib/omnizip/formats/rar/archive_repairer.rb +243 -0
  271. data/lib/omnizip/formats/rar/archive_verifier.rb +195 -0
  272. data/lib/omnizip/formats/rar/block_parser.rb +243 -0
  273. data/lib/omnizip/formats/rar/compression/bit_stream.rb +180 -0
  274. data/lib/omnizip/formats/rar/compression/dispatcher.rb +217 -0
  275. data/lib/omnizip/formats/rar/compression/lz77_huffman/decoder.rb +216 -0
  276. data/lib/omnizip/formats/rar/compression/lz77_huffman/encoder.rb +158 -0
  277. data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_builder.rb +217 -0
  278. data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_coder.rb +189 -0
  279. data/lib/omnizip/formats/rar/compression/lz77_huffman/match_finder.rb +135 -0
  280. data/lib/omnizip/formats/rar/compression/lz77_huffman/sliding_window.rb +165 -0
  281. data/lib/omnizip/formats/rar/compression/ppmd/context.rb +105 -0
  282. data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +219 -0
  283. data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +262 -0
  284. data/lib/omnizip/formats/rar/compression_method_registry.rb +106 -0
  285. data/lib/omnizip/formats/rar/constants.rb +82 -0
  286. data/lib/omnizip/formats/rar/decompressor.rb +238 -0
  287. data/lib/omnizip/formats/rar/external_writer.rb +312 -0
  288. data/lib/omnizip/formats/rar/header.rb +192 -0
  289. data/lib/omnizip/formats/rar/license_validator.rb +109 -0
  290. data/lib/omnizip/formats/rar/models/rar_archive.rb +77 -0
  291. data/lib/omnizip/formats/rar/models/rar_entry.rb +65 -0
  292. data/lib/omnizip/formats/rar/models/rar_volume.rb +56 -0
  293. data/lib/omnizip/formats/rar/parity_handler.rb +292 -0
  294. data/lib/omnizip/formats/rar/rar5/compression/lzma.rb +202 -0
  295. data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +578 -0
  296. data/lib/omnizip/formats/rar/rar5/compression/store.rb +60 -0
  297. data/lib/omnizip/formats/rar/rar5/crc32.rb +39 -0
  298. data/lib/omnizip/formats/rar/rar5/encryption/aes256_cbc.rb +97 -0
  299. data/lib/omnizip/formats/rar/rar5/encryption/encryption_header.rb +114 -0
  300. data/lib/omnizip/formats/rar/rar5/encryption/encryption_manager.rb +166 -0
  301. data/lib/omnizip/formats/rar/rar5/encryption/key_derivation.rb +97 -0
  302. data/lib/omnizip/formats/rar/rar5/header.rb +187 -0
  303. data/lib/omnizip/formats/rar/rar5/models/encryption_options.rb +74 -0
  304. data/lib/omnizip/formats/rar/rar5/models/recovery_options.rb +63 -0
  305. data/lib/omnizip/formats/rar/rar5/models/solid_options.rb +63 -0
  306. data/lib/omnizip/formats/rar/rar5/models/volume_options.rb +74 -0
  307. data/lib/omnizip/formats/rar/rar5/multi_volume/ARCHITECTURE.md +290 -0
  308. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_manager.rb +264 -0
  309. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_splitter.rb +155 -0
  310. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_writer.rb +194 -0
  311. data/lib/omnizip/formats/rar/rar5/solid/solid_encoder.rb +109 -0
  312. data/lib/omnizip/formats/rar/rar5/solid/solid_manager.rb +142 -0
  313. data/lib/omnizip/formats/rar/rar5/solid/solid_stream.rb +121 -0
  314. data/lib/omnizip/formats/rar/rar5/vint.rb +65 -0
  315. data/lib/omnizip/formats/rar/rar5/writer.rb +466 -0
  316. data/lib/omnizip/formats/rar/rar_format_base.rb +241 -0
  317. data/lib/omnizip/formats/rar/reader.rb +366 -0
  318. data/lib/omnizip/formats/rar/recovery_record.rb +245 -0
  319. data/lib/omnizip/formats/rar/volume_manager.rb +168 -0
  320. data/lib/omnizip/formats/rar/writer.rb +431 -0
  321. data/lib/omnizip/formats/rar.rb +205 -0
  322. data/lib/omnizip/formats/rar3/compressor.rb +73 -0
  323. data/lib/omnizip/formats/rar3/decompressor.rb +66 -0
  324. data/lib/omnizip/formats/rar3/reader.rb +386 -0
  325. data/lib/omnizip/formats/rar3/writer.rb +219 -0
  326. data/lib/omnizip/formats/rar5/compressor.rb +73 -0
  327. data/lib/omnizip/formats/rar5/decompressor.rb +66 -0
  328. data/lib/omnizip/formats/rar5/reader.rb +342 -0
  329. data/lib/omnizip/formats/rar5/writer.rb +214 -0
  330. data/lib/omnizip/formats/seven_zip/coder_chain.rb +150 -0
  331. data/lib/omnizip/formats/seven_zip/constants.rb +126 -0
  332. data/lib/omnizip/formats/seven_zip/encoded_header.rb +114 -0
  333. data/lib/omnizip/formats/seven_zip/encrypted_header.rb +142 -0
  334. data/lib/omnizip/formats/seven_zip/file_collector.rb +144 -0
  335. data/lib/omnizip/formats/seven_zip/header.rb +106 -0
  336. data/lib/omnizip/formats/seven_zip/header_encryptor.rb +134 -0
  337. data/lib/omnizip/formats/seven_zip/header_writer.rb +466 -0
  338. data/lib/omnizip/formats/seven_zip/models/coder_info.rb +30 -0
  339. data/lib/omnizip/formats/seven_zip/models/file_entry.rb +58 -0
  340. data/lib/omnizip/formats/seven_zip/models/folder.rb +69 -0
  341. data/lib/omnizip/formats/seven_zip/models/stream_info.rb +42 -0
  342. data/lib/omnizip/formats/seven_zip/parser.rb +660 -0
  343. data/lib/omnizip/formats/seven_zip/reader.rb +458 -0
  344. data/lib/omnizip/formats/seven_zip/split_archive_reader.rb +632 -0
  345. data/lib/omnizip/formats/seven_zip/split_archive_writer.rb +315 -0
  346. data/lib/omnizip/formats/seven_zip/stream_compressor.rb +151 -0
  347. data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +162 -0
  348. data/lib/omnizip/formats/seven_zip/writer.rb +740 -0
  349. data/lib/omnizip/formats/seven_zip.rb +93 -0
  350. data/lib/omnizip/formats/tar/constants.rb +73 -0
  351. data/lib/omnizip/formats/tar/entry.rb +94 -0
  352. data/lib/omnizip/formats/tar/header.rb +168 -0
  353. data/lib/omnizip/formats/tar/reader.rb +121 -0
  354. data/lib/omnizip/formats/tar/writer.rb +216 -0
  355. data/lib/omnizip/formats/tar.rb +84 -0
  356. data/lib/omnizip/formats/xz/reader.rb +116 -0
  357. data/lib/omnizip/formats/xz.rb +237 -0
  358. data/lib/omnizip/formats/xz_impl/block_decoder.rb +754 -0
  359. data/lib/omnizip/formats/xz_impl/block_encoder.rb +306 -0
  360. data/lib/omnizip/formats/xz_impl/block_header.rb +210 -0
  361. data/lib/omnizip/formats/xz_impl/block_header_parser.rb +186 -0
  362. data/lib/omnizip/formats/xz_impl/constants.rb +49 -0
  363. data/lib/omnizip/formats/xz_impl/index_decoder.rb +174 -0
  364. data/lib/omnizip/formats/xz_impl/index_encoder.rb +122 -0
  365. data/lib/omnizip/formats/xz_impl/stream_decoder.rb +468 -0
  366. data/lib/omnizip/formats/xz_impl/stream_encoder.rb +99 -0
  367. data/lib/omnizip/formats/xz_impl/stream_footer.rb +81 -0
  368. data/lib/omnizip/formats/xz_impl/stream_footer_parser.rb +117 -0
  369. data/lib/omnizip/formats/xz_impl/stream_header.rb +55 -0
  370. data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +108 -0
  371. data/lib/omnizip/formats/xz_impl/vli.rb +128 -0
  372. data/lib/omnizip/formats/xz_impl/writer.rb +421 -0
  373. data/lib/omnizip/formats/zip/central_directory_header.rb +195 -0
  374. data/lib/omnizip/formats/zip/constants.rb +69 -0
  375. data/lib/omnizip/formats/zip/end_of_central_directory.rb +133 -0
  376. data/lib/omnizip/formats/zip/local_file_header.rb +138 -0
  377. data/lib/omnizip/formats/zip/reader.rb +250 -0
  378. data/lib/omnizip/formats/zip/unix_extra_field.rb +153 -0
  379. data/lib/omnizip/formats/zip/writer.rb +375 -0
  380. data/lib/omnizip/formats/zip/zip64_end_of_central_directory.rb +104 -0
  381. data/lib/omnizip/formats/zip/zip64_end_of_central_directory_locator.rb +66 -0
  382. data/lib/omnizip/formats/zip/zip64_extra_field.rb +114 -0
  383. data/lib/omnizip/formats/zip.rb +50 -0
  384. data/lib/omnizip/implementations/base/lzma2_decoder_base.rb +75 -0
  385. data/lib/omnizip/implementations/base/lzma2_encoder_base.rb +128 -0
  386. data/lib/omnizip/implementations/base/lzma_decoder_base.rb +83 -0
  387. data/lib/omnizip/implementations/base/lzma_encoder_base.rb +108 -0
  388. data/lib/omnizip/implementations/base/state_machine_base.rb +182 -0
  389. data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +421 -0
  390. data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +465 -0
  391. data/lib/omnizip/implementations/seven_zip/lzma/match_finder.rb +288 -0
  392. data/lib/omnizip/implementations/seven_zip/lzma/range_decoder.rb +200 -0
  393. data/lib/omnizip/implementations/seven_zip/lzma/range_encoder.rb +197 -0
  394. data/lib/omnizip/implementations/seven_zip/lzma/state_machine.rb +141 -0
  395. data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +519 -0
  396. data/lib/omnizip/implementations/xz_utils/lzma2/decoder.rb +723 -0
  397. data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +750 -0
  398. data/lib/omnizip/io/buffered_input.rb +146 -0
  399. data/lib/omnizip/io/buffered_output.rb +105 -0
  400. data/lib/omnizip/io/stream_manager.rb +115 -0
  401. data/lib/omnizip/link_handler/hard_link.rb +79 -0
  402. data/lib/omnizip/link_handler/symbolic_link.rb +74 -0
  403. data/lib/omnizip/link_handler.rb +124 -0
  404. data/lib/omnizip/metadata/archive_metadata.rb +114 -0
  405. data/lib/omnizip/metadata/entry_metadata.rb +146 -0
  406. data/lib/omnizip/metadata/metadata_editor.rb +171 -0
  407. data/lib/omnizip/metadata/metadata_registry.rb +64 -0
  408. data/lib/omnizip/metadata/metadata_validator.rb +99 -0
  409. data/lib/omnizip/metadata.rb +57 -0
  410. data/lib/omnizip/models/.keep +0 -0
  411. data/lib/omnizip/models/algorithm_metadata.rb +73 -0
  412. data/lib/omnizip/models/compression_options.rb +71 -0
  413. data/lib/omnizip/models/conversion_options.rb +87 -0
  414. data/lib/omnizip/models/conversion_result.rb +135 -0
  415. data/lib/omnizip/models/eta_result.rb +46 -0
  416. data/lib/omnizip/models/extraction_rule.rb +115 -0
  417. data/lib/omnizip/models/filter_chain.rb +144 -0
  418. data/lib/omnizip/models/filter_config.rb +183 -0
  419. data/lib/omnizip/models/match_result.rb +124 -0
  420. data/lib/omnizip/models/optimization_suggestion.rb +91 -0
  421. data/lib/omnizip/models/parallel_options.rb +104 -0
  422. data/lib/omnizip/models/performance_result.rb +79 -0
  423. data/lib/omnizip/models/profile_report.rb +82 -0
  424. data/lib/omnizip/models/progress_options.rb +38 -0
  425. data/lib/omnizip/models/split_options.rb +116 -0
  426. data/lib/omnizip/optimization_registry.rb +81 -0
  427. data/lib/omnizip/parallel/job_queue.rb +209 -0
  428. data/lib/omnizip/parallel/job_scheduler.rb +203 -0
  429. data/lib/omnizip/parallel/parallel_compressor.rb +347 -0
  430. data/lib/omnizip/parallel/parallel_extractor.rb +329 -0
  431. data/lib/omnizip/parallel/worker_pool.rb +223 -0
  432. data/lib/omnizip/parallel.rb +149 -0
  433. data/lib/omnizip/parity/chunked_block_processor.rb +196 -0
  434. data/lib/omnizip/parity/galois16.rb +145 -0
  435. data/lib/omnizip/parity/models/creator_packet.rb +73 -0
  436. data/lib/omnizip/parity/models/file_description_packet.rb +133 -0
  437. data/lib/omnizip/parity/models/ifsc_packet.rb +123 -0
  438. data/lib/omnizip/parity/models/main_packet.rb +128 -0
  439. data/lib/omnizip/parity/models/packet.rb +156 -0
  440. data/lib/omnizip/parity/models/packet_registry.rb +109 -0
  441. data/lib/omnizip/parity/models/recovery_slice_packet.rb +78 -0
  442. data/lib/omnizip/parity/par2_creator.rb +531 -0
  443. data/lib/omnizip/parity/par2_repairer.rb +407 -0
  444. data/lib/omnizip/parity/par2_verifier.rb +364 -0
  445. data/lib/omnizip/parity/par2cmdline_algorithm.rb +110 -0
  446. data/lib/omnizip/parity/par2cmdline_coefficients.rb +78 -0
  447. data/lib/omnizip/parity/reed_solomon_decoder.rb +266 -0
  448. data/lib/omnizip/parity/reed_solomon_encoder.rb +111 -0
  449. data/lib/omnizip/parity/reed_solomon_matrix.rb +342 -0
  450. data/lib/omnizip/parity.rb +186 -0
  451. data/lib/omnizip/password/encryption_registry.rb +65 -0
  452. data/lib/omnizip/password/encryption_strategy.rb +96 -0
  453. data/lib/omnizip/password/password_validator.rb +129 -0
  454. data/lib/omnizip/password/winzip_aes_strategy.rb +192 -0
  455. data/lib/omnizip/password/zip_crypto_strategy.rb +141 -0
  456. data/lib/omnizip/password.rb +87 -0
  457. data/lib/omnizip/pipe/stream_compressor.rb +124 -0
  458. data/lib/omnizip/pipe/stream_decompressor.rb +174 -0
  459. data/lib/omnizip/pipe.rb +121 -0
  460. data/lib/omnizip/platform/ntfs_streams.rb +201 -0
  461. data/lib/omnizip/platform.rb +189 -0
  462. data/lib/omnizip/profile/archive_profile.rb +39 -0
  463. data/lib/omnizip/profile/balanced_profile.rb +33 -0
  464. data/lib/omnizip/profile/binary_profile.rb +36 -0
  465. data/lib/omnizip/profile/compression_profile.rb +158 -0
  466. data/lib/omnizip/profile/custom_profile.rb +157 -0
  467. data/lib/omnizip/profile/fast_profile.rb +33 -0
  468. data/lib/omnizip/profile/maximum_profile.rb +33 -0
  469. data/lib/omnizip/profile/profile_detector.rb +110 -0
  470. data/lib/omnizip/profile/profile_registry.rb +161 -0
  471. data/lib/omnizip/profile/text_profile.rb +36 -0
  472. data/lib/omnizip/profile.rb +190 -0
  473. data/lib/omnizip/profiler/memory_profiler.rb +66 -0
  474. data/lib/omnizip/profiler/method_profiler.rb +49 -0
  475. data/lib/omnizip/profiler/report_generator.rb +169 -0
  476. data/lib/omnizip/profiler.rb +204 -0
  477. data/lib/omnizip/progress/callback_reporter.rb +36 -0
  478. data/lib/omnizip/progress/console_reporter.rb +62 -0
  479. data/lib/omnizip/progress/log_reporter.rb +91 -0
  480. data/lib/omnizip/progress/operation_progress.rb +118 -0
  481. data/lib/omnizip/progress/progress_bar.rb +156 -0
  482. data/lib/omnizip/progress/progress_reporter.rb +40 -0
  483. data/lib/omnizip/progress/progress_tracker.rb +190 -0
  484. data/lib/omnizip/progress/silent_reporter.rb +24 -0
  485. data/lib/omnizip/progress.rb +127 -0
  486. data/lib/omnizip/rubyzip_compat.rb +63 -0
  487. data/lib/omnizip/temp/safe_extract.rb +168 -0
  488. data/lib/omnizip/temp/temp_file.rb +124 -0
  489. data/lib/omnizip/temp/temp_file_pool.rb +109 -0
  490. data/lib/omnizip/temp.rb +181 -0
  491. data/lib/omnizip/version.rb +5 -0
  492. data/lib/omnizip/zip/entry.rb +156 -0
  493. data/lib/omnizip/zip/file.rb +485 -0
  494. data/lib/omnizip/zip/input_stream.rb +273 -0
  495. data/lib/omnizip/zip/output_stream.rb +324 -0
  496. data/lib/omnizip.rb +156 -0
  497. data/readme-docs/advanced-features.adoc +515 -0
  498. data/readme-docs/api-usage.adoc +444 -0
  499. data/readme-docs/architecture.adoc +449 -0
  500. data/readme-docs/archive-formats.adoc +479 -0
  501. data/readme-docs/cli-usage.adoc +222 -0
  502. data/readme-docs/compression-algorithms.adoc +442 -0
  503. data/readme-docs/compression-profiles.adoc +247 -0
  504. data/readme-docs/encryption-checksums.adoc +328 -0
  505. data/readme-docs/format-converter.adoc +325 -0
  506. data/readme-docs/installation.adoc +228 -0
  507. data/readme-docs/par2-archives.adoc +608 -0
  508. data/readme-docs/performance-profiler.adoc +389 -0
  509. data/readme-docs/preprocessing-filters.adoc +280 -0
  510. data/xz-file-format-1.2.1.txt +1174 -0
  511. metadata +617 -0
@@ -0,0 +1,415 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Copyright (C) 2025 Ribose Inc.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a
6
+ # copy of this software and associated documentation files (the "Software"),
7
+ # to deal in the Software without restriction, including without limitation
8
+ # the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
+ # and/or sell copies of the Software, and to permit persons to whom the
10
+ # Software is furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
+ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21
+ # DEALINGS IN THE SOFTWARE.
22
+
23
+ require_relative "constants"
24
+ require_relative "bit_model"
25
+
26
+ module Omnizip
27
+ module Algorithms
28
+ class LZMA < Algorithm
29
+ # SDK-compatible distance encoder/decoder
30
+ #
31
+ # This class implements the LZMA SDK's distance encoding scheme:
32
+ # - Slot 0-3: Direct encoding (no extra bits)
33
+ # - Slot 4-13: Slot + 1-5 direct bits
34
+ # - Slot 14+: Slot + fixed bits + aligned bits
35
+ #
36
+ # The slot categorizes distances into ranges, and extra bits
37
+ # specify the exact position within that range.
38
+ class DistanceCoder
39
+ include Constants
40
+
41
+ # Initialize the distance coder
42
+ #
43
+ # @param num_len_to_pos_states [Integer] Number of length states for slot selection
44
+ def initialize(num_len_to_pos_states)
45
+ @num_len_to_pos_states = num_len_to_pos_states
46
+
47
+ # Slot encoders: one per length state, 128 models each
48
+ # Tree needs 2^(num_bits+1) models for a 6-bit tree: indices 1-127
49
+ # This matches the tree decode algorithm which accesses up to index 127
50
+ @slot_encoders = Array.new(num_len_to_pos_states) do
51
+ Array.new(1 << (NUM_DIST_SLOT_BITS + 1)) { BitModel.new }
52
+ end
53
+
54
+ # Position encoders for slots 4-13
55
+ @pos_encoders = Array.new(NUM_FULL_DISTANCES - END_POS_MODEL_INDEX) do
56
+ BitModel.new
57
+ end
58
+
59
+ # Aligned encoder for slots 14+ (4-bit aligned)
60
+ # Tree needs 2^5 = 32 models for 4-bit tree
61
+ @align_encoder = Array.new(1 << (DIST_ALIGN_BITS + 1)) do
62
+ BitModel.new
63
+ end
64
+
65
+ # Precompute distance slot lookup table for fast encoding
66
+ @slot_fast = Array.new(DIST_SLOT_FAST_LIMIT)
67
+ init_slot_fast_table
68
+ end
69
+
70
+ # Reset all probability models in place
71
+ #
72
+ # This method resets the bit models to their initial state.
73
+ # Called during state reset to reinitialize probability models.
74
+ #
75
+ # @return [void]
76
+ def reset_models
77
+ if (ENV["DEBUG_RESET_MODELS"]) && (ENV["LZMA_DEBUG_DISTANCE"])
78
+ puts " [DistanceCoder.reset_models] Resetting #{@slot_encoders.size} len_states, each with #{@slot_encoders[0]&.size || '?'} models"
79
+ end
80
+ @slot_encoders.each do |len_state_models|
81
+ len_state_models.each(&:reset)
82
+ end
83
+ @pos_encoders.each(&:reset)
84
+ @align_encoder.each(&:reset)
85
+ if (ENV["DEBUG_RESET_MODELS"]) && (ENV["LZMA_DEBUG_DISTANCE"])
86
+ puts " [DistanceCoder.reset_models] Done resetting"
87
+ end
88
+ end
89
+
90
+ # Encode a match distance using SDK-compatible encoding
91
+ #
92
+ # @param range_encoder [RangeEncoder] The range encoder
93
+ # @param distance [Integer] Distance value (already subtracted 1)
94
+ # @param len_state [Integer] Length state for slot selection
95
+ # @return [void]
96
+ def encode(range_encoder, distance, len_state)
97
+ slot = get_dist_slot(distance)
98
+
99
+ if ENV["LZMA_DEBUG_ENCODE"]
100
+ puts "[DistanceCoder.encode] distance=#{distance} slot=#{slot} len_state=#{len_state}"
101
+ puts "[DistanceCoder.encode] CALLING encode_tree with symbol=#{slot}"
102
+ end
103
+
104
+ # Encode the slot using the appropriate slot encoder
105
+ encode_tree(range_encoder, @slot_encoders[len_state], slot,
106
+ NUM_DIST_SLOT_BITS)
107
+
108
+ # Encode extra bits based on slot
109
+ if slot >= START_POS_MODEL_INDEX
110
+ footer_bits = (slot >> 1) - 1
111
+ base = (2 | (slot & 1)) << footer_bits
112
+
113
+ if slot < END_POS_MODEL_INDEX
114
+ # Slots 4-13: Use position encoders (reverse tree encoding)
115
+ encode_reverse_tree(range_encoder,
116
+ @pos_encoders,
117
+ base - slot - 1,
118
+ distance - base,
119
+ footer_bits)
120
+ else
121
+ # Slots 14+: Fixed direct bits + aligned bits
122
+ # Encode high bits as direct bits
123
+ range_encoder.encode_direct_bits((distance - base) >> DIST_ALIGN_BITS,
124
+ footer_bits - DIST_ALIGN_BITS)
125
+
126
+ # Encode low 4 bits using aligned encoder (reverse tree)
127
+ encode_reverse_tree(range_encoder,
128
+ @align_encoder,
129
+ 0,
130
+ distance - base,
131
+ DIST_ALIGN_BITS)
132
+ end
133
+ end
134
+ end
135
+
136
+ # Decode a match distance using SDK-compatible decoding
137
+ #
138
+ # @param range_decoder [RangeDecoder] The range decoder
139
+ # @param len_state [Integer] Length state for slot selection
140
+ # @return [Integer] Decoded distance value (before adding 1)
141
+ def decode(range_decoder, len_state)
142
+ # DEBUG: Trace specific calls to find corruption
143
+ $distance_decode_count ||= 0
144
+ debug_calls = (320..330)
145
+ debug_this = debug_calls.include?($distance_decode_count)
146
+ trace_326 = ($distance_decode_count == 326)
147
+ trace_325 = ($distance_decode_count == 325)
148
+
149
+ # DEBUG: Trace large distances (> 100000)
150
+ trace_large = $distance_decode_count.between?(25,
151
+ 35) || $distance_decode_count.between?(
152
+ 315, 330
153
+ )
154
+
155
+ # DEBUG: Trace all when LZMA_DEBUG_DISTANCE is set
156
+ trace_all = ENV["LZMA_DEBUG_DISTANCE"]
157
+
158
+ if (trace_325 || trace_large || trace_all) && (ENV["LZMA_DEBUG_DISTANCE"])
159
+ puts " [DistanceCoder.decode ##{$distance_decode_count}] START - len_state=#{len_state}"
160
+ puts " BEFORE: range=#{range_decoder.range.inspect}, code=#{range_decoder.code.inspect}"
161
+ end
162
+
163
+ slot = decode_tree(range_decoder, @slot_encoders[len_state],
164
+ NUM_DIST_SLOT_BITS)
165
+
166
+ if (debug_this || trace_large || trace_all) && (ENV["LZMA_DEBUG_DISTANCE"])
167
+ puts " [DistanceCoder.decode ##{$distance_decode_count}] len_state=#{len_state}, slot=#{slot}"
168
+ puts " @slot_encoders[#{len_state}] object_id=#{@slot_encoders[len_state].object_id}"
169
+ end
170
+
171
+ # Decode extra bits based on slot
172
+ if slot < START_POS_MODEL_INDEX
173
+ # Slots 0-3: No extra bits
174
+ $distance_decode_count += 1
175
+ if debug_this && (ENV["LZMA_DEBUG_DISTANCE"])
176
+ puts " -> distance=#{slot}"
177
+ end
178
+ slot
179
+ else
180
+ footer_bits = (slot >> 1) - 1
181
+
182
+ if slot < END_POS_MODEL_INDEX
183
+ # Slots 4-13: Use position encoders (reverse tree decoding)
184
+ base = (2 | (slot & 1)) << footer_bits
185
+ result = base + decode_reverse_tree(range_decoder,
186
+ @pos_encoders,
187
+ base - slot - 1,
188
+ footer_bits)
189
+ $distance_decode_count += 1
190
+ if debug_this && (ENV["LZMA_DEBUG_DISTANCE"])
191
+ puts " -> distance=#{result} (slot #{slot})"
192
+ end
193
+ else
194
+ # Slots 14+: Fixed direct bits + aligned bits
195
+ # XZ Utils pattern (lzma_decoder.c:500-514):
196
+ # - Start with rep0 = 2 + (slot & 1)
197
+ # - Decode high_bits using rc_direct (builds up from starting value)
198
+ # - Shift left by ALIGN_BITS
199
+ # - Decode low_bits using aligned encoder
200
+ # - Add symbol (slot) to final result
201
+
202
+ footer_bits = (slot >> 1) - 1
203
+ num_direct_bits = footer_bits - DIST_ALIGN_BITS
204
+
205
+ # XZ Utils pattern for slot >= 14:
206
+ # rep0 = 2 + (slot & 1)
207
+ # rc_direct(rep0, num_direct_bits)
208
+ # rep0 <<= ALIGN_BITS
209
+ # rc_bittree_rev4(coder->pos_align)
210
+ # IMPORTANT: slot value is NOT added to result
211
+ # Reference: /Users/mulgogi/src/external/xz/src/liblzma/lzma/lzma_decoder.c:507-512
212
+ result = 2 + (slot & 1)
213
+
214
+ # Use decode_direct_bits_with_base to match XZ Utils rc_direct
215
+ # rc_direct builds on the base value iteratively
216
+ result = range_decoder.decode_direct_bits_with_base(num_direct_bits, result)
217
+
218
+ # Decode low 4 bits using aligned encoder (reverse tree)
219
+ low_bits = decode_reverse_tree(range_decoder,
220
+ @align_encoder,
221
+ 0,
222
+ DIST_ALIGN_BITS)
223
+ if trace_326 && (ENV["LZMA_DEBUG_DISTANCE"])
224
+ puts " TRACE_326: low_bits=#{low_bits}"
225
+ end
226
+
227
+ # Final result: (result << 4) + low_bits
228
+ # NOTE: slot value is NOT added (XZ Utils pattern - line 513 adds symbol for EOPM check only)
229
+ result = (result << DIST_ALIGN_BITS) + low_bits
230
+ $distance_decode_count += 1
231
+ if (debug_this || trace_large) && (ENV["LZMA_DEBUG_DISTANCE"])
232
+ puts " -> slot=#{slot}, result_after_direct=#{result >> DIST_ALIGN_BITS}, low_bits=#{low_bits}, distance=#{result}"
233
+ end
234
+ if result > 100000
235
+ puts " [LARGE_DISTANCE ##{$distance_decode_count}] distance=#{result}, slot=#{slot}" if ENV["LZMA_DEBUG_DISTANCE"]
236
+ puts " BEFORE: range_decoder.range=#{range_decoder.range}, range_decoder.code=#{range_decoder.code}" if ENV["LZMA_DEBUG_DISTANCE"]
237
+ end
238
+ end
239
+ result
240
+ end
241
+ end
242
+
243
+ private
244
+
245
+ # Initialize fast distance slot lookup table
246
+ #
247
+ # @return [void]
248
+ def init_slot_fast_table
249
+ # Fill table based on slot ranges
250
+ # Slot 0: distance 0
251
+ # Slot 1: distance 1
252
+ # Slot 2: distance 2
253
+ # Slot 3: distance 3
254
+ # Slot 4: distances 4-5
255
+ # Slot 5: distances 6-7
256
+ # Slot 6: distances 8-11
257
+ # etc.
258
+
259
+ slot = 0
260
+ c = 0
261
+
262
+ while slot < NUM_DIST_SLOTS && c < DIST_SLOT_FAST_LIMIT
263
+ # Calculate the start and end of this slot's range
264
+ if slot < 4
265
+ # Slots 0-3 map to single distances
266
+ @slot_fast[c] = slot
267
+ c += 1
268
+ slot += 1
269
+ else
270
+ # Slots 4+ have power-of-2 ranges
271
+ footer_bits = (slot >> 1) - 1
272
+ range_size = 1 << footer_bits
273
+
274
+ # Fill this slot's range
275
+ range_size.times do
276
+ break if c >= DIST_SLOT_FAST_LIMIT
277
+
278
+ @slot_fast[c] = slot
279
+ c += 1
280
+ end
281
+ slot += 1
282
+ end
283
+ end
284
+ end
285
+
286
+ # Get the distance slot for a given distance
287
+ #
288
+ # @param distance [Integer] Distance value
289
+ # @return [Integer] Distance slot (0-63)
290
+ def get_dist_slot(distance)
291
+ if distance < DIST_SLOT_FAST_LIMIT
292
+ @slot_fast[distance]
293
+ else
294
+ # For large distances, calculate slot directly
295
+ # Find the highest bit position
296
+ n = 31
297
+ while n >= 0
298
+ break if (distance >> n) != 0
299
+
300
+ n -= 1
301
+ end
302
+
303
+ # slot = 2 * n + high_bit
304
+ ((n << 1) + ((distance >> (n - 1)) & 1))
305
+ end
306
+ end
307
+
308
+ # Encode a value using a tree of bit models
309
+ #
310
+ # @param range_encoder [RangeEncoder] The range encoder
311
+ # @param models [Array<BitModel>] Array of bit models for the tree
312
+ # @param symbol [Integer] Symbol to encode
313
+ # @param num_bits [Integer] Number of bits in the tree
314
+ # @return [void]
315
+ def encode_tree(range_encoder, models, symbol, num_bits)
316
+ m = 1
317
+ trace_all = ENV["TRACE_ALL_SLOT_ENCODE"]
318
+ iteration = 0
319
+
320
+ if trace_all && (ENV["LZMA_DEBUG_ENCODE"])
321
+ puts " [encode_tree START] RECEIVED symbol=#{symbol}, num_bits=#{num_bits}"
322
+ puts " BEFORE: range=#{range_encoder.range}, low=#{range_encoder.low}"
323
+ end
324
+
325
+ (num_bits - 1).downto(0) do |i|
326
+ iteration += 1
327
+ bit = (symbol >> i) & 1
328
+ if trace_all && (ENV["LZMA_DEBUG_ENCODE"])
329
+ model_idx = m
330
+ puts " [#{iteration}/#{num_bits}] i=#{i}, bit=#{bit}, m=#{m}, model_idx=#{model_idx}, prob=#{models[m].probability}"
331
+ end
332
+ range_encoder.encode_bit(models[m], bit)
333
+ m = (m << 1) | bit
334
+ end
335
+
336
+ if trace_all && (ENV["LZMA_DEBUG_ENCODE"])
337
+ puts " AFTER: range=#{range_encoder.range}, low=#{range_encoder.low}"
338
+ puts " [encode_tree END] ENCODED symbol=#{symbol}"
339
+ end
340
+ end
341
+
342
+ # Decode a value using a tree of bit models
343
+ #
344
+ # @param range_decoder [RangeDecoder] The range decoder
345
+ # @param models [Array<BitModel>] Array of bit models for the tree
346
+ # @param num_bits [Integer] Number of bits in the tree
347
+ # @return [Integer] Decoded symbol
348
+ def decode_tree(range_decoder, models, num_bits)
349
+ m = 1
350
+ symbol = 0
351
+ trace_this = (num_bits == 6 && ENV.fetch("TRACE_SLOT_DECODE",
352
+ nil)) || ($distance_decode_count == 28)
353
+ trace_all = ENV["TRACE_ALL_SLOT_DECODE"]
354
+ iteration = 0
355
+
356
+ if (trace_this || trace_all) && (ENV["LZMA_DEBUG_DISTANCE"])
357
+ puts " [decode_tree START] num_bits=#{num_bits}, range=#{range_decoder.range}, code=#{range_decoder.code}"
358
+ puts " models array object_id=#{models.object_id}"
359
+ end
360
+
361
+ (num_bits - 1).downto(0) do |i|
362
+ iteration += 1
363
+ model = models[m]
364
+ bit = range_decoder.decode_bit(model)
365
+ m = (m << 1) | bit
366
+ symbol |= (bit << i)
367
+ if (trace_this || trace_all) && (ENV["LZMA_DEBUG_DISTANCE"])
368
+ puts " [#{iteration}/#{num_bits}] i=#{i}, bit=#{bit}, m=#{m}, model.object_id=#{model.object_id}, prob=#{model.probability}, symbol=#{symbol}"
369
+ end
370
+ end
371
+ if (trace_this || trace_all) && (ENV["LZMA_DEBUG_DISTANCE"])
372
+ puts " [decode_tree END] symbol=#{symbol}"
373
+ end
374
+ symbol
375
+ end
376
+
377
+ # Encode a value using reverse bit-tree encoding
378
+ #
379
+ # @param range_encoder [RangeEncoder] The range encoder
380
+ # @param models [Array<BitModel>] Array of bit models
381
+ # @param base_idx [Integer] Base index into models array
382
+ # @param symbol [Integer] Symbol to encode
383
+ # @param num_bits [Integer] Number of bits
384
+ # @return [void]
385
+ def encode_reverse_tree(range_encoder, models, base_idx, symbol,
386
+ num_bits)
387
+ m = 1
388
+ num_bits.times do |i|
389
+ bit = (symbol >> i) & 1
390
+ range_encoder.encode_bit(models[base_idx + m], bit)
391
+ m = (m << 1) | bit
392
+ end
393
+ end
394
+
395
+ # Decode a value using reverse bit-tree decoding
396
+ #
397
+ # @param range_decoder [RangeDecoder] The range decoder
398
+ # @param models [Array<BitModel>] Array of bit models
399
+ # @param base_idx [Integer] Base index into models array
400
+ # @param num_bits [Integer] Number of bits
401
+ # @return [Integer] Decoded symbol
402
+ def decode_reverse_tree(range_decoder, models, base_idx, num_bits)
403
+ m = 1
404
+ symbol = 0
405
+ num_bits.times do |i|
406
+ bit = range_decoder.decode_bit(models[base_idx + m])
407
+ m = (m << 1) | bit
408
+ symbol |= (bit << i)
409
+ end
410
+ symbol
411
+ end
412
+ end
413
+ end
414
+ end
415
+ end
@@ -0,0 +1,142 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Copyright (C) 2025 Ribose Inc.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a
6
+ # copy of this software and associated documentation files (the "Software"),
7
+ # to deal in the Software without restriction, including without limitation
8
+ # the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
+ # and/or sell copies of the Software, and to permit persons to whom the
10
+ # Software is furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
+ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21
+ # DEALINGS IN THE SOFTWARE.
22
+
23
+ require_relative "constants"
24
+ require_relative "xz_encoder"
25
+ require_relative "../../implementations/seven_zip/lzma/encoder"
26
+
27
+ module Omnizip
28
+ module Algorithms
29
+ class LZMA < Algorithm
30
+ # LZMA Encoder - Factory for LZMA compression implementations
31
+ #
32
+ # This class provides a unified interface for LZMA encoding, delegating
33
+ # to the appropriate implementation based on the target format:
34
+ #
35
+ # 1. SDK-compatible (default): For 7-Zip containers, uses 7-Zip SDK implementation
36
+ # 2. XZ-compatible: For XZ/LZMA files, uses XZ Utils implementation
37
+ #
38
+ # The encoder produces a stream that consists of:
39
+ # - Property byte (lc, lp, pb parameters)
40
+ # - Dictionary size (4 bytes)
41
+ # - Uncompressed size (8 bytes)
42
+ # - Compressed data
43
+ class Encoder
44
+ include Constants
45
+
46
+ attr_reader :dict_size, :lc, :lp, :pb
47
+
48
+ # Initialize the encoder
49
+ #
50
+ # @param output [IO] Output stream for compressed data
51
+ # @param options [Hash] Encoding options
52
+ # @option options [Integer] :dict_size Dictionary size
53
+ # @option options [Integer] :lc Literal context bits (0-8)
54
+ # @option options [Integer] :lp Literal position bits (0-4)
55
+ # @option options [Integer] :pb Position bits (0-4)
56
+ # @option options [Boolean] :write_size Write actual size (false for standalone .lzma)
57
+ # @option options [Boolean] :sdk_compatible Use SDK-compatible encoding (default: true)
58
+ # @option options [Boolean] :xz_compatible Use XZ-compatible encoding (default: false)
59
+ # @option options [Boolean] :raw_mode Skip header for raw LZMA encoding (for 7-Zip/LZMA2)
60
+ def initialize(output, options = {})
61
+ @output = output
62
+ @dict_size = options[:dict_size] || (1 << 16) # 64KB default
63
+ @lc = options[:lc] || 3
64
+ @lp = options[:lp] || 0
65
+ @pb = options[:pb] || 2
66
+ @write_size = options.fetch(:write_size, true)
67
+ @xz_compatible = options.fetch(:xz_compatible, false)
68
+ @sdk_compatible = options.fetch(:sdk_compatible, !@xz_compatible)
69
+ @raw_mode = options.fetch(:raw_mode, false)
70
+
71
+ validate_parameters
72
+
73
+ # Factory pattern: create implementation based on mode
74
+ @impl = if @xz_compatible
75
+ # Use XzEncoder (XZ Utils LZMA)
76
+ XzEncoderAdapter.new(output, options)
77
+ else
78
+ # Use SdkEncoder (7-Zip LZMA SDK compatible) - DEFAULT
79
+ Implementations::SevenZip::LZMA::Encoder.new(output, options)
80
+ end
81
+ end
82
+
83
+ # Encode a stream of data
84
+ #
85
+ # @param input [String, IO] Input data to compress
86
+ # @return [Array<String, Integer>, void] Tuple of [data, decode_bytes] in raw mode, void otherwise
87
+ def encode_stream(input)
88
+ @impl.encode_stream(input)
89
+ end
90
+
91
+ private
92
+
93
+ # Validate encoding parameters
94
+ #
95
+ # @return [void]
96
+ # @raise [ArgumentError] If parameters are invalid
97
+ def validate_parameters
98
+ raise ArgumentError, "lc must be 0-8" unless @lc.between?(0, 8)
99
+ raise ArgumentError, "lp must be 0-4" unless @lp.between?(0, 4)
100
+ raise ArgumentError, "pb must be 0-4" unless @pb.between?(0, 4)
101
+ return if @dict_size.between?(DICT_SIZE_MIN, DICT_SIZE_MAX)
102
+
103
+ raise ArgumentError, "Invalid dictionary size"
104
+ end
105
+ end
106
+
107
+ # Adapter for XzEncoder to match SdkEncoder interface
108
+ #
109
+ # XzEncoder has a different interface (encode(input, output) vs encode_stream(input)).
110
+ # This adapter wraps XzEncoder to provide the same interface as SdkEncoder.
111
+ class XzEncoderAdapter
112
+ # Initialize adapter
113
+ #
114
+ # @param output [IO] Output stream
115
+ # @param options [Hash] Encoding options
116
+ def initialize(output, options = {})
117
+ @output = output
118
+ @options = options
119
+ @xz_encoder = XzEncoder.new(options)
120
+ @bytes_for_decode = nil
121
+ end
122
+
123
+ # Encode stream (matches SdkEncoder interface)
124
+ #
125
+ # @param input [String, IO] Input data to compress
126
+ # @return [Array<String, Integer>] Tuple of [compressed_data, decode_bytes]
127
+ def encode_stream(input)
128
+ input_data = input.is_a?(String) ? input : input.read
129
+ @bytes_for_decode = @xz_encoder.encode(input_data, @output)
130
+ [@output.string, @bytes_for_decode]
131
+ end
132
+
133
+ # Get bytes for decode (for LZMA2 compatibility)
134
+ #
135
+ # @return [Integer] Number of bytes decoder will consume
136
+ def bytes_for_decode
137
+ @bytes_for_decode || @output.string.bytesize
138
+ end
139
+ end
140
+ end
141
+ end
142
+ end