omnizip 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (511) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.rubocop.yml +32 -0
  4. data/.rubocop_todo.yml +754 -0
  5. data/COPYING +502 -0
  6. data/Gemfile +17 -0
  7. data/LICENSE +12 -0
  8. data/README.adoc +1045 -0
  9. data/Rakefile +12 -0
  10. data/benchmark/README.md +260 -0
  11. data/benchmark/benchmark_suite.rb +125 -0
  12. data/benchmark/compression_bench.rb +181 -0
  13. data/benchmark/filter_bench.rb +180 -0
  14. data/benchmark/models/benchmark_result.rb +59 -0
  15. data/benchmark/models/comparison_result.rb +69 -0
  16. data/benchmark/profile_suite.rb +167 -0
  17. data/benchmark/reporter.rb +150 -0
  18. data/benchmark/run_benchmarks.rb +66 -0
  19. data/benchmark/test_data.rb +137 -0
  20. data/config/formats/rar3_spec.yml +91 -0
  21. data/config/formats/rar5_spec.yml +102 -0
  22. data/docs/.github/workflows/docs.yml +142 -0
  23. data/docs/.gitignore +21 -0
  24. data/docs/.lychee.toml +67 -0
  25. data/docs/Gemfile +13 -0
  26. data/docs/RAR_WRITE_SUPPORT.md +26 -0
  27. data/docs/README.md +101 -0
  28. data/docs/_config.yml +112 -0
  29. data/docs/assets/logo.svg +1 -0
  30. data/docs/assets/omnizip-logo.pdf +1540 -11
  31. data/docs/comparison/feature-matrix.adoc +694 -0
  32. data/docs/comparison/index.adoc +113 -0
  33. data/docs/comparison/vs-7zip.adoc +309 -0
  34. data/docs/comparison/vs-peazip.adoc +77 -0
  35. data/docs/comparison/vs-rubyzip.adoc +342 -0
  36. data/docs/comparison/vs-winrar.adoc +100 -0
  37. data/docs/compatibility.adoc +579 -0
  38. data/docs/concepts/index.adoc +129 -0
  39. data/docs/developer/architecture.adoc +256 -0
  40. data/docs/developer/contributing.adoc +158 -0
  41. data/docs/developer/index.adoc +25 -0
  42. data/docs/developer/testing.adoc +212 -0
  43. data/docs/getting-started/basic-usage.adoc +271 -0
  44. data/docs/getting-started/index.adoc +42 -0
  45. data/docs/getting-started/installation.adoc +138 -0
  46. data/docs/getting-started/quick-start.adoc +185 -0
  47. data/docs/getting-started/your-first-archive.adoc +218 -0
  48. data/docs/guides/advanced-features/encryption.adoc +300 -0
  49. data/docs/guides/advanced-features/index.adoc +49 -0
  50. data/docs/guides/advanced-features/parallel-processing.adoc +246 -0
  51. data/docs/guides/advanced-features/progress-tracking.adoc +320 -0
  52. data/docs/guides/advanced-features/streaming.adoc +212 -0
  53. data/docs/guides/archive-formats/gzip-format.adoc +107 -0
  54. data/docs/guides/archive-formats/index.adoc +130 -0
  55. data/docs/guides/archive-formats/rar-format.adoc +104 -0
  56. data/docs/guides/archive-formats/rar5.adoc +521 -0
  57. data/docs/guides/archive-formats/seven-zip-format.adoc +35 -0
  58. data/docs/guides/archive-formats/tar-format.adoc +106 -0
  59. data/docs/guides/archive-formats/xz-format.adoc +118 -0
  60. data/docs/guides/archive-formats/zip-format.adoc +35 -0
  61. data/docs/guides/compression-algorithms/bzip2.adoc +113 -0
  62. data/docs/guides/compression-algorithms/deflate.adoc +319 -0
  63. data/docs/guides/compression-algorithms/index.adoc +190 -0
  64. data/docs/guides/compression-algorithms/lzma.adoc +398 -0
  65. data/docs/guides/compression-algorithms/lzma2.adoc +327 -0
  66. data/docs/guides/compression-algorithms/ppmd.adoc +316 -0
  67. data/docs/guides/compression-algorithms/zstandard.adoc +361 -0
  68. data/docs/guides/creating-archives.adoc +354 -0
  69. data/docs/guides/extracting-archives.adoc +53 -0
  70. data/docs/guides/format-conversion.adoc +64 -0
  71. data/docs/guides/index.adoc +49 -0
  72. data/docs/guides/migration-rubyzip.adoc +217 -0
  73. data/docs/guides/parity-archives.adoc +605 -0
  74. data/docs/guides/performance-tuning.adoc +88 -0
  75. data/docs/index.adoc +218 -0
  76. data/docs/lychee.toml +67 -0
  77. data/docs/reference/api/overview.adoc +188 -0
  78. data/docs/reference/cli/compress-command.adoc +114 -0
  79. data/docs/reference/cli/overview.adoc +140 -0
  80. data/docs/reference/index.adoc +26 -0
  81. data/docs/resources/faq.adoc +185 -0
  82. data/docs/resources/quick-reference.adoc +222 -0
  83. data/docs/troubleshooting/index.adoc +208 -0
  84. data/examples/api_comparison.rb +205 -0
  85. data/examples/deflate64_example.rb +96 -0
  86. data/examples/par2_demo.rb +121 -0
  87. data/examples/quick_start_native.rb +150 -0
  88. data/examples/quick_start_rubyzip.rb +115 -0
  89. data/examples/rubyzip_compatibility_demo.rb +194 -0
  90. data/exe/omnizip +27 -0
  91. data/lib/omnizip/algorithm.rb +130 -0
  92. data/lib/omnizip/algorithm_registry.rb +86 -0
  93. data/lib/omnizip/algorithms/.keep +0 -0
  94. data/lib/omnizip/algorithms/bzip2/bwt.rb +225 -0
  95. data/lib/omnizip/algorithms/bzip2/decoder.rb +193 -0
  96. data/lib/omnizip/algorithms/bzip2/encoder.rb +237 -0
  97. data/lib/omnizip/algorithms/bzip2/huffman.rb +206 -0
  98. data/lib/omnizip/algorithms/bzip2/mtf.rb +101 -0
  99. data/lib/omnizip/algorithms/bzip2/rle.rb +151 -0
  100. data/lib/omnizip/algorithms/bzip2.rb +130 -0
  101. data/lib/omnizip/algorithms/deflate/constants.rb +28 -0
  102. data/lib/omnizip/algorithms/deflate/decoder.rb +38 -0
  103. data/lib/omnizip/algorithms/deflate/encoder.rb +46 -0
  104. data/lib/omnizip/algorithms/deflate.rb +128 -0
  105. data/lib/omnizip/algorithms/deflate64/constants.rb +45 -0
  106. data/lib/omnizip/algorithms/deflate64/decoder.rb +153 -0
  107. data/lib/omnizip/algorithms/deflate64/encoder.rb +98 -0
  108. data/lib/omnizip/algorithms/deflate64/huffman_coder.rb +354 -0
  109. data/lib/omnizip/algorithms/deflate64/lz77_encoder.rb +142 -0
  110. data/lib/omnizip/algorithms/deflate64.rb +109 -0
  111. data/lib/omnizip/algorithms/lzma/bit_model.rb +120 -0
  112. data/lib/omnizip/algorithms/lzma/constants.rb +112 -0
  113. data/lib/omnizip/algorithms/lzma/decoder.rb +148 -0
  114. data/lib/omnizip/algorithms/lzma/dictionary.rb +69 -0
  115. data/lib/omnizip/algorithms/lzma/distance_coder.rb +415 -0
  116. data/lib/omnizip/algorithms/lzma/encoder.rb +142 -0
  117. data/lib/omnizip/algorithms/lzma/length_coder.rb +260 -0
  118. data/lib/omnizip/algorithms/lzma/literal_decoder.rb +320 -0
  119. data/lib/omnizip/algorithms/lzma/literal_encoder.rb +210 -0
  120. data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +341 -0
  121. data/lib/omnizip/algorithms/lzma/lzma_alone_decoder.rb +192 -0
  122. data/lib/omnizip/algorithms/lzma/lzma_state.rb +128 -0
  123. data/lib/omnizip/algorithms/lzma/match.rb +32 -0
  124. data/lib/omnizip/algorithms/lzma/match_finder.rb +205 -0
  125. data/lib/omnizip/algorithms/lzma/match_finder_config.rb +142 -0
  126. data/lib/omnizip/algorithms/lzma/match_finder_factory.rb +88 -0
  127. data/lib/omnizip/algorithms/lzma/optimal_encoder.rb +130 -0
  128. data/lib/omnizip/algorithms/lzma/probability_models.rb +72 -0
  129. data/lib/omnizip/algorithms/lzma/range_coder.rb +85 -0
  130. data/lib/omnizip/algorithms/lzma/range_decoder.rb +434 -0
  131. data/lib/omnizip/algorithms/lzma/range_encoder.rb +194 -0
  132. data/lib/omnizip/algorithms/lzma/state.rb +127 -0
  133. data/lib/omnizip/algorithms/lzma/xz_buffered_range_encoder.rb +325 -0
  134. data/lib/omnizip/algorithms/lzma/xz_encoder.rb +426 -0
  135. data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +645 -0
  136. data/lib/omnizip/algorithms/lzma/xz_match_finder_adapter.rb +227 -0
  137. data/lib/omnizip/algorithms/lzma/xz_price_calculator.rb +169 -0
  138. data/lib/omnizip/algorithms/lzma/xz_probability_models.rb +261 -0
  139. data/lib/omnizip/algorithms/lzma/xz_range_encoder.rb +223 -0
  140. data/lib/omnizip/algorithms/lzma/xz_range_encoder_exact.rb +331 -0
  141. data/lib/omnizip/algorithms/lzma/xz_state.rb +116 -0
  142. data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +2055 -0
  143. data/lib/omnizip/algorithms/lzma.rb +238 -0
  144. data/lib/omnizip/algorithms/lzma2/chunk_manager.rb +182 -0
  145. data/lib/omnizip/algorithms/lzma2/constants.rb +41 -0
  146. data/lib/omnizip/algorithms/lzma2/encoder.rb +147 -0
  147. data/lib/omnizip/algorithms/lzma2/lzma2_chunk.rb +161 -0
  148. data/lib/omnizip/algorithms/lzma2/properties.rb +179 -0
  149. data/lib/omnizip/algorithms/lzma2/simple_lzma2_encoder.rb +127 -0
  150. data/lib/omnizip/algorithms/lzma2/xz_encoder_adapter.rb +85 -0
  151. data/lib/omnizip/algorithms/lzma2.rb +141 -0
  152. data/lib/omnizip/algorithms/ppmd7/constants.rb +74 -0
  153. data/lib/omnizip/algorithms/ppmd7/context.rb +154 -0
  154. data/lib/omnizip/algorithms/ppmd7/decoder.rb +126 -0
  155. data/lib/omnizip/algorithms/ppmd7/encoder.rb +163 -0
  156. data/lib/omnizip/algorithms/ppmd7/model.rb +248 -0
  157. data/lib/omnizip/algorithms/ppmd7/symbol_state.rb +57 -0
  158. data/lib/omnizip/algorithms/ppmd7.rb +116 -0
  159. data/lib/omnizip/algorithms/ppmd8/constants.rb +61 -0
  160. data/lib/omnizip/algorithms/ppmd8/context.rb +34 -0
  161. data/lib/omnizip/algorithms/ppmd8/decoder.rb +107 -0
  162. data/lib/omnizip/algorithms/ppmd8/encoder.rb +138 -0
  163. data/lib/omnizip/algorithms/ppmd8/model.rb +250 -0
  164. data/lib/omnizip/algorithms/ppmd8/restoration_method.rb +78 -0
  165. data/lib/omnizip/algorithms/ppmd8.rb +82 -0
  166. data/lib/omnizip/algorithms/ppmd_base.rb +138 -0
  167. data/lib/omnizip/algorithms/sevenzip_lzma2.rb +123 -0
  168. data/lib/omnizip/algorithms/xz_lzma2.rb +118 -0
  169. data/lib/omnizip/algorithms/zstandard/constants.rb +25 -0
  170. data/lib/omnizip/algorithms/zstandard/decoder.rb +46 -0
  171. data/lib/omnizip/algorithms/zstandard/encoder.rb +51 -0
  172. data/lib/omnizip/algorithms/zstandard.rb +138 -0
  173. data/lib/omnizip/buffer/memory_archive.rb +251 -0
  174. data/lib/omnizip/buffer/memory_extractor.rb +224 -0
  175. data/lib/omnizip/buffer.rb +176 -0
  176. data/lib/omnizip/checksum_registry.rb +114 -0
  177. data/lib/omnizip/checksums/crc32.rb +100 -0
  178. data/lib/omnizip/checksums/crc64.rb +101 -0
  179. data/lib/omnizip/checksums/crc_base.rb +158 -0
  180. data/lib/omnizip/checksums/verifier.rb +131 -0
  181. data/lib/omnizip/chunked/memory_manager.rb +194 -0
  182. data/lib/omnizip/chunked/reader.rb +78 -0
  183. data/lib/omnizip/chunked/writer.rb +120 -0
  184. data/lib/omnizip/chunked.rb +129 -0
  185. data/lib/omnizip/cli/output_formatter.rb +104 -0
  186. data/lib/omnizip/cli.rb +572 -0
  187. data/lib/omnizip/commands/.keep +0 -0
  188. data/lib/omnizip/commands/archive_create_command.rb +427 -0
  189. data/lib/omnizip/commands/archive_extract_command.rb +272 -0
  190. data/lib/omnizip/commands/archive_list_command.rb +218 -0
  191. data/lib/omnizip/commands/archive_repair_command.rb +131 -0
  192. data/lib/omnizip/commands/archive_verify_command.rb +117 -0
  193. data/lib/omnizip/commands/compress_command.rb +117 -0
  194. data/lib/omnizip/commands/decompress_command.rb +120 -0
  195. data/lib/omnizip/commands/list_command.rb +53 -0
  196. data/lib/omnizip/commands/metadata_command.rb +153 -0
  197. data/lib/omnizip/commands/parity_create_command.rb +122 -0
  198. data/lib/omnizip/commands/parity_repair_command.rb +122 -0
  199. data/lib/omnizip/commands/parity_verify_command.rb +124 -0
  200. data/lib/omnizip/commands/profile_list_command.rb +56 -0
  201. data/lib/omnizip/commands/profile_show_command.rb +44 -0
  202. data/lib/omnizip/convenience.rb +359 -0
  203. data/lib/omnizip/converter/conversion_registry.rb +49 -0
  204. data/lib/omnizip/converter/conversion_strategy.rb +121 -0
  205. data/lib/omnizip/converter/seven_zip_to_zip_strategy.rb +97 -0
  206. data/lib/omnizip/converter/zip_to_seven_zip_strategy.rb +112 -0
  207. data/lib/omnizip/converter.rb +105 -0
  208. data/lib/omnizip/crypto/aes256/cipher.rb +100 -0
  209. data/lib/omnizip/crypto/aes256/constants.rb +28 -0
  210. data/lib/omnizip/crypto/aes256/key_derivation.rb +101 -0
  211. data/lib/omnizip/crypto/aes256.rb +102 -0
  212. data/lib/omnizip/error.rb +106 -0
  213. data/lib/omnizip/eta/exponential_smoothing_estimator.rb +98 -0
  214. data/lib/omnizip/eta/moving_average_estimator.rb +99 -0
  215. data/lib/omnizip/eta/rate_calculator.rb +104 -0
  216. data/lib/omnizip/eta/sample_history.rb +143 -0
  217. data/lib/omnizip/eta/time_estimator.rb +106 -0
  218. data/lib/omnizip/eta.rb +63 -0
  219. data/lib/omnizip/extraction/filter_chain.rb +177 -0
  220. data/lib/omnizip/extraction/glob_pattern.rb +140 -0
  221. data/lib/omnizip/extraction/pattern_matcher.rb +70 -0
  222. data/lib/omnizip/extraction/predicate_pattern.rb +52 -0
  223. data/lib/omnizip/extraction/regex_pattern.rb +50 -0
  224. data/lib/omnizip/extraction/selective_extractor.rb +240 -0
  225. data/lib/omnizip/extraction.rb +111 -0
  226. data/lib/omnizip/file_type/mime_classifier.rb +144 -0
  227. data/lib/omnizip/file_type.rb +113 -0
  228. data/lib/omnizip/filter.rb +139 -0
  229. data/lib/omnizip/filter_pipeline.rb +108 -0
  230. data/lib/omnizip/filter_registry.rb +166 -0
  231. data/lib/omnizip/filters/bcj.rb +279 -0
  232. data/lib/omnizip/filters/bcj2/constants.rb +53 -0
  233. data/lib/omnizip/filters/bcj2/decoder.rb +200 -0
  234. data/lib/omnizip/filters/bcj2/encoder.rb +61 -0
  235. data/lib/omnizip/filters/bcj2/stream_data.rb +93 -0
  236. data/lib/omnizip/filters/bcj2.rb +99 -0
  237. data/lib/omnizip/filters/bcj_arm.rb +176 -0
  238. data/lib/omnizip/filters/bcj_arm64.rb +244 -0
  239. data/lib/omnizip/filters/bcj_ia64.rb +196 -0
  240. data/lib/omnizip/filters/bcj_ppc.rb +190 -0
  241. data/lib/omnizip/filters/bcj_sparc.rb +176 -0
  242. data/lib/omnizip/filters/bcj_x86.rb +193 -0
  243. data/lib/omnizip/filters/delta.rb +196 -0
  244. data/lib/omnizip/filters/filter_base.rb +72 -0
  245. data/lib/omnizip/filters/registry.rb +123 -0
  246. data/lib/omnizip/filters/xz_delta.rb +258 -0
  247. data/lib/omnizip/format_detector.rb +162 -0
  248. data/lib/omnizip/format_registry.rb +59 -0
  249. data/lib/omnizip/formats/.keep +0 -0
  250. data/lib/omnizip/formats/bzip2_file.rb +172 -0
  251. data/lib/omnizip/formats/cpio/constants.rb +55 -0
  252. data/lib/omnizip/formats/cpio/entry.rb +385 -0
  253. data/lib/omnizip/formats/cpio/reader.rb +196 -0
  254. data/lib/omnizip/formats/cpio/writer.rb +234 -0
  255. data/lib/omnizip/formats/cpio.rb +140 -0
  256. data/lib/omnizip/formats/format_spec_loader.rb +230 -0
  257. data/lib/omnizip/formats/gzip.rb +238 -0
  258. data/lib/omnizip/formats/iso/directory_builder.rb +297 -0
  259. data/lib/omnizip/formats/iso/directory_record.rb +152 -0
  260. data/lib/omnizip/formats/iso/joliet.rb +204 -0
  261. data/lib/omnizip/formats/iso/path_table.rb +125 -0
  262. data/lib/omnizip/formats/iso/reader.rb +197 -0
  263. data/lib/omnizip/formats/iso/rock_ridge.rb +349 -0
  264. data/lib/omnizip/formats/iso/volume_builder.rb +320 -0
  265. data/lib/omnizip/formats/iso/volume_descriptor.rb +168 -0
  266. data/lib/omnizip/formats/iso/writer.rb +530 -0
  267. data/lib/omnizip/formats/iso.rb +140 -0
  268. data/lib/omnizip/formats/lzip.rb +175 -0
  269. data/lib/omnizip/formats/lzma_alone.rb +171 -0
  270. data/lib/omnizip/formats/rar/archive_repairer.rb +243 -0
  271. data/lib/omnizip/formats/rar/archive_verifier.rb +195 -0
  272. data/lib/omnizip/formats/rar/block_parser.rb +243 -0
  273. data/lib/omnizip/formats/rar/compression/bit_stream.rb +180 -0
  274. data/lib/omnizip/formats/rar/compression/dispatcher.rb +217 -0
  275. data/lib/omnizip/formats/rar/compression/lz77_huffman/decoder.rb +216 -0
  276. data/lib/omnizip/formats/rar/compression/lz77_huffman/encoder.rb +158 -0
  277. data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_builder.rb +217 -0
  278. data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_coder.rb +189 -0
  279. data/lib/omnizip/formats/rar/compression/lz77_huffman/match_finder.rb +135 -0
  280. data/lib/omnizip/formats/rar/compression/lz77_huffman/sliding_window.rb +165 -0
  281. data/lib/omnizip/formats/rar/compression/ppmd/context.rb +105 -0
  282. data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +219 -0
  283. data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +262 -0
  284. data/lib/omnizip/formats/rar/compression_method_registry.rb +106 -0
  285. data/lib/omnizip/formats/rar/constants.rb +82 -0
  286. data/lib/omnizip/formats/rar/decompressor.rb +238 -0
  287. data/lib/omnizip/formats/rar/external_writer.rb +312 -0
  288. data/lib/omnizip/formats/rar/header.rb +192 -0
  289. data/lib/omnizip/formats/rar/license_validator.rb +109 -0
  290. data/lib/omnizip/formats/rar/models/rar_archive.rb +77 -0
  291. data/lib/omnizip/formats/rar/models/rar_entry.rb +65 -0
  292. data/lib/omnizip/formats/rar/models/rar_volume.rb +56 -0
  293. data/lib/omnizip/formats/rar/parity_handler.rb +292 -0
  294. data/lib/omnizip/formats/rar/rar5/compression/lzma.rb +202 -0
  295. data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +578 -0
  296. data/lib/omnizip/formats/rar/rar5/compression/store.rb +60 -0
  297. data/lib/omnizip/formats/rar/rar5/crc32.rb +39 -0
  298. data/lib/omnizip/formats/rar/rar5/encryption/aes256_cbc.rb +97 -0
  299. data/lib/omnizip/formats/rar/rar5/encryption/encryption_header.rb +114 -0
  300. data/lib/omnizip/formats/rar/rar5/encryption/encryption_manager.rb +166 -0
  301. data/lib/omnizip/formats/rar/rar5/encryption/key_derivation.rb +97 -0
  302. data/lib/omnizip/formats/rar/rar5/header.rb +187 -0
  303. data/lib/omnizip/formats/rar/rar5/models/encryption_options.rb +74 -0
  304. data/lib/omnizip/formats/rar/rar5/models/recovery_options.rb +63 -0
  305. data/lib/omnizip/formats/rar/rar5/models/solid_options.rb +63 -0
  306. data/lib/omnizip/formats/rar/rar5/models/volume_options.rb +74 -0
  307. data/lib/omnizip/formats/rar/rar5/multi_volume/ARCHITECTURE.md +290 -0
  308. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_manager.rb +264 -0
  309. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_splitter.rb +155 -0
  310. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_writer.rb +194 -0
  311. data/lib/omnizip/formats/rar/rar5/solid/solid_encoder.rb +109 -0
  312. data/lib/omnizip/formats/rar/rar5/solid/solid_manager.rb +142 -0
  313. data/lib/omnizip/formats/rar/rar5/solid/solid_stream.rb +121 -0
  314. data/lib/omnizip/formats/rar/rar5/vint.rb +65 -0
  315. data/lib/omnizip/formats/rar/rar5/writer.rb +466 -0
  316. data/lib/omnizip/formats/rar/rar_format_base.rb +241 -0
  317. data/lib/omnizip/formats/rar/reader.rb +366 -0
  318. data/lib/omnizip/formats/rar/recovery_record.rb +245 -0
  319. data/lib/omnizip/formats/rar/volume_manager.rb +168 -0
  320. data/lib/omnizip/formats/rar/writer.rb +431 -0
  321. data/lib/omnizip/formats/rar.rb +205 -0
  322. data/lib/omnizip/formats/rar3/compressor.rb +73 -0
  323. data/lib/omnizip/formats/rar3/decompressor.rb +66 -0
  324. data/lib/omnizip/formats/rar3/reader.rb +386 -0
  325. data/lib/omnizip/formats/rar3/writer.rb +219 -0
  326. data/lib/omnizip/formats/rar5/compressor.rb +73 -0
  327. data/lib/omnizip/formats/rar5/decompressor.rb +66 -0
  328. data/lib/omnizip/formats/rar5/reader.rb +342 -0
  329. data/lib/omnizip/formats/rar5/writer.rb +214 -0
  330. data/lib/omnizip/formats/seven_zip/coder_chain.rb +150 -0
  331. data/lib/omnizip/formats/seven_zip/constants.rb +126 -0
  332. data/lib/omnizip/formats/seven_zip/encoded_header.rb +114 -0
  333. data/lib/omnizip/formats/seven_zip/encrypted_header.rb +142 -0
  334. data/lib/omnizip/formats/seven_zip/file_collector.rb +144 -0
  335. data/lib/omnizip/formats/seven_zip/header.rb +106 -0
  336. data/lib/omnizip/formats/seven_zip/header_encryptor.rb +134 -0
  337. data/lib/omnizip/formats/seven_zip/header_writer.rb +466 -0
  338. data/lib/omnizip/formats/seven_zip/models/coder_info.rb +30 -0
  339. data/lib/omnizip/formats/seven_zip/models/file_entry.rb +58 -0
  340. data/lib/omnizip/formats/seven_zip/models/folder.rb +69 -0
  341. data/lib/omnizip/formats/seven_zip/models/stream_info.rb +42 -0
  342. data/lib/omnizip/formats/seven_zip/parser.rb +660 -0
  343. data/lib/omnizip/formats/seven_zip/reader.rb +458 -0
  344. data/lib/omnizip/formats/seven_zip/split_archive_reader.rb +632 -0
  345. data/lib/omnizip/formats/seven_zip/split_archive_writer.rb +315 -0
  346. data/lib/omnizip/formats/seven_zip/stream_compressor.rb +151 -0
  347. data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +162 -0
  348. data/lib/omnizip/formats/seven_zip/writer.rb +740 -0
  349. data/lib/omnizip/formats/seven_zip.rb +93 -0
  350. data/lib/omnizip/formats/tar/constants.rb +73 -0
  351. data/lib/omnizip/formats/tar/entry.rb +94 -0
  352. data/lib/omnizip/formats/tar/header.rb +168 -0
  353. data/lib/omnizip/formats/tar/reader.rb +121 -0
  354. data/lib/omnizip/formats/tar/writer.rb +216 -0
  355. data/lib/omnizip/formats/tar.rb +84 -0
  356. data/lib/omnizip/formats/xz/reader.rb +116 -0
  357. data/lib/omnizip/formats/xz.rb +237 -0
  358. data/lib/omnizip/formats/xz_impl/block_decoder.rb +754 -0
  359. data/lib/omnizip/formats/xz_impl/block_encoder.rb +306 -0
  360. data/lib/omnizip/formats/xz_impl/block_header.rb +210 -0
  361. data/lib/omnizip/formats/xz_impl/block_header_parser.rb +186 -0
  362. data/lib/omnizip/formats/xz_impl/constants.rb +49 -0
  363. data/lib/omnizip/formats/xz_impl/index_decoder.rb +174 -0
  364. data/lib/omnizip/formats/xz_impl/index_encoder.rb +122 -0
  365. data/lib/omnizip/formats/xz_impl/stream_decoder.rb +468 -0
  366. data/lib/omnizip/formats/xz_impl/stream_encoder.rb +99 -0
  367. data/lib/omnizip/formats/xz_impl/stream_footer.rb +81 -0
  368. data/lib/omnizip/formats/xz_impl/stream_footer_parser.rb +117 -0
  369. data/lib/omnizip/formats/xz_impl/stream_header.rb +55 -0
  370. data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +108 -0
  371. data/lib/omnizip/formats/xz_impl/vli.rb +128 -0
  372. data/lib/omnizip/formats/xz_impl/writer.rb +421 -0
  373. data/lib/omnizip/formats/zip/central_directory_header.rb +195 -0
  374. data/lib/omnizip/formats/zip/constants.rb +69 -0
  375. data/lib/omnizip/formats/zip/end_of_central_directory.rb +133 -0
  376. data/lib/omnizip/formats/zip/local_file_header.rb +138 -0
  377. data/lib/omnizip/formats/zip/reader.rb +250 -0
  378. data/lib/omnizip/formats/zip/unix_extra_field.rb +153 -0
  379. data/lib/omnizip/formats/zip/writer.rb +375 -0
  380. data/lib/omnizip/formats/zip/zip64_end_of_central_directory.rb +104 -0
  381. data/lib/omnizip/formats/zip/zip64_end_of_central_directory_locator.rb +66 -0
  382. data/lib/omnizip/formats/zip/zip64_extra_field.rb +114 -0
  383. data/lib/omnizip/formats/zip.rb +50 -0
  384. data/lib/omnizip/implementations/base/lzma2_decoder_base.rb +75 -0
  385. data/lib/omnizip/implementations/base/lzma2_encoder_base.rb +128 -0
  386. data/lib/omnizip/implementations/base/lzma_decoder_base.rb +83 -0
  387. data/lib/omnizip/implementations/base/lzma_encoder_base.rb +108 -0
  388. data/lib/omnizip/implementations/base/state_machine_base.rb +182 -0
  389. data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +421 -0
  390. data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +465 -0
  391. data/lib/omnizip/implementations/seven_zip/lzma/match_finder.rb +288 -0
  392. data/lib/omnizip/implementations/seven_zip/lzma/range_decoder.rb +200 -0
  393. data/lib/omnizip/implementations/seven_zip/lzma/range_encoder.rb +197 -0
  394. data/lib/omnizip/implementations/seven_zip/lzma/state_machine.rb +141 -0
  395. data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +519 -0
  396. data/lib/omnizip/implementations/xz_utils/lzma2/decoder.rb +723 -0
  397. data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +750 -0
  398. data/lib/omnizip/io/buffered_input.rb +146 -0
  399. data/lib/omnizip/io/buffered_output.rb +105 -0
  400. data/lib/omnizip/io/stream_manager.rb +115 -0
  401. data/lib/omnizip/link_handler/hard_link.rb +79 -0
  402. data/lib/omnizip/link_handler/symbolic_link.rb +74 -0
  403. data/lib/omnizip/link_handler.rb +124 -0
  404. data/lib/omnizip/metadata/archive_metadata.rb +114 -0
  405. data/lib/omnizip/metadata/entry_metadata.rb +146 -0
  406. data/lib/omnizip/metadata/metadata_editor.rb +171 -0
  407. data/lib/omnizip/metadata/metadata_registry.rb +64 -0
  408. data/lib/omnizip/metadata/metadata_validator.rb +99 -0
  409. data/lib/omnizip/metadata.rb +57 -0
  410. data/lib/omnizip/models/.keep +0 -0
  411. data/lib/omnizip/models/algorithm_metadata.rb +73 -0
  412. data/lib/omnizip/models/compression_options.rb +71 -0
  413. data/lib/omnizip/models/conversion_options.rb +87 -0
  414. data/lib/omnizip/models/conversion_result.rb +135 -0
  415. data/lib/omnizip/models/eta_result.rb +46 -0
  416. data/lib/omnizip/models/extraction_rule.rb +115 -0
  417. data/lib/omnizip/models/filter_chain.rb +144 -0
  418. data/lib/omnizip/models/filter_config.rb +183 -0
  419. data/lib/omnizip/models/match_result.rb +124 -0
  420. data/lib/omnizip/models/optimization_suggestion.rb +91 -0
  421. data/lib/omnizip/models/parallel_options.rb +104 -0
  422. data/lib/omnizip/models/performance_result.rb +79 -0
  423. data/lib/omnizip/models/profile_report.rb +82 -0
  424. data/lib/omnizip/models/progress_options.rb +38 -0
  425. data/lib/omnizip/models/split_options.rb +116 -0
  426. data/lib/omnizip/optimization_registry.rb +81 -0
  427. data/lib/omnizip/parallel/job_queue.rb +209 -0
  428. data/lib/omnizip/parallel/job_scheduler.rb +203 -0
  429. data/lib/omnizip/parallel/parallel_compressor.rb +347 -0
  430. data/lib/omnizip/parallel/parallel_extractor.rb +329 -0
  431. data/lib/omnizip/parallel/worker_pool.rb +223 -0
  432. data/lib/omnizip/parallel.rb +149 -0
  433. data/lib/omnizip/parity/chunked_block_processor.rb +196 -0
  434. data/lib/omnizip/parity/galois16.rb +145 -0
  435. data/lib/omnizip/parity/models/creator_packet.rb +73 -0
  436. data/lib/omnizip/parity/models/file_description_packet.rb +133 -0
  437. data/lib/omnizip/parity/models/ifsc_packet.rb +123 -0
  438. data/lib/omnizip/parity/models/main_packet.rb +128 -0
  439. data/lib/omnizip/parity/models/packet.rb +156 -0
  440. data/lib/omnizip/parity/models/packet_registry.rb +109 -0
  441. data/lib/omnizip/parity/models/recovery_slice_packet.rb +78 -0
  442. data/lib/omnizip/parity/par2_creator.rb +531 -0
  443. data/lib/omnizip/parity/par2_repairer.rb +407 -0
  444. data/lib/omnizip/parity/par2_verifier.rb +364 -0
  445. data/lib/omnizip/parity/par2cmdline_algorithm.rb +110 -0
  446. data/lib/omnizip/parity/par2cmdline_coefficients.rb +78 -0
  447. data/lib/omnizip/parity/reed_solomon_decoder.rb +266 -0
  448. data/lib/omnizip/parity/reed_solomon_encoder.rb +111 -0
  449. data/lib/omnizip/parity/reed_solomon_matrix.rb +342 -0
  450. data/lib/omnizip/parity.rb +186 -0
  451. data/lib/omnizip/password/encryption_registry.rb +65 -0
  452. data/lib/omnizip/password/encryption_strategy.rb +96 -0
  453. data/lib/omnizip/password/password_validator.rb +129 -0
  454. data/lib/omnizip/password/winzip_aes_strategy.rb +192 -0
  455. data/lib/omnizip/password/zip_crypto_strategy.rb +141 -0
  456. data/lib/omnizip/password.rb +87 -0
  457. data/lib/omnizip/pipe/stream_compressor.rb +124 -0
  458. data/lib/omnizip/pipe/stream_decompressor.rb +174 -0
  459. data/lib/omnizip/pipe.rb +121 -0
  460. data/lib/omnizip/platform/ntfs_streams.rb +201 -0
  461. data/lib/omnizip/platform.rb +189 -0
  462. data/lib/omnizip/profile/archive_profile.rb +39 -0
  463. data/lib/omnizip/profile/balanced_profile.rb +33 -0
  464. data/lib/omnizip/profile/binary_profile.rb +36 -0
  465. data/lib/omnizip/profile/compression_profile.rb +158 -0
  466. data/lib/omnizip/profile/custom_profile.rb +157 -0
  467. data/lib/omnizip/profile/fast_profile.rb +33 -0
  468. data/lib/omnizip/profile/maximum_profile.rb +33 -0
  469. data/lib/omnizip/profile/profile_detector.rb +110 -0
  470. data/lib/omnizip/profile/profile_registry.rb +161 -0
  471. data/lib/omnizip/profile/text_profile.rb +36 -0
  472. data/lib/omnizip/profile.rb +190 -0
  473. data/lib/omnizip/profiler/memory_profiler.rb +66 -0
  474. data/lib/omnizip/profiler/method_profiler.rb +49 -0
  475. data/lib/omnizip/profiler/report_generator.rb +169 -0
  476. data/lib/omnizip/profiler.rb +204 -0
  477. data/lib/omnizip/progress/callback_reporter.rb +36 -0
  478. data/lib/omnizip/progress/console_reporter.rb +62 -0
  479. data/lib/omnizip/progress/log_reporter.rb +91 -0
  480. data/lib/omnizip/progress/operation_progress.rb +118 -0
  481. data/lib/omnizip/progress/progress_bar.rb +156 -0
  482. data/lib/omnizip/progress/progress_reporter.rb +40 -0
  483. data/lib/omnizip/progress/progress_tracker.rb +190 -0
  484. data/lib/omnizip/progress/silent_reporter.rb +24 -0
  485. data/lib/omnizip/progress.rb +127 -0
  486. data/lib/omnizip/rubyzip_compat.rb +63 -0
  487. data/lib/omnizip/temp/safe_extract.rb +168 -0
  488. data/lib/omnizip/temp/temp_file.rb +124 -0
  489. data/lib/omnizip/temp/temp_file_pool.rb +109 -0
  490. data/lib/omnizip/temp.rb +181 -0
  491. data/lib/omnizip/version.rb +5 -0
  492. data/lib/omnizip/zip/entry.rb +156 -0
  493. data/lib/omnizip/zip/file.rb +485 -0
  494. data/lib/omnizip/zip/input_stream.rb +273 -0
  495. data/lib/omnizip/zip/output_stream.rb +324 -0
  496. data/lib/omnizip.rb +156 -0
  497. data/readme-docs/advanced-features.adoc +515 -0
  498. data/readme-docs/api-usage.adoc +444 -0
  499. data/readme-docs/architecture.adoc +449 -0
  500. data/readme-docs/archive-formats.adoc +479 -0
  501. data/readme-docs/cli-usage.adoc +222 -0
  502. data/readme-docs/compression-algorithms.adoc +442 -0
  503. data/readme-docs/compression-profiles.adoc +247 -0
  504. data/readme-docs/encryption-checksums.adoc +328 -0
  505. data/readme-docs/format-converter.adoc +325 -0
  506. data/readme-docs/installation.adoc +228 -0
  507. data/readme-docs/par2-archives.adoc +608 -0
  508. data/readme-docs/performance-profiler.adoc +389 -0
  509. data/readme-docs/preprocessing-filters.adoc +280 -0
  510. data/xz-file-format-1.2.1.txt +1174 -0
  511. metadata +617 -0
@@ -0,0 +1,227 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Copyright (C) 2025 Ribose Inc.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a
6
+ # copy of this software and associated documentation files (the "Software"),
7
+ # to deal in the Software without restriction, including without limitation
8
+ # the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
+ # and/or sell copies of the Software, and to permit persons to whom the
10
+ # Software is furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
+ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21
+ # DEALINGS IN THE SOFTWARE.
22
+
23
+ require_relative "match_finder"
24
+ require_relative "constants"
25
+
26
+ module Omnizip
27
+ module Algorithms
28
+ class LZMA < Algorithm
29
+ # XZ Utils-compatible match finder adapter
30
+ #
31
+ # Wraps existing MatchFinder to provide XZ Utils interface with:
32
+ # - Cursor-based position tracking
33
+ # - Multiple match finding (not just longest)
34
+ # - Skip and lookahead operations
35
+ #
36
+ # Based on: xz/src/liblzma/lz/lz_encoder_mf.c
37
+ class XzMatchFinderAdapter
38
+ include Constants
39
+
40
+ # Match structure matching XZ Utils
41
+ Match = Struct.new(:len, :dist, keyword_init: true) do
42
+ def to_s
43
+ "Match(len=#{len}, dist=#{dist})"
44
+ end
45
+ end
46
+
47
+ attr_reader :matches, :longest_len, :pos
48
+
49
+ # Initialize match finder adapter
50
+ #
51
+ # @param data [String, Array<Integer>] Input data
52
+ # @param dict_size [Integer] Dictionary size (default 8MB for XZ)
53
+ # @param nice_len [Integer] Nice match length (default 32)
54
+ def initialize(data, dict_size: 1 << 23, nice_len: 32)
55
+ @data = data.is_a?(String) ? data.bytes : data
56
+ @pos = 0
57
+ @dict_size = dict_size
58
+ @nice_len = nice_len
59
+
60
+ # Internal state
61
+ @matches = []
62
+ @longest_len = 0
63
+
64
+ # Hash table for match finding
65
+ @hash_table = {}
66
+ end
67
+
68
+ # Find all matches at current position
69
+ #
70
+ # Finds multiple matches of different lengths, not just the longest.
71
+ # Results stored in @matches array, longest length in @longest_len.
72
+ #
73
+ # @return [Integer] Longest match length (0 if no matches)
74
+ def find_matches
75
+ @matches.clear
76
+ @longest_len = 0
77
+
78
+ return 0 if @pos >= @data.size
79
+ return 0 if available < MATCH_LEN_MIN
80
+
81
+ # CRITICAL: Don't produce matches until there's enough data for decoder
82
+ # The decoder validates: dict_full > distance
83
+ # Where dict_full = decoded_byte_count (starting from 0)
84
+ # So for distance=N to be valid, we need at least N+1 bytes decoded
85
+ # We're at position @pos (0-based), so @pos bytes have been processed
86
+ # For distance=1 match: need @pos >= 2 (so decoder has dict_full=2)
87
+ # For distance=N match: need @pos >= N+1
88
+ # Simple check: Don't produce matches until @pos >= 2
89
+ return 0 if @pos < 2
90
+
91
+ # Find matches using hash chains
92
+ hash_val = compute_hash
93
+ positions = @hash_table[hash_val] || []
94
+
95
+ # Track best matches at each length
96
+ best_distances = {}
97
+
98
+ positions.reverse_each do |prev_pos|
99
+ distance = @pos - prev_pos
100
+ break if distance > @dict_size
101
+
102
+ # Skip self-matching (can happen when lookahead searches same position twice)
103
+ next if distance.zero?
104
+
105
+ match_len = calculate_match_length(prev_pos)
106
+ next if match_len < MATCH_LEN_MIN
107
+
108
+ # Keep best (shortest) distance for each length
109
+ if !best_distances[match_len] || distance < best_distances[match_len]
110
+ best_distances[match_len] = distance
111
+ end
112
+
113
+ # Update longest
114
+ @longest_len = match_len if match_len > @longest_len
115
+
116
+ # Stop if we found nice length
117
+ break if match_len >= @nice_len
118
+ end
119
+
120
+ # Convert to matches array (sorted by length)
121
+ best_distances.keys.sort.each do |len|
122
+ @matches << Match.new(len: len, dist: best_distances[len])
123
+ end
124
+
125
+ # Update hash table
126
+ update_hash(hash_val, @pos)
127
+
128
+ @longest_len
129
+ end
130
+
131
+ # Skip n bytes without finding matches
132
+ #
133
+ # Advances position and updates hash tables but doesn't search for matches.
134
+ # Used for rep matches where we already know what to encode.
135
+ #
136
+ # @param n [Integer] Number of bytes to skip
137
+ def skip(n)
138
+ n.times do
139
+ return if @pos >= @data.size
140
+
141
+ hash_val = compute_hash
142
+ update_hash(hash_val, @pos)
143
+ @pos += 1
144
+ end
145
+ end
146
+
147
+ # Move position forward by one byte
148
+ def move_pos
149
+ @pos += 1
150
+ end
151
+
152
+ # Bytes available from current position
153
+ #
154
+ # @return [Integer] Number of bytes remaining
155
+ def available
156
+ @data.size - @pos
157
+ end
158
+
159
+ # Get current byte at position
160
+ #
161
+ # @return [Integer, nil] Byte value or nil if at end
162
+ def current_byte
163
+ return nil if @pos >= @data.size
164
+
165
+ @data[@pos]
166
+ end
167
+
168
+ # Get byte at offset from current position
169
+ #
170
+ # @param offset [Integer] Offset from current position (can be negative)
171
+ # @return [Integer] Byte value (0 if out of bounds)
172
+ def get_byte(offset)
173
+ pos = @pos + offset
174
+ return 0 if pos.negative? || pos >= @data.size
175
+
176
+ @data[pos]
177
+ end
178
+
179
+ # Reset match finder to beginning
180
+ def reset
181
+ @pos = 0
182
+ @matches.clear
183
+ @longest_len = 0
184
+ @hash_table.clear
185
+ end
186
+
187
+ private
188
+
189
+ # Compute hash value for sequence at current position
190
+ #
191
+ # @return [Integer] Hash value
192
+ def compute_hash
193
+ return 0 if @pos + 2 >= @data.size
194
+
195
+ (@data[@pos] << 16) ^ (@data[@pos + 1] << 8) ^ @data[@pos + 2]
196
+ end
197
+
198
+ # Calculate match length between current position and previous position
199
+ #
200
+ # @param prev_pos [Integer] Previous position to compare against
201
+ # @return [Integer] Length of match
202
+ def calculate_match_length(prev_pos)
203
+ max_len = [available, @nice_len].min
204
+ length = 0
205
+
206
+ while length < max_len && @data[@pos + length] == @data[prev_pos + length]
207
+ length += 1
208
+ end
209
+
210
+ length
211
+ end
212
+
213
+ # Update hash table with position
214
+ #
215
+ # @param hash_val [Integer] Hash value
216
+ # @param pos [Integer] Position to add
217
+ def update_hash(hash_val, pos)
218
+ @hash_table[hash_val] ||= []
219
+ @hash_table[hash_val] << pos
220
+
221
+ # Keep hash chains from growing too large
222
+ @hash_table[hash_val].shift if @hash_table[hash_val].size > 1024
223
+ end
224
+ end
225
+ end
226
+ end
227
+ end
@@ -0,0 +1,169 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "constants"
4
+
5
+ module Omnizip
6
+ module Algorithms
7
+ class LZMA < Algorithm
8
+ # XZ Utils-compatible price calculator
9
+ #
10
+ # Calculates the cost (in price units) of encoding symbols using
11
+ # probability models. Prices are based on logarithmic probabilities:
12
+ # price = -log2(probability) * scale_factor
13
+ #
14
+ # Uses precomputed tables for efficiency, matching XZ Utils exactly.
15
+ #
16
+ # Based on: xz/src/liblzma/rangecoder/price.h
17
+ class XzPriceCalculator
18
+ include Constants
19
+
20
+ # Price scale factor (matches XZ Utils)
21
+ PRICE_SHIFT_BITS = 4
22
+ PRICE_SCALE = 1 << PRICE_SHIFT_BITS
23
+
24
+ # BIT_MODEL_TOTAL = 2^11 = 2048 (from Constants, but define locally for clarity)
25
+ BIT_MODEL_TOTAL_LOCAL = 0x800
26
+ BIT_MODEL_TOTAL_BITS = 11
27
+
28
+ # Number of entries in price table
29
+ PRICE_TABLE_SIZE = BIT_MODEL_TOTAL_LOCAL >> PRICE_SHIFT_BITS
30
+
31
+ class << self
32
+ # Calculate price for encoding a single bit
33
+ #
34
+ # @param prob [Integer] Probability model value (0..BIT_MODEL_TOTAL)
35
+ # @param bit [Integer] Bit value (0 or 1)
36
+ # @return [Integer] Price in price units
37
+ def bit_price(prob, bit)
38
+ if bit.zero?
39
+ # Price for encoding 0
40
+ PRICE_TABLE[prob >> PRICE_SHIFT_BITS]
41
+ else
42
+ # Price for encoding 1
43
+ PRICE_TABLE[(BIT_MODEL_TOTAL_LOCAL - prob) >> PRICE_SHIFT_BITS]
44
+ end
45
+ end
46
+
47
+ # Calculate price for encoding a symbol using bit tree
48
+ #
49
+ # A bit tree encodes a symbol by encoding its bits from MSB to LSB,
50
+ # using probability models indexed by the partial symbol value.
51
+ #
52
+ # @param probs [Array<BitModel>] Probability models for tree
53
+ # @param num_bits [Integer] Number of bits in symbol
54
+ # @param symbol [Integer] Symbol value to encode
55
+ # @return [Integer] Total price in price units
56
+ def bittree_price(probs, num_bits, symbol)
57
+ price = 0
58
+ symbol |= (1 << num_bits) # Add sentinel bit
59
+
60
+ # Encode bits from MSB to LSB
61
+ (num_bits - 1).downto(0) do |i|
62
+ bit = (symbol >> i) & 1
63
+ model_idx = symbol >> (i + 1)
64
+ price += bit_price(probs[model_idx].probability, bit)
65
+ end
66
+
67
+ price
68
+ end
69
+
70
+ # Calculate price for encoding a symbol using reverse bit tree
71
+ #
72
+ # A reverse bit tree encodes a symbol by encoding its bits from
73
+ # LSB to MSB, used for distance encoding.
74
+ #
75
+ # @param probs [Array<BitModel>] Probability models for tree
76
+ # @param num_bits [Integer] Number of bits in symbol
77
+ # @param symbol [Integer] Symbol value to encode
78
+ # @return [Integer] Total price in price units
79
+ def bittree_reverse_price(probs, num_bits, symbol)
80
+ price = 0
81
+ model_idx = 1
82
+
83
+ # Encode bits from LSB to MSB
84
+ num_bits.times do |i|
85
+ bit = (symbol >> i) & 1
86
+ price += bit_price(probs[model_idx].probability, bit)
87
+ model_idx = (model_idx << 1) | bit
88
+ end
89
+
90
+ price
91
+ end
92
+
93
+ # Calculate price for direct bits (uniform distribution)
94
+ #
95
+ # Direct bits have no probability model, each bit costs the same.
96
+ #
97
+ # @param num_bits [Integer] Number of direct bits
98
+ # @return [Integer] Total price in price units
99
+ def direct_price(num_bits)
100
+ # Each direct bit costs 64 units (price of 0.5 probability)
101
+ num_bits << (PRICE_SHIFT_BITS + 2)
102
+ end
103
+
104
+ # Precompute logarithmic price table using Math.log2
105
+ #
106
+ # Generates a table mapping probabilities to prices using the formula:
107
+ # price[i] = -log2(i / BIT_MODEL_TOTAL) * PRICE_SCALE
108
+ #
109
+ # @return [Array<Integer>] Precomputed price table
110
+ def precompute_price_table
111
+ table = Array.new(PRICE_TABLE_SIZE)
112
+
113
+ PRICE_TABLE_SIZE.times do |i|
114
+ if i.zero?
115
+ # Handle zero probability case (maximum price)
116
+ table[i] = 15 << PRICE_SHIFT_BITS
117
+ else
118
+ # Reconstruct probability from table index
119
+ prob = (i << PRICE_SHIFT_BITS) + (PRICE_SCALE >> 1)
120
+ probability = prob.to_f / BIT_MODEL_TOTAL_LOCAL
121
+
122
+ # price = -log2(probability) * PRICE_SCALE
123
+ price = (-Math.log2(probability) * PRICE_SCALE).round
124
+ table[i] = price
125
+ end
126
+ end
127
+
128
+ table
129
+ end
130
+ end
131
+
132
+ # Precomputed logarithmic price table
133
+ # Each entry represents -log2(i/BIT_MODEL_TOTAL) * PRICE_SCALE
134
+ PRICE_TABLE = precompute_price_table.freeze
135
+
136
+ # Instance methods for convenience
137
+
138
+ # @param prob [Integer] Probability value
139
+ # @param bit [Integer] Bit value
140
+ # @return [Integer] Price
141
+ def bit_price(prob, bit)
142
+ self.class.bit_price(prob, bit)
143
+ end
144
+
145
+ # @param probs [Array<BitModel>] Probability models
146
+ # @param num_bits [Integer] Number of bits
147
+ # @param symbol [Integer] Symbol value
148
+ # @return [Integer] Price
149
+ def bittree_price(probs, num_bits, symbol)
150
+ self.class.bittree_price(probs, num_bits, symbol)
151
+ end
152
+
153
+ # @param probs [Array<BitModel>] Probability models
154
+ # @param num_bits [Integer] Number of bits
155
+ # @param symbol [Integer] Symbol value
156
+ # @return [Integer] Price
157
+ def bittree_reverse_price(probs, num_bits, symbol)
158
+ self.class.bittree_reverse_price(probs, num_bits, symbol)
159
+ end
160
+
161
+ # @param num_bits [Integer] Number of direct bits
162
+ # @return [Integer] Price
163
+ def direct_price(num_bits)
164
+ self.class.direct_price(num_bits)
165
+ end
166
+ end
167
+ end
168
+ end
169
+ end
@@ -0,0 +1,261 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "xz_buffered_range_encoder"
4
+ require_relative "constants"
5
+
6
+ module Omnizip
7
+ module Algorithms
8
+ class LZMA < Algorithm
9
+ # XZ Utils-compatible probability models
10
+ #
11
+ # Organizes all probability models used by LZMA encoder, matching
12
+ # XZ Utils structure exactly. All models start at probability 1024
13
+ # (BIT_MODEL_TOTAL / 2 = 0.5 probability).
14
+ #
15
+ # Uses XzBufferedRangeEncoder::Probability for mutable inline updates.
16
+ #
17
+ # Based on: xz/src/liblzma/lzma/lzma_encoder_private.h
18
+ class XzProbabilityModels
19
+ include Constants
20
+
21
+ # Literal context models
22
+ attr_reader :literal
23
+
24
+ # Match type models
25
+ attr_reader :is_match, :is_rep, :is_rep0, :is_rep1, :is_rep2
26
+ attr_reader :is_rep0_long
27
+
28
+ # Distance models
29
+ attr_reader :dist_slot, :dist_special, :dist_align
30
+
31
+ # Length encoders
32
+ attr_reader :match_len_encoder, :rep_len_encoder
33
+
34
+ # Initialize all probability models
35
+ #
36
+ # @param lc [Integer] Number of literal context bits (0-8)
37
+ # @param lp [Integer] Number of literal position bits (0-4)
38
+ # @param pb [Integer] Number of position bits (0-4)
39
+ def initialize(lc, lp, pb)
40
+ @lc = lc
41
+ @lp = lp
42
+ @pb = pb
43
+
44
+ init_literal_models(lc, lp)
45
+ init_match_models(pb)
46
+ init_distance_models
47
+ init_length_encoders(pb)
48
+ end
49
+
50
+ # Reset all probability models to initial state
51
+ def reset
52
+ reset_literal_models
53
+ reset_match_models
54
+ reset_distance_models
55
+ reset_length_encoders
56
+ end
57
+
58
+ private
59
+
60
+ # Initialize literal context models
61
+ # Ported from XZ Utils literal_init() in lzma_common.h
62
+ # Size: LITERAL_CODER_SIZE << (lc + lp) = 0x300 * (1 << (lc + lp))
63
+ # This is a FLAT array, not 2D, to match XZ Utils structure
64
+ def init_literal_models(lc, lp)
65
+ coders = 0x300 << (lc + lp)
66
+ @literal = Array.new(coders) { XzBufferedRangeEncoder::Probability.new }
67
+ end
68
+
69
+ # Initialize match type models
70
+ def init_match_models(pb)
71
+ num_pos_states = 1 << pb
72
+
73
+ # is_match[state][pos_state]
74
+ @is_match = Array.new(NUM_STATES) do
75
+ Array.new(num_pos_states) { XzBufferedRangeEncoder::Probability.new }
76
+ end
77
+
78
+ # is_rep[state]
79
+ @is_rep = Array.new(NUM_STATES) { XzBufferedRangeEncoder::Probability.new }
80
+
81
+ # is_rep0[state]
82
+ @is_rep0 = Array.new(NUM_STATES) { XzBufferedRangeEncoder::Probability.new }
83
+
84
+ # is_rep1[state]
85
+ @is_rep1 = Array.new(NUM_STATES) { XzBufferedRangeEncoder::Probability.new }
86
+
87
+ # is_rep2[state]
88
+ @is_rep2 = Array.new(NUM_STATES) { XzBufferedRangeEncoder::Probability.new }
89
+
90
+ # is_rep0_long[state][pos_state]
91
+ @is_rep0_long = Array.new(NUM_STATES) do
92
+ Array.new(num_pos_states) { XzBufferedRangeEncoder::Probability.new }
93
+ end
94
+ end
95
+
96
+ # Initialize distance models
97
+ def init_distance_models
98
+ # dist_slot[len_to_pos_state][dist_slot]
99
+ # len_to_pos_state: 0-3 (maps match length to state)
100
+ # dist_slot: 0-63 (6-bit distance slot)
101
+ @dist_slot = Array.new(NUM_LEN_TO_POS_STATES) do
102
+ Array.new(NUM_DIST_SLOTS) { XzBufferedRangeEncoder::Probability.new }
103
+ end
104
+
105
+ # dist_special[dist - 4] for slots 4-13 (160 models)
106
+ # Each slot has varying number of bits encoded with models
107
+ num_dist_special = NUM_FULL_DISTANCES - START_POS_MODEL_INDEX
108
+ @dist_special = Array.new(num_dist_special) { XzBufferedRangeEncoder::Probability.new }
109
+
110
+ # dist_align[align_bit] for alignment (16 models)
111
+ @dist_align = Array.new(DIST_ALIGN_SIZE) { XzBufferedRangeEncoder::Probability.new }
112
+ end
113
+
114
+ # Initialize length encoders
115
+ def init_length_encoders(pb)
116
+ @match_len_encoder = LengthEncoder.new(pb)
117
+ @rep_len_encoder = LengthEncoder.new(pb)
118
+ end
119
+
120
+ # Reset methods
121
+ def reset_literal_models
122
+ @literal.each { |prob| prob.value = BIT_MODEL_TOTAL >> 1 }
123
+ end
124
+
125
+ def reset_match_models
126
+ @is_match.each do |pos_states|
127
+ pos_states.each do |prob|
128
+ prob.value = BIT_MODEL_TOTAL >> 1
129
+ end
130
+ end
131
+ @is_rep.each { |prob| prob.value = BIT_MODEL_TOTAL >> 1 }
132
+ @is_rep0.each { |prob| prob.value = BIT_MODEL_TOTAL >> 1 }
133
+ @is_rep1.each { |prob| prob.value = BIT_MODEL_TOTAL >> 1 }
134
+ @is_rep2.each { |prob| prob.value = BIT_MODEL_TOTAL >> 1 }
135
+ @is_rep0_long.each do |pos_states|
136
+ pos_states.each do |prob|
137
+ prob.value = BIT_MODEL_TOTAL >> 1
138
+ end
139
+ end
140
+ end
141
+
142
+ def reset_distance_models
143
+ @dist_slot.each do |slots|
144
+ slots.each do |prob|
145
+ prob.value = BIT_MODEL_TOTAL >> 1
146
+ end
147
+ end
148
+ @dist_special.each { |prob| prob.value = BIT_MODEL_TOTAL >> 1 }
149
+ @dist_align.each { |prob| prob.value = BIT_MODEL_TOTAL >> 1 }
150
+ end
151
+
152
+ def reset_length_encoders
153
+ @match_len_encoder.reset
154
+ @rep_len_encoder.reset
155
+ end
156
+ end
157
+
158
+ # Length encoder with probability models and price tables
159
+ #
160
+ # Encodes match lengths using a 3-tier structure:
161
+ # - Low: lengths 2-9 (choice=0, 3 bits)
162
+ # - Mid: lengths 10-17 (choice=1, choice2=0, 3 bits)
163
+ # - High: lengths 18-273 (choice=1, choice2=1, 8 bits)
164
+ class LengthEncoder
165
+ include Constants
166
+
167
+ attr_reader :choice, :choice2, :low, :mid, :high, :prices, :counters
168
+
169
+ # Initialize length encoder
170
+ #
171
+ # @param pb [Integer] Number of position bits
172
+ def initialize(pb)
173
+ @pb = pb
174
+ @num_pos_states = 1 << pb
175
+
176
+ # Choice bits
177
+ @choice = XzBufferedRangeEncoder::Probability.new
178
+ @choice2 = XzBufferedRangeEncoder::Probability.new
179
+
180
+ # Low lengths (per position state)
181
+ @low = Array.new(@num_pos_states) do
182
+ Array.new(LEN_LOW_SYMBOLS) { XzBufferedRangeEncoder::Probability.new }
183
+ end
184
+
185
+ # Mid lengths (per position state)
186
+ @mid = Array.new(@num_pos_states) do
187
+ Array.new(LEN_MID_SYMBOLS) { XzBufferedRangeEncoder::Probability.new }
188
+ end
189
+
190
+ # High lengths (shared across position states)
191
+ @high = Array.new(LEN_HIGH_SYMBOLS) { XzBufferedRangeEncoder::Probability.new }
192
+
193
+ # Price tables (updated incrementally)
194
+ # prices[pos_state][length - MATCH_LEN_MIN]
195
+ @prices = Array.new(@num_pos_states) do
196
+ Array.new(MATCH_LEN_MAX - MATCH_LEN_MIN + 1, 0)
197
+ end
198
+
199
+ # Counters for price table updates
200
+ @counters = Array.new(@num_pos_states, 0)
201
+ end
202
+
203
+ # Reset all models to initial state
204
+ def reset
205
+ @choice.value = BIT_MODEL_TOTAL >> 1
206
+ @choice2.value = BIT_MODEL_TOTAL >> 1
207
+ @low.each do |models|
208
+ models.each do |prob|
209
+ prob.value = BIT_MODEL_TOTAL >> 1
210
+ end
211
+ end
212
+ @mid.each do |models|
213
+ models.each do |prob|
214
+ prob.value = BIT_MODEL_TOTAL >> 1
215
+ end
216
+ end
217
+ @high.each { |prob| prob.value = BIT_MODEL_TOTAL >> 1 }
218
+
219
+ # Reset price tables
220
+ @prices.each { |pos_prices| pos_prices.fill(0) }
221
+ @counters.fill(0)
222
+ end
223
+
224
+ # Get price for encoding length at position state
225
+ #
226
+ # @param pos_state [Integer] Position state (0 to 2^pb - 1)
227
+ # @param length [Integer] Match length (2 to 273)
228
+ # @return [Integer] Price in price units
229
+ def get_price(pos_state, length)
230
+ @prices[pos_state][length - MATCH_LEN_MIN]
231
+ end
232
+
233
+ # Set price for length at position state
234
+ #
235
+ # @param pos_state [Integer] Position state
236
+ # @param length [Integer] Match length
237
+ # @param price [Integer] Price value
238
+ def set_price(pos_state, length, price)
239
+ @prices[pos_state][length - MATCH_LEN_MIN] = price
240
+ end
241
+
242
+ # Decrement counter for position state
243
+ #
244
+ # @param pos_state [Integer] Position state
245
+ # @return [Boolean] True if counter reached zero
246
+ def decrement_counter(pos_state)
247
+ @counters[pos_state] -= 1
248
+ @counters[pos_state] <= 0
249
+ end
250
+
251
+ # Reset counter for position state
252
+ #
253
+ # @param pos_state [Integer] Position state
254
+ # @param value [Integer] Counter value
255
+ def reset_counter(pos_state, value)
256
+ @counters[pos_state] = value
257
+ end
258
+ end
259
+ end
260
+ end
261
+ end