omnizip 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (511) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.rubocop.yml +32 -0
  4. data/.rubocop_todo.yml +754 -0
  5. data/COPYING +502 -0
  6. data/Gemfile +17 -0
  7. data/LICENSE +12 -0
  8. data/README.adoc +1045 -0
  9. data/Rakefile +12 -0
  10. data/benchmark/README.md +260 -0
  11. data/benchmark/benchmark_suite.rb +125 -0
  12. data/benchmark/compression_bench.rb +181 -0
  13. data/benchmark/filter_bench.rb +180 -0
  14. data/benchmark/models/benchmark_result.rb +59 -0
  15. data/benchmark/models/comparison_result.rb +69 -0
  16. data/benchmark/profile_suite.rb +167 -0
  17. data/benchmark/reporter.rb +150 -0
  18. data/benchmark/run_benchmarks.rb +66 -0
  19. data/benchmark/test_data.rb +137 -0
  20. data/config/formats/rar3_spec.yml +91 -0
  21. data/config/formats/rar5_spec.yml +102 -0
  22. data/docs/.github/workflows/docs.yml +142 -0
  23. data/docs/.gitignore +21 -0
  24. data/docs/.lychee.toml +67 -0
  25. data/docs/Gemfile +13 -0
  26. data/docs/RAR_WRITE_SUPPORT.md +26 -0
  27. data/docs/README.md +101 -0
  28. data/docs/_config.yml +112 -0
  29. data/docs/assets/logo.svg +1 -0
  30. data/docs/assets/omnizip-logo.pdf +1540 -11
  31. data/docs/comparison/feature-matrix.adoc +694 -0
  32. data/docs/comparison/index.adoc +113 -0
  33. data/docs/comparison/vs-7zip.adoc +309 -0
  34. data/docs/comparison/vs-peazip.adoc +77 -0
  35. data/docs/comparison/vs-rubyzip.adoc +342 -0
  36. data/docs/comparison/vs-winrar.adoc +100 -0
  37. data/docs/compatibility.adoc +579 -0
  38. data/docs/concepts/index.adoc +129 -0
  39. data/docs/developer/architecture.adoc +256 -0
  40. data/docs/developer/contributing.adoc +158 -0
  41. data/docs/developer/index.adoc +25 -0
  42. data/docs/developer/testing.adoc +212 -0
  43. data/docs/getting-started/basic-usage.adoc +271 -0
  44. data/docs/getting-started/index.adoc +42 -0
  45. data/docs/getting-started/installation.adoc +138 -0
  46. data/docs/getting-started/quick-start.adoc +185 -0
  47. data/docs/getting-started/your-first-archive.adoc +218 -0
  48. data/docs/guides/advanced-features/encryption.adoc +300 -0
  49. data/docs/guides/advanced-features/index.adoc +49 -0
  50. data/docs/guides/advanced-features/parallel-processing.adoc +246 -0
  51. data/docs/guides/advanced-features/progress-tracking.adoc +320 -0
  52. data/docs/guides/advanced-features/streaming.adoc +212 -0
  53. data/docs/guides/archive-formats/gzip-format.adoc +107 -0
  54. data/docs/guides/archive-formats/index.adoc +130 -0
  55. data/docs/guides/archive-formats/rar-format.adoc +104 -0
  56. data/docs/guides/archive-formats/rar5.adoc +521 -0
  57. data/docs/guides/archive-formats/seven-zip-format.adoc +35 -0
  58. data/docs/guides/archive-formats/tar-format.adoc +106 -0
  59. data/docs/guides/archive-formats/xz-format.adoc +118 -0
  60. data/docs/guides/archive-formats/zip-format.adoc +35 -0
  61. data/docs/guides/compression-algorithms/bzip2.adoc +113 -0
  62. data/docs/guides/compression-algorithms/deflate.adoc +319 -0
  63. data/docs/guides/compression-algorithms/index.adoc +190 -0
  64. data/docs/guides/compression-algorithms/lzma.adoc +398 -0
  65. data/docs/guides/compression-algorithms/lzma2.adoc +327 -0
  66. data/docs/guides/compression-algorithms/ppmd.adoc +316 -0
  67. data/docs/guides/compression-algorithms/zstandard.adoc +361 -0
  68. data/docs/guides/creating-archives.adoc +354 -0
  69. data/docs/guides/extracting-archives.adoc +53 -0
  70. data/docs/guides/format-conversion.adoc +64 -0
  71. data/docs/guides/index.adoc +49 -0
  72. data/docs/guides/migration-rubyzip.adoc +217 -0
  73. data/docs/guides/parity-archives.adoc +605 -0
  74. data/docs/guides/performance-tuning.adoc +88 -0
  75. data/docs/index.adoc +218 -0
  76. data/docs/lychee.toml +67 -0
  77. data/docs/reference/api/overview.adoc +188 -0
  78. data/docs/reference/cli/compress-command.adoc +114 -0
  79. data/docs/reference/cli/overview.adoc +140 -0
  80. data/docs/reference/index.adoc +26 -0
  81. data/docs/resources/faq.adoc +185 -0
  82. data/docs/resources/quick-reference.adoc +222 -0
  83. data/docs/troubleshooting/index.adoc +208 -0
  84. data/examples/api_comparison.rb +205 -0
  85. data/examples/deflate64_example.rb +96 -0
  86. data/examples/par2_demo.rb +121 -0
  87. data/examples/quick_start_native.rb +150 -0
  88. data/examples/quick_start_rubyzip.rb +115 -0
  89. data/examples/rubyzip_compatibility_demo.rb +194 -0
  90. data/exe/omnizip +27 -0
  91. data/lib/omnizip/algorithm.rb +130 -0
  92. data/lib/omnizip/algorithm_registry.rb +86 -0
  93. data/lib/omnizip/algorithms/.keep +0 -0
  94. data/lib/omnizip/algorithms/bzip2/bwt.rb +225 -0
  95. data/lib/omnizip/algorithms/bzip2/decoder.rb +193 -0
  96. data/lib/omnizip/algorithms/bzip2/encoder.rb +237 -0
  97. data/lib/omnizip/algorithms/bzip2/huffman.rb +206 -0
  98. data/lib/omnizip/algorithms/bzip2/mtf.rb +101 -0
  99. data/lib/omnizip/algorithms/bzip2/rle.rb +151 -0
  100. data/lib/omnizip/algorithms/bzip2.rb +130 -0
  101. data/lib/omnizip/algorithms/deflate/constants.rb +28 -0
  102. data/lib/omnizip/algorithms/deflate/decoder.rb +38 -0
  103. data/lib/omnizip/algorithms/deflate/encoder.rb +46 -0
  104. data/lib/omnizip/algorithms/deflate.rb +128 -0
  105. data/lib/omnizip/algorithms/deflate64/constants.rb +45 -0
  106. data/lib/omnizip/algorithms/deflate64/decoder.rb +153 -0
  107. data/lib/omnizip/algorithms/deflate64/encoder.rb +98 -0
  108. data/lib/omnizip/algorithms/deflate64/huffman_coder.rb +354 -0
  109. data/lib/omnizip/algorithms/deflate64/lz77_encoder.rb +142 -0
  110. data/lib/omnizip/algorithms/deflate64.rb +109 -0
  111. data/lib/omnizip/algorithms/lzma/bit_model.rb +120 -0
  112. data/lib/omnizip/algorithms/lzma/constants.rb +112 -0
  113. data/lib/omnizip/algorithms/lzma/decoder.rb +148 -0
  114. data/lib/omnizip/algorithms/lzma/dictionary.rb +69 -0
  115. data/lib/omnizip/algorithms/lzma/distance_coder.rb +415 -0
  116. data/lib/omnizip/algorithms/lzma/encoder.rb +142 -0
  117. data/lib/omnizip/algorithms/lzma/length_coder.rb +260 -0
  118. data/lib/omnizip/algorithms/lzma/literal_decoder.rb +320 -0
  119. data/lib/omnizip/algorithms/lzma/literal_encoder.rb +210 -0
  120. data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +341 -0
  121. data/lib/omnizip/algorithms/lzma/lzma_alone_decoder.rb +192 -0
  122. data/lib/omnizip/algorithms/lzma/lzma_state.rb +128 -0
  123. data/lib/omnizip/algorithms/lzma/match.rb +32 -0
  124. data/lib/omnizip/algorithms/lzma/match_finder.rb +205 -0
  125. data/lib/omnizip/algorithms/lzma/match_finder_config.rb +142 -0
  126. data/lib/omnizip/algorithms/lzma/match_finder_factory.rb +88 -0
  127. data/lib/omnizip/algorithms/lzma/optimal_encoder.rb +130 -0
  128. data/lib/omnizip/algorithms/lzma/probability_models.rb +72 -0
  129. data/lib/omnizip/algorithms/lzma/range_coder.rb +85 -0
  130. data/lib/omnizip/algorithms/lzma/range_decoder.rb +434 -0
  131. data/lib/omnizip/algorithms/lzma/range_encoder.rb +194 -0
  132. data/lib/omnizip/algorithms/lzma/state.rb +127 -0
  133. data/lib/omnizip/algorithms/lzma/xz_buffered_range_encoder.rb +325 -0
  134. data/lib/omnizip/algorithms/lzma/xz_encoder.rb +426 -0
  135. data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +645 -0
  136. data/lib/omnizip/algorithms/lzma/xz_match_finder_adapter.rb +227 -0
  137. data/lib/omnizip/algorithms/lzma/xz_price_calculator.rb +169 -0
  138. data/lib/omnizip/algorithms/lzma/xz_probability_models.rb +261 -0
  139. data/lib/omnizip/algorithms/lzma/xz_range_encoder.rb +223 -0
  140. data/lib/omnizip/algorithms/lzma/xz_range_encoder_exact.rb +331 -0
  141. data/lib/omnizip/algorithms/lzma/xz_state.rb +116 -0
  142. data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +2055 -0
  143. data/lib/omnizip/algorithms/lzma.rb +238 -0
  144. data/lib/omnizip/algorithms/lzma2/chunk_manager.rb +182 -0
  145. data/lib/omnizip/algorithms/lzma2/constants.rb +41 -0
  146. data/lib/omnizip/algorithms/lzma2/encoder.rb +147 -0
  147. data/lib/omnizip/algorithms/lzma2/lzma2_chunk.rb +161 -0
  148. data/lib/omnizip/algorithms/lzma2/properties.rb +179 -0
  149. data/lib/omnizip/algorithms/lzma2/simple_lzma2_encoder.rb +127 -0
  150. data/lib/omnizip/algorithms/lzma2/xz_encoder_adapter.rb +85 -0
  151. data/lib/omnizip/algorithms/lzma2.rb +141 -0
  152. data/lib/omnizip/algorithms/ppmd7/constants.rb +74 -0
  153. data/lib/omnizip/algorithms/ppmd7/context.rb +154 -0
  154. data/lib/omnizip/algorithms/ppmd7/decoder.rb +126 -0
  155. data/lib/omnizip/algorithms/ppmd7/encoder.rb +163 -0
  156. data/lib/omnizip/algorithms/ppmd7/model.rb +248 -0
  157. data/lib/omnizip/algorithms/ppmd7/symbol_state.rb +57 -0
  158. data/lib/omnizip/algorithms/ppmd7.rb +116 -0
  159. data/lib/omnizip/algorithms/ppmd8/constants.rb +61 -0
  160. data/lib/omnizip/algorithms/ppmd8/context.rb +34 -0
  161. data/lib/omnizip/algorithms/ppmd8/decoder.rb +107 -0
  162. data/lib/omnizip/algorithms/ppmd8/encoder.rb +138 -0
  163. data/lib/omnizip/algorithms/ppmd8/model.rb +250 -0
  164. data/lib/omnizip/algorithms/ppmd8/restoration_method.rb +78 -0
  165. data/lib/omnizip/algorithms/ppmd8.rb +82 -0
  166. data/lib/omnizip/algorithms/ppmd_base.rb +138 -0
  167. data/lib/omnizip/algorithms/sevenzip_lzma2.rb +123 -0
  168. data/lib/omnizip/algorithms/xz_lzma2.rb +118 -0
  169. data/lib/omnizip/algorithms/zstandard/constants.rb +25 -0
  170. data/lib/omnizip/algorithms/zstandard/decoder.rb +46 -0
  171. data/lib/omnizip/algorithms/zstandard/encoder.rb +51 -0
  172. data/lib/omnizip/algorithms/zstandard.rb +138 -0
  173. data/lib/omnizip/buffer/memory_archive.rb +251 -0
  174. data/lib/omnizip/buffer/memory_extractor.rb +224 -0
  175. data/lib/omnizip/buffer.rb +176 -0
  176. data/lib/omnizip/checksum_registry.rb +114 -0
  177. data/lib/omnizip/checksums/crc32.rb +100 -0
  178. data/lib/omnizip/checksums/crc64.rb +101 -0
  179. data/lib/omnizip/checksums/crc_base.rb +158 -0
  180. data/lib/omnizip/checksums/verifier.rb +131 -0
  181. data/lib/omnizip/chunked/memory_manager.rb +194 -0
  182. data/lib/omnizip/chunked/reader.rb +78 -0
  183. data/lib/omnizip/chunked/writer.rb +120 -0
  184. data/lib/omnizip/chunked.rb +129 -0
  185. data/lib/omnizip/cli/output_formatter.rb +104 -0
  186. data/lib/omnizip/cli.rb +572 -0
  187. data/lib/omnizip/commands/.keep +0 -0
  188. data/lib/omnizip/commands/archive_create_command.rb +427 -0
  189. data/lib/omnizip/commands/archive_extract_command.rb +272 -0
  190. data/lib/omnizip/commands/archive_list_command.rb +218 -0
  191. data/lib/omnizip/commands/archive_repair_command.rb +131 -0
  192. data/lib/omnizip/commands/archive_verify_command.rb +117 -0
  193. data/lib/omnizip/commands/compress_command.rb +117 -0
  194. data/lib/omnizip/commands/decompress_command.rb +120 -0
  195. data/lib/omnizip/commands/list_command.rb +53 -0
  196. data/lib/omnizip/commands/metadata_command.rb +153 -0
  197. data/lib/omnizip/commands/parity_create_command.rb +122 -0
  198. data/lib/omnizip/commands/parity_repair_command.rb +122 -0
  199. data/lib/omnizip/commands/parity_verify_command.rb +124 -0
  200. data/lib/omnizip/commands/profile_list_command.rb +56 -0
  201. data/lib/omnizip/commands/profile_show_command.rb +44 -0
  202. data/lib/omnizip/convenience.rb +359 -0
  203. data/lib/omnizip/converter/conversion_registry.rb +49 -0
  204. data/lib/omnizip/converter/conversion_strategy.rb +121 -0
  205. data/lib/omnizip/converter/seven_zip_to_zip_strategy.rb +97 -0
  206. data/lib/omnizip/converter/zip_to_seven_zip_strategy.rb +112 -0
  207. data/lib/omnizip/converter.rb +105 -0
  208. data/lib/omnizip/crypto/aes256/cipher.rb +100 -0
  209. data/lib/omnizip/crypto/aes256/constants.rb +28 -0
  210. data/lib/omnizip/crypto/aes256/key_derivation.rb +101 -0
  211. data/lib/omnizip/crypto/aes256.rb +102 -0
  212. data/lib/omnizip/error.rb +106 -0
  213. data/lib/omnizip/eta/exponential_smoothing_estimator.rb +98 -0
  214. data/lib/omnizip/eta/moving_average_estimator.rb +99 -0
  215. data/lib/omnizip/eta/rate_calculator.rb +104 -0
  216. data/lib/omnizip/eta/sample_history.rb +143 -0
  217. data/lib/omnizip/eta/time_estimator.rb +106 -0
  218. data/lib/omnizip/eta.rb +63 -0
  219. data/lib/omnizip/extraction/filter_chain.rb +177 -0
  220. data/lib/omnizip/extraction/glob_pattern.rb +140 -0
  221. data/lib/omnizip/extraction/pattern_matcher.rb +70 -0
  222. data/lib/omnizip/extraction/predicate_pattern.rb +52 -0
  223. data/lib/omnizip/extraction/regex_pattern.rb +50 -0
  224. data/lib/omnizip/extraction/selective_extractor.rb +240 -0
  225. data/lib/omnizip/extraction.rb +111 -0
  226. data/lib/omnizip/file_type/mime_classifier.rb +144 -0
  227. data/lib/omnizip/file_type.rb +113 -0
  228. data/lib/omnizip/filter.rb +139 -0
  229. data/lib/omnizip/filter_pipeline.rb +108 -0
  230. data/lib/omnizip/filter_registry.rb +166 -0
  231. data/lib/omnizip/filters/bcj.rb +279 -0
  232. data/lib/omnizip/filters/bcj2/constants.rb +53 -0
  233. data/lib/omnizip/filters/bcj2/decoder.rb +200 -0
  234. data/lib/omnizip/filters/bcj2/encoder.rb +61 -0
  235. data/lib/omnizip/filters/bcj2/stream_data.rb +93 -0
  236. data/lib/omnizip/filters/bcj2.rb +99 -0
  237. data/lib/omnizip/filters/bcj_arm.rb +176 -0
  238. data/lib/omnizip/filters/bcj_arm64.rb +244 -0
  239. data/lib/omnizip/filters/bcj_ia64.rb +196 -0
  240. data/lib/omnizip/filters/bcj_ppc.rb +190 -0
  241. data/lib/omnizip/filters/bcj_sparc.rb +176 -0
  242. data/lib/omnizip/filters/bcj_x86.rb +193 -0
  243. data/lib/omnizip/filters/delta.rb +196 -0
  244. data/lib/omnizip/filters/filter_base.rb +72 -0
  245. data/lib/omnizip/filters/registry.rb +123 -0
  246. data/lib/omnizip/filters/xz_delta.rb +258 -0
  247. data/lib/omnizip/format_detector.rb +162 -0
  248. data/lib/omnizip/format_registry.rb +59 -0
  249. data/lib/omnizip/formats/.keep +0 -0
  250. data/lib/omnizip/formats/bzip2_file.rb +172 -0
  251. data/lib/omnizip/formats/cpio/constants.rb +55 -0
  252. data/lib/omnizip/formats/cpio/entry.rb +385 -0
  253. data/lib/omnizip/formats/cpio/reader.rb +196 -0
  254. data/lib/omnizip/formats/cpio/writer.rb +234 -0
  255. data/lib/omnizip/formats/cpio.rb +140 -0
  256. data/lib/omnizip/formats/format_spec_loader.rb +230 -0
  257. data/lib/omnizip/formats/gzip.rb +238 -0
  258. data/lib/omnizip/formats/iso/directory_builder.rb +297 -0
  259. data/lib/omnizip/formats/iso/directory_record.rb +152 -0
  260. data/lib/omnizip/formats/iso/joliet.rb +204 -0
  261. data/lib/omnizip/formats/iso/path_table.rb +125 -0
  262. data/lib/omnizip/formats/iso/reader.rb +197 -0
  263. data/lib/omnizip/formats/iso/rock_ridge.rb +349 -0
  264. data/lib/omnizip/formats/iso/volume_builder.rb +320 -0
  265. data/lib/omnizip/formats/iso/volume_descriptor.rb +168 -0
  266. data/lib/omnizip/formats/iso/writer.rb +530 -0
  267. data/lib/omnizip/formats/iso.rb +140 -0
  268. data/lib/omnizip/formats/lzip.rb +175 -0
  269. data/lib/omnizip/formats/lzma_alone.rb +171 -0
  270. data/lib/omnizip/formats/rar/archive_repairer.rb +243 -0
  271. data/lib/omnizip/formats/rar/archive_verifier.rb +195 -0
  272. data/lib/omnizip/formats/rar/block_parser.rb +243 -0
  273. data/lib/omnizip/formats/rar/compression/bit_stream.rb +180 -0
  274. data/lib/omnizip/formats/rar/compression/dispatcher.rb +217 -0
  275. data/lib/omnizip/formats/rar/compression/lz77_huffman/decoder.rb +216 -0
  276. data/lib/omnizip/formats/rar/compression/lz77_huffman/encoder.rb +158 -0
  277. data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_builder.rb +217 -0
  278. data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_coder.rb +189 -0
  279. data/lib/omnizip/formats/rar/compression/lz77_huffman/match_finder.rb +135 -0
  280. data/lib/omnizip/formats/rar/compression/lz77_huffman/sliding_window.rb +165 -0
  281. data/lib/omnizip/formats/rar/compression/ppmd/context.rb +105 -0
  282. data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +219 -0
  283. data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +262 -0
  284. data/lib/omnizip/formats/rar/compression_method_registry.rb +106 -0
  285. data/lib/omnizip/formats/rar/constants.rb +82 -0
  286. data/lib/omnizip/formats/rar/decompressor.rb +238 -0
  287. data/lib/omnizip/formats/rar/external_writer.rb +312 -0
  288. data/lib/omnizip/formats/rar/header.rb +192 -0
  289. data/lib/omnizip/formats/rar/license_validator.rb +109 -0
  290. data/lib/omnizip/formats/rar/models/rar_archive.rb +77 -0
  291. data/lib/omnizip/formats/rar/models/rar_entry.rb +65 -0
  292. data/lib/omnizip/formats/rar/models/rar_volume.rb +56 -0
  293. data/lib/omnizip/formats/rar/parity_handler.rb +292 -0
  294. data/lib/omnizip/formats/rar/rar5/compression/lzma.rb +202 -0
  295. data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +578 -0
  296. data/lib/omnizip/formats/rar/rar5/compression/store.rb +60 -0
  297. data/lib/omnizip/formats/rar/rar5/crc32.rb +39 -0
  298. data/lib/omnizip/formats/rar/rar5/encryption/aes256_cbc.rb +97 -0
  299. data/lib/omnizip/formats/rar/rar5/encryption/encryption_header.rb +114 -0
  300. data/lib/omnizip/formats/rar/rar5/encryption/encryption_manager.rb +166 -0
  301. data/lib/omnizip/formats/rar/rar5/encryption/key_derivation.rb +97 -0
  302. data/lib/omnizip/formats/rar/rar5/header.rb +187 -0
  303. data/lib/omnizip/formats/rar/rar5/models/encryption_options.rb +74 -0
  304. data/lib/omnizip/formats/rar/rar5/models/recovery_options.rb +63 -0
  305. data/lib/omnizip/formats/rar/rar5/models/solid_options.rb +63 -0
  306. data/lib/omnizip/formats/rar/rar5/models/volume_options.rb +74 -0
  307. data/lib/omnizip/formats/rar/rar5/multi_volume/ARCHITECTURE.md +290 -0
  308. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_manager.rb +264 -0
  309. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_splitter.rb +155 -0
  310. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_writer.rb +194 -0
  311. data/lib/omnizip/formats/rar/rar5/solid/solid_encoder.rb +109 -0
  312. data/lib/omnizip/formats/rar/rar5/solid/solid_manager.rb +142 -0
  313. data/lib/omnizip/formats/rar/rar5/solid/solid_stream.rb +121 -0
  314. data/lib/omnizip/formats/rar/rar5/vint.rb +65 -0
  315. data/lib/omnizip/formats/rar/rar5/writer.rb +466 -0
  316. data/lib/omnizip/formats/rar/rar_format_base.rb +241 -0
  317. data/lib/omnizip/formats/rar/reader.rb +366 -0
  318. data/lib/omnizip/formats/rar/recovery_record.rb +245 -0
  319. data/lib/omnizip/formats/rar/volume_manager.rb +168 -0
  320. data/lib/omnizip/formats/rar/writer.rb +431 -0
  321. data/lib/omnizip/formats/rar.rb +205 -0
  322. data/lib/omnizip/formats/rar3/compressor.rb +73 -0
  323. data/lib/omnizip/formats/rar3/decompressor.rb +66 -0
  324. data/lib/omnizip/formats/rar3/reader.rb +386 -0
  325. data/lib/omnizip/formats/rar3/writer.rb +219 -0
  326. data/lib/omnizip/formats/rar5/compressor.rb +73 -0
  327. data/lib/omnizip/formats/rar5/decompressor.rb +66 -0
  328. data/lib/omnizip/formats/rar5/reader.rb +342 -0
  329. data/lib/omnizip/formats/rar5/writer.rb +214 -0
  330. data/lib/omnizip/formats/seven_zip/coder_chain.rb +150 -0
  331. data/lib/omnizip/formats/seven_zip/constants.rb +126 -0
  332. data/lib/omnizip/formats/seven_zip/encoded_header.rb +114 -0
  333. data/lib/omnizip/formats/seven_zip/encrypted_header.rb +142 -0
  334. data/lib/omnizip/formats/seven_zip/file_collector.rb +144 -0
  335. data/lib/omnizip/formats/seven_zip/header.rb +106 -0
  336. data/lib/omnizip/formats/seven_zip/header_encryptor.rb +134 -0
  337. data/lib/omnizip/formats/seven_zip/header_writer.rb +466 -0
  338. data/lib/omnizip/formats/seven_zip/models/coder_info.rb +30 -0
  339. data/lib/omnizip/formats/seven_zip/models/file_entry.rb +58 -0
  340. data/lib/omnizip/formats/seven_zip/models/folder.rb +69 -0
  341. data/lib/omnizip/formats/seven_zip/models/stream_info.rb +42 -0
  342. data/lib/omnizip/formats/seven_zip/parser.rb +660 -0
  343. data/lib/omnizip/formats/seven_zip/reader.rb +458 -0
  344. data/lib/omnizip/formats/seven_zip/split_archive_reader.rb +632 -0
  345. data/lib/omnizip/formats/seven_zip/split_archive_writer.rb +315 -0
  346. data/lib/omnizip/formats/seven_zip/stream_compressor.rb +151 -0
  347. data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +162 -0
  348. data/lib/omnizip/formats/seven_zip/writer.rb +740 -0
  349. data/lib/omnizip/formats/seven_zip.rb +93 -0
  350. data/lib/omnizip/formats/tar/constants.rb +73 -0
  351. data/lib/omnizip/formats/tar/entry.rb +94 -0
  352. data/lib/omnizip/formats/tar/header.rb +168 -0
  353. data/lib/omnizip/formats/tar/reader.rb +121 -0
  354. data/lib/omnizip/formats/tar/writer.rb +216 -0
  355. data/lib/omnizip/formats/tar.rb +84 -0
  356. data/lib/omnizip/formats/xz/reader.rb +116 -0
  357. data/lib/omnizip/formats/xz.rb +237 -0
  358. data/lib/omnizip/formats/xz_impl/block_decoder.rb +754 -0
  359. data/lib/omnizip/formats/xz_impl/block_encoder.rb +306 -0
  360. data/lib/omnizip/formats/xz_impl/block_header.rb +210 -0
  361. data/lib/omnizip/formats/xz_impl/block_header_parser.rb +186 -0
  362. data/lib/omnizip/formats/xz_impl/constants.rb +49 -0
  363. data/lib/omnizip/formats/xz_impl/index_decoder.rb +174 -0
  364. data/lib/omnizip/formats/xz_impl/index_encoder.rb +122 -0
  365. data/lib/omnizip/formats/xz_impl/stream_decoder.rb +468 -0
  366. data/lib/omnizip/formats/xz_impl/stream_encoder.rb +99 -0
  367. data/lib/omnizip/formats/xz_impl/stream_footer.rb +81 -0
  368. data/lib/omnizip/formats/xz_impl/stream_footer_parser.rb +117 -0
  369. data/lib/omnizip/formats/xz_impl/stream_header.rb +55 -0
  370. data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +108 -0
  371. data/lib/omnizip/formats/xz_impl/vli.rb +128 -0
  372. data/lib/omnizip/formats/xz_impl/writer.rb +421 -0
  373. data/lib/omnizip/formats/zip/central_directory_header.rb +195 -0
  374. data/lib/omnizip/formats/zip/constants.rb +69 -0
  375. data/lib/omnizip/formats/zip/end_of_central_directory.rb +133 -0
  376. data/lib/omnizip/formats/zip/local_file_header.rb +138 -0
  377. data/lib/omnizip/formats/zip/reader.rb +250 -0
  378. data/lib/omnizip/formats/zip/unix_extra_field.rb +153 -0
  379. data/lib/omnizip/formats/zip/writer.rb +375 -0
  380. data/lib/omnizip/formats/zip/zip64_end_of_central_directory.rb +104 -0
  381. data/lib/omnizip/formats/zip/zip64_end_of_central_directory_locator.rb +66 -0
  382. data/lib/omnizip/formats/zip/zip64_extra_field.rb +114 -0
  383. data/lib/omnizip/formats/zip.rb +50 -0
  384. data/lib/omnizip/implementations/base/lzma2_decoder_base.rb +75 -0
  385. data/lib/omnizip/implementations/base/lzma2_encoder_base.rb +128 -0
  386. data/lib/omnizip/implementations/base/lzma_decoder_base.rb +83 -0
  387. data/lib/omnizip/implementations/base/lzma_encoder_base.rb +108 -0
  388. data/lib/omnizip/implementations/base/state_machine_base.rb +182 -0
  389. data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +421 -0
  390. data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +465 -0
  391. data/lib/omnizip/implementations/seven_zip/lzma/match_finder.rb +288 -0
  392. data/lib/omnizip/implementations/seven_zip/lzma/range_decoder.rb +200 -0
  393. data/lib/omnizip/implementations/seven_zip/lzma/range_encoder.rb +197 -0
  394. data/lib/omnizip/implementations/seven_zip/lzma/state_machine.rb +141 -0
  395. data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +519 -0
  396. data/lib/omnizip/implementations/xz_utils/lzma2/decoder.rb +723 -0
  397. data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +750 -0
  398. data/lib/omnizip/io/buffered_input.rb +146 -0
  399. data/lib/omnizip/io/buffered_output.rb +105 -0
  400. data/lib/omnizip/io/stream_manager.rb +115 -0
  401. data/lib/omnizip/link_handler/hard_link.rb +79 -0
  402. data/lib/omnizip/link_handler/symbolic_link.rb +74 -0
  403. data/lib/omnizip/link_handler.rb +124 -0
  404. data/lib/omnizip/metadata/archive_metadata.rb +114 -0
  405. data/lib/omnizip/metadata/entry_metadata.rb +146 -0
  406. data/lib/omnizip/metadata/metadata_editor.rb +171 -0
  407. data/lib/omnizip/metadata/metadata_registry.rb +64 -0
  408. data/lib/omnizip/metadata/metadata_validator.rb +99 -0
  409. data/lib/omnizip/metadata.rb +57 -0
  410. data/lib/omnizip/models/.keep +0 -0
  411. data/lib/omnizip/models/algorithm_metadata.rb +73 -0
  412. data/lib/omnizip/models/compression_options.rb +71 -0
  413. data/lib/omnizip/models/conversion_options.rb +87 -0
  414. data/lib/omnizip/models/conversion_result.rb +135 -0
  415. data/lib/omnizip/models/eta_result.rb +46 -0
  416. data/lib/omnizip/models/extraction_rule.rb +115 -0
  417. data/lib/omnizip/models/filter_chain.rb +144 -0
  418. data/lib/omnizip/models/filter_config.rb +183 -0
  419. data/lib/omnizip/models/match_result.rb +124 -0
  420. data/lib/omnizip/models/optimization_suggestion.rb +91 -0
  421. data/lib/omnizip/models/parallel_options.rb +104 -0
  422. data/lib/omnizip/models/performance_result.rb +79 -0
  423. data/lib/omnizip/models/profile_report.rb +82 -0
  424. data/lib/omnizip/models/progress_options.rb +38 -0
  425. data/lib/omnizip/models/split_options.rb +116 -0
  426. data/lib/omnizip/optimization_registry.rb +81 -0
  427. data/lib/omnizip/parallel/job_queue.rb +209 -0
  428. data/lib/omnizip/parallel/job_scheduler.rb +203 -0
  429. data/lib/omnizip/parallel/parallel_compressor.rb +347 -0
  430. data/lib/omnizip/parallel/parallel_extractor.rb +329 -0
  431. data/lib/omnizip/parallel/worker_pool.rb +223 -0
  432. data/lib/omnizip/parallel.rb +149 -0
  433. data/lib/omnizip/parity/chunked_block_processor.rb +196 -0
  434. data/lib/omnizip/parity/galois16.rb +145 -0
  435. data/lib/omnizip/parity/models/creator_packet.rb +73 -0
  436. data/lib/omnizip/parity/models/file_description_packet.rb +133 -0
  437. data/lib/omnizip/parity/models/ifsc_packet.rb +123 -0
  438. data/lib/omnizip/parity/models/main_packet.rb +128 -0
  439. data/lib/omnizip/parity/models/packet.rb +156 -0
  440. data/lib/omnizip/parity/models/packet_registry.rb +109 -0
  441. data/lib/omnizip/parity/models/recovery_slice_packet.rb +78 -0
  442. data/lib/omnizip/parity/par2_creator.rb +531 -0
  443. data/lib/omnizip/parity/par2_repairer.rb +407 -0
  444. data/lib/omnizip/parity/par2_verifier.rb +364 -0
  445. data/lib/omnizip/parity/par2cmdline_algorithm.rb +110 -0
  446. data/lib/omnizip/parity/par2cmdline_coefficients.rb +78 -0
  447. data/lib/omnizip/parity/reed_solomon_decoder.rb +266 -0
  448. data/lib/omnizip/parity/reed_solomon_encoder.rb +111 -0
  449. data/lib/omnizip/parity/reed_solomon_matrix.rb +342 -0
  450. data/lib/omnizip/parity.rb +186 -0
  451. data/lib/omnizip/password/encryption_registry.rb +65 -0
  452. data/lib/omnizip/password/encryption_strategy.rb +96 -0
  453. data/lib/omnizip/password/password_validator.rb +129 -0
  454. data/lib/omnizip/password/winzip_aes_strategy.rb +192 -0
  455. data/lib/omnizip/password/zip_crypto_strategy.rb +141 -0
  456. data/lib/omnizip/password.rb +87 -0
  457. data/lib/omnizip/pipe/stream_compressor.rb +124 -0
  458. data/lib/omnizip/pipe/stream_decompressor.rb +174 -0
  459. data/lib/omnizip/pipe.rb +121 -0
  460. data/lib/omnizip/platform/ntfs_streams.rb +201 -0
  461. data/lib/omnizip/platform.rb +189 -0
  462. data/lib/omnizip/profile/archive_profile.rb +39 -0
  463. data/lib/omnizip/profile/balanced_profile.rb +33 -0
  464. data/lib/omnizip/profile/binary_profile.rb +36 -0
  465. data/lib/omnizip/profile/compression_profile.rb +158 -0
  466. data/lib/omnizip/profile/custom_profile.rb +157 -0
  467. data/lib/omnizip/profile/fast_profile.rb +33 -0
  468. data/lib/omnizip/profile/maximum_profile.rb +33 -0
  469. data/lib/omnizip/profile/profile_detector.rb +110 -0
  470. data/lib/omnizip/profile/profile_registry.rb +161 -0
  471. data/lib/omnizip/profile/text_profile.rb +36 -0
  472. data/lib/omnizip/profile.rb +190 -0
  473. data/lib/omnizip/profiler/memory_profiler.rb +66 -0
  474. data/lib/omnizip/profiler/method_profiler.rb +49 -0
  475. data/lib/omnizip/profiler/report_generator.rb +169 -0
  476. data/lib/omnizip/profiler.rb +204 -0
  477. data/lib/omnizip/progress/callback_reporter.rb +36 -0
  478. data/lib/omnizip/progress/console_reporter.rb +62 -0
  479. data/lib/omnizip/progress/log_reporter.rb +91 -0
  480. data/lib/omnizip/progress/operation_progress.rb +118 -0
  481. data/lib/omnizip/progress/progress_bar.rb +156 -0
  482. data/lib/omnizip/progress/progress_reporter.rb +40 -0
  483. data/lib/omnizip/progress/progress_tracker.rb +190 -0
  484. data/lib/omnizip/progress/silent_reporter.rb +24 -0
  485. data/lib/omnizip/progress.rb +127 -0
  486. data/lib/omnizip/rubyzip_compat.rb +63 -0
  487. data/lib/omnizip/temp/safe_extract.rb +168 -0
  488. data/lib/omnizip/temp/temp_file.rb +124 -0
  489. data/lib/omnizip/temp/temp_file_pool.rb +109 -0
  490. data/lib/omnizip/temp.rb +181 -0
  491. data/lib/omnizip/version.rb +5 -0
  492. data/lib/omnizip/zip/entry.rb +156 -0
  493. data/lib/omnizip/zip/file.rb +485 -0
  494. data/lib/omnizip/zip/input_stream.rb +273 -0
  495. data/lib/omnizip/zip/output_stream.rb +324 -0
  496. data/lib/omnizip.rb +156 -0
  497. data/readme-docs/advanced-features.adoc +515 -0
  498. data/readme-docs/api-usage.adoc +444 -0
  499. data/readme-docs/architecture.adoc +449 -0
  500. data/readme-docs/archive-formats.adoc +479 -0
  501. data/readme-docs/cli-usage.adoc +222 -0
  502. data/readme-docs/compression-algorithms.adoc +442 -0
  503. data/readme-docs/compression-profiles.adoc +247 -0
  504. data/readme-docs/encryption-checksums.adoc +328 -0
  505. data/readme-docs/format-converter.adoc +325 -0
  506. data/readme-docs/installation.adoc +228 -0
  507. data/readme-docs/par2-archives.adoc +608 -0
  508. data/readme-docs/performance-profiler.adoc +389 -0
  509. data/readme-docs/preprocessing-filters.adoc +280 -0
  510. data/xz-file-format-1.2.1.txt +1174 -0
  511. metadata +617 -0
@@ -0,0 +1,645 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Copyright (C) 2025 Ribose Inc.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a
6
+ # copy of this software and associated documentation files (the "Software"),
7
+ # to deal in the Software without restriction, including without limitation
8
+ # the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
+ # and/or sell copies of the Software, and to permit persons to whom the
10
+ # Software is furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
+ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21
+ # DEALINGS IN THE SOFTWARE.
22
+
23
+ require_relative "xz_match_finder_adapter"
24
+ require_relative "xz_state"
25
+ require_relative "xz_probability_models"
26
+ require_relative "xz_buffered_range_encoder"
27
+ require_relative "constants"
28
+
29
+ module Omnizip
30
+ module Algorithms
31
+ class LZMA < Algorithm
32
+ # XZ Utils-compatible fast mode encoder
33
+ #
34
+ # Implements greedy heuristics from lzma_encoder_optimum_fast.c.
35
+ # Uses 1-position lookahead to decide between literals and matches.
36
+ # No price calculation - relies on simple heuristics for speed.
37
+ #
38
+ # Based on: xz/src/liblzma/lzma/lzma_encoder_optimum_fast.c
39
+ class XzEncoderFast
40
+ include Constants
41
+
42
+ # Number of rep distances (REPS constant)
43
+ REPS = 4
44
+
45
+ # Literal marker (matches XZ Utils UINT32_MAX)
46
+ LITERAL_MARKER = 0xFFFFFFFF
47
+
48
+ attr_reader :reps
49
+
50
+ # Return bytes needed for decoding (excludes flush padding)
51
+ #
52
+ # For LZMA2: returns pre-flush position (excludes 5-byte flush padding)
53
+ # For regular LZMA: returns full output size
54
+ #
55
+ # @return [Integer] Number of bytes decoder will consume
56
+ def bytes_for_decode
57
+ @encoder.bytes_for_decode
58
+ end
59
+
60
+ # Initialize fast mode encoder
61
+ #
62
+ # @param mf [XzMatchFinderAdapter] Match finder
63
+ # @param encoder [XzBufferedRangeEncoder] Range encoder
64
+ # @param models [XzProbabilityModels] Probability models
65
+ # @param state [XzState] LZMA state machine
66
+ # @param nice_len [Integer] Nice match length (default 32)
67
+ # @param lc [Integer] Literal context bits (default 3)
68
+ # @param lp [Integer] Literal position bits (default 0)
69
+ # @param pb [Integer] Position bits (default 2)
70
+ def initialize(mf, encoder, models, state, nice_len: 32, lc: 3, lp: 0,
71
+ pb: 2)
72
+ @mf = mf
73
+ @encoder = encoder
74
+ @models = models
75
+ @state = state
76
+ @nice_len = nice_len
77
+ @lc = lc
78
+ @lp = lp
79
+ @pb = pb
80
+
81
+ # Rep distances (last 4 match distances)
82
+ # Initialize to 0 to prevent false matches before first normal match
83
+ @reps = [0, 0, 0, 0]
84
+
85
+ # Lookahead cache (for read_ahead == 1 optimization)
86
+ @read_ahead = 0
87
+ @longest_match_length = 0
88
+ @matches_count = 0
89
+ @cached_matches = []
90
+
91
+ # Track previous byte for literal context
92
+ @prev_byte = 0
93
+ end
94
+
95
+ # Find best match at current position using fast mode heuristics
96
+ #
97
+ # Returns (back, len) where:
98
+ # - back = LITERAL_MARKER, len = 1: encode literal
99
+ # - back = 0..3, len >= 2: rep match (use reps[back])
100
+ # - back >= 4, len >= 2: normal match (distance = back - 4)
101
+ #
102
+ # @return [Array<Integer, Integer>] [back, len]
103
+ def find_best_match
104
+ # Get matches (use cached if lookahead was done)
105
+ if @read_ahead.zero?
106
+ len_main = @mf.find_matches
107
+ matches_count = @mf.matches.size
108
+ else
109
+ # Use cached matches from previous lookahead
110
+ len_main = @longest_match_length
111
+ matches_count = @matches_count
112
+ @read_ahead = 0
113
+ end
114
+
115
+ buf_avail = [@mf.available + 1, MATCH_LEN_MAX].min
116
+
117
+ # Not enough input for match
118
+ return [LITERAL_MARKER, 1] if buf_avail < 2
119
+
120
+ # Check rep matches
121
+ rep_len, rep_index = check_rep_matches(buf_avail)
122
+
123
+ # Found long rep match - return immediately
124
+ if rep_len >= @nice_len
125
+ # Don't skip here - main loop handles it
126
+ return [rep_index, rep_len]
127
+ end
128
+
129
+ # Found long normal match - return immediately
130
+ if len_main >= @nice_len
131
+ back_main = @mf.matches.last.dist - 1 + REPS # Convert to 0-based then add REPS offset
132
+ # Don't skip here - main loop handles it
133
+ return [back_main, len_main]
134
+ end
135
+
136
+ # Select best normal match using heuristics
137
+ back_main = 0
138
+ if len_main >= 2
139
+ back_main = @mf.matches.last.dist
140
+
141
+ # Apply change_pair heuristic: prefer closer distances
142
+ while matches_count > 1 &&
143
+ len_main == @mf.matches[matches_count - 2].len + 1
144
+ prev_dist = @mf.matches[matches_count - 2].dist
145
+ break unless change_pair?(prev_dist, back_main)
146
+
147
+ matches_count -= 1
148
+ len_main = @mf.matches[matches_count - 1].len
149
+ back_main = @mf.matches[matches_count - 1].dist
150
+ end
151
+
152
+ # Reject short matches with far distances
153
+ len_main = 1 if len_main == 2 && back_main >= 0x80
154
+ end
155
+
156
+ # Compare rep vs normal match
157
+ # Prefer rep match if:
158
+ # - rep_len + 1 >= len_main, OR
159
+ # - rep_len + 2 >= len_main AND back_main > 512, OR
160
+ # - rep_len + 3 >= len_main AND back_main > 32768
161
+ if (rep_len >= 2) && ((rep_len + 1 >= len_main) ||
162
+ (rep_len + 2 >= len_main && back_main > (1 << 9)) ||
163
+ (rep_len + 3 >= len_main && back_main > (1 << 15)))
164
+ # Don't skip here - main loop handles it
165
+ return [rep_index, rep_len]
166
+ end
167
+
168
+ # No good match found
169
+ return [LITERAL_MARKER, 1] if len_main < 2 || buf_avail <= 2
170
+
171
+ # Lookahead: check next position for better match
172
+ @longest_match_length = @mf.find_matches
173
+ @matches_count = @mf.matches.size
174
+ @read_ahead = 1
175
+
176
+ if @longest_match_length >= 2
177
+ new_dist = @mf.matches.last.dist
178
+
179
+ # Encode literal if next position has better match
180
+ if (@longest_match_length >= len_main && new_dist < back_main) ||
181
+ (@longest_match_length == len_main + 1 && !change_pair?(
182
+ back_main, new_dist
183
+ )) ||
184
+ (@longest_match_length > len_main + 1) ||
185
+ (len_main.between?(3, @longest_match_length + 1) &&
186
+ change_pair?(new_dist, back_main))
187
+ return [LITERAL_MARKER, 1]
188
+ end
189
+ end
190
+
191
+ # Check reps at next position (after lookahead)
192
+ # Skip if all distances are 0 (uninitialized - before first normal match)
193
+ unless @reps.all?(0)
194
+ limit = [2, len_main - 1].max
195
+ @reps.each do |rep_dist|
196
+ if memcmp_at_offset(1, rep_dist, limit)
197
+ return [LITERAL_MARKER, 1]
198
+ end
199
+ end
200
+ end
201
+
202
+ # Encode best normal match
203
+ # Don't skip here - main loop handles it
204
+ # back_main contains raw 1-based distance, convert to back value
205
+ [back_main - 1 + REPS, len_main] # Convert to 0-based then add REPS offset
206
+ end
207
+
208
+ # Update rep distances after encoding match
209
+ #
210
+ # @param distance [Integer] Match distance (0-based)
211
+ def update_reps_match(distance)
212
+ @reps = [distance, @reps[0], @reps[1], @reps[2]]
213
+ end
214
+
215
+ # Update rep distances after encoding rep match
216
+ #
217
+ # @param rep_index [Integer] Rep index (0-3)
218
+ def update_reps_rep(rep_index)
219
+ rep_dist = @reps[rep_index]
220
+ @reps.delete_at(rep_index)
221
+ @reps.unshift(rep_dist)
222
+ end
223
+
224
+ # Encode literal symbol
225
+ #
226
+ # @param symbol [Integer] Byte value to encode
227
+ def encode_literal(symbol)
228
+ pos_state = @mf.pos & ((1 << @pb) - 1)
229
+
230
+ # Encode is_match bit (0 for literal)
231
+ prob_is_match = @models.is_match[@state.value][pos_state]
232
+ @encoder.queue_bit(prob_is_match, 0)
233
+
234
+ # Get literal subcoder BASE index (XZ Utils literal_subcoder macro)
235
+ # The subcoder is a flat array of 768 probability models
236
+ literal_base = get_literal_base(@mf.pos, @prev_byte)
237
+
238
+ if @state.literal_state?
239
+ # Normal literal (8-bit tree)
240
+ encode_normal_literal(literal_base, symbol)
241
+ else
242
+ # Matched literal (compare with match byte at rep0)
243
+ match_byte = @mf.get_byte(-@reps[0]) # reps[0] is 0-based offset
244
+ encode_matched_literal(literal_base, match_byte, symbol)
245
+ end
246
+
247
+ # Update state and prev_byte
248
+ @state.update_literal
249
+ @prev_byte = symbol
250
+ end
251
+
252
+ # Encode rep match
253
+ #
254
+ # @param rep_index [Integer] Rep index (0-3)
255
+ # @param length [Integer] Match length (>= 2)
256
+ def encode_rep_match(rep_index, length)
257
+ pos_state = @mf.pos & ((1 << @pb) - 1)
258
+
259
+ # Encode is_match bit (1 for match)
260
+ prob_is_match = @models.is_match[@state.value][pos_state]
261
+ @encoder.queue_bit(prob_is_match, 1)
262
+
263
+ # Encode is_rep bit (1 for rep)
264
+ prob_is_rep = @models.is_rep[@state.value]
265
+ @encoder.queue_bit(prob_is_rep, 1)
266
+
267
+ prob_is_rep0 = @models.is_rep0[@state.value]
268
+ case rep_index
269
+ when 0
270
+ # rep0
271
+ @encoder.queue_bit(prob_is_rep0, 0) # FIX: 0 means "yes, use rep0"
272
+
273
+ prob_is_rep0_long = @models.is_rep0_long[@state.value][pos_state]
274
+ if length == 1
275
+ # Short rep (1 byte)
276
+ @encoder.queue_bit(prob_is_rep0_long, 0)
277
+ @state.update_short_rep
278
+ else
279
+ # Long rep0
280
+ @encoder.queue_bit(prob_is_rep0_long, 1)
281
+ encode_rep_length(length, pos_state)
282
+ @state.update_long_rep
283
+ end
284
+ when 1
285
+ # rep1
286
+ @encoder.queue_bit(prob_is_rep0, 1)
287
+ prob_is_rep1 = @models.is_rep1[@state.value]
288
+ @encoder.queue_bit(prob_is_rep1, 0) # FIX: 0 means "yes, use rep1"
289
+ encode_rep_length(length, pos_state)
290
+ @state.update_long_rep
291
+ when 2
292
+ # rep2
293
+ @encoder.queue_bit(prob_is_rep0, 1)
294
+ prob_is_rep1 = @models.is_rep1[@state.value]
295
+ @encoder.queue_bit(prob_is_rep1, 1)
296
+ prob_is_rep2 = @models.is_rep2[@state.value]
297
+ @encoder.queue_bit(prob_is_rep2, 0) # FIX: 0 means "yes, use rep2"
298
+ encode_rep_length(length, pos_state)
299
+ @state.update_long_rep
300
+ else
301
+ # rep3
302
+ @encoder.queue_bit(prob_is_rep0, 1)
303
+ prob_is_rep1 = @models.is_rep1[@state.value]
304
+ @encoder.queue_bit(prob_is_rep1, 1)
305
+ prob_is_rep2 = @models.is_rep2[@state.value]
306
+ @encoder.queue_bit(prob_is_rep2, 1)
307
+ encode_rep_length(length, pos_state)
308
+ @state.update_long_rep
309
+ end
310
+
311
+ # Update prev_byte (last byte of match)
312
+ @prev_byte = @mf.get_byte(length - 1)
313
+ end
314
+
315
+ # Encode normal match
316
+ #
317
+ # @param distance [Integer] Match distance (0-based)
318
+ # @param length [Integer] Match length (>= 2)
319
+ def encode_normal_match(distance, length)
320
+ pos_state = @mf.pos & ((1 << @pb) - 1)
321
+
322
+ # Encode is_match bit (1 for match)
323
+ prob_is_match = @models.is_match[@state.value][pos_state]
324
+ @encoder.queue_bit(prob_is_match, 1)
325
+
326
+ # Encode is_rep bit (0 for normal match)
327
+ prob_is_rep = @models.is_rep[@state.value]
328
+ @encoder.queue_bit(prob_is_rep, 0)
329
+
330
+ # Encode length
331
+ encode_match_length(length, pos_state)
332
+
333
+ # Encode distance
334
+ encode_distance(distance, length)
335
+
336
+ # Update state and prev_byte
337
+ @state.update_match
338
+ @prev_byte = @mf.get_byte(length - 1)
339
+ end
340
+
341
+ private
342
+
343
+ # Check all rep matches at current position
344
+ #
345
+ # @param buf_avail [Integer] Bytes available
346
+ # @return [Array<Integer, Integer>] [best_rep_len, best_rep_index]
347
+ def check_rep_matches(buf_avail)
348
+ rep_len = 0
349
+ rep_index = 0
350
+
351
+ # Guard: Skip rep matching if all distances are 0 (uninitialized)
352
+ # This prevents false matches before the first normal match
353
+ return [0, 0] if @reps.all?(0)
354
+
355
+ @reps.each_with_index do |rep_dist, i|
356
+ # Skip rep distances of 0 (same position, invalid)
357
+ next if rep_dist.zero?
358
+
359
+ # Check first 2 bytes (MATCH_LEN_MIN)
360
+ next unless matches_at_distance?(rep_dist, MATCH_LEN_MIN)
361
+
362
+ # Calculate full match length
363
+ len = calculate_match_length(rep_dist, buf_avail)
364
+
365
+ if len > rep_len
366
+ rep_len = len
367
+ rep_index = i
368
+ end
369
+ end
370
+
371
+ [rep_len, rep_index]
372
+ end
373
+
374
+ # Check if first n bytes match at given distance
375
+ #
376
+ # @param distance [Integer] Distance to check (0-based: 0=same pos, 1=1 byte back)
377
+ # @param n [Integer] Number of bytes to check
378
+ # @return [Boolean] True if matches
379
+ def matches_at_distance?(distance, n)
380
+ return false if @mf.pos < distance
381
+
382
+ n.times do |i|
383
+ curr = @mf.get_byte(i)
384
+ prev = @mf.get_byte(i - distance)
385
+ return false if curr != prev
386
+ end
387
+
388
+ true
389
+ end
390
+
391
+ # Calculate match length at given distance
392
+ #
393
+ # @param distance [Integer] Distance (0-based: 0=same pos, 1=1 byte back)
394
+ # @param max_len [Integer] Maximum length to check
395
+ # @return [Integer] Match length
396
+ def calculate_match_length(distance, max_len)
397
+ return 0 if @mf.pos < distance
398
+
399
+ len = 0
400
+
401
+ while len < max_len
402
+ curr = @mf.get_byte(len)
403
+ prev = @mf.get_byte(len - distance)
404
+ break if curr != prev
405
+
406
+ len += 1
407
+ end
408
+
409
+ len
410
+ end
411
+
412
+ # Compare bytes at offset with bytes at distance
413
+ #
414
+ # Used for checking reps after lookahead.
415
+ #
416
+ # @param offset [Integer] Offset from current position
417
+ # @param distance [Integer] Distance to check (1-based)
418
+ # @param limit [Integer] Number of bytes to compare
419
+ # @return [Boolean] True if all bytes match
420
+ def memcmp_at_offset(offset, distance, limit)
421
+ limit.times do |i|
422
+ curr = @mf.get_byte(offset + i)
423
+ prev = @mf.get_byte(offset + i - distance)
424
+ return false if curr != prev
425
+ end
426
+
427
+ true
428
+ end
429
+
430
+ # Apply change_pair heuristic
431
+ #
432
+ # Prefer closer distances if far distance is much larger.
433
+ #
434
+ # @param small_dist [Integer] Smaller distance
435
+ # @param big_dist [Integer] Larger distance
436
+ # @return [Boolean] True if should change to smaller distance
437
+ def change_pair?(small_dist, big_dist)
438
+ (big_dist >> 7) > small_dist
439
+ end
440
+
441
+ # Get literal subcoder BASE index
442
+ #
443
+ # Ported from XZ Utils literal_subcoder() macro in lzma_common.h
444
+ # Returns the base index into the flat literal models array
445
+ # Each subcoder has 768 probability models (0x300)
446
+ #
447
+ # @param pos [Integer] Current position
448
+ # @param prev_byte [Integer] Previous byte
449
+ # @return [Integer] Base index into @models.literal array
450
+ def get_literal_base(pos, prev_byte)
451
+ # literal_mask = (UINT32_C(0x100) << (lp)) - (UINT32_C(0x100) >> (lc))
452
+ literal_mask = (0x100 << @lp) - (0x100 >> @lc)
453
+
454
+ # ((((pos) << 8) + (prev_byte)) & (literal_mask)) << (lc)
455
+ context = (((pos << 8) + prev_byte) & literal_mask) << @lc
456
+
457
+ # 3 * context (each subcoder has 768 models, indexed as 3 * context + offset)
458
+ 3 * context
459
+ end
460
+
461
+ # Encode normal literal (8-bit tree)
462
+ #
463
+ # @param literal_base [Integer] Base index into literal models array
464
+ # @param symbol [Integer] Byte value
465
+ def encode_normal_literal(literal_base, symbol)
466
+ context = 1
467
+ 8.downto(1) do |i|
468
+ bit = (symbol >> (i - 1)) & 1
469
+ @encoder.queue_bit(@models.literal[literal_base + context], bit)
470
+ context = (context << 1) | bit
471
+ end
472
+ end
473
+
474
+ # Encode matched literal (compare with match byte)
475
+ #
476
+ # @param literal_base [Integer] Base index into literal models array
477
+ # @param match_byte [Integer] Byte at match position
478
+ # @param symbol [Integer] Byte value to encode
479
+ def encode_matched_literal(literal_base, match_byte, symbol)
480
+ offset = 0x100
481
+ symbol += 0x100 # Start symbol at 256 (XZ Utils algorithm)
482
+
483
+ # Loop until symbol reaches 0x10000 (65536)
484
+ while symbol < 0x10000
485
+ match_byte <<= 1
486
+ match_bit = match_byte & offset
487
+ subcoder_index = offset + match_bit + (symbol >> 8)
488
+ bit = (symbol >> 7) & 1
489
+
490
+ @encoder.queue_bit(@models.literal[literal_base + subcoder_index], bit)
491
+
492
+ symbol <<= 1
493
+ offset &= ~(match_byte ^ symbol)
494
+ end
495
+ end
496
+
497
+ # Encode rep match length
498
+ #
499
+ # @param length [Integer] Match length (>= 2)
500
+ # @param pos_state [Integer] Position state
501
+ def encode_rep_length(length, pos_state)
502
+ encode_length(@models.rep_len_encoder, length, pos_state)
503
+ end
504
+
505
+ # Encode normal match length
506
+ #
507
+ # @param length [Integer] Match length (>= 2)
508
+ # @param pos_state [Integer] Position state
509
+ def encode_match_length(length, pos_state)
510
+ encode_length(@models.match_len_encoder, length, pos_state)
511
+ end
512
+
513
+ # Encode length using length encoder
514
+ #
515
+ # @param len_encoder [LengthEncoder] Length encoder
516
+ # @param length [Integer] Match length (2-273)
517
+ # @param pos_state [Integer] Position state
518
+ def encode_length(len_encoder, length, pos_state)
519
+ len = length - MATCH_LEN_MIN
520
+
521
+ if len < LEN_LOW_SYMBOLS
522
+ # Low: 0-7
523
+ @encoder.queue_bit(len_encoder.choice, 0)
524
+ encode_bittree(len_encoder.low[pos_state], NUM_LEN_LOW_BITS, len)
525
+ elsif len < LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS
526
+ # Mid: 8-15
527
+ @encoder.queue_bit(len_encoder.choice, 1)
528
+ @encoder.queue_bit(len_encoder.choice2, 0)
529
+ encode_bittree(len_encoder.mid[pos_state], NUM_LEN_MID_BITS,
530
+ len - LEN_LOW_SYMBOLS)
531
+ else
532
+ # High: 16-271
533
+ @encoder.queue_bit(len_encoder.choice, 1)
534
+ @encoder.queue_bit(len_encoder.choice2, 1)
535
+ encode_bittree(len_encoder.high, NUM_LEN_HIGH_BITS,
536
+ len - LEN_LOW_SYMBOLS - LEN_MID_SYMBOLS)
537
+ end
538
+ end
539
+
540
+ # Encode distance
541
+ #
542
+ # @param distance [Integer] Distance (0-based)
543
+ # @param length [Integer] Match length
544
+ def encode_distance(distance, length)
545
+ dist_slot = get_dist_slot(distance)
546
+ len_state = get_len_to_pos_state(length)
547
+
548
+ # Encode distance slot
549
+ encode_bittree(@models.dist_slot[len_state], NUM_DIST_SLOT_BITS,
550
+ dist_slot)
551
+
552
+ # Encode distance footer
553
+ if dist_slot >= START_POS_MODEL_INDEX
554
+ footer_bits = (dist_slot >> 1) - 1
555
+ base = (2 | (dist_slot & 1)) << footer_bits
556
+ dist_reduced = distance - base
557
+
558
+ if dist_slot < END_POS_MODEL_INDEX
559
+ # Use probability models
560
+ encode_bittree_reverse(@models.dist_special, dist_reduced,
561
+ footer_bits, base - dist_slot)
562
+ else
563
+ # Direct bits + alignment
564
+ direct_bits = footer_bits - DIST_ALIGN_BITS
565
+ @encoder.queue_direct_bits(dist_reduced >> DIST_ALIGN_BITS,
566
+ direct_bits)
567
+ encode_bittree_reverse(@models.dist_align, dist_reduced & ((1 << DIST_ALIGN_BITS) - 1),
568
+ DIST_ALIGN_BITS, 0)
569
+ end
570
+ end
571
+ end
572
+
573
+ # Encode bittree (MSB first)
574
+ #
575
+ # @param probs [Array<BitModel>] Probability models
576
+ # @param num_bits [Integer] Number of bits
577
+ # @param value [Integer] Value to encode
578
+ def encode_bittree(probs, num_bits, value)
579
+ context = 1
580
+ num_bits.downto(1) do |i|
581
+ bit = (value >> (i - 1)) & 1
582
+ @encoder.queue_bit(probs[context], bit)
583
+ context = (context << 1) | bit
584
+ end
585
+ end
586
+
587
+ # Encode bittree in reverse (LSB first)
588
+ #
589
+ # @param probs [Array<BitModel>] Probability models
590
+ # @param value [Integer] Value to encode
591
+ # @param num_bits [Integer] Number of bits
592
+ # @param offset [Integer] Probability array offset
593
+ def encode_bittree_reverse(probs, value, num_bits, offset)
594
+ context = 1
595
+ num_bits.times do |i|
596
+ bit = (value >> i) & 1
597
+ @encoder.queue_bit(probs[offset + context], bit)
598
+ context = (context << 1) | bit
599
+ end
600
+ end
601
+
602
+ # Get distance slot for distance
603
+ #
604
+ # @param distance [Integer] Distance (0-based)
605
+ # @return [Integer] Distance slot (0-63)
606
+ def get_dist_slot(distance)
607
+ if distance < NUM_FULL_DISTANCES
608
+ # Use precomputed table for small distances
609
+ distance < 4 ? distance : fast_pos_small(distance)
610
+ else
611
+ # Formula for large distances
612
+ fast_pos_large(distance)
613
+ end
614
+ end
615
+
616
+ # Fast position calculation for small distances
617
+ def fast_pos_small(distance)
618
+ # Simplified slot calculation
619
+ slot = 0
620
+ dist = distance
621
+ while dist > 3
622
+ dist >>= 1
623
+ slot += 2
624
+ end
625
+ slot + dist
626
+ end
627
+
628
+ # Fast position calculation for large distances
629
+ def fast_pos_large(distance)
630
+ slot = fast_pos_small(distance >> 6)
631
+ slot + 12
632
+ end
633
+
634
+ # Map length to position state
635
+ #
636
+ # @param length [Integer] Match length
637
+ # @return [Integer] Position state (0-3)
638
+ def get_len_to_pos_state(length)
639
+ len = length - MATCH_LEN_MIN
640
+ len < NUM_LEN_TO_POS_STATES ? len : NUM_LEN_TO_POS_STATES - 1
641
+ end
642
+ end
643
+ end
644
+ end
645
+ end