omnizip 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (511) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.rubocop.yml +32 -0
  4. data/.rubocop_todo.yml +754 -0
  5. data/COPYING +502 -0
  6. data/Gemfile +17 -0
  7. data/LICENSE +12 -0
  8. data/README.adoc +1045 -0
  9. data/Rakefile +12 -0
  10. data/benchmark/README.md +260 -0
  11. data/benchmark/benchmark_suite.rb +125 -0
  12. data/benchmark/compression_bench.rb +181 -0
  13. data/benchmark/filter_bench.rb +180 -0
  14. data/benchmark/models/benchmark_result.rb +59 -0
  15. data/benchmark/models/comparison_result.rb +69 -0
  16. data/benchmark/profile_suite.rb +167 -0
  17. data/benchmark/reporter.rb +150 -0
  18. data/benchmark/run_benchmarks.rb +66 -0
  19. data/benchmark/test_data.rb +137 -0
  20. data/config/formats/rar3_spec.yml +91 -0
  21. data/config/formats/rar5_spec.yml +102 -0
  22. data/docs/.github/workflows/docs.yml +142 -0
  23. data/docs/.gitignore +21 -0
  24. data/docs/.lychee.toml +67 -0
  25. data/docs/Gemfile +13 -0
  26. data/docs/RAR_WRITE_SUPPORT.md +26 -0
  27. data/docs/README.md +101 -0
  28. data/docs/_config.yml +112 -0
  29. data/docs/assets/logo.svg +1 -0
  30. data/docs/assets/omnizip-logo.pdf +1540 -11
  31. data/docs/comparison/feature-matrix.adoc +694 -0
  32. data/docs/comparison/index.adoc +113 -0
  33. data/docs/comparison/vs-7zip.adoc +309 -0
  34. data/docs/comparison/vs-peazip.adoc +77 -0
  35. data/docs/comparison/vs-rubyzip.adoc +342 -0
  36. data/docs/comparison/vs-winrar.adoc +100 -0
  37. data/docs/compatibility.adoc +579 -0
  38. data/docs/concepts/index.adoc +129 -0
  39. data/docs/developer/architecture.adoc +256 -0
  40. data/docs/developer/contributing.adoc +158 -0
  41. data/docs/developer/index.adoc +25 -0
  42. data/docs/developer/testing.adoc +212 -0
  43. data/docs/getting-started/basic-usage.adoc +271 -0
  44. data/docs/getting-started/index.adoc +42 -0
  45. data/docs/getting-started/installation.adoc +138 -0
  46. data/docs/getting-started/quick-start.adoc +185 -0
  47. data/docs/getting-started/your-first-archive.adoc +218 -0
  48. data/docs/guides/advanced-features/encryption.adoc +300 -0
  49. data/docs/guides/advanced-features/index.adoc +49 -0
  50. data/docs/guides/advanced-features/parallel-processing.adoc +246 -0
  51. data/docs/guides/advanced-features/progress-tracking.adoc +320 -0
  52. data/docs/guides/advanced-features/streaming.adoc +212 -0
  53. data/docs/guides/archive-formats/gzip-format.adoc +107 -0
  54. data/docs/guides/archive-formats/index.adoc +130 -0
  55. data/docs/guides/archive-formats/rar-format.adoc +104 -0
  56. data/docs/guides/archive-formats/rar5.adoc +521 -0
  57. data/docs/guides/archive-formats/seven-zip-format.adoc +35 -0
  58. data/docs/guides/archive-formats/tar-format.adoc +106 -0
  59. data/docs/guides/archive-formats/xz-format.adoc +118 -0
  60. data/docs/guides/archive-formats/zip-format.adoc +35 -0
  61. data/docs/guides/compression-algorithms/bzip2.adoc +113 -0
  62. data/docs/guides/compression-algorithms/deflate.adoc +319 -0
  63. data/docs/guides/compression-algorithms/index.adoc +190 -0
  64. data/docs/guides/compression-algorithms/lzma.adoc +398 -0
  65. data/docs/guides/compression-algorithms/lzma2.adoc +327 -0
  66. data/docs/guides/compression-algorithms/ppmd.adoc +316 -0
  67. data/docs/guides/compression-algorithms/zstandard.adoc +361 -0
  68. data/docs/guides/creating-archives.adoc +354 -0
  69. data/docs/guides/extracting-archives.adoc +53 -0
  70. data/docs/guides/format-conversion.adoc +64 -0
  71. data/docs/guides/index.adoc +49 -0
  72. data/docs/guides/migration-rubyzip.adoc +217 -0
  73. data/docs/guides/parity-archives.adoc +605 -0
  74. data/docs/guides/performance-tuning.adoc +88 -0
  75. data/docs/index.adoc +218 -0
  76. data/docs/lychee.toml +67 -0
  77. data/docs/reference/api/overview.adoc +188 -0
  78. data/docs/reference/cli/compress-command.adoc +114 -0
  79. data/docs/reference/cli/overview.adoc +140 -0
  80. data/docs/reference/index.adoc +26 -0
  81. data/docs/resources/faq.adoc +185 -0
  82. data/docs/resources/quick-reference.adoc +222 -0
  83. data/docs/troubleshooting/index.adoc +208 -0
  84. data/examples/api_comparison.rb +205 -0
  85. data/examples/deflate64_example.rb +96 -0
  86. data/examples/par2_demo.rb +121 -0
  87. data/examples/quick_start_native.rb +150 -0
  88. data/examples/quick_start_rubyzip.rb +115 -0
  89. data/examples/rubyzip_compatibility_demo.rb +194 -0
  90. data/exe/omnizip +27 -0
  91. data/lib/omnizip/algorithm.rb +130 -0
  92. data/lib/omnizip/algorithm_registry.rb +86 -0
  93. data/lib/omnizip/algorithms/.keep +0 -0
  94. data/lib/omnizip/algorithms/bzip2/bwt.rb +225 -0
  95. data/lib/omnizip/algorithms/bzip2/decoder.rb +193 -0
  96. data/lib/omnizip/algorithms/bzip2/encoder.rb +237 -0
  97. data/lib/omnizip/algorithms/bzip2/huffman.rb +206 -0
  98. data/lib/omnizip/algorithms/bzip2/mtf.rb +101 -0
  99. data/lib/omnizip/algorithms/bzip2/rle.rb +151 -0
  100. data/lib/omnizip/algorithms/bzip2.rb +130 -0
  101. data/lib/omnizip/algorithms/deflate/constants.rb +28 -0
  102. data/lib/omnizip/algorithms/deflate/decoder.rb +38 -0
  103. data/lib/omnizip/algorithms/deflate/encoder.rb +46 -0
  104. data/lib/omnizip/algorithms/deflate.rb +128 -0
  105. data/lib/omnizip/algorithms/deflate64/constants.rb +45 -0
  106. data/lib/omnizip/algorithms/deflate64/decoder.rb +153 -0
  107. data/lib/omnizip/algorithms/deflate64/encoder.rb +98 -0
  108. data/lib/omnizip/algorithms/deflate64/huffman_coder.rb +354 -0
  109. data/lib/omnizip/algorithms/deflate64/lz77_encoder.rb +142 -0
  110. data/lib/omnizip/algorithms/deflate64.rb +109 -0
  111. data/lib/omnizip/algorithms/lzma/bit_model.rb +120 -0
  112. data/lib/omnizip/algorithms/lzma/constants.rb +112 -0
  113. data/lib/omnizip/algorithms/lzma/decoder.rb +148 -0
  114. data/lib/omnizip/algorithms/lzma/dictionary.rb +69 -0
  115. data/lib/omnizip/algorithms/lzma/distance_coder.rb +415 -0
  116. data/lib/omnizip/algorithms/lzma/encoder.rb +142 -0
  117. data/lib/omnizip/algorithms/lzma/length_coder.rb +260 -0
  118. data/lib/omnizip/algorithms/lzma/literal_decoder.rb +320 -0
  119. data/lib/omnizip/algorithms/lzma/literal_encoder.rb +210 -0
  120. data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +341 -0
  121. data/lib/omnizip/algorithms/lzma/lzma_alone_decoder.rb +192 -0
  122. data/lib/omnizip/algorithms/lzma/lzma_state.rb +128 -0
  123. data/lib/omnizip/algorithms/lzma/match.rb +32 -0
  124. data/lib/omnizip/algorithms/lzma/match_finder.rb +205 -0
  125. data/lib/omnizip/algorithms/lzma/match_finder_config.rb +142 -0
  126. data/lib/omnizip/algorithms/lzma/match_finder_factory.rb +88 -0
  127. data/lib/omnizip/algorithms/lzma/optimal_encoder.rb +130 -0
  128. data/lib/omnizip/algorithms/lzma/probability_models.rb +72 -0
  129. data/lib/omnizip/algorithms/lzma/range_coder.rb +85 -0
  130. data/lib/omnizip/algorithms/lzma/range_decoder.rb +434 -0
  131. data/lib/omnizip/algorithms/lzma/range_encoder.rb +194 -0
  132. data/lib/omnizip/algorithms/lzma/state.rb +127 -0
  133. data/lib/omnizip/algorithms/lzma/xz_buffered_range_encoder.rb +325 -0
  134. data/lib/omnizip/algorithms/lzma/xz_encoder.rb +426 -0
  135. data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +645 -0
  136. data/lib/omnizip/algorithms/lzma/xz_match_finder_adapter.rb +227 -0
  137. data/lib/omnizip/algorithms/lzma/xz_price_calculator.rb +169 -0
  138. data/lib/omnizip/algorithms/lzma/xz_probability_models.rb +261 -0
  139. data/lib/omnizip/algorithms/lzma/xz_range_encoder.rb +223 -0
  140. data/lib/omnizip/algorithms/lzma/xz_range_encoder_exact.rb +331 -0
  141. data/lib/omnizip/algorithms/lzma/xz_state.rb +116 -0
  142. data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +2055 -0
  143. data/lib/omnizip/algorithms/lzma.rb +238 -0
  144. data/lib/omnizip/algorithms/lzma2/chunk_manager.rb +182 -0
  145. data/lib/omnizip/algorithms/lzma2/constants.rb +41 -0
  146. data/lib/omnizip/algorithms/lzma2/encoder.rb +147 -0
  147. data/lib/omnizip/algorithms/lzma2/lzma2_chunk.rb +161 -0
  148. data/lib/omnizip/algorithms/lzma2/properties.rb +179 -0
  149. data/lib/omnizip/algorithms/lzma2/simple_lzma2_encoder.rb +127 -0
  150. data/lib/omnizip/algorithms/lzma2/xz_encoder_adapter.rb +85 -0
  151. data/lib/omnizip/algorithms/lzma2.rb +141 -0
  152. data/lib/omnizip/algorithms/ppmd7/constants.rb +74 -0
  153. data/lib/omnizip/algorithms/ppmd7/context.rb +154 -0
  154. data/lib/omnizip/algorithms/ppmd7/decoder.rb +126 -0
  155. data/lib/omnizip/algorithms/ppmd7/encoder.rb +163 -0
  156. data/lib/omnizip/algorithms/ppmd7/model.rb +248 -0
  157. data/lib/omnizip/algorithms/ppmd7/symbol_state.rb +57 -0
  158. data/lib/omnizip/algorithms/ppmd7.rb +116 -0
  159. data/lib/omnizip/algorithms/ppmd8/constants.rb +61 -0
  160. data/lib/omnizip/algorithms/ppmd8/context.rb +34 -0
  161. data/lib/omnizip/algorithms/ppmd8/decoder.rb +107 -0
  162. data/lib/omnizip/algorithms/ppmd8/encoder.rb +138 -0
  163. data/lib/omnizip/algorithms/ppmd8/model.rb +250 -0
  164. data/lib/omnizip/algorithms/ppmd8/restoration_method.rb +78 -0
  165. data/lib/omnizip/algorithms/ppmd8.rb +82 -0
  166. data/lib/omnizip/algorithms/ppmd_base.rb +138 -0
  167. data/lib/omnizip/algorithms/sevenzip_lzma2.rb +123 -0
  168. data/lib/omnizip/algorithms/xz_lzma2.rb +118 -0
  169. data/lib/omnizip/algorithms/zstandard/constants.rb +25 -0
  170. data/lib/omnizip/algorithms/zstandard/decoder.rb +46 -0
  171. data/lib/omnizip/algorithms/zstandard/encoder.rb +51 -0
  172. data/lib/omnizip/algorithms/zstandard.rb +138 -0
  173. data/lib/omnizip/buffer/memory_archive.rb +251 -0
  174. data/lib/omnizip/buffer/memory_extractor.rb +224 -0
  175. data/lib/omnizip/buffer.rb +176 -0
  176. data/lib/omnizip/checksum_registry.rb +114 -0
  177. data/lib/omnizip/checksums/crc32.rb +100 -0
  178. data/lib/omnizip/checksums/crc64.rb +101 -0
  179. data/lib/omnizip/checksums/crc_base.rb +158 -0
  180. data/lib/omnizip/checksums/verifier.rb +131 -0
  181. data/lib/omnizip/chunked/memory_manager.rb +194 -0
  182. data/lib/omnizip/chunked/reader.rb +78 -0
  183. data/lib/omnizip/chunked/writer.rb +120 -0
  184. data/lib/omnizip/chunked.rb +129 -0
  185. data/lib/omnizip/cli/output_formatter.rb +104 -0
  186. data/lib/omnizip/cli.rb +572 -0
  187. data/lib/omnizip/commands/.keep +0 -0
  188. data/lib/omnizip/commands/archive_create_command.rb +427 -0
  189. data/lib/omnizip/commands/archive_extract_command.rb +272 -0
  190. data/lib/omnizip/commands/archive_list_command.rb +218 -0
  191. data/lib/omnizip/commands/archive_repair_command.rb +131 -0
  192. data/lib/omnizip/commands/archive_verify_command.rb +117 -0
  193. data/lib/omnizip/commands/compress_command.rb +117 -0
  194. data/lib/omnizip/commands/decompress_command.rb +120 -0
  195. data/lib/omnizip/commands/list_command.rb +53 -0
  196. data/lib/omnizip/commands/metadata_command.rb +153 -0
  197. data/lib/omnizip/commands/parity_create_command.rb +122 -0
  198. data/lib/omnizip/commands/parity_repair_command.rb +122 -0
  199. data/lib/omnizip/commands/parity_verify_command.rb +124 -0
  200. data/lib/omnizip/commands/profile_list_command.rb +56 -0
  201. data/lib/omnizip/commands/profile_show_command.rb +44 -0
  202. data/lib/omnizip/convenience.rb +359 -0
  203. data/lib/omnizip/converter/conversion_registry.rb +49 -0
  204. data/lib/omnizip/converter/conversion_strategy.rb +121 -0
  205. data/lib/omnizip/converter/seven_zip_to_zip_strategy.rb +97 -0
  206. data/lib/omnizip/converter/zip_to_seven_zip_strategy.rb +112 -0
  207. data/lib/omnizip/converter.rb +105 -0
  208. data/lib/omnizip/crypto/aes256/cipher.rb +100 -0
  209. data/lib/omnizip/crypto/aes256/constants.rb +28 -0
  210. data/lib/omnizip/crypto/aes256/key_derivation.rb +101 -0
  211. data/lib/omnizip/crypto/aes256.rb +102 -0
  212. data/lib/omnizip/error.rb +106 -0
  213. data/lib/omnizip/eta/exponential_smoothing_estimator.rb +98 -0
  214. data/lib/omnizip/eta/moving_average_estimator.rb +99 -0
  215. data/lib/omnizip/eta/rate_calculator.rb +104 -0
  216. data/lib/omnizip/eta/sample_history.rb +143 -0
  217. data/lib/omnizip/eta/time_estimator.rb +106 -0
  218. data/lib/omnizip/eta.rb +63 -0
  219. data/lib/omnizip/extraction/filter_chain.rb +177 -0
  220. data/lib/omnizip/extraction/glob_pattern.rb +140 -0
  221. data/lib/omnizip/extraction/pattern_matcher.rb +70 -0
  222. data/lib/omnizip/extraction/predicate_pattern.rb +52 -0
  223. data/lib/omnizip/extraction/regex_pattern.rb +50 -0
  224. data/lib/omnizip/extraction/selective_extractor.rb +240 -0
  225. data/lib/omnizip/extraction.rb +111 -0
  226. data/lib/omnizip/file_type/mime_classifier.rb +144 -0
  227. data/lib/omnizip/file_type.rb +113 -0
  228. data/lib/omnizip/filter.rb +139 -0
  229. data/lib/omnizip/filter_pipeline.rb +108 -0
  230. data/lib/omnizip/filter_registry.rb +166 -0
  231. data/lib/omnizip/filters/bcj.rb +279 -0
  232. data/lib/omnizip/filters/bcj2/constants.rb +53 -0
  233. data/lib/omnizip/filters/bcj2/decoder.rb +200 -0
  234. data/lib/omnizip/filters/bcj2/encoder.rb +61 -0
  235. data/lib/omnizip/filters/bcj2/stream_data.rb +93 -0
  236. data/lib/omnizip/filters/bcj2.rb +99 -0
  237. data/lib/omnizip/filters/bcj_arm.rb +176 -0
  238. data/lib/omnizip/filters/bcj_arm64.rb +244 -0
  239. data/lib/omnizip/filters/bcj_ia64.rb +196 -0
  240. data/lib/omnizip/filters/bcj_ppc.rb +190 -0
  241. data/lib/omnizip/filters/bcj_sparc.rb +176 -0
  242. data/lib/omnizip/filters/bcj_x86.rb +193 -0
  243. data/lib/omnizip/filters/delta.rb +196 -0
  244. data/lib/omnizip/filters/filter_base.rb +72 -0
  245. data/lib/omnizip/filters/registry.rb +123 -0
  246. data/lib/omnizip/filters/xz_delta.rb +258 -0
  247. data/lib/omnizip/format_detector.rb +162 -0
  248. data/lib/omnizip/format_registry.rb +59 -0
  249. data/lib/omnizip/formats/.keep +0 -0
  250. data/lib/omnizip/formats/bzip2_file.rb +172 -0
  251. data/lib/omnizip/formats/cpio/constants.rb +55 -0
  252. data/lib/omnizip/formats/cpio/entry.rb +385 -0
  253. data/lib/omnizip/formats/cpio/reader.rb +196 -0
  254. data/lib/omnizip/formats/cpio/writer.rb +234 -0
  255. data/lib/omnizip/formats/cpio.rb +140 -0
  256. data/lib/omnizip/formats/format_spec_loader.rb +230 -0
  257. data/lib/omnizip/formats/gzip.rb +238 -0
  258. data/lib/omnizip/formats/iso/directory_builder.rb +297 -0
  259. data/lib/omnizip/formats/iso/directory_record.rb +152 -0
  260. data/lib/omnizip/formats/iso/joliet.rb +204 -0
  261. data/lib/omnizip/formats/iso/path_table.rb +125 -0
  262. data/lib/omnizip/formats/iso/reader.rb +197 -0
  263. data/lib/omnizip/formats/iso/rock_ridge.rb +349 -0
  264. data/lib/omnizip/formats/iso/volume_builder.rb +320 -0
  265. data/lib/omnizip/formats/iso/volume_descriptor.rb +168 -0
  266. data/lib/omnizip/formats/iso/writer.rb +530 -0
  267. data/lib/omnizip/formats/iso.rb +140 -0
  268. data/lib/omnizip/formats/lzip.rb +175 -0
  269. data/lib/omnizip/formats/lzma_alone.rb +171 -0
  270. data/lib/omnizip/formats/rar/archive_repairer.rb +243 -0
  271. data/lib/omnizip/formats/rar/archive_verifier.rb +195 -0
  272. data/lib/omnizip/formats/rar/block_parser.rb +243 -0
  273. data/lib/omnizip/formats/rar/compression/bit_stream.rb +180 -0
  274. data/lib/omnizip/formats/rar/compression/dispatcher.rb +217 -0
  275. data/lib/omnizip/formats/rar/compression/lz77_huffman/decoder.rb +216 -0
  276. data/lib/omnizip/formats/rar/compression/lz77_huffman/encoder.rb +158 -0
  277. data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_builder.rb +217 -0
  278. data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_coder.rb +189 -0
  279. data/lib/omnizip/formats/rar/compression/lz77_huffman/match_finder.rb +135 -0
  280. data/lib/omnizip/formats/rar/compression/lz77_huffman/sliding_window.rb +165 -0
  281. data/lib/omnizip/formats/rar/compression/ppmd/context.rb +105 -0
  282. data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +219 -0
  283. data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +262 -0
  284. data/lib/omnizip/formats/rar/compression_method_registry.rb +106 -0
  285. data/lib/omnizip/formats/rar/constants.rb +82 -0
  286. data/lib/omnizip/formats/rar/decompressor.rb +238 -0
  287. data/lib/omnizip/formats/rar/external_writer.rb +312 -0
  288. data/lib/omnizip/formats/rar/header.rb +192 -0
  289. data/lib/omnizip/formats/rar/license_validator.rb +109 -0
  290. data/lib/omnizip/formats/rar/models/rar_archive.rb +77 -0
  291. data/lib/omnizip/formats/rar/models/rar_entry.rb +65 -0
  292. data/lib/omnizip/formats/rar/models/rar_volume.rb +56 -0
  293. data/lib/omnizip/formats/rar/parity_handler.rb +292 -0
  294. data/lib/omnizip/formats/rar/rar5/compression/lzma.rb +202 -0
  295. data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +578 -0
  296. data/lib/omnizip/formats/rar/rar5/compression/store.rb +60 -0
  297. data/lib/omnizip/formats/rar/rar5/crc32.rb +39 -0
  298. data/lib/omnizip/formats/rar/rar5/encryption/aes256_cbc.rb +97 -0
  299. data/lib/omnizip/formats/rar/rar5/encryption/encryption_header.rb +114 -0
  300. data/lib/omnizip/formats/rar/rar5/encryption/encryption_manager.rb +166 -0
  301. data/lib/omnizip/formats/rar/rar5/encryption/key_derivation.rb +97 -0
  302. data/lib/omnizip/formats/rar/rar5/header.rb +187 -0
  303. data/lib/omnizip/formats/rar/rar5/models/encryption_options.rb +74 -0
  304. data/lib/omnizip/formats/rar/rar5/models/recovery_options.rb +63 -0
  305. data/lib/omnizip/formats/rar/rar5/models/solid_options.rb +63 -0
  306. data/lib/omnizip/formats/rar/rar5/models/volume_options.rb +74 -0
  307. data/lib/omnizip/formats/rar/rar5/multi_volume/ARCHITECTURE.md +290 -0
  308. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_manager.rb +264 -0
  309. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_splitter.rb +155 -0
  310. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_writer.rb +194 -0
  311. data/lib/omnizip/formats/rar/rar5/solid/solid_encoder.rb +109 -0
  312. data/lib/omnizip/formats/rar/rar5/solid/solid_manager.rb +142 -0
  313. data/lib/omnizip/formats/rar/rar5/solid/solid_stream.rb +121 -0
  314. data/lib/omnizip/formats/rar/rar5/vint.rb +65 -0
  315. data/lib/omnizip/formats/rar/rar5/writer.rb +466 -0
  316. data/lib/omnizip/formats/rar/rar_format_base.rb +241 -0
  317. data/lib/omnizip/formats/rar/reader.rb +366 -0
  318. data/lib/omnizip/formats/rar/recovery_record.rb +245 -0
  319. data/lib/omnizip/formats/rar/volume_manager.rb +168 -0
  320. data/lib/omnizip/formats/rar/writer.rb +431 -0
  321. data/lib/omnizip/formats/rar.rb +205 -0
  322. data/lib/omnizip/formats/rar3/compressor.rb +73 -0
  323. data/lib/omnizip/formats/rar3/decompressor.rb +66 -0
  324. data/lib/omnizip/formats/rar3/reader.rb +386 -0
  325. data/lib/omnizip/formats/rar3/writer.rb +219 -0
  326. data/lib/omnizip/formats/rar5/compressor.rb +73 -0
  327. data/lib/omnizip/formats/rar5/decompressor.rb +66 -0
  328. data/lib/omnizip/formats/rar5/reader.rb +342 -0
  329. data/lib/omnizip/formats/rar5/writer.rb +214 -0
  330. data/lib/omnizip/formats/seven_zip/coder_chain.rb +150 -0
  331. data/lib/omnizip/formats/seven_zip/constants.rb +126 -0
  332. data/lib/omnizip/formats/seven_zip/encoded_header.rb +114 -0
  333. data/lib/omnizip/formats/seven_zip/encrypted_header.rb +142 -0
  334. data/lib/omnizip/formats/seven_zip/file_collector.rb +144 -0
  335. data/lib/omnizip/formats/seven_zip/header.rb +106 -0
  336. data/lib/omnizip/formats/seven_zip/header_encryptor.rb +134 -0
  337. data/lib/omnizip/formats/seven_zip/header_writer.rb +466 -0
  338. data/lib/omnizip/formats/seven_zip/models/coder_info.rb +30 -0
  339. data/lib/omnizip/formats/seven_zip/models/file_entry.rb +58 -0
  340. data/lib/omnizip/formats/seven_zip/models/folder.rb +69 -0
  341. data/lib/omnizip/formats/seven_zip/models/stream_info.rb +42 -0
  342. data/lib/omnizip/formats/seven_zip/parser.rb +660 -0
  343. data/lib/omnizip/formats/seven_zip/reader.rb +458 -0
  344. data/lib/omnizip/formats/seven_zip/split_archive_reader.rb +632 -0
  345. data/lib/omnizip/formats/seven_zip/split_archive_writer.rb +315 -0
  346. data/lib/omnizip/formats/seven_zip/stream_compressor.rb +151 -0
  347. data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +162 -0
  348. data/lib/omnizip/formats/seven_zip/writer.rb +740 -0
  349. data/lib/omnizip/formats/seven_zip.rb +93 -0
  350. data/lib/omnizip/formats/tar/constants.rb +73 -0
  351. data/lib/omnizip/formats/tar/entry.rb +94 -0
  352. data/lib/omnizip/formats/tar/header.rb +168 -0
  353. data/lib/omnizip/formats/tar/reader.rb +121 -0
  354. data/lib/omnizip/formats/tar/writer.rb +216 -0
  355. data/lib/omnizip/formats/tar.rb +84 -0
  356. data/lib/omnizip/formats/xz/reader.rb +116 -0
  357. data/lib/omnizip/formats/xz.rb +237 -0
  358. data/lib/omnizip/formats/xz_impl/block_decoder.rb +754 -0
  359. data/lib/omnizip/formats/xz_impl/block_encoder.rb +306 -0
  360. data/lib/omnizip/formats/xz_impl/block_header.rb +210 -0
  361. data/lib/omnizip/formats/xz_impl/block_header_parser.rb +186 -0
  362. data/lib/omnizip/formats/xz_impl/constants.rb +49 -0
  363. data/lib/omnizip/formats/xz_impl/index_decoder.rb +174 -0
  364. data/lib/omnizip/formats/xz_impl/index_encoder.rb +122 -0
  365. data/lib/omnizip/formats/xz_impl/stream_decoder.rb +468 -0
  366. data/lib/omnizip/formats/xz_impl/stream_encoder.rb +99 -0
  367. data/lib/omnizip/formats/xz_impl/stream_footer.rb +81 -0
  368. data/lib/omnizip/formats/xz_impl/stream_footer_parser.rb +117 -0
  369. data/lib/omnizip/formats/xz_impl/stream_header.rb +55 -0
  370. data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +108 -0
  371. data/lib/omnizip/formats/xz_impl/vli.rb +128 -0
  372. data/lib/omnizip/formats/xz_impl/writer.rb +421 -0
  373. data/lib/omnizip/formats/zip/central_directory_header.rb +195 -0
  374. data/lib/omnizip/formats/zip/constants.rb +69 -0
  375. data/lib/omnizip/formats/zip/end_of_central_directory.rb +133 -0
  376. data/lib/omnizip/formats/zip/local_file_header.rb +138 -0
  377. data/lib/omnizip/formats/zip/reader.rb +250 -0
  378. data/lib/omnizip/formats/zip/unix_extra_field.rb +153 -0
  379. data/lib/omnizip/formats/zip/writer.rb +375 -0
  380. data/lib/omnizip/formats/zip/zip64_end_of_central_directory.rb +104 -0
  381. data/lib/omnizip/formats/zip/zip64_end_of_central_directory_locator.rb +66 -0
  382. data/lib/omnizip/formats/zip/zip64_extra_field.rb +114 -0
  383. data/lib/omnizip/formats/zip.rb +50 -0
  384. data/lib/omnizip/implementations/base/lzma2_decoder_base.rb +75 -0
  385. data/lib/omnizip/implementations/base/lzma2_encoder_base.rb +128 -0
  386. data/lib/omnizip/implementations/base/lzma_decoder_base.rb +83 -0
  387. data/lib/omnizip/implementations/base/lzma_encoder_base.rb +108 -0
  388. data/lib/omnizip/implementations/base/state_machine_base.rb +182 -0
  389. data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +421 -0
  390. data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +465 -0
  391. data/lib/omnizip/implementations/seven_zip/lzma/match_finder.rb +288 -0
  392. data/lib/omnizip/implementations/seven_zip/lzma/range_decoder.rb +200 -0
  393. data/lib/omnizip/implementations/seven_zip/lzma/range_encoder.rb +197 -0
  394. data/lib/omnizip/implementations/seven_zip/lzma/state_machine.rb +141 -0
  395. data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +519 -0
  396. data/lib/omnizip/implementations/xz_utils/lzma2/decoder.rb +723 -0
  397. data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +750 -0
  398. data/lib/omnizip/io/buffered_input.rb +146 -0
  399. data/lib/omnizip/io/buffered_output.rb +105 -0
  400. data/lib/omnizip/io/stream_manager.rb +115 -0
  401. data/lib/omnizip/link_handler/hard_link.rb +79 -0
  402. data/lib/omnizip/link_handler/symbolic_link.rb +74 -0
  403. data/lib/omnizip/link_handler.rb +124 -0
  404. data/lib/omnizip/metadata/archive_metadata.rb +114 -0
  405. data/lib/omnizip/metadata/entry_metadata.rb +146 -0
  406. data/lib/omnizip/metadata/metadata_editor.rb +171 -0
  407. data/lib/omnizip/metadata/metadata_registry.rb +64 -0
  408. data/lib/omnizip/metadata/metadata_validator.rb +99 -0
  409. data/lib/omnizip/metadata.rb +57 -0
  410. data/lib/omnizip/models/.keep +0 -0
  411. data/lib/omnizip/models/algorithm_metadata.rb +73 -0
  412. data/lib/omnizip/models/compression_options.rb +71 -0
  413. data/lib/omnizip/models/conversion_options.rb +87 -0
  414. data/lib/omnizip/models/conversion_result.rb +135 -0
  415. data/lib/omnizip/models/eta_result.rb +46 -0
  416. data/lib/omnizip/models/extraction_rule.rb +115 -0
  417. data/lib/omnizip/models/filter_chain.rb +144 -0
  418. data/lib/omnizip/models/filter_config.rb +183 -0
  419. data/lib/omnizip/models/match_result.rb +124 -0
  420. data/lib/omnizip/models/optimization_suggestion.rb +91 -0
  421. data/lib/omnizip/models/parallel_options.rb +104 -0
  422. data/lib/omnizip/models/performance_result.rb +79 -0
  423. data/lib/omnizip/models/profile_report.rb +82 -0
  424. data/lib/omnizip/models/progress_options.rb +38 -0
  425. data/lib/omnizip/models/split_options.rb +116 -0
  426. data/lib/omnizip/optimization_registry.rb +81 -0
  427. data/lib/omnizip/parallel/job_queue.rb +209 -0
  428. data/lib/omnizip/parallel/job_scheduler.rb +203 -0
  429. data/lib/omnizip/parallel/parallel_compressor.rb +347 -0
  430. data/lib/omnizip/parallel/parallel_extractor.rb +329 -0
  431. data/lib/omnizip/parallel/worker_pool.rb +223 -0
  432. data/lib/omnizip/parallel.rb +149 -0
  433. data/lib/omnizip/parity/chunked_block_processor.rb +196 -0
  434. data/lib/omnizip/parity/galois16.rb +145 -0
  435. data/lib/omnizip/parity/models/creator_packet.rb +73 -0
  436. data/lib/omnizip/parity/models/file_description_packet.rb +133 -0
  437. data/lib/omnizip/parity/models/ifsc_packet.rb +123 -0
  438. data/lib/omnizip/parity/models/main_packet.rb +128 -0
  439. data/lib/omnizip/parity/models/packet.rb +156 -0
  440. data/lib/omnizip/parity/models/packet_registry.rb +109 -0
  441. data/lib/omnizip/parity/models/recovery_slice_packet.rb +78 -0
  442. data/lib/omnizip/parity/par2_creator.rb +531 -0
  443. data/lib/omnizip/parity/par2_repairer.rb +407 -0
  444. data/lib/omnizip/parity/par2_verifier.rb +364 -0
  445. data/lib/omnizip/parity/par2cmdline_algorithm.rb +110 -0
  446. data/lib/omnizip/parity/par2cmdline_coefficients.rb +78 -0
  447. data/lib/omnizip/parity/reed_solomon_decoder.rb +266 -0
  448. data/lib/omnizip/parity/reed_solomon_encoder.rb +111 -0
  449. data/lib/omnizip/parity/reed_solomon_matrix.rb +342 -0
  450. data/lib/omnizip/parity.rb +186 -0
  451. data/lib/omnizip/password/encryption_registry.rb +65 -0
  452. data/lib/omnizip/password/encryption_strategy.rb +96 -0
  453. data/lib/omnizip/password/password_validator.rb +129 -0
  454. data/lib/omnizip/password/winzip_aes_strategy.rb +192 -0
  455. data/lib/omnizip/password/zip_crypto_strategy.rb +141 -0
  456. data/lib/omnizip/password.rb +87 -0
  457. data/lib/omnizip/pipe/stream_compressor.rb +124 -0
  458. data/lib/omnizip/pipe/stream_decompressor.rb +174 -0
  459. data/lib/omnizip/pipe.rb +121 -0
  460. data/lib/omnizip/platform/ntfs_streams.rb +201 -0
  461. data/lib/omnizip/platform.rb +189 -0
  462. data/lib/omnizip/profile/archive_profile.rb +39 -0
  463. data/lib/omnizip/profile/balanced_profile.rb +33 -0
  464. data/lib/omnizip/profile/binary_profile.rb +36 -0
  465. data/lib/omnizip/profile/compression_profile.rb +158 -0
  466. data/lib/omnizip/profile/custom_profile.rb +157 -0
  467. data/lib/omnizip/profile/fast_profile.rb +33 -0
  468. data/lib/omnizip/profile/maximum_profile.rb +33 -0
  469. data/lib/omnizip/profile/profile_detector.rb +110 -0
  470. data/lib/omnizip/profile/profile_registry.rb +161 -0
  471. data/lib/omnizip/profile/text_profile.rb +36 -0
  472. data/lib/omnizip/profile.rb +190 -0
  473. data/lib/omnizip/profiler/memory_profiler.rb +66 -0
  474. data/lib/omnizip/profiler/method_profiler.rb +49 -0
  475. data/lib/omnizip/profiler/report_generator.rb +169 -0
  476. data/lib/omnizip/profiler.rb +204 -0
  477. data/lib/omnizip/progress/callback_reporter.rb +36 -0
  478. data/lib/omnizip/progress/console_reporter.rb +62 -0
  479. data/lib/omnizip/progress/log_reporter.rb +91 -0
  480. data/lib/omnizip/progress/operation_progress.rb +118 -0
  481. data/lib/omnizip/progress/progress_bar.rb +156 -0
  482. data/lib/omnizip/progress/progress_reporter.rb +40 -0
  483. data/lib/omnizip/progress/progress_tracker.rb +190 -0
  484. data/lib/omnizip/progress/silent_reporter.rb +24 -0
  485. data/lib/omnizip/progress.rb +127 -0
  486. data/lib/omnizip/rubyzip_compat.rb +63 -0
  487. data/lib/omnizip/temp/safe_extract.rb +168 -0
  488. data/lib/omnizip/temp/temp_file.rb +124 -0
  489. data/lib/omnizip/temp/temp_file_pool.rb +109 -0
  490. data/lib/omnizip/temp.rb +181 -0
  491. data/lib/omnizip/version.rb +5 -0
  492. data/lib/omnizip/zip/entry.rb +156 -0
  493. data/lib/omnizip/zip/file.rb +485 -0
  494. data/lib/omnizip/zip/input_stream.rb +273 -0
  495. data/lib/omnizip/zip/output_stream.rb +324 -0
  496. data/lib/omnizip.rb +156 -0
  497. data/readme-docs/advanced-features.adoc +515 -0
  498. data/readme-docs/api-usage.adoc +444 -0
  499. data/readme-docs/architecture.adoc +449 -0
  500. data/readme-docs/archive-formats.adoc +479 -0
  501. data/readme-docs/cli-usage.adoc +222 -0
  502. data/readme-docs/compression-algorithms.adoc +442 -0
  503. data/readme-docs/compression-profiles.adoc +247 -0
  504. data/readme-docs/encryption-checksums.adoc +328 -0
  505. data/readme-docs/format-converter.adoc +325 -0
  506. data/readme-docs/installation.adoc +228 -0
  507. data/readme-docs/par2-archives.adoc +608 -0
  508. data/readme-docs/performance-profiler.adoc +389 -0
  509. data/readme-docs/preprocessing-filters.adoc +280 -0
  510. data/xz-file-format-1.2.1.txt +1174 -0
  511. metadata +617 -0
@@ -0,0 +1,465 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Copyright (C) 2025 Ribose Inc.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a
6
+ # copy of this software and associated documentation files (the "Software"),
7
+ # to deal in the Software without restriction, including without limitation
8
+ # the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
+ # and/or sell copies of the Software, and to permit persons to whom the
10
+ # Software is furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
+ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21
+ # DEALINGS IN THE SOFTWARE.
22
+
23
+ require_relative "../../../algorithms/lzma/constants"
24
+ require_relative "match_finder"
25
+ require_relative "../../../algorithms/lzma/match_finder_config"
26
+ require_relative "../../../algorithms/lzma/match_finder_factory"
27
+ require_relative "../../../algorithms/lzma/literal_encoder"
28
+ require_relative "state_machine"
29
+ require_relative "../../../algorithms/lzma/length_coder"
30
+ require_relative "../../../algorithms/lzma/distance_coder"
31
+ require_relative "range_encoder" # Use 7-Zip SDK range encoder (not XZ Utils)
32
+ require_relative "../../../algorithms/lzma/bit_model"
33
+
34
+ module Omnizip
35
+ module Implementations
36
+ module SevenZip
37
+ module LZMA
38
+ # 7-Zip LZMA SDK encoder implementation.
39
+ #
40
+ # This is the original SdkEncoder moved from algorithms/lzma/sdk_encoder.rb
41
+ # to the new namespace structure.
42
+ #
43
+ # Ported from 7-Zip LZMA SDK by Igor Pavlov.
44
+ class Encoder
45
+ include Omnizip::Algorithms::LZMA::Constants
46
+
47
+ attr_reader :lc, :lp, :pb, :dict_size
48
+
49
+ # Initialize the SDK-compatible encoder
50
+ #
51
+ # @param output [IO] Output stream for compressed data
52
+ # @param options [Hash] Encoding options
53
+ # @option options [Integer] :lc Literal context bits (0-8, default: 3)
54
+ # @option options [Integer] :lp Literal position bits (0-4, default: 0)
55
+ # @option options [Integer] :pb Position bits (0-4, default: 2)
56
+ # @option options [Integer] :dict_size Dictionary size (default: 64KB)
57
+ # @option options [Integer] :level Compression level (0-9, default: 5)
58
+ # @option options [Boolean] :raw_mode Skip header and EOS marker for LZMA2 (default: false)
59
+ def initialize(output, options = {})
60
+ @output = output
61
+ @lc = options.fetch(:lc, 3)
62
+ @lp = options.fetch(:lp, 0)
63
+ @pb = options.fetch(:pb, 2)
64
+ @dict_size = options.fetch(:dict_size, 1 << 16) # 64KB default
65
+ @level = options.fetch(:level, 5)
66
+ @raw_mode = options.fetch(:raw_mode, false) # NEW: skip header/EOS for LZMA2
67
+
68
+ validate_parameters
69
+ init_models
70
+ init_coders
71
+ end
72
+
73
+ # Encode a stream of data
74
+ #
75
+ # Main encoding loop following SDK's LzmaEnc_CodeOneBlock logic:
76
+ # 1. Initialize match finder with data
77
+ # 2. Process each position: find matches, encode literals/matches
78
+ # 3. Write EOS marker
79
+ # 4. Flush range encoder
80
+ #
81
+ # @param data [String, IO] Input data to compress
82
+ # @return [Array<String, Integer>] Tuple of [compressed_data, decode_bytes]
83
+ def encode_stream(data)
84
+ input_data = data.is_a?(String) ? data : data.read
85
+
86
+ # Force binary encoding to handle binary data properly
87
+ # Duplicate to avoid modifying frozen strings
88
+ input_data = input_data.dup.force_encoding(Encoding::BINARY)
89
+
90
+ # Write LZMA header
91
+ write_header(input_data.bytesize) unless @raw_mode
92
+
93
+ # Initialize range encoder (7-Zip SDK version)
94
+ @range_encoder = RangeEncoder.new(@output)
95
+
96
+ # Initialize match finder with SDK configuration
97
+ match_finder_config = Omnizip::Algorithms::LZMA::MatchFinderConfig.sdk_config(
98
+ dict_size: @dict_size,
99
+ level: @level,
100
+ )
101
+ @match_finder = Omnizip::Algorithms::LZMA::MatchFinderFactory.create(match_finder_config)
102
+
103
+ # Initialize state and dictionary
104
+ @state = StateMachine.new
105
+ @dict = +"" # Mutable string for dictionary
106
+ @pos = 0
107
+
108
+ # Initialize repeat distances (all 1 initially, as in SDK)
109
+ @reps = [1, 1, 1, 1]
110
+
111
+ # Main encoding loop
112
+ while @pos < input_data.bytesize
113
+ # Find best match at current position
114
+ match = @match_finder.find_longest_match(input_data, @pos)
115
+
116
+ # Decide: literal vs match
117
+ if should_encode_match?(match)
118
+ encode_match(match, input_data)
119
+ else
120
+ encode_literal(input_data[@pos].ord, input_data)
121
+ end
122
+ end
123
+
124
+ # Write EOS marker and flush
125
+ # For LZMA2: skip EOS marker but DO flush the range encoder
126
+ # The range encoder flush outputs pending bytes needed by decoder
127
+ # LZMA2 uses CONTROL_END (0x00) to signal end of stream instead of LZMA EOS
128
+ encode_eos_marker unless @raw_mode # Skip EOS in raw mode
129
+ @range_encoder.flush # Always flush to output pending range encoder bytes
130
+
131
+ # Return tuple for LZMA2: [data, bytes_for_decode]
132
+ # For raw mode, return actual decode bytes (excluding flush padding)
133
+ if @raw_mode
134
+ [@output.string, @range_encoder.bytes_for_decode]
135
+ elsif @output.respond_to?(:string)
136
+ # For File output, just return bytes written (don't try to read back)
137
+ # For StringIO, return the string and its size
138
+ [@output.string, @output.string.bytesize]
139
+ else
140
+ [@range_encoder.bytes_for_decode, @range_encoder.bytes_for_decode]
141
+ end
142
+ end
143
+
144
+ private
145
+
146
+ # Validate encoding parameters
147
+ #
148
+ # @return [void]
149
+ # @raise [ArgumentError] If parameters are invalid
150
+ def validate_parameters
151
+ raise ArgumentError, "lc must be 0-8" unless @lc.between?(0, 8)
152
+ raise ArgumentError, "lp must be 0-4" unless @lp.between?(0, 4)
153
+ raise ArgumentError, "pb must be 0-4" unless @pb.between?(0, 4)
154
+ raise ArgumentError, "level must be 0-9" unless @level.between?(0, 9)
155
+ return if @dict_size.between?(DICT_SIZE_MIN, DICT_SIZE_MAX)
156
+
157
+ raise ArgumentError, "Invalid dictionary size"
158
+ end
159
+
160
+ # Initialize probability models
161
+ #
162
+ # SDK allocates models following exact structure from LzmaEnc.c:
163
+ # - Literal models: compact layout indexed by literal_subcoder macro
164
+ # - Match models: NUM_STATES * (1 << @pb) models
165
+ # - Rep models: NUM_STATES models each
166
+ #
167
+ # The literal_subcoder macro calculates:
168
+ # base_offset = 3 * (((((pos) << 8) + (prev_byte)) & (literal_mask)) << (lc))
169
+ # We need to allocate enough models for the maximum possible offset.
170
+ #
171
+ # @return [void]
172
+ def init_models
173
+ # Calculate literal_mask using XZ Utils formula
174
+ # literal_mask = (UINT32_C(0x100) << (lp)) - (UINT32_C(0x100) >> (lc))
175
+ literal_mask = (0x100 << @lp) - (0x100 >> @lc)
176
+
177
+ # Calculate maximum possible context value
178
+ # context = (((pos << 8) + prev_byte) & literal_mask)
179
+ # Maximum context occurs when the lower bits of (pos << 8) + prev_byte
180
+ # align with the mask to give the maximum value.
181
+ max_context = literal_mask # Maximum possible context value
182
+
183
+ # Calculate maximum base_offset
184
+ # base_offset = 3 * (context << lc)
185
+ max_base_offset = 3 * (max_context << @lc)
186
+
187
+ # Maximum index for matched mode:
188
+ # encode_matched can use up to base_offset + offset + match_bit + (symbol >> 8)
189
+ # where offset, match_bit, and (symbol >> 8) can each be up to 0x100
190
+ # So max index = base_offset + 0x100 + 0x100 + 0x100 = base_offset + 0x300
191
+ # encode_unmatched can use up to base_offset + 256
192
+ max_model_index = max_base_offset + 0x300
193
+
194
+ # Allocate literal models
195
+ @literal_models = Array.new(max_model_index + 1) do
196
+ Omnizip::Algorithms::LZMA::BitModel.new
197
+ end
198
+
199
+ # Match/rep decision models
200
+ @is_match_models = Array.new(NUM_STATES * (1 << @pb)) do
201
+ Omnizip::Algorithms::LZMA::BitModel.new
202
+ end
203
+ @is_rep_models = Array.new(NUM_STATES) { Omnizip::Algorithms::LZMA::BitModel.new }
204
+ @is_rep0_models = Array.new(NUM_STATES) { Omnizip::Algorithms::LZMA::BitModel.new }
205
+ @is_rep1_models = Array.new(NUM_STATES) { Omnizip::Algorithms::LZMA::BitModel.new }
206
+ @is_rep2_models = Array.new(NUM_STATES) { Omnizip::Algorithms::LZMA::BitModel.new }
207
+ @is_rep0_long_models = Array.new(NUM_STATES * (1 << @pb)) do
208
+ Omnizip::Algorithms::LZMA::BitModel.new
209
+ end
210
+ end
211
+
212
+ # Initialize SDK coders
213
+ #
214
+ # @return [void]
215
+ def init_coders
216
+ @literal_encoder = Omnizip::Algorithms::LZMA::LiteralEncoder.new(@lc)
217
+ @length_coder = Omnizip::Algorithms::LZMA::LengthCoder.new(1 << @pb)
218
+ @rep_length_coder = Omnizip::Algorithms::LZMA::LengthCoder.new(1 << @pb)
219
+ @distance_coder = Omnizip::Algorithms::LZMA::DistanceCoder.new(NUM_LEN_TO_POS_STATES)
220
+ end
221
+
222
+ # Determine if a match should be encoded
223
+ #
224
+ # SDK uses complex heuristics considering:
225
+ # - Match length vs literal cost
226
+ # - Position in stream
227
+ # - Previous encoding results
228
+ #
229
+ # Simplified heuristic: encode if length >= 2 and provides benefit
230
+ #
231
+ # @param match [MatchFinder::Match, nil] Found match
232
+ # @return [Boolean] True if match should be encoded
233
+ def should_encode_match?(match)
234
+ return false if match.nil?
235
+ return false if match.length < MATCH_LEN_MIN
236
+
237
+ # CRITICAL: Validate that match distance is within current position
238
+ # The decoder reads from its dictionary: src_pos = dict_pos - distance - 1
239
+ # We need src_pos >= 0, which means distance <= dict_pos (current position)
240
+ # The match finder may return distances up to window_size, but we can only
241
+ # encode distances that reference data we've already encoded
242
+ return false if match.distance > @pos
243
+
244
+ # Simple heuristic: encode matches length >= 2
245
+ # For length 2: only if distance is small (< 128)
246
+ # For length 3+: always encode
247
+ if match.length == 2
248
+ match.distance < 128
249
+ else
250
+ true
251
+ end
252
+ end
253
+
254
+ # Encode a literal byte
255
+ #
256
+ # SDK encoding sequence (from LzmaEnc.c):
257
+ # 1. Encode is_match bit (0 = literal)
258
+ # 2. Calculate literal state
259
+ # 3. Encode literal (matched or unmatched based on state)
260
+ # 4. Update state machine
261
+ # 5. Update dictionary and position
262
+ #
263
+ # @param byte [Integer] Byte to encode (0-255)
264
+ # @param data [String] Full input data (for context)
265
+ # @return [void]
266
+ def encode_literal(byte, _data)
267
+ pos_state = @pos & ((1 << @pb) - 1)
268
+
269
+ # Encode is_match bit (0 = literal)
270
+ # XZ Utils: is_match[state][pos_state] where the array size is NUM_STATES * (1 << pb)
271
+ model_index = (@state.value * (1 << @pb)) + pos_state
272
+ @range_encoder.encode_bit(@is_match_models[model_index], 0)
273
+
274
+ # Calculate previous byte for literal encoding
275
+ # XZ Utils dict_get0 pattern: dict->buf[dict->pos - 1]
276
+ prev_byte = @dict.bytesize.positive? ? @dict[-1].ord : 0
277
+
278
+ # Calculate literal_mask using XZ Utils formula
279
+ # From lzma_common.h:literal_mask_calc
280
+ # literal_mask = (UINT32_C(0x100) << (lp)) - (UINT32_C(0x100) >> (lc))
281
+ literal_mask = (0x100 << @lp) - (0x100 >> @lc)
282
+
283
+ # Encode literal (matched or unmatched)
284
+ # Check if we can use matched literal: need enough data at current position
285
+ # The match is at dict[pos - reps[0] - 1], so we need pos > reps[0]
286
+ if @state.use_matched_literal? && @pos > @reps[0]
287
+ # Matched literal: use match byte from repeat distance
288
+ # The decoder uses get_byte_from_dict(reps[0]) which is dict[dict_pos - reps[0]]
289
+ # We need to use the same formula: dict[pos - reps[0]]
290
+ # Note: This is different from the SDK formula which uses -1
291
+ match_byte = @dict[@pos - @reps[0]].ord
292
+ @literal_encoder.encode_matched(byte, match_byte, @pos, prev_byte,
293
+ @lc, literal_mask, @range_encoder, @literal_models)
294
+ else
295
+ # Unmatched literal: simple 8-bit encoding
296
+ @literal_encoder.encode_unmatched(byte, @pos, prev_byte,
297
+ @lc, literal_mask, @range_encoder, @literal_models)
298
+ end
299
+
300
+ # Update state and dictionary
301
+ @state.update_literal
302
+ @dict << byte.chr
303
+ @pos += 1
304
+ end
305
+
306
+ # Encode a match
307
+ #
308
+ # SDK encoding sequence:
309
+ # 1. Encode is_match bit (1 = match)
310
+ # 2. Encode is_rep bit (0 = regular match)
311
+ # 3. Encode match length using length coder
312
+ # 4. Encode match distance using distance coder
313
+ # 5. Update state machine
314
+ # 6. Update dictionary and position
315
+ #
316
+ # @param match [MatchFinder::Match] Match to encode
317
+ # @param data [String] Full input data (for updating dictionary)
318
+ # @return [void]
319
+ def encode_match(match, data)
320
+ # Defensive check: distance must be >= 1
321
+ raise "Invalid match distance: #{match.distance}" if match.distance < 1
322
+
323
+ pos_state = @pos & ((1 << @pb) - 1)
324
+
325
+ # Encode is_match bit (1 = match)
326
+ # XZ Utils: is_match[state][pos_state] where the array is NUM_STATES * (1 << @pb)
327
+ model_index = (@state.value * (1 << @pb)) + pos_state
328
+ @range_encoder.encode_bit(@is_match_models[model_index], 1)
329
+
330
+ # Encode is_rep bit (0 = regular match, not rep)
331
+ # For now, we only handle regular matches
332
+ @range_encoder.encode_bit(@is_rep_models[@state.value], 0)
333
+
334
+ # Calculate length state for distance encoding
335
+ # XZ Utils formula (from lzma_common.h get_dist_state macro):
336
+ # ((len) < DIST_STATES + MATCH_LEN_MIN ? (len) - MATCH_LEN_MIN : DIST_STATES - 1)
337
+ # This gives: len=2→0, len=3→1, len=4→2, len=5→3, len=6+→3
338
+ len_state = if match.length < NUM_LEN_TO_POS_STATES + MATCH_LEN_MIN
339
+ match.length - MATCH_LEN_MIN
340
+ else
341
+ NUM_LEN_TO_POS_STATES - 1
342
+ end
343
+
344
+ # Encode match length
345
+ @length_coder.encode(@range_encoder,
346
+ match.length - MATCH_LEN_MIN,
347
+ pos_state)
348
+
349
+ # Encode match distance
350
+ # Distance coder expects (distance - 1), decoder will add 1 back
351
+ @distance_coder.encode(@range_encoder,
352
+ match.distance - 1,
353
+ len_state)
354
+
355
+ # Update repeat distances (shift and add new distance)
356
+ # When encoding a regular match, the distance becomes the new rep0
357
+ @reps[3] = @reps[2]
358
+ @reps[2] = @reps[1]
359
+ @reps[1] = @reps[0]
360
+ @reps[0] = match.distance
361
+
362
+ # Update state
363
+ @state.update_match
364
+
365
+ # Update dictionary with matched data
366
+ matched_data = data[@pos, match.length]
367
+ @dict << matched_data
368
+ @pos += match.length
369
+ end
370
+
371
+ # Encode end-of-stream marker
372
+ #
373
+ # SDK EOS marker (from LzmaEnc.c):
374
+ # - Encoded as a match with maximum distance
375
+ # - Signals decoder to stop
376
+ #
377
+ # @return [void]
378
+ def encode_eos_marker
379
+ # Use actual position state, not hardcoded 0
380
+ pos_state = @pos & ((1 << @pb) - 1)
381
+
382
+ # Encode is_match bit (1 = match)
383
+ # XZ Utils: is_match[state][pos_state] where the array is NUM_STATES * (1 << @pb)
384
+ model_index = (@state.value * (1 << @pb)) + pos_state
385
+ @range_encoder.encode_bit(@is_match_models[model_index], 1)
386
+
387
+ # Encode is_rep bit (0 = regular match)
388
+ @range_encoder.encode_bit(@is_rep_models[@state.value], 0)
389
+
390
+ # Calculate len_state to match decoder's calculation
391
+ # Decoder: length = decoded_value + MATCH_LEN_MIN = 0 + 2 = 2
392
+ # len_state = 2 - MATCH_LEN_MIN = 0 (when 2 < 4)
393
+ len_state = 0 # MATCH_LEN_MIN - MATCH_LEN_MIN
394
+
395
+ # Encode minimum length (0, decoder adds MATCH_LEN_MIN to get 2)
396
+ @length_coder.encode(@range_encoder, 0, pos_state)
397
+
398
+ # Encode special EOS distance (0xFFFFFFFF)
399
+ # XZ Utils encode_eopm calls match(coder, pos_state, UINT32_MAX, MATCH_LEN_MIN)
400
+ # Decoder adds 1 to get distance = 0x100000000, which triggers EOS check
401
+ @distance_coder.encode(@range_encoder, 0xFFFFFFFF, len_state)
402
+ end
403
+
404
+ # Calculate literal state index
405
+ # XZ Utils literal_subcoder formula (from lzma_common.h:141-143):
406
+ # ((probs) + 3 * (((((pos) << 8) + (prev_byte)) & (literal_mask)) << (lc)))
407
+ # where literal_mask = (1 << (lc + lp)) - 1
408
+ #
409
+ # The key insight is that (pos << 8) + prev_byte is computed FIRST,
410
+ # then masked, THEN shifted by lc. This is different from our old formula
411
+ # which added pos_part and prev_part separately.
412
+ #
413
+ # IMPORTANT: The literal_subcoder macro returns:
414
+ # probs + 3 * context_value_shifted
415
+ # where context_value_shifted = context_value << lc
416
+ #
417
+ # For our implementation, we return context_value (unshifted) so that
418
+ # the literal encoder can calculate the correct offset: 3 * context_value
419
+ #
420
+ # This creates (1 << (lc + lp)) unique contexts
421
+ #
422
+ # @return [Integer] Literal context value (unshifted, 0-7 for lc=3)
423
+ def calculate_literal_state
424
+ prev_byte = @dict.bytesize.positive? ? @dict[-1].ord : 0
425
+
426
+ # XZ Utils formula from lzma_common.h:literal_mask_calc
427
+ # literal_mask = (UINT32_C(0x100) << (lp)) - (UINT32_C(0x100) >> (lc))
428
+ # For lc=3, lp=0: (256 << 0) - (256 >> 3) = 256 - 32 = 224 (0xE0)
429
+ # IMPORTANT: Use the SAME formula as the decoder to ensure compatibility
430
+ literal_mask = (0x100 << @lp) - (0x100 >> @lc)
431
+
432
+ # Combine pos and prev_byte, then apply mask
433
+ # IMPORTANT: (pos << 8) + prev_byte is computed FIRST, then masked
434
+ (((@pos << 8) + prev_byte) & literal_mask)
435
+ end
436
+
437
+ # Write LZMA header
438
+ #
439
+ # SDK header format:
440
+ # - Property byte: (lc + lp*9 + pb*45)
441
+ # - Dictionary size: 4 bytes little-endian
442
+ # - Uncompressed size: 8 bytes (0xFF for unknown size)
443
+ #
444
+ # @param uncompressed_size [Integer] Original data size
445
+ # @return [void]
446
+ def write_header(_uncompressed_size)
447
+ # Property byte: (lc + lp*9 + pb*45)
448
+ props = @lc + (@lp * 9) + (@pb * 45)
449
+ @output.putc(props)
450
+
451
+ # Dictionary size (4 bytes, little-endian)
452
+ 4.times do |i|
453
+ @output.putc((@dict_size >> (i * 8)) & 0xFF)
454
+ end
455
+
456
+ # Uncompressed size (8 bytes, little-endian)
457
+ # For SDK mode, use unknown size marker (0xFFFFFFFFFFFFFFFF)
458
+ # This matches xz/lzma behavior for standalone streams
459
+ 8.times { @output.putc(0xFF) }
460
+ end
461
+ end
462
+ end
463
+ end
464
+ end
465
+ end