omnizip 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (511) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.rubocop.yml +32 -0
  4. data/.rubocop_todo.yml +754 -0
  5. data/COPYING +502 -0
  6. data/Gemfile +17 -0
  7. data/LICENSE +12 -0
  8. data/README.adoc +1045 -0
  9. data/Rakefile +12 -0
  10. data/benchmark/README.md +260 -0
  11. data/benchmark/benchmark_suite.rb +125 -0
  12. data/benchmark/compression_bench.rb +181 -0
  13. data/benchmark/filter_bench.rb +180 -0
  14. data/benchmark/models/benchmark_result.rb +59 -0
  15. data/benchmark/models/comparison_result.rb +69 -0
  16. data/benchmark/profile_suite.rb +167 -0
  17. data/benchmark/reporter.rb +150 -0
  18. data/benchmark/run_benchmarks.rb +66 -0
  19. data/benchmark/test_data.rb +137 -0
  20. data/config/formats/rar3_spec.yml +91 -0
  21. data/config/formats/rar5_spec.yml +102 -0
  22. data/docs/.github/workflows/docs.yml +142 -0
  23. data/docs/.gitignore +21 -0
  24. data/docs/.lychee.toml +67 -0
  25. data/docs/Gemfile +13 -0
  26. data/docs/RAR_WRITE_SUPPORT.md +26 -0
  27. data/docs/README.md +101 -0
  28. data/docs/_config.yml +112 -0
  29. data/docs/assets/logo.svg +1 -0
  30. data/docs/assets/omnizip-logo.pdf +1540 -11
  31. data/docs/comparison/feature-matrix.adoc +694 -0
  32. data/docs/comparison/index.adoc +113 -0
  33. data/docs/comparison/vs-7zip.adoc +309 -0
  34. data/docs/comparison/vs-peazip.adoc +77 -0
  35. data/docs/comparison/vs-rubyzip.adoc +342 -0
  36. data/docs/comparison/vs-winrar.adoc +100 -0
  37. data/docs/compatibility.adoc +579 -0
  38. data/docs/concepts/index.adoc +129 -0
  39. data/docs/developer/architecture.adoc +256 -0
  40. data/docs/developer/contributing.adoc +158 -0
  41. data/docs/developer/index.adoc +25 -0
  42. data/docs/developer/testing.adoc +212 -0
  43. data/docs/getting-started/basic-usage.adoc +271 -0
  44. data/docs/getting-started/index.adoc +42 -0
  45. data/docs/getting-started/installation.adoc +138 -0
  46. data/docs/getting-started/quick-start.adoc +185 -0
  47. data/docs/getting-started/your-first-archive.adoc +218 -0
  48. data/docs/guides/advanced-features/encryption.adoc +300 -0
  49. data/docs/guides/advanced-features/index.adoc +49 -0
  50. data/docs/guides/advanced-features/parallel-processing.adoc +246 -0
  51. data/docs/guides/advanced-features/progress-tracking.adoc +320 -0
  52. data/docs/guides/advanced-features/streaming.adoc +212 -0
  53. data/docs/guides/archive-formats/gzip-format.adoc +107 -0
  54. data/docs/guides/archive-formats/index.adoc +130 -0
  55. data/docs/guides/archive-formats/rar-format.adoc +104 -0
  56. data/docs/guides/archive-formats/rar5.adoc +521 -0
  57. data/docs/guides/archive-formats/seven-zip-format.adoc +35 -0
  58. data/docs/guides/archive-formats/tar-format.adoc +106 -0
  59. data/docs/guides/archive-formats/xz-format.adoc +118 -0
  60. data/docs/guides/archive-formats/zip-format.adoc +35 -0
  61. data/docs/guides/compression-algorithms/bzip2.adoc +113 -0
  62. data/docs/guides/compression-algorithms/deflate.adoc +319 -0
  63. data/docs/guides/compression-algorithms/index.adoc +190 -0
  64. data/docs/guides/compression-algorithms/lzma.adoc +398 -0
  65. data/docs/guides/compression-algorithms/lzma2.adoc +327 -0
  66. data/docs/guides/compression-algorithms/ppmd.adoc +316 -0
  67. data/docs/guides/compression-algorithms/zstandard.adoc +361 -0
  68. data/docs/guides/creating-archives.adoc +354 -0
  69. data/docs/guides/extracting-archives.adoc +53 -0
  70. data/docs/guides/format-conversion.adoc +64 -0
  71. data/docs/guides/index.adoc +49 -0
  72. data/docs/guides/migration-rubyzip.adoc +217 -0
  73. data/docs/guides/parity-archives.adoc +605 -0
  74. data/docs/guides/performance-tuning.adoc +88 -0
  75. data/docs/index.adoc +218 -0
  76. data/docs/lychee.toml +67 -0
  77. data/docs/reference/api/overview.adoc +188 -0
  78. data/docs/reference/cli/compress-command.adoc +114 -0
  79. data/docs/reference/cli/overview.adoc +140 -0
  80. data/docs/reference/index.adoc +26 -0
  81. data/docs/resources/faq.adoc +185 -0
  82. data/docs/resources/quick-reference.adoc +222 -0
  83. data/docs/troubleshooting/index.adoc +208 -0
  84. data/examples/api_comparison.rb +205 -0
  85. data/examples/deflate64_example.rb +96 -0
  86. data/examples/par2_demo.rb +121 -0
  87. data/examples/quick_start_native.rb +150 -0
  88. data/examples/quick_start_rubyzip.rb +115 -0
  89. data/examples/rubyzip_compatibility_demo.rb +194 -0
  90. data/exe/omnizip +27 -0
  91. data/lib/omnizip/algorithm.rb +130 -0
  92. data/lib/omnizip/algorithm_registry.rb +86 -0
  93. data/lib/omnizip/algorithms/.keep +0 -0
  94. data/lib/omnizip/algorithms/bzip2/bwt.rb +225 -0
  95. data/lib/omnizip/algorithms/bzip2/decoder.rb +193 -0
  96. data/lib/omnizip/algorithms/bzip2/encoder.rb +237 -0
  97. data/lib/omnizip/algorithms/bzip2/huffman.rb +206 -0
  98. data/lib/omnizip/algorithms/bzip2/mtf.rb +101 -0
  99. data/lib/omnizip/algorithms/bzip2/rle.rb +151 -0
  100. data/lib/omnizip/algorithms/bzip2.rb +130 -0
  101. data/lib/omnizip/algorithms/deflate/constants.rb +28 -0
  102. data/lib/omnizip/algorithms/deflate/decoder.rb +38 -0
  103. data/lib/omnizip/algorithms/deflate/encoder.rb +46 -0
  104. data/lib/omnizip/algorithms/deflate.rb +128 -0
  105. data/lib/omnizip/algorithms/deflate64/constants.rb +45 -0
  106. data/lib/omnizip/algorithms/deflate64/decoder.rb +153 -0
  107. data/lib/omnizip/algorithms/deflate64/encoder.rb +98 -0
  108. data/lib/omnizip/algorithms/deflate64/huffman_coder.rb +354 -0
  109. data/lib/omnizip/algorithms/deflate64/lz77_encoder.rb +142 -0
  110. data/lib/omnizip/algorithms/deflate64.rb +109 -0
  111. data/lib/omnizip/algorithms/lzma/bit_model.rb +120 -0
  112. data/lib/omnizip/algorithms/lzma/constants.rb +112 -0
  113. data/lib/omnizip/algorithms/lzma/decoder.rb +148 -0
  114. data/lib/omnizip/algorithms/lzma/dictionary.rb +69 -0
  115. data/lib/omnizip/algorithms/lzma/distance_coder.rb +415 -0
  116. data/lib/omnizip/algorithms/lzma/encoder.rb +142 -0
  117. data/lib/omnizip/algorithms/lzma/length_coder.rb +260 -0
  118. data/lib/omnizip/algorithms/lzma/literal_decoder.rb +320 -0
  119. data/lib/omnizip/algorithms/lzma/literal_encoder.rb +210 -0
  120. data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +341 -0
  121. data/lib/omnizip/algorithms/lzma/lzma_alone_decoder.rb +192 -0
  122. data/lib/omnizip/algorithms/lzma/lzma_state.rb +128 -0
  123. data/lib/omnizip/algorithms/lzma/match.rb +32 -0
  124. data/lib/omnizip/algorithms/lzma/match_finder.rb +205 -0
  125. data/lib/omnizip/algorithms/lzma/match_finder_config.rb +142 -0
  126. data/lib/omnizip/algorithms/lzma/match_finder_factory.rb +88 -0
  127. data/lib/omnizip/algorithms/lzma/optimal_encoder.rb +130 -0
  128. data/lib/omnizip/algorithms/lzma/probability_models.rb +72 -0
  129. data/lib/omnizip/algorithms/lzma/range_coder.rb +85 -0
  130. data/lib/omnizip/algorithms/lzma/range_decoder.rb +434 -0
  131. data/lib/omnizip/algorithms/lzma/range_encoder.rb +194 -0
  132. data/lib/omnizip/algorithms/lzma/state.rb +127 -0
  133. data/lib/omnizip/algorithms/lzma/xz_buffered_range_encoder.rb +325 -0
  134. data/lib/omnizip/algorithms/lzma/xz_encoder.rb +426 -0
  135. data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +645 -0
  136. data/lib/omnizip/algorithms/lzma/xz_match_finder_adapter.rb +227 -0
  137. data/lib/omnizip/algorithms/lzma/xz_price_calculator.rb +169 -0
  138. data/lib/omnizip/algorithms/lzma/xz_probability_models.rb +261 -0
  139. data/lib/omnizip/algorithms/lzma/xz_range_encoder.rb +223 -0
  140. data/lib/omnizip/algorithms/lzma/xz_range_encoder_exact.rb +331 -0
  141. data/lib/omnizip/algorithms/lzma/xz_state.rb +116 -0
  142. data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +2055 -0
  143. data/lib/omnizip/algorithms/lzma.rb +238 -0
  144. data/lib/omnizip/algorithms/lzma2/chunk_manager.rb +182 -0
  145. data/lib/omnizip/algorithms/lzma2/constants.rb +41 -0
  146. data/lib/omnizip/algorithms/lzma2/encoder.rb +147 -0
  147. data/lib/omnizip/algorithms/lzma2/lzma2_chunk.rb +161 -0
  148. data/lib/omnizip/algorithms/lzma2/properties.rb +179 -0
  149. data/lib/omnizip/algorithms/lzma2/simple_lzma2_encoder.rb +127 -0
  150. data/lib/omnizip/algorithms/lzma2/xz_encoder_adapter.rb +85 -0
  151. data/lib/omnizip/algorithms/lzma2.rb +141 -0
  152. data/lib/omnizip/algorithms/ppmd7/constants.rb +74 -0
  153. data/lib/omnizip/algorithms/ppmd7/context.rb +154 -0
  154. data/lib/omnizip/algorithms/ppmd7/decoder.rb +126 -0
  155. data/lib/omnizip/algorithms/ppmd7/encoder.rb +163 -0
  156. data/lib/omnizip/algorithms/ppmd7/model.rb +248 -0
  157. data/lib/omnizip/algorithms/ppmd7/symbol_state.rb +57 -0
  158. data/lib/omnizip/algorithms/ppmd7.rb +116 -0
  159. data/lib/omnizip/algorithms/ppmd8/constants.rb +61 -0
  160. data/lib/omnizip/algorithms/ppmd8/context.rb +34 -0
  161. data/lib/omnizip/algorithms/ppmd8/decoder.rb +107 -0
  162. data/lib/omnizip/algorithms/ppmd8/encoder.rb +138 -0
  163. data/lib/omnizip/algorithms/ppmd8/model.rb +250 -0
  164. data/lib/omnizip/algorithms/ppmd8/restoration_method.rb +78 -0
  165. data/lib/omnizip/algorithms/ppmd8.rb +82 -0
  166. data/lib/omnizip/algorithms/ppmd_base.rb +138 -0
  167. data/lib/omnizip/algorithms/sevenzip_lzma2.rb +123 -0
  168. data/lib/omnizip/algorithms/xz_lzma2.rb +118 -0
  169. data/lib/omnizip/algorithms/zstandard/constants.rb +25 -0
  170. data/lib/omnizip/algorithms/zstandard/decoder.rb +46 -0
  171. data/lib/omnizip/algorithms/zstandard/encoder.rb +51 -0
  172. data/lib/omnizip/algorithms/zstandard.rb +138 -0
  173. data/lib/omnizip/buffer/memory_archive.rb +251 -0
  174. data/lib/omnizip/buffer/memory_extractor.rb +224 -0
  175. data/lib/omnizip/buffer.rb +176 -0
  176. data/lib/omnizip/checksum_registry.rb +114 -0
  177. data/lib/omnizip/checksums/crc32.rb +100 -0
  178. data/lib/omnizip/checksums/crc64.rb +101 -0
  179. data/lib/omnizip/checksums/crc_base.rb +158 -0
  180. data/lib/omnizip/checksums/verifier.rb +131 -0
  181. data/lib/omnizip/chunked/memory_manager.rb +194 -0
  182. data/lib/omnizip/chunked/reader.rb +78 -0
  183. data/lib/omnizip/chunked/writer.rb +120 -0
  184. data/lib/omnizip/chunked.rb +129 -0
  185. data/lib/omnizip/cli/output_formatter.rb +104 -0
  186. data/lib/omnizip/cli.rb +572 -0
  187. data/lib/omnizip/commands/.keep +0 -0
  188. data/lib/omnizip/commands/archive_create_command.rb +427 -0
  189. data/lib/omnizip/commands/archive_extract_command.rb +272 -0
  190. data/lib/omnizip/commands/archive_list_command.rb +218 -0
  191. data/lib/omnizip/commands/archive_repair_command.rb +131 -0
  192. data/lib/omnizip/commands/archive_verify_command.rb +117 -0
  193. data/lib/omnizip/commands/compress_command.rb +117 -0
  194. data/lib/omnizip/commands/decompress_command.rb +120 -0
  195. data/lib/omnizip/commands/list_command.rb +53 -0
  196. data/lib/omnizip/commands/metadata_command.rb +153 -0
  197. data/lib/omnizip/commands/parity_create_command.rb +122 -0
  198. data/lib/omnizip/commands/parity_repair_command.rb +122 -0
  199. data/lib/omnizip/commands/parity_verify_command.rb +124 -0
  200. data/lib/omnizip/commands/profile_list_command.rb +56 -0
  201. data/lib/omnizip/commands/profile_show_command.rb +44 -0
  202. data/lib/omnizip/convenience.rb +359 -0
  203. data/lib/omnizip/converter/conversion_registry.rb +49 -0
  204. data/lib/omnizip/converter/conversion_strategy.rb +121 -0
  205. data/lib/omnizip/converter/seven_zip_to_zip_strategy.rb +97 -0
  206. data/lib/omnizip/converter/zip_to_seven_zip_strategy.rb +112 -0
  207. data/lib/omnizip/converter.rb +105 -0
  208. data/lib/omnizip/crypto/aes256/cipher.rb +100 -0
  209. data/lib/omnizip/crypto/aes256/constants.rb +28 -0
  210. data/lib/omnizip/crypto/aes256/key_derivation.rb +101 -0
  211. data/lib/omnizip/crypto/aes256.rb +102 -0
  212. data/lib/omnizip/error.rb +106 -0
  213. data/lib/omnizip/eta/exponential_smoothing_estimator.rb +98 -0
  214. data/lib/omnizip/eta/moving_average_estimator.rb +99 -0
  215. data/lib/omnizip/eta/rate_calculator.rb +104 -0
  216. data/lib/omnizip/eta/sample_history.rb +143 -0
  217. data/lib/omnizip/eta/time_estimator.rb +106 -0
  218. data/lib/omnizip/eta.rb +63 -0
  219. data/lib/omnizip/extraction/filter_chain.rb +177 -0
  220. data/lib/omnizip/extraction/glob_pattern.rb +140 -0
  221. data/lib/omnizip/extraction/pattern_matcher.rb +70 -0
  222. data/lib/omnizip/extraction/predicate_pattern.rb +52 -0
  223. data/lib/omnizip/extraction/regex_pattern.rb +50 -0
  224. data/lib/omnizip/extraction/selective_extractor.rb +240 -0
  225. data/lib/omnizip/extraction.rb +111 -0
  226. data/lib/omnizip/file_type/mime_classifier.rb +144 -0
  227. data/lib/omnizip/file_type.rb +113 -0
  228. data/lib/omnizip/filter.rb +139 -0
  229. data/lib/omnizip/filter_pipeline.rb +108 -0
  230. data/lib/omnizip/filter_registry.rb +166 -0
  231. data/lib/omnizip/filters/bcj.rb +279 -0
  232. data/lib/omnizip/filters/bcj2/constants.rb +53 -0
  233. data/lib/omnizip/filters/bcj2/decoder.rb +200 -0
  234. data/lib/omnizip/filters/bcj2/encoder.rb +61 -0
  235. data/lib/omnizip/filters/bcj2/stream_data.rb +93 -0
  236. data/lib/omnizip/filters/bcj2.rb +99 -0
  237. data/lib/omnizip/filters/bcj_arm.rb +176 -0
  238. data/lib/omnizip/filters/bcj_arm64.rb +244 -0
  239. data/lib/omnizip/filters/bcj_ia64.rb +196 -0
  240. data/lib/omnizip/filters/bcj_ppc.rb +190 -0
  241. data/lib/omnizip/filters/bcj_sparc.rb +176 -0
  242. data/lib/omnizip/filters/bcj_x86.rb +193 -0
  243. data/lib/omnizip/filters/delta.rb +196 -0
  244. data/lib/omnizip/filters/filter_base.rb +72 -0
  245. data/lib/omnizip/filters/registry.rb +123 -0
  246. data/lib/omnizip/filters/xz_delta.rb +258 -0
  247. data/lib/omnizip/format_detector.rb +162 -0
  248. data/lib/omnizip/format_registry.rb +59 -0
  249. data/lib/omnizip/formats/.keep +0 -0
  250. data/lib/omnizip/formats/bzip2_file.rb +172 -0
  251. data/lib/omnizip/formats/cpio/constants.rb +55 -0
  252. data/lib/omnizip/formats/cpio/entry.rb +385 -0
  253. data/lib/omnizip/formats/cpio/reader.rb +196 -0
  254. data/lib/omnizip/formats/cpio/writer.rb +234 -0
  255. data/lib/omnizip/formats/cpio.rb +140 -0
  256. data/lib/omnizip/formats/format_spec_loader.rb +230 -0
  257. data/lib/omnizip/formats/gzip.rb +238 -0
  258. data/lib/omnizip/formats/iso/directory_builder.rb +297 -0
  259. data/lib/omnizip/formats/iso/directory_record.rb +152 -0
  260. data/lib/omnizip/formats/iso/joliet.rb +204 -0
  261. data/lib/omnizip/formats/iso/path_table.rb +125 -0
  262. data/lib/omnizip/formats/iso/reader.rb +197 -0
  263. data/lib/omnizip/formats/iso/rock_ridge.rb +349 -0
  264. data/lib/omnizip/formats/iso/volume_builder.rb +320 -0
  265. data/lib/omnizip/formats/iso/volume_descriptor.rb +168 -0
  266. data/lib/omnizip/formats/iso/writer.rb +530 -0
  267. data/lib/omnizip/formats/iso.rb +140 -0
  268. data/lib/omnizip/formats/lzip.rb +175 -0
  269. data/lib/omnizip/formats/lzma_alone.rb +171 -0
  270. data/lib/omnizip/formats/rar/archive_repairer.rb +243 -0
  271. data/lib/omnizip/formats/rar/archive_verifier.rb +195 -0
  272. data/lib/omnizip/formats/rar/block_parser.rb +243 -0
  273. data/lib/omnizip/formats/rar/compression/bit_stream.rb +180 -0
  274. data/lib/omnizip/formats/rar/compression/dispatcher.rb +217 -0
  275. data/lib/omnizip/formats/rar/compression/lz77_huffman/decoder.rb +216 -0
  276. data/lib/omnizip/formats/rar/compression/lz77_huffman/encoder.rb +158 -0
  277. data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_builder.rb +217 -0
  278. data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_coder.rb +189 -0
  279. data/lib/omnizip/formats/rar/compression/lz77_huffman/match_finder.rb +135 -0
  280. data/lib/omnizip/formats/rar/compression/lz77_huffman/sliding_window.rb +165 -0
  281. data/lib/omnizip/formats/rar/compression/ppmd/context.rb +105 -0
  282. data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +219 -0
  283. data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +262 -0
  284. data/lib/omnizip/formats/rar/compression_method_registry.rb +106 -0
  285. data/lib/omnizip/formats/rar/constants.rb +82 -0
  286. data/lib/omnizip/formats/rar/decompressor.rb +238 -0
  287. data/lib/omnizip/formats/rar/external_writer.rb +312 -0
  288. data/lib/omnizip/formats/rar/header.rb +192 -0
  289. data/lib/omnizip/formats/rar/license_validator.rb +109 -0
  290. data/lib/omnizip/formats/rar/models/rar_archive.rb +77 -0
  291. data/lib/omnizip/formats/rar/models/rar_entry.rb +65 -0
  292. data/lib/omnizip/formats/rar/models/rar_volume.rb +56 -0
  293. data/lib/omnizip/formats/rar/parity_handler.rb +292 -0
  294. data/lib/omnizip/formats/rar/rar5/compression/lzma.rb +202 -0
  295. data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +578 -0
  296. data/lib/omnizip/formats/rar/rar5/compression/store.rb +60 -0
  297. data/lib/omnizip/formats/rar/rar5/crc32.rb +39 -0
  298. data/lib/omnizip/formats/rar/rar5/encryption/aes256_cbc.rb +97 -0
  299. data/lib/omnizip/formats/rar/rar5/encryption/encryption_header.rb +114 -0
  300. data/lib/omnizip/formats/rar/rar5/encryption/encryption_manager.rb +166 -0
  301. data/lib/omnizip/formats/rar/rar5/encryption/key_derivation.rb +97 -0
  302. data/lib/omnizip/formats/rar/rar5/header.rb +187 -0
  303. data/lib/omnizip/formats/rar/rar5/models/encryption_options.rb +74 -0
  304. data/lib/omnizip/formats/rar/rar5/models/recovery_options.rb +63 -0
  305. data/lib/omnizip/formats/rar/rar5/models/solid_options.rb +63 -0
  306. data/lib/omnizip/formats/rar/rar5/models/volume_options.rb +74 -0
  307. data/lib/omnizip/formats/rar/rar5/multi_volume/ARCHITECTURE.md +290 -0
  308. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_manager.rb +264 -0
  309. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_splitter.rb +155 -0
  310. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_writer.rb +194 -0
  311. data/lib/omnizip/formats/rar/rar5/solid/solid_encoder.rb +109 -0
  312. data/lib/omnizip/formats/rar/rar5/solid/solid_manager.rb +142 -0
  313. data/lib/omnizip/formats/rar/rar5/solid/solid_stream.rb +121 -0
  314. data/lib/omnizip/formats/rar/rar5/vint.rb +65 -0
  315. data/lib/omnizip/formats/rar/rar5/writer.rb +466 -0
  316. data/lib/omnizip/formats/rar/rar_format_base.rb +241 -0
  317. data/lib/omnizip/formats/rar/reader.rb +366 -0
  318. data/lib/omnizip/formats/rar/recovery_record.rb +245 -0
  319. data/lib/omnizip/formats/rar/volume_manager.rb +168 -0
  320. data/lib/omnizip/formats/rar/writer.rb +431 -0
  321. data/lib/omnizip/formats/rar.rb +205 -0
  322. data/lib/omnizip/formats/rar3/compressor.rb +73 -0
  323. data/lib/omnizip/formats/rar3/decompressor.rb +66 -0
  324. data/lib/omnizip/formats/rar3/reader.rb +386 -0
  325. data/lib/omnizip/formats/rar3/writer.rb +219 -0
  326. data/lib/omnizip/formats/rar5/compressor.rb +73 -0
  327. data/lib/omnizip/formats/rar5/decompressor.rb +66 -0
  328. data/lib/omnizip/formats/rar5/reader.rb +342 -0
  329. data/lib/omnizip/formats/rar5/writer.rb +214 -0
  330. data/lib/omnizip/formats/seven_zip/coder_chain.rb +150 -0
  331. data/lib/omnizip/formats/seven_zip/constants.rb +126 -0
  332. data/lib/omnizip/formats/seven_zip/encoded_header.rb +114 -0
  333. data/lib/omnizip/formats/seven_zip/encrypted_header.rb +142 -0
  334. data/lib/omnizip/formats/seven_zip/file_collector.rb +144 -0
  335. data/lib/omnizip/formats/seven_zip/header.rb +106 -0
  336. data/lib/omnizip/formats/seven_zip/header_encryptor.rb +134 -0
  337. data/lib/omnizip/formats/seven_zip/header_writer.rb +466 -0
  338. data/lib/omnizip/formats/seven_zip/models/coder_info.rb +30 -0
  339. data/lib/omnizip/formats/seven_zip/models/file_entry.rb +58 -0
  340. data/lib/omnizip/formats/seven_zip/models/folder.rb +69 -0
  341. data/lib/omnizip/formats/seven_zip/models/stream_info.rb +42 -0
  342. data/lib/omnizip/formats/seven_zip/parser.rb +660 -0
  343. data/lib/omnizip/formats/seven_zip/reader.rb +458 -0
  344. data/lib/omnizip/formats/seven_zip/split_archive_reader.rb +632 -0
  345. data/lib/omnizip/formats/seven_zip/split_archive_writer.rb +315 -0
  346. data/lib/omnizip/formats/seven_zip/stream_compressor.rb +151 -0
  347. data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +162 -0
  348. data/lib/omnizip/formats/seven_zip/writer.rb +740 -0
  349. data/lib/omnizip/formats/seven_zip.rb +93 -0
  350. data/lib/omnizip/formats/tar/constants.rb +73 -0
  351. data/lib/omnizip/formats/tar/entry.rb +94 -0
  352. data/lib/omnizip/formats/tar/header.rb +168 -0
  353. data/lib/omnizip/formats/tar/reader.rb +121 -0
  354. data/lib/omnizip/formats/tar/writer.rb +216 -0
  355. data/lib/omnizip/formats/tar.rb +84 -0
  356. data/lib/omnizip/formats/xz/reader.rb +116 -0
  357. data/lib/omnizip/formats/xz.rb +237 -0
  358. data/lib/omnizip/formats/xz_impl/block_decoder.rb +754 -0
  359. data/lib/omnizip/formats/xz_impl/block_encoder.rb +306 -0
  360. data/lib/omnizip/formats/xz_impl/block_header.rb +210 -0
  361. data/lib/omnizip/formats/xz_impl/block_header_parser.rb +186 -0
  362. data/lib/omnizip/formats/xz_impl/constants.rb +49 -0
  363. data/lib/omnizip/formats/xz_impl/index_decoder.rb +174 -0
  364. data/lib/omnizip/formats/xz_impl/index_encoder.rb +122 -0
  365. data/lib/omnizip/formats/xz_impl/stream_decoder.rb +468 -0
  366. data/lib/omnizip/formats/xz_impl/stream_encoder.rb +99 -0
  367. data/lib/omnizip/formats/xz_impl/stream_footer.rb +81 -0
  368. data/lib/omnizip/formats/xz_impl/stream_footer_parser.rb +117 -0
  369. data/lib/omnizip/formats/xz_impl/stream_header.rb +55 -0
  370. data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +108 -0
  371. data/lib/omnizip/formats/xz_impl/vli.rb +128 -0
  372. data/lib/omnizip/formats/xz_impl/writer.rb +421 -0
  373. data/lib/omnizip/formats/zip/central_directory_header.rb +195 -0
  374. data/lib/omnizip/formats/zip/constants.rb +69 -0
  375. data/lib/omnizip/formats/zip/end_of_central_directory.rb +133 -0
  376. data/lib/omnizip/formats/zip/local_file_header.rb +138 -0
  377. data/lib/omnizip/formats/zip/reader.rb +250 -0
  378. data/lib/omnizip/formats/zip/unix_extra_field.rb +153 -0
  379. data/lib/omnizip/formats/zip/writer.rb +375 -0
  380. data/lib/omnizip/formats/zip/zip64_end_of_central_directory.rb +104 -0
  381. data/lib/omnizip/formats/zip/zip64_end_of_central_directory_locator.rb +66 -0
  382. data/lib/omnizip/formats/zip/zip64_extra_field.rb +114 -0
  383. data/lib/omnizip/formats/zip.rb +50 -0
  384. data/lib/omnizip/implementations/base/lzma2_decoder_base.rb +75 -0
  385. data/lib/omnizip/implementations/base/lzma2_encoder_base.rb +128 -0
  386. data/lib/omnizip/implementations/base/lzma_decoder_base.rb +83 -0
  387. data/lib/omnizip/implementations/base/lzma_encoder_base.rb +108 -0
  388. data/lib/omnizip/implementations/base/state_machine_base.rb +182 -0
  389. data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +421 -0
  390. data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +465 -0
  391. data/lib/omnizip/implementations/seven_zip/lzma/match_finder.rb +288 -0
  392. data/lib/omnizip/implementations/seven_zip/lzma/range_decoder.rb +200 -0
  393. data/lib/omnizip/implementations/seven_zip/lzma/range_encoder.rb +197 -0
  394. data/lib/omnizip/implementations/seven_zip/lzma/state_machine.rb +141 -0
  395. data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +519 -0
  396. data/lib/omnizip/implementations/xz_utils/lzma2/decoder.rb +723 -0
  397. data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +750 -0
  398. data/lib/omnizip/io/buffered_input.rb +146 -0
  399. data/lib/omnizip/io/buffered_output.rb +105 -0
  400. data/lib/omnizip/io/stream_manager.rb +115 -0
  401. data/lib/omnizip/link_handler/hard_link.rb +79 -0
  402. data/lib/omnizip/link_handler/symbolic_link.rb +74 -0
  403. data/lib/omnizip/link_handler.rb +124 -0
  404. data/lib/omnizip/metadata/archive_metadata.rb +114 -0
  405. data/lib/omnizip/metadata/entry_metadata.rb +146 -0
  406. data/lib/omnizip/metadata/metadata_editor.rb +171 -0
  407. data/lib/omnizip/metadata/metadata_registry.rb +64 -0
  408. data/lib/omnizip/metadata/metadata_validator.rb +99 -0
  409. data/lib/omnizip/metadata.rb +57 -0
  410. data/lib/omnizip/models/.keep +0 -0
  411. data/lib/omnizip/models/algorithm_metadata.rb +73 -0
  412. data/lib/omnizip/models/compression_options.rb +71 -0
  413. data/lib/omnizip/models/conversion_options.rb +87 -0
  414. data/lib/omnizip/models/conversion_result.rb +135 -0
  415. data/lib/omnizip/models/eta_result.rb +46 -0
  416. data/lib/omnizip/models/extraction_rule.rb +115 -0
  417. data/lib/omnizip/models/filter_chain.rb +144 -0
  418. data/lib/omnizip/models/filter_config.rb +183 -0
  419. data/lib/omnizip/models/match_result.rb +124 -0
  420. data/lib/omnizip/models/optimization_suggestion.rb +91 -0
  421. data/lib/omnizip/models/parallel_options.rb +104 -0
  422. data/lib/omnizip/models/performance_result.rb +79 -0
  423. data/lib/omnizip/models/profile_report.rb +82 -0
  424. data/lib/omnizip/models/progress_options.rb +38 -0
  425. data/lib/omnizip/models/split_options.rb +116 -0
  426. data/lib/omnizip/optimization_registry.rb +81 -0
  427. data/lib/omnizip/parallel/job_queue.rb +209 -0
  428. data/lib/omnizip/parallel/job_scheduler.rb +203 -0
  429. data/lib/omnizip/parallel/parallel_compressor.rb +347 -0
  430. data/lib/omnizip/parallel/parallel_extractor.rb +329 -0
  431. data/lib/omnizip/parallel/worker_pool.rb +223 -0
  432. data/lib/omnizip/parallel.rb +149 -0
  433. data/lib/omnizip/parity/chunked_block_processor.rb +196 -0
  434. data/lib/omnizip/parity/galois16.rb +145 -0
  435. data/lib/omnizip/parity/models/creator_packet.rb +73 -0
  436. data/lib/omnizip/parity/models/file_description_packet.rb +133 -0
  437. data/lib/omnizip/parity/models/ifsc_packet.rb +123 -0
  438. data/lib/omnizip/parity/models/main_packet.rb +128 -0
  439. data/lib/omnizip/parity/models/packet.rb +156 -0
  440. data/lib/omnizip/parity/models/packet_registry.rb +109 -0
  441. data/lib/omnizip/parity/models/recovery_slice_packet.rb +78 -0
  442. data/lib/omnizip/parity/par2_creator.rb +531 -0
  443. data/lib/omnizip/parity/par2_repairer.rb +407 -0
  444. data/lib/omnizip/parity/par2_verifier.rb +364 -0
  445. data/lib/omnizip/parity/par2cmdline_algorithm.rb +110 -0
  446. data/lib/omnizip/parity/par2cmdline_coefficients.rb +78 -0
  447. data/lib/omnizip/parity/reed_solomon_decoder.rb +266 -0
  448. data/lib/omnizip/parity/reed_solomon_encoder.rb +111 -0
  449. data/lib/omnizip/parity/reed_solomon_matrix.rb +342 -0
  450. data/lib/omnizip/parity.rb +186 -0
  451. data/lib/omnizip/password/encryption_registry.rb +65 -0
  452. data/lib/omnizip/password/encryption_strategy.rb +96 -0
  453. data/lib/omnizip/password/password_validator.rb +129 -0
  454. data/lib/omnizip/password/winzip_aes_strategy.rb +192 -0
  455. data/lib/omnizip/password/zip_crypto_strategy.rb +141 -0
  456. data/lib/omnizip/password.rb +87 -0
  457. data/lib/omnizip/pipe/stream_compressor.rb +124 -0
  458. data/lib/omnizip/pipe/stream_decompressor.rb +174 -0
  459. data/lib/omnizip/pipe.rb +121 -0
  460. data/lib/omnizip/platform/ntfs_streams.rb +201 -0
  461. data/lib/omnizip/platform.rb +189 -0
  462. data/lib/omnizip/profile/archive_profile.rb +39 -0
  463. data/lib/omnizip/profile/balanced_profile.rb +33 -0
  464. data/lib/omnizip/profile/binary_profile.rb +36 -0
  465. data/lib/omnizip/profile/compression_profile.rb +158 -0
  466. data/lib/omnizip/profile/custom_profile.rb +157 -0
  467. data/lib/omnizip/profile/fast_profile.rb +33 -0
  468. data/lib/omnizip/profile/maximum_profile.rb +33 -0
  469. data/lib/omnizip/profile/profile_detector.rb +110 -0
  470. data/lib/omnizip/profile/profile_registry.rb +161 -0
  471. data/lib/omnizip/profile/text_profile.rb +36 -0
  472. data/lib/omnizip/profile.rb +190 -0
  473. data/lib/omnizip/profiler/memory_profiler.rb +66 -0
  474. data/lib/omnizip/profiler/method_profiler.rb +49 -0
  475. data/lib/omnizip/profiler/report_generator.rb +169 -0
  476. data/lib/omnizip/profiler.rb +204 -0
  477. data/lib/omnizip/progress/callback_reporter.rb +36 -0
  478. data/lib/omnizip/progress/console_reporter.rb +62 -0
  479. data/lib/omnizip/progress/log_reporter.rb +91 -0
  480. data/lib/omnizip/progress/operation_progress.rb +118 -0
  481. data/lib/omnizip/progress/progress_bar.rb +156 -0
  482. data/lib/omnizip/progress/progress_reporter.rb +40 -0
  483. data/lib/omnizip/progress/progress_tracker.rb +190 -0
  484. data/lib/omnizip/progress/silent_reporter.rb +24 -0
  485. data/lib/omnizip/progress.rb +127 -0
  486. data/lib/omnizip/rubyzip_compat.rb +63 -0
  487. data/lib/omnizip/temp/safe_extract.rb +168 -0
  488. data/lib/omnizip/temp/temp_file.rb +124 -0
  489. data/lib/omnizip/temp/temp_file_pool.rb +109 -0
  490. data/lib/omnizip/temp.rb +181 -0
  491. data/lib/omnizip/version.rb +5 -0
  492. data/lib/omnizip/zip/entry.rb +156 -0
  493. data/lib/omnizip/zip/file.rb +485 -0
  494. data/lib/omnizip/zip/input_stream.rb +273 -0
  495. data/lib/omnizip/zip/output_stream.rb +324 -0
  496. data/lib/omnizip.rb +156 -0
  497. data/readme-docs/advanced-features.adoc +515 -0
  498. data/readme-docs/api-usage.adoc +444 -0
  499. data/readme-docs/architecture.adoc +449 -0
  500. data/readme-docs/archive-formats.adoc +479 -0
  501. data/readme-docs/cli-usage.adoc +222 -0
  502. data/readme-docs/compression-algorithms.adoc +442 -0
  503. data/readme-docs/compression-profiles.adoc +247 -0
  504. data/readme-docs/encryption-checksums.adoc +328 -0
  505. data/readme-docs/format-converter.adoc +325 -0
  506. data/readme-docs/installation.adoc +228 -0
  507. data/readme-docs/par2-archives.adoc +608 -0
  508. data/readme-docs/performance-profiler.adoc +389 -0
  509. data/readme-docs/preprocessing-filters.adoc +280 -0
  510. data/xz-file-format-1.2.1.txt +1174 -0
  511. metadata +617 -0
@@ -0,0 +1,141 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Copyright (C) 2025 Ribose Inc.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a
6
+ # copy of this software and associated documentation files (the "Software"),
7
+ # to deal in the Software without restriction, including without limitation
8
+ # the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
+ # and/or sell copies of the Software, and to permit persons to whom the
10
+ # Software is furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
+ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21
+ # DEALINGS IN THE SOFTWARE.
22
+
23
+ require_relative "../../../algorithms/lzma/state"
24
+
25
+ module Omnizip
26
+ module Implementations
27
+ module SevenZip
28
+ module LZMA
29
+ # 7-Zip LZMA SDK state machine implementation.
30
+ #
31
+ # This is the original SdkStateMachine moved from algorithms/lzma/sdk_state_machine.rb
32
+ # to the new namespace structure.
33
+ #
34
+ # Ported from 7-Zip LZMA SDK by Igor Pavlov.
35
+ class StateMachine < Omnizip::Algorithms::LZMA::State
36
+ # State categories (SDK classification)
37
+ CATEGORY_LITERAL = :literal # States 0-6
38
+ CATEGORY_MATCH = :match # States 7-9
39
+ CATEGORY_REP = :rep # State 8, 11
40
+ CATEGORY_SHORT_REP = :short_rep # State 9, 11 after short rep
41
+
42
+ # Check if current state is a character state
43
+ #
44
+ # Character states (0-6) occur after literal encoding.
45
+ # The SDK uses this to determine probability model selection.
46
+ # This is SDK's IsCharState() macro.
47
+ #
48
+ # @return [Boolean] True if state < 7
49
+ def is_char_state?
50
+ @index < 7
51
+ end
52
+
53
+ # Get state value (alias for index)
54
+ #
55
+ # @return [Integer] Current state index
56
+ def value
57
+ @index
58
+ end
59
+
60
+ # Get literal state index for probability model selection
61
+ #
62
+ # The SDK uses a simplified state value for literal encoding:
63
+ # - States 0-3 map to themselves (0-3)
64
+ # - States 4-6 map to 4-6
65
+ # - States 7+ map to state - 3 (4-9)
66
+ #
67
+ # This creates 10 possible literal contexts (0-9) from 12 states.
68
+ # From LzmaEnc.c: litState = (state < 4) ? state : (state - (state < 10 ? 3 : 6))
69
+ #
70
+ # @return [Integer] Literal state index (0-9)
71
+ def literal_state
72
+ if @index < 4
73
+ @index
74
+ elsif @index < 10
75
+ @index - 3
76
+ else
77
+ @index - 6
78
+ end
79
+ end
80
+
81
+ # Check if matched literal mode should be used
82
+ #
83
+ # XZ Utils logic (lzma_decoder.c, lzma_common.h):
84
+ # - if (is_literal_state(state)) → use UNMATCHED literal
85
+ # - else → use MATCHED literal
86
+ # - is_literal_state(state) = (state < LIT_STATES) where LIT_STATES = 7
87
+ # - States 0-6: literal states (unmatched)
88
+ # - States 7-11: non-literal states (matched after rep/match)
89
+ #
90
+ # @return [Boolean] True if state >= 7 (non-literal state)
91
+ def use_matched_literal?
92
+ @index >= 7
93
+ end
94
+
95
+ # Get state category
96
+ #
97
+ # Categorizes states for debugging and encoder logic.
98
+ # The SDK doesn't expose this directly but uses state ranges
99
+ # in various encoding decisions.
100
+ #
101
+ # @return [Symbol] State category
102
+ def category
103
+ case @index
104
+ when 0..6
105
+ CATEGORY_LITERAL
106
+ when 7, 10
107
+ CATEGORY_MATCH
108
+ when 8, 11
109
+ CATEGORY_REP
110
+ when 9
111
+ CATEGORY_SHORT_REP
112
+ else
113
+ raise "Invalid state: #{@index}"
114
+ end
115
+ end
116
+
117
+ # Create a copy of this state
118
+ #
119
+ # Overrides parent to return StateMachine instance
120
+ #
121
+ # @return [StateMachine] A new StateMachine with the same index
122
+ def dup
123
+ StateMachine.new(@index)
124
+ end
125
+
126
+ # Check if state would use matched literal after match
127
+ #
128
+ # Helper method for encoder to determine encoding path.
129
+ # Checks if encoding a match NOW would result in matched literal NEXT.
130
+ #
131
+ # @return [Boolean] True if state would transition to matched literal state
132
+ def would_use_matched_literal?
133
+ # After a match, we transition to MATCH_STATES[@index]
134
+ next_state = MATCH_STATES[@index]
135
+ next_state >= 7
136
+ end
137
+ end
138
+ end
139
+ end
140
+ end
141
+ end
@@ -0,0 +1,519 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Copyright (C) 2025 Ribose Inc.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a
6
+ # copy of this software and associated documentation files (the "Software"),
7
+ # to deal in the Software without restriction, including without limitation
8
+ # the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
+ # and/or sell copies of the Software, and to permit persons to whom the
10
+ # Software is furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
+ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21
+ # DEALINGS IN THE SOFTWARE.
22
+
23
+ require "stringio"
24
+ require_relative "../../base/lzma2_encoder_base"
25
+ require_relative "../../../algorithms/lzma2/constants"
26
+ require_relative "../../../algorithms/lzma2/lzma2_chunk"
27
+
28
+ module Omnizip
29
+ module Implementations
30
+ module SevenZip
31
+ module LZMA2
32
+ # 7-Zip SDK LZMA2 encoder implementation.
33
+ #
34
+ # This encoder produces LZMA2 compressed data compatible with 7-Zip format.
35
+ # It uses the same LZMA encoding logic as XZ Utils, but with 7-Zip
36
+ # format requirements (no EOS marker, no padding).
37
+ #
38
+ # Key differences from XZ Utils implementation:
39
+ # - No EOS marker (raw LZMA2 data ends with 0x00 control byte)
40
+ # - No chunk padding (XZ pads to 4-byte boundary)
41
+ # - No LZMA2 property byte in data stream (method ID only in container)
42
+ #
43
+ # Based on LZMA SDK by Igor Pavlov
44
+ # Reference: https://www.7-zip.org/sdk.html
45
+ #
46
+ # LZMA2 format (as used by 7-Zip):
47
+ # - Control byte specifies chunk type and dictionary reset
48
+ # - Dictionary size follows in some chunk types
49
+ # - Uncompressed size follows in some chunk types
50
+ # - Compressed data follows
51
+ class Encoder < Base::LZMA2EncoderBase
52
+ include Omnizip::Algorithms::LZMA2Const
53
+
54
+ # Maximum chunk sizes (from LZMA2 specification)
55
+ MAX_UNCOMPRESSED_CHUNK = 2 * 1024 * 1024 # 2MB
56
+ MAX_COMPRESSED_CHUNK = 64 * 1024 # 64KB
57
+
58
+ # Encoding constants
59
+ UINT32_MAX = 0xFFFFFFFF
60
+ REPS = 4
61
+ MATCH_LEN_MIN = 2
62
+
63
+ attr_reader :dict_size, :lc, :lp, :pb, :standalone
64
+
65
+ # Initialize 7-Zip SDK LZMA2 encoder
66
+ #
67
+ # @param dict_size [Integer] Dictionary size (must be power of 2)
68
+ # @param lc [Integer] Literal context bits (0-8)
69
+ # @param lp [Integer] Literal position bits (0-4)
70
+ # @param pb [Integer] Position bits (0-4)
71
+ # @param standalone [Boolean] Include property byte (false for 7-Zip)
72
+ def initialize(dict_size:, lc: 3, lp: 0, pb: 2, standalone: false)
73
+ super
74
+
75
+ # Initialize shared state across all chunks
76
+ # Using XZ Utils components (tested and working)
77
+ require_relative "../../../algorithms/lzma/dictionary"
78
+ require_relative "../../../algorithms/lzma/lzma_state"
79
+ require_relative "../../../algorithms/lzma/xz_probability_models"
80
+ require_relative "../../../algorithms/lzma/match_finder"
81
+ require_relative "../../../algorithms/lzma/optimal_encoder"
82
+ require_relative "../../../algorithms/lzma/xz_range_encoder_exact"
83
+
84
+ @dictionary = Omnizip::Algorithms::LZMA::Dictionary.new(dict_size)
85
+ @state = Omnizip::Algorithms::LZMA::LZMAState.new(0)
86
+ @models = Omnizip::Algorithms::LZMA::XzProbabilityModels.new(lc, lp, pb)
87
+ @match_finder = Omnizip::Algorithms::LZMA::MatchFinder.new(@dictionary)
88
+ @optimal = Omnizip::Algorithms::LZMA::OptimalEncoder.new(mode: :fast)
89
+
90
+ # Track previous byte for literal context
91
+ @prev_byte = 0
92
+
93
+ # First chunk always resets dictionary (7-Zip compatibility)
94
+ @need_dictionary_reset = true
95
+ @need_state_reset = false
96
+ @need_properties = true
97
+ end
98
+
99
+ # Encode data with LZMA2 compression
100
+ #
101
+ # @param data [String] Input data to compress
102
+ # @return [String] LZMA2 compressed data (7-Zip format)
103
+ def encode(data)
104
+ return "" if data.empty?
105
+
106
+ output = StringIO.new
107
+ output.set_encoding(Encoding::BINARY)
108
+
109
+ # Write property byte if standalone mode
110
+ if @standalone
111
+ prop_byte = encode_dict_size(@dict_size)
112
+ output.putc(prop_byte)
113
+ end
114
+
115
+ # Reset match finder state for each encoding session
116
+ @match_finder.reset
117
+
118
+ # Process in chunks
119
+ input = StringIO.new(data)
120
+ input.set_encoding(Encoding::BINARY)
121
+
122
+ while !input.eof?
123
+ chunk_data = input.read(MAX_UNCOMPRESSED_CHUNK)
124
+ break if chunk_data.nil? || chunk_data.empty?
125
+
126
+ chunk = encode_chunk(chunk_data)
127
+ output.write(chunk)
128
+
129
+ # Update reset flags for next chunk
130
+ @need_dictionary_reset = false
131
+ @need_state_reset = false
132
+ @need_properties = false
133
+ end
134
+
135
+ # End of stream marker (0x00)
136
+ output.write(Omnizip::Algorithms::LZMA2::LZMA2Chunk.end_chunk.to_bytes)
137
+
138
+ output.string
139
+ end
140
+
141
+ # Get implementation identifier
142
+ #
143
+ # @return [Symbol] :seven_zip_sdk
144
+ def implementation_name
145
+ :seven_zip_sdk
146
+ end
147
+
148
+ private
149
+
150
+ # Encode a single chunk with LZMA2 compression
151
+ #
152
+ # Uses XZ Utils encoding logic (tested and compatible)
153
+ def encode_chunk(uncompressed_data)
154
+ compressed = try_compress(uncompressed_data)
155
+
156
+ # Decide: compressed vs uncompressed
157
+ # Use compressed if it's actually smaller
158
+ if compressed.bytesize >= uncompressed_data.bytesize
159
+ # Use uncompressed chunk
160
+ chunk = Omnizip::Algorithms::LZMA2::LZMA2Chunk.new(
161
+ chunk_type: :uncompressed,
162
+ uncompressed_data: uncompressed_data,
163
+ compressed_data: "",
164
+ need_dict_reset: @need_dictionary_reset,
165
+ need_state_reset: false,
166
+ need_props: false,
167
+ )
168
+ # After uncompressed chunk, next chunk needs state reset
169
+ @need_state_reset = true
170
+ else
171
+ # Use compressed chunk
172
+ chunk_properties = (((@pb * 5) + @lp) * 9) + @lc
173
+ chunk = Omnizip::Algorithms::LZMA2::LZMA2Chunk.new(
174
+ chunk_type: :compressed,
175
+ uncompressed_data: uncompressed_data,
176
+ compressed_data: compressed,
177
+ compressed_size: compressed.bytesize,
178
+ properties: chunk_properties,
179
+ need_dict_reset: @need_dictionary_reset,
180
+ need_state_reset: @need_state_reset,
181
+ need_props: true,
182
+ )
183
+ end
184
+
185
+ # Update dictionary with the chunk data
186
+ @dictionary.append(uncompressed_data)
187
+
188
+ # Update prev_byte for next chunk
189
+ if uncompressed_data.bytesize.positive?
190
+ @prev_byte = uncompressed_data.getbyte(uncompressed_data.bytesize - 1)
191
+ end
192
+
193
+ chunk.to_bytes
194
+ end
195
+
196
+ # Try to compress data using LZMA
197
+ #
198
+ # Uses XZ Utils encoding components (tested and working)
199
+ def try_compress(data)
200
+ # Create output buffer
201
+ output_buffer = StringIO.new
202
+ output_buffer.set_encoding(Encoding::BINARY)
203
+
204
+ # Create range encoder
205
+ encoder = Omnizip::Algorithms::LZMA::XzRangeEncoder.new(output_buffer)
206
+
207
+ # Feed all data to match finder first
208
+ @match_finder.feed(data)
209
+
210
+ # Initialize hash table
211
+ match_len_max = 2
212
+ end_pos = [@dictionary.buffer.bytesize + data.bytesize - match_len_max, 0].max
213
+ @match_finder.skip(end_pos)
214
+
215
+ # Position in match finder's buffer for encoding
216
+ start_pos = @dictionary.buffer.bytesize
217
+ @current_start_pos = start_pos
218
+
219
+ pos = 0
220
+ while pos < data.bytesize
221
+ # Encode queued symbols if buffer getting full
222
+ if encoder.count > 20
223
+ encode_queued_symbols(encoder, output_buffer)
224
+ end
225
+
226
+ # Find matches at current position
227
+ match_pos = start_pos + pos
228
+ @match_finder.find_matches(match_pos)
229
+
230
+ # Get optimal encoding choice
231
+ distance, length = @optimal.find_optimal(
232
+ match_pos,
233
+ @match_finder,
234
+ @state,
235
+ @state.reps,
236
+ @models,
237
+ )
238
+
239
+ # Encode based on choice
240
+ if distance == UINT32_MAX || length == 1
241
+ encode_literal(data.getbyte(pos), encoder, pos)
242
+ pos += 1
243
+ elsif distance < REPS
244
+ encode_repeated_match(distance, length, encoder, pos, match_pos)
245
+ pos += length
246
+ else
247
+ actual_distance = distance - REPS
248
+ encode_match(actual_distance, length, encoder, pos, match_pos, data)
249
+ pos += length
250
+ end
251
+ end
252
+
253
+ # Flush encoder
254
+ encode_queued_symbols(encoder, output_buffer)
255
+ encoder.queue_flush
256
+ encode_queued_symbols(encoder, output_buffer)
257
+
258
+ output_buffer.string
259
+ end
260
+
261
+ # Encode queued symbols to output
262
+ def encode_queued_symbols(encoder, output)
263
+ return if encoder.none?
264
+
265
+ temp_buffer = "\0" * 10000
266
+ out_pos = Omnizip::Algorithms::LZMA::IntRef.new(0)
267
+
268
+ size_before = output.size
269
+
270
+ encoder.encode_symbols(temp_buffer, out_pos, 10000)
271
+
272
+ if out_pos.value.positive?
273
+ output.write(StringCompat.byteslice(temp_buffer, 0, out_pos.value))
274
+ end
275
+
276
+ output.size - size_before
277
+ end
278
+
279
+ # Compatibility helper for Ruby 3.0-3.1
280
+ module StringCompat
281
+ if "".respond_to?(:byteslice)
282
+ def self.byteslice(string, start, length)
283
+ string.byteslice(start, length)
284
+ end
285
+ else
286
+ def self.byteslice(string, start, length)
287
+ string.bytes[start, length]&.pack("C*") || ""
288
+ end
289
+ end
290
+ end
291
+
292
+ # Encode literal byte
293
+ def encode_literal(symbol, encoder, pos)
294
+ pos_state = pos & ((1 << @pb) - 1)
295
+
296
+ prob_is_match = @models.is_match[@state.value][pos_state]
297
+ encoder.queue_bit(prob_is_match, 0)
298
+
299
+ literal_offset = get_literal_state(pos, @prev_byte)
300
+ use_matched = @state.use_matched_literal?
301
+
302
+ @state.update_literal!
303
+
304
+ if use_matched
305
+ match_pos = @current_start_pos + pos
306
+ match_byte_pos = match_pos - @state.reps[0] - 1
307
+ match_byte = @match_finder.buffer.getbyte(match_byte_pos) if match_byte_pos >= 0 && match_byte_pos < @match_finder.buffer.bytesize
308
+
309
+ if match_byte.nil?
310
+ encode_normal_literal(literal_offset, symbol, encoder)
311
+ else
312
+ encode_matched_literal(literal_offset, match_byte, symbol, encoder)
313
+ end
314
+ else
315
+ encode_normal_literal(literal_offset, symbol, encoder)
316
+ end
317
+
318
+ @prev_byte = symbol
319
+ end
320
+
321
+ # Encode normal match
322
+ def encode_match(distance, length, encoder, pos, match_pos, _input_data)
323
+ pos_state = pos & ((1 << @pb) - 1)
324
+
325
+ prob_is_match = @models.is_match[@state.value][pos_state]
326
+ encoder.queue_bit(prob_is_match, 1)
327
+
328
+ prob_is_rep = @models.is_rep[@state.value]
329
+ encoder.queue_bit(prob_is_rep, 0)
330
+
331
+ @state.update_match!(distance)
332
+
333
+ encode_match_length(length, pos_state, encoder)
334
+ encode_distance(distance, length, encoder)
335
+
336
+ last_byte_pos = match_pos - distance + length - 1
337
+ @prev_byte = @match_finder.buffer.getbyte(last_byte_pos) if last_byte_pos >= 0 && last_byte_pos < @match_finder.buffer.bytesize
338
+ end
339
+
340
+ # Encode repeated match
341
+ def encode_repeated_match(rep, length, encoder, pos, match_pos)
342
+ pos_state = pos & ((1 << @pb) - 1)
343
+
344
+ prob_is_match = @models.is_match[@state.value][pos_state]
345
+ encoder.queue_bit(prob_is_match, 1)
346
+
347
+ prob_is_rep = @models.is_rep[@state.value]
348
+ encoder.queue_bit(prob_is_rep, 1)
349
+
350
+ prob_is_rep0 = @models.is_rep0[@state.value]
351
+ if rep.zero?
352
+ encoder.queue_bit(prob_is_rep0, 0)
353
+
354
+ prob_is_rep0_long = @models.is_rep0_long[@state.value][pos_state]
355
+ encoder.queue_bit(prob_is_rep0_long, length == 1 ? 0 : 1)
356
+ else
357
+ encoder.queue_bit(prob_is_rep0, 1)
358
+
359
+ prob_is_rep1 = @models.is_rep1[@state.value]
360
+ if rep == 1
361
+ encoder.queue_bit(prob_is_rep1, 0)
362
+ else
363
+ encoder.queue_bit(prob_is_rep1, 1)
364
+
365
+ prob_is_rep2 = @models.is_rep2[@state.value]
366
+ encoder.queue_bit(prob_is_rep2, rep - 2)
367
+
368
+ if rep == 3
369
+ @state.reps[3] = @state.reps[2]
370
+ end
371
+
372
+ @state.reps[2] = @state.reps[1]
373
+ end
374
+
375
+ @state.reps[1] = @state.reps[0]
376
+
377
+ distance = @state.reps[rep]
378
+
379
+ if distance.nil?
380
+ raise "Distance is nil for rep #{rep}, reps=#{@state.reps.inspect}"
381
+ end
382
+
383
+ @state.reps[0] = distance
384
+ end
385
+
386
+ if length == 1
387
+ @state.update_short_rep!
388
+ else
389
+ encode_match_length(length, pos_state, encoder)
390
+ @state.update_long_rep!
391
+ end
392
+
393
+ last_byte_pos = match_pos - @state.reps[0] + length - 1
394
+ @prev_byte = @match_finder.buffer.getbyte(last_byte_pos) if last_byte_pos >= 0 && last_byte_pos < @match_finder.buffer.bytesize
395
+ end
396
+
397
+ def get_literal_state(pos, prev_byte)
398
+ literal_mask = (0x100 << @lp) - (0x100 >> @lc)
399
+ ((((pos << 8) + prev_byte) & literal_mask) << @lc)
400
+ end
401
+
402
+ def encode_normal_literal(literal_offset, symbol, encoder)
403
+ context = 1
404
+ 8.downto(1) do |i|
405
+ bit = (symbol >> (i - 1)) & 1
406
+ encoder.queue_bit(@models.literal[literal_offset + context], bit)
407
+ context = (context << 1) | bit
408
+ end
409
+ end
410
+
411
+ def encode_matched_literal(literal_offset, match_byte, symbol, encoder)
412
+ offset = 0x100
413
+ symbol += 0x100
414
+
415
+ while symbol < 0x10000
416
+ match_byte <<= 1
417
+ match_bit = match_byte & offset
418
+ subcoder_index = offset + match_bit + (symbol >> 8)
419
+ bit = (symbol >> 7) & 1
420
+
421
+ encoder.queue_bit(@models.literal[literal_offset + subcoder_index], bit)
422
+
423
+ symbol <<= 1
424
+ offset &= ~(match_byte ^ symbol)
425
+ end
426
+ end
427
+
428
+ def encode_match_length(length, pos_state, encoder)
429
+ len = length - 2
430
+
431
+ if len < 8
432
+ encoder.queue_bit(@models.match_len_encoder.choice, 0)
433
+ encode_bittree(@models.match_len_encoder.low[pos_state], 3, len, encoder)
434
+ elsif len < 16
435
+ encoder.queue_bit(@models.match_len_encoder.choice, 1)
436
+ encoder.queue_bit(@models.match_len_encoder.choice2, 0)
437
+ encode_bittree(@models.match_len_encoder.mid[pos_state], 3, len - 8, encoder)
438
+ else
439
+ encoder.queue_bit(@models.match_len_encoder.choice, 1)
440
+ encoder.queue_bit(@models.match_len_encoder.choice2, 1)
441
+ encode_bittree(@models.match_len_encoder.high, 8, len - 16, encoder)
442
+ end
443
+ end
444
+
445
+ def encode_distance(distance, length, encoder)
446
+ dist_slot = get_dist_slot(distance)
447
+ len_state = [length - 2, 3].min
448
+
449
+ encode_bittree(@models.dist_slot[len_state], 6, dist_slot, encoder)
450
+
451
+ if dist_slot >= 4
452
+ footer_bits = (dist_slot >> 1) - 1
453
+ base = (2 | (dist_slot & 1)) << footer_bits
454
+ dist_reduced = distance - base
455
+
456
+ if dist_slot < 14
457
+ encode_bittree_reverse(@models.dist_special, dist_reduced, footer_bits, base - dist_slot - 1, encoder)
458
+ else
459
+ direct_bits = footer_bits - 4
460
+ encoder.queue_direct_bits(dist_reduced >> 4, direct_bits)
461
+ align_mask = (1 << 4) - 1
462
+ encode_bittree_reverse(@models.dist_align, dist_reduced & align_mask, 4, 0, encoder)
463
+ end
464
+ end
465
+ end
466
+
467
+ def encode_bittree(probs, num_bits, value, encoder)
468
+ context = 1
469
+ num_bits.downto(1) do |i|
470
+ bit = (value >> (i - 1)) & 1
471
+ encoder.queue_bit(probs[context], bit)
472
+ context = (context << 1) | bit
473
+ end
474
+ end
475
+
476
+ def encode_bittree_reverse(probs, value, num_bits, offset, encoder)
477
+ context = 1
478
+ num_bits.times do |i|
479
+ bit = (value >> i) & 1
480
+ encoder.queue_bit(probs[offset + context], bit)
481
+ context = (context << 1) | bit
482
+ end
483
+ end
484
+
485
+ def get_dist_slot(distance)
486
+ if distance < 4
487
+ distance
488
+ else
489
+ slot = 0
490
+ dist = distance
491
+ while dist > 3
492
+ dist >>= 1
493
+ slot += 2
494
+ end
495
+ slot + dist
496
+ end
497
+ end
498
+
499
+ def encode_dict_size(dict_size)
500
+ d = [dict_size, DICT_SIZE_MIN].max
501
+
502
+ log2_size = 0
503
+ temp = d
504
+ while temp > 1
505
+ log2_size += 1
506
+ temp >>= 1
507
+ end
508
+
509
+ if d == (1 << log2_size)
510
+ [(log2_size - 12) * 2, 40].min
511
+ else
512
+ [((log2_size - 12) * 2) + 1, 40].min
513
+ end
514
+ end
515
+ end
516
+ end
517
+ end
518
+ end
519
+ end