omnizip 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (511) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.rubocop.yml +32 -0
  4. data/.rubocop_todo.yml +754 -0
  5. data/COPYING +502 -0
  6. data/Gemfile +17 -0
  7. data/LICENSE +12 -0
  8. data/README.adoc +1045 -0
  9. data/Rakefile +12 -0
  10. data/benchmark/README.md +260 -0
  11. data/benchmark/benchmark_suite.rb +125 -0
  12. data/benchmark/compression_bench.rb +181 -0
  13. data/benchmark/filter_bench.rb +180 -0
  14. data/benchmark/models/benchmark_result.rb +59 -0
  15. data/benchmark/models/comparison_result.rb +69 -0
  16. data/benchmark/profile_suite.rb +167 -0
  17. data/benchmark/reporter.rb +150 -0
  18. data/benchmark/run_benchmarks.rb +66 -0
  19. data/benchmark/test_data.rb +137 -0
  20. data/config/formats/rar3_spec.yml +91 -0
  21. data/config/formats/rar5_spec.yml +102 -0
  22. data/docs/.github/workflows/docs.yml +142 -0
  23. data/docs/.gitignore +21 -0
  24. data/docs/.lychee.toml +67 -0
  25. data/docs/Gemfile +13 -0
  26. data/docs/RAR_WRITE_SUPPORT.md +26 -0
  27. data/docs/README.md +101 -0
  28. data/docs/_config.yml +112 -0
  29. data/docs/assets/logo.svg +1 -0
  30. data/docs/assets/omnizip-logo.pdf +1540 -11
  31. data/docs/comparison/feature-matrix.adoc +694 -0
  32. data/docs/comparison/index.adoc +113 -0
  33. data/docs/comparison/vs-7zip.adoc +309 -0
  34. data/docs/comparison/vs-peazip.adoc +77 -0
  35. data/docs/comparison/vs-rubyzip.adoc +342 -0
  36. data/docs/comparison/vs-winrar.adoc +100 -0
  37. data/docs/compatibility.adoc +579 -0
  38. data/docs/concepts/index.adoc +129 -0
  39. data/docs/developer/architecture.adoc +256 -0
  40. data/docs/developer/contributing.adoc +158 -0
  41. data/docs/developer/index.adoc +25 -0
  42. data/docs/developer/testing.adoc +212 -0
  43. data/docs/getting-started/basic-usage.adoc +271 -0
  44. data/docs/getting-started/index.adoc +42 -0
  45. data/docs/getting-started/installation.adoc +138 -0
  46. data/docs/getting-started/quick-start.adoc +185 -0
  47. data/docs/getting-started/your-first-archive.adoc +218 -0
  48. data/docs/guides/advanced-features/encryption.adoc +300 -0
  49. data/docs/guides/advanced-features/index.adoc +49 -0
  50. data/docs/guides/advanced-features/parallel-processing.adoc +246 -0
  51. data/docs/guides/advanced-features/progress-tracking.adoc +320 -0
  52. data/docs/guides/advanced-features/streaming.adoc +212 -0
  53. data/docs/guides/archive-formats/gzip-format.adoc +107 -0
  54. data/docs/guides/archive-formats/index.adoc +130 -0
  55. data/docs/guides/archive-formats/rar-format.adoc +104 -0
  56. data/docs/guides/archive-formats/rar5.adoc +521 -0
  57. data/docs/guides/archive-formats/seven-zip-format.adoc +35 -0
  58. data/docs/guides/archive-formats/tar-format.adoc +106 -0
  59. data/docs/guides/archive-formats/xz-format.adoc +118 -0
  60. data/docs/guides/archive-formats/zip-format.adoc +35 -0
  61. data/docs/guides/compression-algorithms/bzip2.adoc +113 -0
  62. data/docs/guides/compression-algorithms/deflate.adoc +319 -0
  63. data/docs/guides/compression-algorithms/index.adoc +190 -0
  64. data/docs/guides/compression-algorithms/lzma.adoc +398 -0
  65. data/docs/guides/compression-algorithms/lzma2.adoc +327 -0
  66. data/docs/guides/compression-algorithms/ppmd.adoc +316 -0
  67. data/docs/guides/compression-algorithms/zstandard.adoc +361 -0
  68. data/docs/guides/creating-archives.adoc +354 -0
  69. data/docs/guides/extracting-archives.adoc +53 -0
  70. data/docs/guides/format-conversion.adoc +64 -0
  71. data/docs/guides/index.adoc +49 -0
  72. data/docs/guides/migration-rubyzip.adoc +217 -0
  73. data/docs/guides/parity-archives.adoc +605 -0
  74. data/docs/guides/performance-tuning.adoc +88 -0
  75. data/docs/index.adoc +218 -0
  76. data/docs/lychee.toml +67 -0
  77. data/docs/reference/api/overview.adoc +188 -0
  78. data/docs/reference/cli/compress-command.adoc +114 -0
  79. data/docs/reference/cli/overview.adoc +140 -0
  80. data/docs/reference/index.adoc +26 -0
  81. data/docs/resources/faq.adoc +185 -0
  82. data/docs/resources/quick-reference.adoc +222 -0
  83. data/docs/troubleshooting/index.adoc +208 -0
  84. data/examples/api_comparison.rb +205 -0
  85. data/examples/deflate64_example.rb +96 -0
  86. data/examples/par2_demo.rb +121 -0
  87. data/examples/quick_start_native.rb +150 -0
  88. data/examples/quick_start_rubyzip.rb +115 -0
  89. data/examples/rubyzip_compatibility_demo.rb +194 -0
  90. data/exe/omnizip +27 -0
  91. data/lib/omnizip/algorithm.rb +130 -0
  92. data/lib/omnizip/algorithm_registry.rb +86 -0
  93. data/lib/omnizip/algorithms/.keep +0 -0
  94. data/lib/omnizip/algorithms/bzip2/bwt.rb +225 -0
  95. data/lib/omnizip/algorithms/bzip2/decoder.rb +193 -0
  96. data/lib/omnizip/algorithms/bzip2/encoder.rb +237 -0
  97. data/lib/omnizip/algorithms/bzip2/huffman.rb +206 -0
  98. data/lib/omnizip/algorithms/bzip2/mtf.rb +101 -0
  99. data/lib/omnizip/algorithms/bzip2/rle.rb +151 -0
  100. data/lib/omnizip/algorithms/bzip2.rb +130 -0
  101. data/lib/omnizip/algorithms/deflate/constants.rb +28 -0
  102. data/lib/omnizip/algorithms/deflate/decoder.rb +38 -0
  103. data/lib/omnizip/algorithms/deflate/encoder.rb +46 -0
  104. data/lib/omnizip/algorithms/deflate.rb +128 -0
  105. data/lib/omnizip/algorithms/deflate64/constants.rb +45 -0
  106. data/lib/omnizip/algorithms/deflate64/decoder.rb +153 -0
  107. data/lib/omnizip/algorithms/deflate64/encoder.rb +98 -0
  108. data/lib/omnizip/algorithms/deflate64/huffman_coder.rb +354 -0
  109. data/lib/omnizip/algorithms/deflate64/lz77_encoder.rb +142 -0
  110. data/lib/omnizip/algorithms/deflate64.rb +109 -0
  111. data/lib/omnizip/algorithms/lzma/bit_model.rb +120 -0
  112. data/lib/omnizip/algorithms/lzma/constants.rb +112 -0
  113. data/lib/omnizip/algorithms/lzma/decoder.rb +148 -0
  114. data/lib/omnizip/algorithms/lzma/dictionary.rb +69 -0
  115. data/lib/omnizip/algorithms/lzma/distance_coder.rb +415 -0
  116. data/lib/omnizip/algorithms/lzma/encoder.rb +142 -0
  117. data/lib/omnizip/algorithms/lzma/length_coder.rb +260 -0
  118. data/lib/omnizip/algorithms/lzma/literal_decoder.rb +320 -0
  119. data/lib/omnizip/algorithms/lzma/literal_encoder.rb +210 -0
  120. data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +341 -0
  121. data/lib/omnizip/algorithms/lzma/lzma_alone_decoder.rb +192 -0
  122. data/lib/omnizip/algorithms/lzma/lzma_state.rb +128 -0
  123. data/lib/omnizip/algorithms/lzma/match.rb +32 -0
  124. data/lib/omnizip/algorithms/lzma/match_finder.rb +205 -0
  125. data/lib/omnizip/algorithms/lzma/match_finder_config.rb +142 -0
  126. data/lib/omnizip/algorithms/lzma/match_finder_factory.rb +88 -0
  127. data/lib/omnizip/algorithms/lzma/optimal_encoder.rb +130 -0
  128. data/lib/omnizip/algorithms/lzma/probability_models.rb +72 -0
  129. data/lib/omnizip/algorithms/lzma/range_coder.rb +85 -0
  130. data/lib/omnizip/algorithms/lzma/range_decoder.rb +434 -0
  131. data/lib/omnizip/algorithms/lzma/range_encoder.rb +194 -0
  132. data/lib/omnizip/algorithms/lzma/state.rb +127 -0
  133. data/lib/omnizip/algorithms/lzma/xz_buffered_range_encoder.rb +325 -0
  134. data/lib/omnizip/algorithms/lzma/xz_encoder.rb +426 -0
  135. data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +645 -0
  136. data/lib/omnizip/algorithms/lzma/xz_match_finder_adapter.rb +227 -0
  137. data/lib/omnizip/algorithms/lzma/xz_price_calculator.rb +169 -0
  138. data/lib/omnizip/algorithms/lzma/xz_probability_models.rb +261 -0
  139. data/lib/omnizip/algorithms/lzma/xz_range_encoder.rb +223 -0
  140. data/lib/omnizip/algorithms/lzma/xz_range_encoder_exact.rb +331 -0
  141. data/lib/omnizip/algorithms/lzma/xz_state.rb +116 -0
  142. data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +2055 -0
  143. data/lib/omnizip/algorithms/lzma.rb +238 -0
  144. data/lib/omnizip/algorithms/lzma2/chunk_manager.rb +182 -0
  145. data/lib/omnizip/algorithms/lzma2/constants.rb +41 -0
  146. data/lib/omnizip/algorithms/lzma2/encoder.rb +147 -0
  147. data/lib/omnizip/algorithms/lzma2/lzma2_chunk.rb +161 -0
  148. data/lib/omnizip/algorithms/lzma2/properties.rb +179 -0
  149. data/lib/omnizip/algorithms/lzma2/simple_lzma2_encoder.rb +127 -0
  150. data/lib/omnizip/algorithms/lzma2/xz_encoder_adapter.rb +85 -0
  151. data/lib/omnizip/algorithms/lzma2.rb +141 -0
  152. data/lib/omnizip/algorithms/ppmd7/constants.rb +74 -0
  153. data/lib/omnizip/algorithms/ppmd7/context.rb +154 -0
  154. data/lib/omnizip/algorithms/ppmd7/decoder.rb +126 -0
  155. data/lib/omnizip/algorithms/ppmd7/encoder.rb +163 -0
  156. data/lib/omnizip/algorithms/ppmd7/model.rb +248 -0
  157. data/lib/omnizip/algorithms/ppmd7/symbol_state.rb +57 -0
  158. data/lib/omnizip/algorithms/ppmd7.rb +116 -0
  159. data/lib/omnizip/algorithms/ppmd8/constants.rb +61 -0
  160. data/lib/omnizip/algorithms/ppmd8/context.rb +34 -0
  161. data/lib/omnizip/algorithms/ppmd8/decoder.rb +107 -0
  162. data/lib/omnizip/algorithms/ppmd8/encoder.rb +138 -0
  163. data/lib/omnizip/algorithms/ppmd8/model.rb +250 -0
  164. data/lib/omnizip/algorithms/ppmd8/restoration_method.rb +78 -0
  165. data/lib/omnizip/algorithms/ppmd8.rb +82 -0
  166. data/lib/omnizip/algorithms/ppmd_base.rb +138 -0
  167. data/lib/omnizip/algorithms/sevenzip_lzma2.rb +123 -0
  168. data/lib/omnizip/algorithms/xz_lzma2.rb +118 -0
  169. data/lib/omnizip/algorithms/zstandard/constants.rb +25 -0
  170. data/lib/omnizip/algorithms/zstandard/decoder.rb +46 -0
  171. data/lib/omnizip/algorithms/zstandard/encoder.rb +51 -0
  172. data/lib/omnizip/algorithms/zstandard.rb +138 -0
  173. data/lib/omnizip/buffer/memory_archive.rb +251 -0
  174. data/lib/omnizip/buffer/memory_extractor.rb +224 -0
  175. data/lib/omnizip/buffer.rb +176 -0
  176. data/lib/omnizip/checksum_registry.rb +114 -0
  177. data/lib/omnizip/checksums/crc32.rb +100 -0
  178. data/lib/omnizip/checksums/crc64.rb +101 -0
  179. data/lib/omnizip/checksums/crc_base.rb +158 -0
  180. data/lib/omnizip/checksums/verifier.rb +131 -0
  181. data/lib/omnizip/chunked/memory_manager.rb +194 -0
  182. data/lib/omnizip/chunked/reader.rb +78 -0
  183. data/lib/omnizip/chunked/writer.rb +120 -0
  184. data/lib/omnizip/chunked.rb +129 -0
  185. data/lib/omnizip/cli/output_formatter.rb +104 -0
  186. data/lib/omnizip/cli.rb +572 -0
  187. data/lib/omnizip/commands/.keep +0 -0
  188. data/lib/omnizip/commands/archive_create_command.rb +427 -0
  189. data/lib/omnizip/commands/archive_extract_command.rb +272 -0
  190. data/lib/omnizip/commands/archive_list_command.rb +218 -0
  191. data/lib/omnizip/commands/archive_repair_command.rb +131 -0
  192. data/lib/omnizip/commands/archive_verify_command.rb +117 -0
  193. data/lib/omnizip/commands/compress_command.rb +117 -0
  194. data/lib/omnizip/commands/decompress_command.rb +120 -0
  195. data/lib/omnizip/commands/list_command.rb +53 -0
  196. data/lib/omnizip/commands/metadata_command.rb +153 -0
  197. data/lib/omnizip/commands/parity_create_command.rb +122 -0
  198. data/lib/omnizip/commands/parity_repair_command.rb +122 -0
  199. data/lib/omnizip/commands/parity_verify_command.rb +124 -0
  200. data/lib/omnizip/commands/profile_list_command.rb +56 -0
  201. data/lib/omnizip/commands/profile_show_command.rb +44 -0
  202. data/lib/omnizip/convenience.rb +359 -0
  203. data/lib/omnizip/converter/conversion_registry.rb +49 -0
  204. data/lib/omnizip/converter/conversion_strategy.rb +121 -0
  205. data/lib/omnizip/converter/seven_zip_to_zip_strategy.rb +97 -0
  206. data/lib/omnizip/converter/zip_to_seven_zip_strategy.rb +112 -0
  207. data/lib/omnizip/converter.rb +105 -0
  208. data/lib/omnizip/crypto/aes256/cipher.rb +100 -0
  209. data/lib/omnizip/crypto/aes256/constants.rb +28 -0
  210. data/lib/omnizip/crypto/aes256/key_derivation.rb +101 -0
  211. data/lib/omnizip/crypto/aes256.rb +102 -0
  212. data/lib/omnizip/error.rb +106 -0
  213. data/lib/omnizip/eta/exponential_smoothing_estimator.rb +98 -0
  214. data/lib/omnizip/eta/moving_average_estimator.rb +99 -0
  215. data/lib/omnizip/eta/rate_calculator.rb +104 -0
  216. data/lib/omnizip/eta/sample_history.rb +143 -0
  217. data/lib/omnizip/eta/time_estimator.rb +106 -0
  218. data/lib/omnizip/eta.rb +63 -0
  219. data/lib/omnizip/extraction/filter_chain.rb +177 -0
  220. data/lib/omnizip/extraction/glob_pattern.rb +140 -0
  221. data/lib/omnizip/extraction/pattern_matcher.rb +70 -0
  222. data/lib/omnizip/extraction/predicate_pattern.rb +52 -0
  223. data/lib/omnizip/extraction/regex_pattern.rb +50 -0
  224. data/lib/omnizip/extraction/selective_extractor.rb +240 -0
  225. data/lib/omnizip/extraction.rb +111 -0
  226. data/lib/omnizip/file_type/mime_classifier.rb +144 -0
  227. data/lib/omnizip/file_type.rb +113 -0
  228. data/lib/omnizip/filter.rb +139 -0
  229. data/lib/omnizip/filter_pipeline.rb +108 -0
  230. data/lib/omnizip/filter_registry.rb +166 -0
  231. data/lib/omnizip/filters/bcj.rb +279 -0
  232. data/lib/omnizip/filters/bcj2/constants.rb +53 -0
  233. data/lib/omnizip/filters/bcj2/decoder.rb +200 -0
  234. data/lib/omnizip/filters/bcj2/encoder.rb +61 -0
  235. data/lib/omnizip/filters/bcj2/stream_data.rb +93 -0
  236. data/lib/omnizip/filters/bcj2.rb +99 -0
  237. data/lib/omnizip/filters/bcj_arm.rb +176 -0
  238. data/lib/omnizip/filters/bcj_arm64.rb +244 -0
  239. data/lib/omnizip/filters/bcj_ia64.rb +196 -0
  240. data/lib/omnizip/filters/bcj_ppc.rb +190 -0
  241. data/lib/omnizip/filters/bcj_sparc.rb +176 -0
  242. data/lib/omnizip/filters/bcj_x86.rb +193 -0
  243. data/lib/omnizip/filters/delta.rb +196 -0
  244. data/lib/omnizip/filters/filter_base.rb +72 -0
  245. data/lib/omnizip/filters/registry.rb +123 -0
  246. data/lib/omnizip/filters/xz_delta.rb +258 -0
  247. data/lib/omnizip/format_detector.rb +162 -0
  248. data/lib/omnizip/format_registry.rb +59 -0
  249. data/lib/omnizip/formats/.keep +0 -0
  250. data/lib/omnizip/formats/bzip2_file.rb +172 -0
  251. data/lib/omnizip/formats/cpio/constants.rb +55 -0
  252. data/lib/omnizip/formats/cpio/entry.rb +385 -0
  253. data/lib/omnizip/formats/cpio/reader.rb +196 -0
  254. data/lib/omnizip/formats/cpio/writer.rb +234 -0
  255. data/lib/omnizip/formats/cpio.rb +140 -0
  256. data/lib/omnizip/formats/format_spec_loader.rb +230 -0
  257. data/lib/omnizip/formats/gzip.rb +238 -0
  258. data/lib/omnizip/formats/iso/directory_builder.rb +297 -0
  259. data/lib/omnizip/formats/iso/directory_record.rb +152 -0
  260. data/lib/omnizip/formats/iso/joliet.rb +204 -0
  261. data/lib/omnizip/formats/iso/path_table.rb +125 -0
  262. data/lib/omnizip/formats/iso/reader.rb +197 -0
  263. data/lib/omnizip/formats/iso/rock_ridge.rb +349 -0
  264. data/lib/omnizip/formats/iso/volume_builder.rb +320 -0
  265. data/lib/omnizip/formats/iso/volume_descriptor.rb +168 -0
  266. data/lib/omnizip/formats/iso/writer.rb +530 -0
  267. data/lib/omnizip/formats/iso.rb +140 -0
  268. data/lib/omnizip/formats/lzip.rb +175 -0
  269. data/lib/omnizip/formats/lzma_alone.rb +171 -0
  270. data/lib/omnizip/formats/rar/archive_repairer.rb +243 -0
  271. data/lib/omnizip/formats/rar/archive_verifier.rb +195 -0
  272. data/lib/omnizip/formats/rar/block_parser.rb +243 -0
  273. data/lib/omnizip/formats/rar/compression/bit_stream.rb +180 -0
  274. data/lib/omnizip/formats/rar/compression/dispatcher.rb +217 -0
  275. data/lib/omnizip/formats/rar/compression/lz77_huffman/decoder.rb +216 -0
  276. data/lib/omnizip/formats/rar/compression/lz77_huffman/encoder.rb +158 -0
  277. data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_builder.rb +217 -0
  278. data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_coder.rb +189 -0
  279. data/lib/omnizip/formats/rar/compression/lz77_huffman/match_finder.rb +135 -0
  280. data/lib/omnizip/formats/rar/compression/lz77_huffman/sliding_window.rb +165 -0
  281. data/lib/omnizip/formats/rar/compression/ppmd/context.rb +105 -0
  282. data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +219 -0
  283. data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +262 -0
  284. data/lib/omnizip/formats/rar/compression_method_registry.rb +106 -0
  285. data/lib/omnizip/formats/rar/constants.rb +82 -0
  286. data/lib/omnizip/formats/rar/decompressor.rb +238 -0
  287. data/lib/omnizip/formats/rar/external_writer.rb +312 -0
  288. data/lib/omnizip/formats/rar/header.rb +192 -0
  289. data/lib/omnizip/formats/rar/license_validator.rb +109 -0
  290. data/lib/omnizip/formats/rar/models/rar_archive.rb +77 -0
  291. data/lib/omnizip/formats/rar/models/rar_entry.rb +65 -0
  292. data/lib/omnizip/formats/rar/models/rar_volume.rb +56 -0
  293. data/lib/omnizip/formats/rar/parity_handler.rb +292 -0
  294. data/lib/omnizip/formats/rar/rar5/compression/lzma.rb +202 -0
  295. data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +578 -0
  296. data/lib/omnizip/formats/rar/rar5/compression/store.rb +60 -0
  297. data/lib/omnizip/formats/rar/rar5/crc32.rb +39 -0
  298. data/lib/omnizip/formats/rar/rar5/encryption/aes256_cbc.rb +97 -0
  299. data/lib/omnizip/formats/rar/rar5/encryption/encryption_header.rb +114 -0
  300. data/lib/omnizip/formats/rar/rar5/encryption/encryption_manager.rb +166 -0
  301. data/lib/omnizip/formats/rar/rar5/encryption/key_derivation.rb +97 -0
  302. data/lib/omnizip/formats/rar/rar5/header.rb +187 -0
  303. data/lib/omnizip/formats/rar/rar5/models/encryption_options.rb +74 -0
  304. data/lib/omnizip/formats/rar/rar5/models/recovery_options.rb +63 -0
  305. data/lib/omnizip/formats/rar/rar5/models/solid_options.rb +63 -0
  306. data/lib/omnizip/formats/rar/rar5/models/volume_options.rb +74 -0
  307. data/lib/omnizip/formats/rar/rar5/multi_volume/ARCHITECTURE.md +290 -0
  308. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_manager.rb +264 -0
  309. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_splitter.rb +155 -0
  310. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_writer.rb +194 -0
  311. data/lib/omnizip/formats/rar/rar5/solid/solid_encoder.rb +109 -0
  312. data/lib/omnizip/formats/rar/rar5/solid/solid_manager.rb +142 -0
  313. data/lib/omnizip/formats/rar/rar5/solid/solid_stream.rb +121 -0
  314. data/lib/omnizip/formats/rar/rar5/vint.rb +65 -0
  315. data/lib/omnizip/formats/rar/rar5/writer.rb +466 -0
  316. data/lib/omnizip/formats/rar/rar_format_base.rb +241 -0
  317. data/lib/omnizip/formats/rar/reader.rb +366 -0
  318. data/lib/omnizip/formats/rar/recovery_record.rb +245 -0
  319. data/lib/omnizip/formats/rar/volume_manager.rb +168 -0
  320. data/lib/omnizip/formats/rar/writer.rb +431 -0
  321. data/lib/omnizip/formats/rar.rb +205 -0
  322. data/lib/omnizip/formats/rar3/compressor.rb +73 -0
  323. data/lib/omnizip/formats/rar3/decompressor.rb +66 -0
  324. data/lib/omnizip/formats/rar3/reader.rb +386 -0
  325. data/lib/omnizip/formats/rar3/writer.rb +219 -0
  326. data/lib/omnizip/formats/rar5/compressor.rb +73 -0
  327. data/lib/omnizip/formats/rar5/decompressor.rb +66 -0
  328. data/lib/omnizip/formats/rar5/reader.rb +342 -0
  329. data/lib/omnizip/formats/rar5/writer.rb +214 -0
  330. data/lib/omnizip/formats/seven_zip/coder_chain.rb +150 -0
  331. data/lib/omnizip/formats/seven_zip/constants.rb +126 -0
  332. data/lib/omnizip/formats/seven_zip/encoded_header.rb +114 -0
  333. data/lib/omnizip/formats/seven_zip/encrypted_header.rb +142 -0
  334. data/lib/omnizip/formats/seven_zip/file_collector.rb +144 -0
  335. data/lib/omnizip/formats/seven_zip/header.rb +106 -0
  336. data/lib/omnizip/formats/seven_zip/header_encryptor.rb +134 -0
  337. data/lib/omnizip/formats/seven_zip/header_writer.rb +466 -0
  338. data/lib/omnizip/formats/seven_zip/models/coder_info.rb +30 -0
  339. data/lib/omnizip/formats/seven_zip/models/file_entry.rb +58 -0
  340. data/lib/omnizip/formats/seven_zip/models/folder.rb +69 -0
  341. data/lib/omnizip/formats/seven_zip/models/stream_info.rb +42 -0
  342. data/lib/omnizip/formats/seven_zip/parser.rb +660 -0
  343. data/lib/omnizip/formats/seven_zip/reader.rb +458 -0
  344. data/lib/omnizip/formats/seven_zip/split_archive_reader.rb +632 -0
  345. data/lib/omnizip/formats/seven_zip/split_archive_writer.rb +315 -0
  346. data/lib/omnizip/formats/seven_zip/stream_compressor.rb +151 -0
  347. data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +162 -0
  348. data/lib/omnizip/formats/seven_zip/writer.rb +740 -0
  349. data/lib/omnizip/formats/seven_zip.rb +93 -0
  350. data/lib/omnizip/formats/tar/constants.rb +73 -0
  351. data/lib/omnizip/formats/tar/entry.rb +94 -0
  352. data/lib/omnizip/formats/tar/header.rb +168 -0
  353. data/lib/omnizip/formats/tar/reader.rb +121 -0
  354. data/lib/omnizip/formats/tar/writer.rb +216 -0
  355. data/lib/omnizip/formats/tar.rb +84 -0
  356. data/lib/omnizip/formats/xz/reader.rb +116 -0
  357. data/lib/omnizip/formats/xz.rb +237 -0
  358. data/lib/omnizip/formats/xz_impl/block_decoder.rb +754 -0
  359. data/lib/omnizip/formats/xz_impl/block_encoder.rb +306 -0
  360. data/lib/omnizip/formats/xz_impl/block_header.rb +210 -0
  361. data/lib/omnizip/formats/xz_impl/block_header_parser.rb +186 -0
  362. data/lib/omnizip/formats/xz_impl/constants.rb +49 -0
  363. data/lib/omnizip/formats/xz_impl/index_decoder.rb +174 -0
  364. data/lib/omnizip/formats/xz_impl/index_encoder.rb +122 -0
  365. data/lib/omnizip/formats/xz_impl/stream_decoder.rb +468 -0
  366. data/lib/omnizip/formats/xz_impl/stream_encoder.rb +99 -0
  367. data/lib/omnizip/formats/xz_impl/stream_footer.rb +81 -0
  368. data/lib/omnizip/formats/xz_impl/stream_footer_parser.rb +117 -0
  369. data/lib/omnizip/formats/xz_impl/stream_header.rb +55 -0
  370. data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +108 -0
  371. data/lib/omnizip/formats/xz_impl/vli.rb +128 -0
  372. data/lib/omnizip/formats/xz_impl/writer.rb +421 -0
  373. data/lib/omnizip/formats/zip/central_directory_header.rb +195 -0
  374. data/lib/omnizip/formats/zip/constants.rb +69 -0
  375. data/lib/omnizip/formats/zip/end_of_central_directory.rb +133 -0
  376. data/lib/omnizip/formats/zip/local_file_header.rb +138 -0
  377. data/lib/omnizip/formats/zip/reader.rb +250 -0
  378. data/lib/omnizip/formats/zip/unix_extra_field.rb +153 -0
  379. data/lib/omnizip/formats/zip/writer.rb +375 -0
  380. data/lib/omnizip/formats/zip/zip64_end_of_central_directory.rb +104 -0
  381. data/lib/omnizip/formats/zip/zip64_end_of_central_directory_locator.rb +66 -0
  382. data/lib/omnizip/formats/zip/zip64_extra_field.rb +114 -0
  383. data/lib/omnizip/formats/zip.rb +50 -0
  384. data/lib/omnizip/implementations/base/lzma2_decoder_base.rb +75 -0
  385. data/lib/omnizip/implementations/base/lzma2_encoder_base.rb +128 -0
  386. data/lib/omnizip/implementations/base/lzma_decoder_base.rb +83 -0
  387. data/lib/omnizip/implementations/base/lzma_encoder_base.rb +108 -0
  388. data/lib/omnizip/implementations/base/state_machine_base.rb +182 -0
  389. data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +421 -0
  390. data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +465 -0
  391. data/lib/omnizip/implementations/seven_zip/lzma/match_finder.rb +288 -0
  392. data/lib/omnizip/implementations/seven_zip/lzma/range_decoder.rb +200 -0
  393. data/lib/omnizip/implementations/seven_zip/lzma/range_encoder.rb +197 -0
  394. data/lib/omnizip/implementations/seven_zip/lzma/state_machine.rb +141 -0
  395. data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +519 -0
  396. data/lib/omnizip/implementations/xz_utils/lzma2/decoder.rb +723 -0
  397. data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +750 -0
  398. data/lib/omnizip/io/buffered_input.rb +146 -0
  399. data/lib/omnizip/io/buffered_output.rb +105 -0
  400. data/lib/omnizip/io/stream_manager.rb +115 -0
  401. data/lib/omnizip/link_handler/hard_link.rb +79 -0
  402. data/lib/omnizip/link_handler/symbolic_link.rb +74 -0
  403. data/lib/omnizip/link_handler.rb +124 -0
  404. data/lib/omnizip/metadata/archive_metadata.rb +114 -0
  405. data/lib/omnizip/metadata/entry_metadata.rb +146 -0
  406. data/lib/omnizip/metadata/metadata_editor.rb +171 -0
  407. data/lib/omnizip/metadata/metadata_registry.rb +64 -0
  408. data/lib/omnizip/metadata/metadata_validator.rb +99 -0
  409. data/lib/omnizip/metadata.rb +57 -0
  410. data/lib/omnizip/models/.keep +0 -0
  411. data/lib/omnizip/models/algorithm_metadata.rb +73 -0
  412. data/lib/omnizip/models/compression_options.rb +71 -0
  413. data/lib/omnizip/models/conversion_options.rb +87 -0
  414. data/lib/omnizip/models/conversion_result.rb +135 -0
  415. data/lib/omnizip/models/eta_result.rb +46 -0
  416. data/lib/omnizip/models/extraction_rule.rb +115 -0
  417. data/lib/omnizip/models/filter_chain.rb +144 -0
  418. data/lib/omnizip/models/filter_config.rb +183 -0
  419. data/lib/omnizip/models/match_result.rb +124 -0
  420. data/lib/omnizip/models/optimization_suggestion.rb +91 -0
  421. data/lib/omnizip/models/parallel_options.rb +104 -0
  422. data/lib/omnizip/models/performance_result.rb +79 -0
  423. data/lib/omnizip/models/profile_report.rb +82 -0
  424. data/lib/omnizip/models/progress_options.rb +38 -0
  425. data/lib/omnizip/models/split_options.rb +116 -0
  426. data/lib/omnizip/optimization_registry.rb +81 -0
  427. data/lib/omnizip/parallel/job_queue.rb +209 -0
  428. data/lib/omnizip/parallel/job_scheduler.rb +203 -0
  429. data/lib/omnizip/parallel/parallel_compressor.rb +347 -0
  430. data/lib/omnizip/parallel/parallel_extractor.rb +329 -0
  431. data/lib/omnizip/parallel/worker_pool.rb +223 -0
  432. data/lib/omnizip/parallel.rb +149 -0
  433. data/lib/omnizip/parity/chunked_block_processor.rb +196 -0
  434. data/lib/omnizip/parity/galois16.rb +145 -0
  435. data/lib/omnizip/parity/models/creator_packet.rb +73 -0
  436. data/lib/omnizip/parity/models/file_description_packet.rb +133 -0
  437. data/lib/omnizip/parity/models/ifsc_packet.rb +123 -0
  438. data/lib/omnizip/parity/models/main_packet.rb +128 -0
  439. data/lib/omnizip/parity/models/packet.rb +156 -0
  440. data/lib/omnizip/parity/models/packet_registry.rb +109 -0
  441. data/lib/omnizip/parity/models/recovery_slice_packet.rb +78 -0
  442. data/lib/omnizip/parity/par2_creator.rb +531 -0
  443. data/lib/omnizip/parity/par2_repairer.rb +407 -0
  444. data/lib/omnizip/parity/par2_verifier.rb +364 -0
  445. data/lib/omnizip/parity/par2cmdline_algorithm.rb +110 -0
  446. data/lib/omnizip/parity/par2cmdline_coefficients.rb +78 -0
  447. data/lib/omnizip/parity/reed_solomon_decoder.rb +266 -0
  448. data/lib/omnizip/parity/reed_solomon_encoder.rb +111 -0
  449. data/lib/omnizip/parity/reed_solomon_matrix.rb +342 -0
  450. data/lib/omnizip/parity.rb +186 -0
  451. data/lib/omnizip/password/encryption_registry.rb +65 -0
  452. data/lib/omnizip/password/encryption_strategy.rb +96 -0
  453. data/lib/omnizip/password/password_validator.rb +129 -0
  454. data/lib/omnizip/password/winzip_aes_strategy.rb +192 -0
  455. data/lib/omnizip/password/zip_crypto_strategy.rb +141 -0
  456. data/lib/omnizip/password.rb +87 -0
  457. data/lib/omnizip/pipe/stream_compressor.rb +124 -0
  458. data/lib/omnizip/pipe/stream_decompressor.rb +174 -0
  459. data/lib/omnizip/pipe.rb +121 -0
  460. data/lib/omnizip/platform/ntfs_streams.rb +201 -0
  461. data/lib/omnizip/platform.rb +189 -0
  462. data/lib/omnizip/profile/archive_profile.rb +39 -0
  463. data/lib/omnizip/profile/balanced_profile.rb +33 -0
  464. data/lib/omnizip/profile/binary_profile.rb +36 -0
  465. data/lib/omnizip/profile/compression_profile.rb +158 -0
  466. data/lib/omnizip/profile/custom_profile.rb +157 -0
  467. data/lib/omnizip/profile/fast_profile.rb +33 -0
  468. data/lib/omnizip/profile/maximum_profile.rb +33 -0
  469. data/lib/omnizip/profile/profile_detector.rb +110 -0
  470. data/lib/omnizip/profile/profile_registry.rb +161 -0
  471. data/lib/omnizip/profile/text_profile.rb +36 -0
  472. data/lib/omnizip/profile.rb +190 -0
  473. data/lib/omnizip/profiler/memory_profiler.rb +66 -0
  474. data/lib/omnizip/profiler/method_profiler.rb +49 -0
  475. data/lib/omnizip/profiler/report_generator.rb +169 -0
  476. data/lib/omnizip/profiler.rb +204 -0
  477. data/lib/omnizip/progress/callback_reporter.rb +36 -0
  478. data/lib/omnizip/progress/console_reporter.rb +62 -0
  479. data/lib/omnizip/progress/log_reporter.rb +91 -0
  480. data/lib/omnizip/progress/operation_progress.rb +118 -0
  481. data/lib/omnizip/progress/progress_bar.rb +156 -0
  482. data/lib/omnizip/progress/progress_reporter.rb +40 -0
  483. data/lib/omnizip/progress/progress_tracker.rb +190 -0
  484. data/lib/omnizip/progress/silent_reporter.rb +24 -0
  485. data/lib/omnizip/progress.rb +127 -0
  486. data/lib/omnizip/rubyzip_compat.rb +63 -0
  487. data/lib/omnizip/temp/safe_extract.rb +168 -0
  488. data/lib/omnizip/temp/temp_file.rb +124 -0
  489. data/lib/omnizip/temp/temp_file_pool.rb +109 -0
  490. data/lib/omnizip/temp.rb +181 -0
  491. data/lib/omnizip/version.rb +5 -0
  492. data/lib/omnizip/zip/entry.rb +156 -0
  493. data/lib/omnizip/zip/file.rb +485 -0
  494. data/lib/omnizip/zip/input_stream.rb +273 -0
  495. data/lib/omnizip/zip/output_stream.rb +324 -0
  496. data/lib/omnizip.rb +156 -0
  497. data/readme-docs/advanced-features.adoc +515 -0
  498. data/readme-docs/api-usage.adoc +444 -0
  499. data/readme-docs/architecture.adoc +449 -0
  500. data/readme-docs/archive-formats.adoc +479 -0
  501. data/readme-docs/cli-usage.adoc +222 -0
  502. data/readme-docs/compression-algorithms.adoc +442 -0
  503. data/readme-docs/compression-profiles.adoc +247 -0
  504. data/readme-docs/encryption-checksums.adoc +328 -0
  505. data/readme-docs/format-converter.adoc +325 -0
  506. data/readme-docs/installation.adoc +228 -0
  507. data/readme-docs/par2-archives.adoc +608 -0
  508. data/readme-docs/performance-profiler.adoc +389 -0
  509. data/readme-docs/preprocessing-filters.adoc +280 -0
  510. data/xz-file-format-1.2.1.txt +1174 -0
  511. metadata +617 -0
@@ -0,0 +1,223 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Copyright (C) 2025 Ribose Inc.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a
6
+ # copy of this software and associated documentation files (the "Software"),
7
+ # to deal in the Software without restriction, including without limitation
8
+ # the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
+ # and/or sell copies of the Software, and to permit persons to whom the
10
+ # Software is furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
+ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21
+ # DEALINGS IN THE SOFTWARE.
22
+
23
+ module Omnizip
24
+ module Algorithms
25
+ class LZMA < Algorithm
26
+ # Range Encoder ported from XZ Utils range_encoder.c
27
+ #
28
+ # This class implements binary range coding, which is the core
29
+ # compression algorithm for LZMA. Range coding is a form of
30
+ # arithmetic coding that encodes bits into a compressed bitstream
31
+ # using probability models.
32
+ #
33
+ # The encoder maintains a range [low, low+range) and narrows this
34
+ # range as bits are encoded. When the range becomes too small, it
35
+ # is normalized and output bytes are produced.
36
+ #
37
+ # Ported from XZ Utils liblzma/range_encoder.c
38
+ class XZRangeEncoder
39
+ TOP_VALUE = 1 << 24
40
+ SHIFT_BITS = 8
41
+ BIT_MODEL_TOTAL_BITS = 11
42
+ BIT_MODEL_TOTAL = 1 << BIT_MODEL_TOTAL_BITS
43
+
44
+ attr_reader :cache, :range, :low
45
+
46
+ # Initialize a new range encoder
47
+ #
48
+ # @param output [IO] Output stream for compressed data
49
+ def initialize(output)
50
+ @output = output
51
+ @low = 0
52
+ @range = 0xFFFFFFFF
53
+ @cache = 0
54
+ @cache_size = 1
55
+ end
56
+
57
+ # Encode a single bit using probability model
58
+ #
59
+ # This method encodes a bit (0 or 1) using an adaptive probability
60
+ # model. The probability model is updated after encoding to adapt
61
+ # to the input data statistics.
62
+ #
63
+ # Ported from XZ Utils range_encoder.c rc_bit()
64
+ #
65
+ # @param model [BitModel] Probability model for this bit
66
+ # @param bit [Integer] Bit value to encode (0 or 1)
67
+ # @return [void]
68
+ def encode_bit(model, bit)
69
+ prob = model.probability
70
+ bound = (@range >> BIT_MODEL_TOTAL_BITS) * prob
71
+
72
+ if bit.zero?
73
+ @range = bound
74
+ else
75
+ @low += bound
76
+ @range -= bound
77
+ end
78
+
79
+ normalize! if @range < TOP_VALUE
80
+
81
+ # Update probability model based on bit value
82
+ # Matches decoder behavior (proper OOP symmetry)
83
+ model.update(bit)
84
+ end
85
+
86
+ # Encode multiple bits as a bittree
87
+ #
88
+ # A bittree is a binary tree where each node has a probability model.
89
+ # This method encodes a value by traversing the tree from the root,
90
+ # encoding the bit at each node and following the corresponding branch.
91
+ #
92
+ # Ported from XZ Utils range_encoder.c rc_bittree()
93
+ #
94
+ # @param models [Array<BitModel>] Array of probability models for tree nodes
95
+ # @param num_bits [Integer] Number of bits to encode
96
+ # @param value [Integer] Value to encode
97
+ # @return [void]
98
+ def encode_bittree(models, num_bits, value)
99
+ index = 1
100
+ bit = num_bits - 1
101
+
102
+ while bit >= 0
103
+ bit_value = (value >> bit) & 1
104
+ encode_bit(models[index - 1], bit_value)
105
+ index = (index << 1) | bit_value
106
+ bit -= 1
107
+ end
108
+ end
109
+
110
+ # Encode multiple bits as a reverse bittree
111
+ #
112
+ # Similar to encode_bittree but processes bits in reverse order.
113
+ # This is used for certain LZMA encoding operations.
114
+ #
115
+ # Ported from XZ Utils range_encoder.c rc_bittree_reverse()
116
+ #
117
+ # @param models [Array<BitModel>] Array of probability models for tree nodes
118
+ # @param num_bits [Integer] Number of bits to encode
119
+ # @param value [Integer] Value to encode
120
+ # @return [void]
121
+ def encode_bittree_reverse(models, num_bits, value)
122
+ index = 1
123
+ bit = 0
124
+
125
+ while bit < num_bits
126
+ bit_value = (value >> bit) & 1
127
+ encode_bit(models[index - 1], bit_value)
128
+ index = (index << 1) | bit_value
129
+ bit += 1
130
+ end
131
+ end
132
+
133
+ # Encode a direct bit (without probability model)
134
+ #
135
+ # This method encodes a bit with fixed 0.5 probability.
136
+ # Used for encoding values where no adaptive model is available.
137
+ #
138
+ # Ported from XZ Utils range_encoder.c rc_direct()
139
+ #
140
+ # @param value [Integer] Value to encode (0 or 1)
141
+ # @return [void]
142
+ def encode_direct(value)
143
+ @range >>= 1
144
+ @low += @range if value != 0
145
+ normalize! if @range < TOP_VALUE
146
+ end
147
+
148
+ # Flush pending data to output stream
149
+ #
150
+ # This method flushes any remaining data in the range encoder
151
+ # to the output stream. This must be called before the encoder
152
+ # is discarded.
153
+ #
154
+ # Ported from XZ Utils range_encoder.c rc_flush()
155
+ #
156
+ # @return [void]
157
+ def flush!
158
+ (5 - @cache_size).times do
159
+ shift_low
160
+ end
161
+ end
162
+
163
+ private
164
+
165
+ # Normalize the range encoder state
166
+ #
167
+ # When the range becomes too small (< TOP_VALUE), it needs to be
168
+ # normalized by shifting left and outputting bytes as needed.
169
+ #
170
+ # Ported from XZ Utils range_encoder.c rc_normalize()
171
+ #
172
+ # @return [void]
173
+ def normalize!
174
+ if @range < TOP_VALUE
175
+ @range <<= SHIFT_BITS
176
+ shift_low
177
+ end
178
+ end
179
+
180
+ # Shift low value and output bytes as needed
181
+ #
182
+ # This method handles the carry propagation and byte output
183
+ # for the range encoder. When the high byte of low changes,
184
+ # it outputs bytes to the stream.
185
+ #
186
+ # Ported from XZ Utils range_encoder.c rc_shift_low()
187
+ # See: /Users/mulgogi/src/external/xz/src/liblzma/rangecoder/range_encoder.h:140-186
188
+ #
189
+ # @return [void]
190
+ def shift_low
191
+ # Extract low 32 bits and high 32 bits (carry)
192
+ # XZ Utils: if ((uint32_t)(rc->low) < (uint32_t)(0xFF000000) || (uint32_t)(rc->low >> 32) != 0)
193
+ # This condition is TRUE when:
194
+ # - low32 < 0xFF000000 (the high byte of low is NOT 0xFF)
195
+ # - OR high != 0 (there's a carry from the low value)
196
+ # When TRUE: write output bytes
197
+ # When FALSE: increment cache_size (we're in a run of 0xFF bytes)
198
+ low32 = @low & 0xFFFFFFFF
199
+ high = (@low >> 32) & 0xFF
200
+
201
+ if low32 < 0xFF000000 || high != 0
202
+ # Write pending cache bytes (with carry if present)
203
+ temp = @cache
204
+ while @cache_size.positive?
205
+ @output.putc((temp + high) & 0xFF)
206
+ temp = 0xFF
207
+ @cache_size -= 1
208
+ end
209
+ # Update cache to the high byte of low
210
+ @cache = (low32 >> 24) & 0xFF
211
+ else
212
+ # High byte of low is 0xFF and no carry - increment pending count
213
+ @cache_size += 1
214
+ end
215
+
216
+ # Shift low left by 8 bits (keeping only low 24 bits before shift)
217
+ # XZ Utils: low = (low & 0x00FFFFFF) << RC_SHIFT_BITS;
218
+ @low = (low32 & 0x00FFFFFF) << SHIFT_BITS
219
+ end
220
+ end
221
+ end
222
+ end
223
+ end
@@ -0,0 +1,331 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Copyright (C) 2025 Ribose Inc.
4
+ #
5
+ # Direct port of XZ Utils range encoder to Ruby
6
+ # Based on: xz/src/liblzma/rangecoder/range_encoder.h
7
+
8
+ require_relative "constants"
9
+
10
+ module Omnizip
11
+ module Algorithms
12
+ class LZMA < Algorithm
13
+ # XZ Utils-compatible range encoder (direct port)
14
+ #
15
+ # This is a direct port of the XZ Utils range encoder implementation
16
+ # to ensure exact algorithmic compatibility with XZ Utils output.
17
+ class XzRangeEncoder
18
+ include Constants
19
+
20
+ # Range encoder constants (matching XZ Utils range_common.h)
21
+ SHIFT_BITS = 8 # RC_SHIFT_BITS
22
+ TOP_BITS = 24 # RC_TOP_BITS
23
+ TOP = 0x01000000 # 2^24
24
+ BIT_MODEL_TOTAL_BITS = 11
25
+ BIT_MODEL_TOTAL = 2048 # 2^11
26
+
27
+ # Symbol types (matching XZ Utils enum)
28
+ RC_BIT_0 = 0
29
+ RC_BIT_1 = 1
30
+ RC_DIRECT_0 = 2
31
+ RC_DIRECT_1 = 3
32
+ RC_FLUSH = 4
33
+
34
+ # Maximum symbols that can be queued
35
+ RC_SYMBOLS_MAX = 53
36
+
37
+ attr_reader :out_total, :count, :low, :range, :cache, :cache_size
38
+
39
+ # Initialize the range encoder
40
+ #
41
+ # @param output_stream [IO] The output stream for encoded bytes
42
+ def initialize(output_stream)
43
+ @stream = output_stream
44
+ # Initialize @out_total BEFORE calling reset
45
+ @out_total = 0
46
+ reset
47
+ end
48
+
49
+ # Reset encoder to initial state (matches XZ Utils rc_reset)
50
+ def reset
51
+ @low = 0
52
+ @cache_size = 1 # CRITICAL: XZ starts with 1, not 0
53
+ @range = 0xFFFFFFFF
54
+ @cache = 0
55
+ # CRITICAL: Reset @out_total to match XZ Utils behavior (line 63 of range_encoder.h)
56
+ # This ensures bytes_for_decode returns the correct count
57
+ @out_total = 0
58
+ # NOTE: @pre_flush_out_total is NOT reset - it retains its value for bytes_for_decode
59
+ # It will be reset to 0 when a new chunk starts (via initialize)
60
+ @count = 0
61
+ @pos = 0
62
+ @symbols = Array.new(RC_SYMBOLS_MAX, 0)
63
+ @probs = Array.new(RC_SYMBOLS_MAX, nil)
64
+ end
65
+
66
+ # Forget pending symbols (matches XZ Utils rc_forget)
67
+ def forget
68
+ raise "Cannot forget while encoding" if @pos != 0
69
+
70
+ @count = 0
71
+ end
72
+
73
+ # Queue a bit for encoding (matches XZ Utils rc_bit)
74
+ #
75
+ # @param prob [Probability] Probability model
76
+ # @param bit [Integer] Bit value (0 or 1)
77
+ def bit(prob, bit)
78
+ raise "Symbol buffer overflow" if @count >= RC_SYMBOLS_MAX
79
+
80
+ @symbols[@count] = bit
81
+ @probs[@count] = prob
82
+ @count += 1
83
+ end
84
+ alias queue_bit bit
85
+
86
+ # Queue bittree encoding (matches XZ Utils rc_bittree)
87
+ #
88
+ # @param probs [Array<Probability>] Probability array
89
+ # @param bit_count [Integer] Number of bits
90
+ # @param symbol [Integer] Symbol to encode
91
+ def bittree(probs, bit_count, symbol)
92
+ model_index = 1
93
+
94
+ bit_count.times do
95
+ bit = (symbol >> (bit_count -= 1)) & 1
96
+ bit(probs[model_index], bit)
97
+ model_index = (model_index << 1) | bit
98
+ end
99
+ end
100
+
101
+ # Queue bittree encoding in reverse (matches XZ Utils rc_bittree_reverse)
102
+ #
103
+ # @param probs [Array<Probability>] Probability array
104
+ # @param bit_count [Integer] Number of bits
105
+ # @param symbol [Integer] Symbol to encode
106
+ def bittree_reverse(probs, bit_count, symbol)
107
+ model_index = 1
108
+
109
+ bit_count.times do
110
+ bit = symbol & 1
111
+ symbol >>= 1
112
+ bit(probs[model_index], bit)
113
+ model_index = (model_index << 1) | bit
114
+ end
115
+ end
116
+
117
+ # Queue direct bits (matches XZ Utils rc_direct)
118
+ #
119
+ # @param value [Integer] Value to encode
120
+ # @param bit_count [Integer] Number of bits
121
+ def direct(value, bit_count)
122
+ bit_count.times do
123
+ raise "Symbol buffer overflow" if @count >= RC_SYMBOLS_MAX
124
+
125
+ @symbols[@count] = RC_DIRECT_0 | ((value >> (bit_count -= 1)) & 1)
126
+ @probs[@count] = nil
127
+ @count += 1
128
+ end
129
+ end
130
+
131
+ # Queue flush operation (matches XZ Utils rc_flush)
132
+ def flush
133
+ puts "[FLUSH] Adding 5 RC_FLUSH symbols, @count before=#{@count}" if ENV["DEBUG"]
134
+ 5.times do
135
+ raise "Symbol buffer overflow" if @count >= RC_SYMBOLS_MAX
136
+
137
+ @symbols[@count] = RC_FLUSH
138
+ @probs[@count] = nil
139
+ @count += 1
140
+ end
141
+ puts "[FLUSH] @count after=#{@count}" if ENV["DEBUG"]
142
+ end
143
+ alias queue_flush flush
144
+
145
+ # Get number of pending bytes (matches XZ Utils rc_pending)
146
+ #
147
+ # @return [Integer] Number of pending output bytes
148
+ def pending
149
+ @cache_size + 5 - 1
150
+ end
151
+
152
+ # Check if no symbols are queued
153
+ #
154
+ # @return [Boolean] True if no symbols queued
155
+ def none?
156
+ @count.zero?
157
+ end
158
+
159
+ # Encode all queued symbols to output (matches XZ Utils rc_encode)
160
+ #
161
+ # @param out [IO,String] Output buffer
162
+ # @param out_pos [IntegerRef] Current output position
163
+ # @param out_size [Integer] Output buffer size
164
+ # @return [Boolean] True if output buffer filled before encoding complete
165
+ def encode(out, out_pos, out_size)
166
+ raise "Symbol buffer overflow" if @count > RC_SYMBOLS_MAX
167
+
168
+ puts "[ENCODE] Start: @count=#{@count} @pos=#{@pos} @out_total=#{@out_total}" if ENV["DEBUG"]
169
+
170
+ skip_increment = false
171
+
172
+ while @pos < @count
173
+ # Normalize (matches XZ Utils exactly)
174
+ if @range < TOP
175
+ return true if shift_low(out, out_pos, out_size)
176
+
177
+ @range <<= SHIFT_BITS
178
+ end
179
+
180
+ # Encode current symbol
181
+ case @symbols[@pos]
182
+ when RC_BIT_0
183
+ prob = @probs[@pos]
184
+ # XZ Utils: rc->range = (rc->range >> RC_BIT_MODEL_TOTAL_BITS) * prob
185
+ @range = (@range >> BIT_MODEL_TOTAL_BITS) * prob.value
186
+ # XZ Utils: prob += (RC_BIT_MODEL_TOTAL - prob) >> RC_MOVE_BITS
187
+ prob.value += (BIT_MODEL_TOTAL - prob.value) >> MOVE_BITS
188
+ @probs[@pos] = prob
189
+
190
+ when RC_BIT_1
191
+ prob = @probs[@pos]
192
+ # XZ Utils: bound = prob * (rc->range >> RC_BIT_MODEL_TOTAL_BITS)
193
+ bound = prob.value * (@range >> BIT_MODEL_TOTAL_BITS)
194
+ @low += bound
195
+ @range -= bound
196
+ # XZ Utils: prob -= prob >> RC_MOVE_BITS
197
+ prob.value -= prob.value >> MOVE_BITS
198
+ @probs[@pos] = prob
199
+
200
+ when RC_DIRECT_0
201
+ @range >>= 1
202
+
203
+ when RC_DIRECT_1
204
+ @range >>= 1
205
+ @low += @range
206
+
207
+ when RC_FLUSH
208
+ # Prevent further normalizations (XZ Utils behavior)
209
+ @range = 0xFFFFFFFF
210
+
211
+ puts "[ENCODE] RC_FLUSH: @pos=#{@pos} @count=#{@count}" if ENV["DEBUG"]
212
+
213
+ iteration = 0
214
+ # Flush the last five bytes (see rc_flush)
215
+ begin
216
+ iteration += 1
217
+ puts "[ENCODE] RC_FLUSH iteration #{iteration}: @pos=#{@pos}" if ENV["DEBUG"]
218
+ return true if shift_low(out, out_pos, out_size)
219
+
220
+ puts "[ENCODE] After shift_low: @pos=#{@pos}" if ENV["DEBUG"]
221
+ end while (@pos += 1) < @count
222
+
223
+ puts "[ENCODE] After RC_FLUSH loop: total #{iteration} iterations" if ENV["DEBUG"]
224
+
225
+ # Reset the range encoder (matches XZ Utils)
226
+ reset
227
+ # CRITICAL: Skip the @pos increment at loop end because do-while already did it
228
+ skip_increment = true
229
+ break
230
+
231
+ else
232
+ raise "Unknown symbol type: #{@symbols[@pos]}"
233
+ end
234
+
235
+ @pos += 1 unless skip_increment
236
+ end
237
+
238
+ puts "[ENCODE] End: @count=#{@count} @pos=#{@pos} @out_total=#{@out_total}" if ENV["DEBUG"]
239
+
240
+ @count = 0
241
+ @pos = 0
242
+
243
+ false
244
+ end
245
+
246
+ # Shift low bytes to output (matches XZ Utils rc_shift_low)
247
+ #
248
+ # @param out [IO,String] Output buffer
249
+ # @param out_pos [IntegerRef] Current output position
250
+ # @param out_size [Integer] Output buffer size
251
+ # @return [Boolean] True if output buffer filled
252
+ def shift_low(out, out_pos, out_size)
253
+ # XZ Utils: if ((uint32_t)(rc->low) < (uint32_t)(0xFF000000) || (uint32_t)(rc->low >> 32) != 0)
254
+ if (@low & 0xFFFFFFFF) < 0xFF000000 || (@low >> 32) != 0
255
+ # XZ Utils: do { ... } while (--rc->cache_size != 0);
256
+ while @cache_size.positive?
257
+ return true if out_pos.value == out_size
258
+
259
+ # XZ Utils: out[*out_pos] = rc->cache + (uint8_t)(rc->low >> 32)
260
+ output_byte = @cache + ((@low >> 32) & 0xFF)
261
+
262
+ if out.is_a?(String)
263
+ out.setbyte(out_pos.value, output_byte)
264
+ else
265
+ out.putc(output_byte)
266
+ end
267
+
268
+ out_pos.value += 1
269
+ @out_total += 1
270
+ @cache = 0xFF
271
+
272
+ @cache_size -= 1
273
+ end
274
+
275
+ # XZ Utils: rc->cache = (rc->low >> 24) & 0xFF
276
+ @cache = (@low >> 24) & 0xFF
277
+ end
278
+
279
+ # XZ Utils: ++rc->cache_size; rc->low = (rc->low & 0x00FFFFFF) << RC_SHIFT_BITS
280
+ @cache_size += 1
281
+ @low = (@low & 0x00FFFFFF) << SHIFT_BITS
282
+
283
+ false
284
+ end
285
+
286
+ # Adapter method: alias for bit (to match existing API)
287
+ alias queue_bit bit
288
+
289
+ # Adapter method: alias for bittree (to match existing API)
290
+ alias queue_bittree bittree
291
+
292
+ # Adapter method: alias for bittree_reverse (to match existing API)
293
+ alias queue_bittree_reverse bittree_reverse
294
+
295
+ # Adapter method: alias for direct (to match existing API)
296
+ def queue_direct_bits(value, num_bits)
297
+ direct(value, num_bits)
298
+ end
299
+
300
+ # Adapter method: alias for encode (to match existing API)
301
+ alias encode_symbols encode
302
+
303
+ # Adapter method: match existing API
304
+ alias queue_flush flush
305
+
306
+ # Forget pending symbols (matches XZ Utils rc_forget)
307
+ def forget
308
+ raise "Cannot forget while encoding" if @pos != 0
309
+
310
+ @count = 0
311
+ end
312
+
313
+ # Calculate pending output bytes
314
+ #
315
+ # @return [Integer] Number of bytes decoder will consume
316
+ def bytes_for_decode
317
+ @out_total
318
+ end
319
+ end
320
+
321
+ # Reference wrapper for integer (for out_pos parameter)
322
+ class IntRef
323
+ attr_accessor :value
324
+
325
+ def initialize(val)
326
+ @value = val
327
+ end
328
+ end
329
+ end
330
+ end
331
+ end
@@ -0,0 +1,116 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Omnizip
4
+ module Algorithms
5
+ class LZMA < Algorithm
6
+ # XZ Utils-compatible LZMA state machine
7
+ #
8
+ # Tracks encoding context via 12-state machine to predict
9
+ # optimal probability models for upcoming symbols.
10
+ #
11
+ # Based on: xz/src/liblzma/lzma/lzma_common.h
12
+ class XzState
13
+ # 12 LZMA states (matching XZ Utils exactly)
14
+ STATE_LIT_LIT = 0 # literal after literal
15
+ STATE_MATCH_LIT_LIT = 1 # literal after literal after match
16
+ STATE_REP_LIT_LIT = 2 # literal after literal after rep
17
+ STATE_SHORTREP_LIT_LIT = 3 # literal after literal after shortrep
18
+ STATE_MATCH_LIT = 4 # literal after match
19
+ STATE_REP_LIT = 5 # literal after rep
20
+ STATE_SHORTREP_LIT = 6 # literal after shortrep
21
+ STATE_LIT_MATCH = 7 # match after literal
22
+ STATE_LIT_LONGREP = 8 # longrep after literal
23
+ STATE_LIT_SHORTREP = 9 # shortrep after literal
24
+ STATE_NONLIT_MATCH = 10 # match after non-literal
25
+ STATE_NONLIT_REP = 11 # rep after non-literal
26
+
27
+ LIT_STATES = 7 # States 0-6 indicate previous was literal
28
+
29
+ attr_accessor :value
30
+
31
+ def initialize(initial = STATE_LIT_LIT)
32
+ @value = initial
33
+ end
34
+
35
+ # Update state after encoding literal
36
+ # Matches XZ Utils update_literal() macro
37
+ def update_literal
38
+ old_value = @value
39
+ @value = if @value <= STATE_SHORTREP_LIT_LIT
40
+ STATE_LIT_LIT
41
+ elsif @value <= STATE_LIT_SHORTREP
42
+ @value - 3
43
+ else
44
+ @value - 6
45
+ end
46
+ if ENV["LZMA_DEBUG"]
47
+ warn "DEBUG: update_literal - state: #{old_value} → #{@value}"
48
+ end
49
+ end
50
+
51
+ # Update state after encoding match
52
+ # Matches XZ Utils update_match() macro
53
+ def update_match
54
+ old_value = @value
55
+ @value = @value < LIT_STATES ? STATE_LIT_MATCH : STATE_NONLIT_MATCH
56
+ if ENV["LZMA_DEBUG"]
57
+ warn "DEBUG: update_match - state: #{old_value} → #{@value}"
58
+ end
59
+ end
60
+
61
+ # Update state after encoding long rep match
62
+ # Matches XZ Utils update_long_rep() macro
63
+ def update_long_rep
64
+ @value = @value < LIT_STATES ? STATE_LIT_LONGREP : STATE_NONLIT_REP
65
+ end
66
+
67
+ # Update state after encoding short rep (1 byte)
68
+ # Matches XZ Utils update_short_rep() macro
69
+ def update_short_rep
70
+ @value = @value < LIT_STATES ? STATE_LIT_SHORTREP : STATE_NONLIT_REP
71
+ end
72
+
73
+ # Check if previous symbol was literal
74
+ def literal_state?
75
+ @value < LIT_STATES
76
+ end
77
+
78
+ # Create a copy of this state
79
+ def dup
80
+ XzState.new(@value)
81
+ end
82
+
83
+ # Reset to initial state
84
+ def reset
85
+ @value = STATE_LIT_LIT
86
+ end
87
+
88
+ # Check if state is valid
89
+ def valid?
90
+ @value.between?(STATE_LIT_LIT, STATE_NONLIT_REP)
91
+ end
92
+
93
+ # String representation for debugging
94
+ def to_s
95
+ STATE_NAMES[@value] || "INVALID(#{@value})"
96
+ end
97
+
98
+ # State names for debugging
99
+ STATE_NAMES = {
100
+ STATE_LIT_LIT => "STATE_LIT_LIT",
101
+ STATE_MATCH_LIT_LIT => "STATE_MATCH_LIT_LIT",
102
+ STATE_REP_LIT_LIT => "STATE_REP_LIT_LIT",
103
+ STATE_SHORTREP_LIT_LIT => "STATE_SHORTREP_LIT_LIT",
104
+ STATE_MATCH_LIT => "STATE_MATCH_LIT",
105
+ STATE_REP_LIT => "STATE_REP_LIT",
106
+ STATE_SHORTREP_LIT => "STATE_SHORTREP_LIT",
107
+ STATE_LIT_MATCH => "STATE_LIT_MATCH",
108
+ STATE_LIT_LONGREP => "STATE_LIT_LONGREP",
109
+ STATE_LIT_SHORTREP => "STATE_LIT_SHORTREP",
110
+ STATE_NONLIT_MATCH => "STATE_NONLIT_MATCH",
111
+ STATE_NONLIT_REP => "STATE_NONLIT_REP",
112
+ }.freeze
113
+ end
114
+ end
115
+ end
116
+ end