omnizip 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (511) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.rubocop.yml +32 -0
  4. data/.rubocop_todo.yml +754 -0
  5. data/COPYING +502 -0
  6. data/Gemfile +17 -0
  7. data/LICENSE +12 -0
  8. data/README.adoc +1045 -0
  9. data/Rakefile +12 -0
  10. data/benchmark/README.md +260 -0
  11. data/benchmark/benchmark_suite.rb +125 -0
  12. data/benchmark/compression_bench.rb +181 -0
  13. data/benchmark/filter_bench.rb +180 -0
  14. data/benchmark/models/benchmark_result.rb +59 -0
  15. data/benchmark/models/comparison_result.rb +69 -0
  16. data/benchmark/profile_suite.rb +167 -0
  17. data/benchmark/reporter.rb +150 -0
  18. data/benchmark/run_benchmarks.rb +66 -0
  19. data/benchmark/test_data.rb +137 -0
  20. data/config/formats/rar3_spec.yml +91 -0
  21. data/config/formats/rar5_spec.yml +102 -0
  22. data/docs/.github/workflows/docs.yml +142 -0
  23. data/docs/.gitignore +21 -0
  24. data/docs/.lychee.toml +67 -0
  25. data/docs/Gemfile +13 -0
  26. data/docs/RAR_WRITE_SUPPORT.md +26 -0
  27. data/docs/README.md +101 -0
  28. data/docs/_config.yml +112 -0
  29. data/docs/assets/logo.svg +1 -0
  30. data/docs/assets/omnizip-logo.pdf +1540 -11
  31. data/docs/comparison/feature-matrix.adoc +694 -0
  32. data/docs/comparison/index.adoc +113 -0
  33. data/docs/comparison/vs-7zip.adoc +309 -0
  34. data/docs/comparison/vs-peazip.adoc +77 -0
  35. data/docs/comparison/vs-rubyzip.adoc +342 -0
  36. data/docs/comparison/vs-winrar.adoc +100 -0
  37. data/docs/compatibility.adoc +579 -0
  38. data/docs/concepts/index.adoc +129 -0
  39. data/docs/developer/architecture.adoc +256 -0
  40. data/docs/developer/contributing.adoc +158 -0
  41. data/docs/developer/index.adoc +25 -0
  42. data/docs/developer/testing.adoc +212 -0
  43. data/docs/getting-started/basic-usage.adoc +271 -0
  44. data/docs/getting-started/index.adoc +42 -0
  45. data/docs/getting-started/installation.adoc +138 -0
  46. data/docs/getting-started/quick-start.adoc +185 -0
  47. data/docs/getting-started/your-first-archive.adoc +218 -0
  48. data/docs/guides/advanced-features/encryption.adoc +300 -0
  49. data/docs/guides/advanced-features/index.adoc +49 -0
  50. data/docs/guides/advanced-features/parallel-processing.adoc +246 -0
  51. data/docs/guides/advanced-features/progress-tracking.adoc +320 -0
  52. data/docs/guides/advanced-features/streaming.adoc +212 -0
  53. data/docs/guides/archive-formats/gzip-format.adoc +107 -0
  54. data/docs/guides/archive-formats/index.adoc +130 -0
  55. data/docs/guides/archive-formats/rar-format.adoc +104 -0
  56. data/docs/guides/archive-formats/rar5.adoc +521 -0
  57. data/docs/guides/archive-formats/seven-zip-format.adoc +35 -0
  58. data/docs/guides/archive-formats/tar-format.adoc +106 -0
  59. data/docs/guides/archive-formats/xz-format.adoc +118 -0
  60. data/docs/guides/archive-formats/zip-format.adoc +35 -0
  61. data/docs/guides/compression-algorithms/bzip2.adoc +113 -0
  62. data/docs/guides/compression-algorithms/deflate.adoc +319 -0
  63. data/docs/guides/compression-algorithms/index.adoc +190 -0
  64. data/docs/guides/compression-algorithms/lzma.adoc +398 -0
  65. data/docs/guides/compression-algorithms/lzma2.adoc +327 -0
  66. data/docs/guides/compression-algorithms/ppmd.adoc +316 -0
  67. data/docs/guides/compression-algorithms/zstandard.adoc +361 -0
  68. data/docs/guides/creating-archives.adoc +354 -0
  69. data/docs/guides/extracting-archives.adoc +53 -0
  70. data/docs/guides/format-conversion.adoc +64 -0
  71. data/docs/guides/index.adoc +49 -0
  72. data/docs/guides/migration-rubyzip.adoc +217 -0
  73. data/docs/guides/parity-archives.adoc +605 -0
  74. data/docs/guides/performance-tuning.adoc +88 -0
  75. data/docs/index.adoc +218 -0
  76. data/docs/lychee.toml +67 -0
  77. data/docs/reference/api/overview.adoc +188 -0
  78. data/docs/reference/cli/compress-command.adoc +114 -0
  79. data/docs/reference/cli/overview.adoc +140 -0
  80. data/docs/reference/index.adoc +26 -0
  81. data/docs/resources/faq.adoc +185 -0
  82. data/docs/resources/quick-reference.adoc +222 -0
  83. data/docs/troubleshooting/index.adoc +208 -0
  84. data/examples/api_comparison.rb +205 -0
  85. data/examples/deflate64_example.rb +96 -0
  86. data/examples/par2_demo.rb +121 -0
  87. data/examples/quick_start_native.rb +150 -0
  88. data/examples/quick_start_rubyzip.rb +115 -0
  89. data/examples/rubyzip_compatibility_demo.rb +194 -0
  90. data/exe/omnizip +27 -0
  91. data/lib/omnizip/algorithm.rb +130 -0
  92. data/lib/omnizip/algorithm_registry.rb +86 -0
  93. data/lib/omnizip/algorithms/.keep +0 -0
  94. data/lib/omnizip/algorithms/bzip2/bwt.rb +225 -0
  95. data/lib/omnizip/algorithms/bzip2/decoder.rb +193 -0
  96. data/lib/omnizip/algorithms/bzip2/encoder.rb +237 -0
  97. data/lib/omnizip/algorithms/bzip2/huffman.rb +206 -0
  98. data/lib/omnizip/algorithms/bzip2/mtf.rb +101 -0
  99. data/lib/omnizip/algorithms/bzip2/rle.rb +151 -0
  100. data/lib/omnizip/algorithms/bzip2.rb +130 -0
  101. data/lib/omnizip/algorithms/deflate/constants.rb +28 -0
  102. data/lib/omnizip/algorithms/deflate/decoder.rb +38 -0
  103. data/lib/omnizip/algorithms/deflate/encoder.rb +46 -0
  104. data/lib/omnizip/algorithms/deflate.rb +128 -0
  105. data/lib/omnizip/algorithms/deflate64/constants.rb +45 -0
  106. data/lib/omnizip/algorithms/deflate64/decoder.rb +153 -0
  107. data/lib/omnizip/algorithms/deflate64/encoder.rb +98 -0
  108. data/lib/omnizip/algorithms/deflate64/huffman_coder.rb +354 -0
  109. data/lib/omnizip/algorithms/deflate64/lz77_encoder.rb +142 -0
  110. data/lib/omnizip/algorithms/deflate64.rb +109 -0
  111. data/lib/omnizip/algorithms/lzma/bit_model.rb +120 -0
  112. data/lib/omnizip/algorithms/lzma/constants.rb +112 -0
  113. data/lib/omnizip/algorithms/lzma/decoder.rb +148 -0
  114. data/lib/omnizip/algorithms/lzma/dictionary.rb +69 -0
  115. data/lib/omnizip/algorithms/lzma/distance_coder.rb +415 -0
  116. data/lib/omnizip/algorithms/lzma/encoder.rb +142 -0
  117. data/lib/omnizip/algorithms/lzma/length_coder.rb +260 -0
  118. data/lib/omnizip/algorithms/lzma/literal_decoder.rb +320 -0
  119. data/lib/omnizip/algorithms/lzma/literal_encoder.rb +210 -0
  120. data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +341 -0
  121. data/lib/omnizip/algorithms/lzma/lzma_alone_decoder.rb +192 -0
  122. data/lib/omnizip/algorithms/lzma/lzma_state.rb +128 -0
  123. data/lib/omnizip/algorithms/lzma/match.rb +32 -0
  124. data/lib/omnizip/algorithms/lzma/match_finder.rb +205 -0
  125. data/lib/omnizip/algorithms/lzma/match_finder_config.rb +142 -0
  126. data/lib/omnizip/algorithms/lzma/match_finder_factory.rb +88 -0
  127. data/lib/omnizip/algorithms/lzma/optimal_encoder.rb +130 -0
  128. data/lib/omnizip/algorithms/lzma/probability_models.rb +72 -0
  129. data/lib/omnizip/algorithms/lzma/range_coder.rb +85 -0
  130. data/lib/omnizip/algorithms/lzma/range_decoder.rb +434 -0
  131. data/lib/omnizip/algorithms/lzma/range_encoder.rb +194 -0
  132. data/lib/omnizip/algorithms/lzma/state.rb +127 -0
  133. data/lib/omnizip/algorithms/lzma/xz_buffered_range_encoder.rb +325 -0
  134. data/lib/omnizip/algorithms/lzma/xz_encoder.rb +426 -0
  135. data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +645 -0
  136. data/lib/omnizip/algorithms/lzma/xz_match_finder_adapter.rb +227 -0
  137. data/lib/omnizip/algorithms/lzma/xz_price_calculator.rb +169 -0
  138. data/lib/omnizip/algorithms/lzma/xz_probability_models.rb +261 -0
  139. data/lib/omnizip/algorithms/lzma/xz_range_encoder.rb +223 -0
  140. data/lib/omnizip/algorithms/lzma/xz_range_encoder_exact.rb +331 -0
  141. data/lib/omnizip/algorithms/lzma/xz_state.rb +116 -0
  142. data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +2055 -0
  143. data/lib/omnizip/algorithms/lzma.rb +238 -0
  144. data/lib/omnizip/algorithms/lzma2/chunk_manager.rb +182 -0
  145. data/lib/omnizip/algorithms/lzma2/constants.rb +41 -0
  146. data/lib/omnizip/algorithms/lzma2/encoder.rb +147 -0
  147. data/lib/omnizip/algorithms/lzma2/lzma2_chunk.rb +161 -0
  148. data/lib/omnizip/algorithms/lzma2/properties.rb +179 -0
  149. data/lib/omnizip/algorithms/lzma2/simple_lzma2_encoder.rb +127 -0
  150. data/lib/omnizip/algorithms/lzma2/xz_encoder_adapter.rb +85 -0
  151. data/lib/omnizip/algorithms/lzma2.rb +141 -0
  152. data/lib/omnizip/algorithms/ppmd7/constants.rb +74 -0
  153. data/lib/omnizip/algorithms/ppmd7/context.rb +154 -0
  154. data/lib/omnizip/algorithms/ppmd7/decoder.rb +126 -0
  155. data/lib/omnizip/algorithms/ppmd7/encoder.rb +163 -0
  156. data/lib/omnizip/algorithms/ppmd7/model.rb +248 -0
  157. data/lib/omnizip/algorithms/ppmd7/symbol_state.rb +57 -0
  158. data/lib/omnizip/algorithms/ppmd7.rb +116 -0
  159. data/lib/omnizip/algorithms/ppmd8/constants.rb +61 -0
  160. data/lib/omnizip/algorithms/ppmd8/context.rb +34 -0
  161. data/lib/omnizip/algorithms/ppmd8/decoder.rb +107 -0
  162. data/lib/omnizip/algorithms/ppmd8/encoder.rb +138 -0
  163. data/lib/omnizip/algorithms/ppmd8/model.rb +250 -0
  164. data/lib/omnizip/algorithms/ppmd8/restoration_method.rb +78 -0
  165. data/lib/omnizip/algorithms/ppmd8.rb +82 -0
  166. data/lib/omnizip/algorithms/ppmd_base.rb +138 -0
  167. data/lib/omnizip/algorithms/sevenzip_lzma2.rb +123 -0
  168. data/lib/omnizip/algorithms/xz_lzma2.rb +118 -0
  169. data/lib/omnizip/algorithms/zstandard/constants.rb +25 -0
  170. data/lib/omnizip/algorithms/zstandard/decoder.rb +46 -0
  171. data/lib/omnizip/algorithms/zstandard/encoder.rb +51 -0
  172. data/lib/omnizip/algorithms/zstandard.rb +138 -0
  173. data/lib/omnizip/buffer/memory_archive.rb +251 -0
  174. data/lib/omnizip/buffer/memory_extractor.rb +224 -0
  175. data/lib/omnizip/buffer.rb +176 -0
  176. data/lib/omnizip/checksum_registry.rb +114 -0
  177. data/lib/omnizip/checksums/crc32.rb +100 -0
  178. data/lib/omnizip/checksums/crc64.rb +101 -0
  179. data/lib/omnizip/checksums/crc_base.rb +158 -0
  180. data/lib/omnizip/checksums/verifier.rb +131 -0
  181. data/lib/omnizip/chunked/memory_manager.rb +194 -0
  182. data/lib/omnizip/chunked/reader.rb +78 -0
  183. data/lib/omnizip/chunked/writer.rb +120 -0
  184. data/lib/omnizip/chunked.rb +129 -0
  185. data/lib/omnizip/cli/output_formatter.rb +104 -0
  186. data/lib/omnizip/cli.rb +572 -0
  187. data/lib/omnizip/commands/.keep +0 -0
  188. data/lib/omnizip/commands/archive_create_command.rb +427 -0
  189. data/lib/omnizip/commands/archive_extract_command.rb +272 -0
  190. data/lib/omnizip/commands/archive_list_command.rb +218 -0
  191. data/lib/omnizip/commands/archive_repair_command.rb +131 -0
  192. data/lib/omnizip/commands/archive_verify_command.rb +117 -0
  193. data/lib/omnizip/commands/compress_command.rb +117 -0
  194. data/lib/omnizip/commands/decompress_command.rb +120 -0
  195. data/lib/omnizip/commands/list_command.rb +53 -0
  196. data/lib/omnizip/commands/metadata_command.rb +153 -0
  197. data/lib/omnizip/commands/parity_create_command.rb +122 -0
  198. data/lib/omnizip/commands/parity_repair_command.rb +122 -0
  199. data/lib/omnizip/commands/parity_verify_command.rb +124 -0
  200. data/lib/omnizip/commands/profile_list_command.rb +56 -0
  201. data/lib/omnizip/commands/profile_show_command.rb +44 -0
  202. data/lib/omnizip/convenience.rb +359 -0
  203. data/lib/omnizip/converter/conversion_registry.rb +49 -0
  204. data/lib/omnizip/converter/conversion_strategy.rb +121 -0
  205. data/lib/omnizip/converter/seven_zip_to_zip_strategy.rb +97 -0
  206. data/lib/omnizip/converter/zip_to_seven_zip_strategy.rb +112 -0
  207. data/lib/omnizip/converter.rb +105 -0
  208. data/lib/omnizip/crypto/aes256/cipher.rb +100 -0
  209. data/lib/omnizip/crypto/aes256/constants.rb +28 -0
  210. data/lib/omnizip/crypto/aes256/key_derivation.rb +101 -0
  211. data/lib/omnizip/crypto/aes256.rb +102 -0
  212. data/lib/omnizip/error.rb +106 -0
  213. data/lib/omnizip/eta/exponential_smoothing_estimator.rb +98 -0
  214. data/lib/omnizip/eta/moving_average_estimator.rb +99 -0
  215. data/lib/omnizip/eta/rate_calculator.rb +104 -0
  216. data/lib/omnizip/eta/sample_history.rb +143 -0
  217. data/lib/omnizip/eta/time_estimator.rb +106 -0
  218. data/lib/omnizip/eta.rb +63 -0
  219. data/lib/omnizip/extraction/filter_chain.rb +177 -0
  220. data/lib/omnizip/extraction/glob_pattern.rb +140 -0
  221. data/lib/omnizip/extraction/pattern_matcher.rb +70 -0
  222. data/lib/omnizip/extraction/predicate_pattern.rb +52 -0
  223. data/lib/omnizip/extraction/regex_pattern.rb +50 -0
  224. data/lib/omnizip/extraction/selective_extractor.rb +240 -0
  225. data/lib/omnizip/extraction.rb +111 -0
  226. data/lib/omnizip/file_type/mime_classifier.rb +144 -0
  227. data/lib/omnizip/file_type.rb +113 -0
  228. data/lib/omnizip/filter.rb +139 -0
  229. data/lib/omnizip/filter_pipeline.rb +108 -0
  230. data/lib/omnizip/filter_registry.rb +166 -0
  231. data/lib/omnizip/filters/bcj.rb +279 -0
  232. data/lib/omnizip/filters/bcj2/constants.rb +53 -0
  233. data/lib/omnizip/filters/bcj2/decoder.rb +200 -0
  234. data/lib/omnizip/filters/bcj2/encoder.rb +61 -0
  235. data/lib/omnizip/filters/bcj2/stream_data.rb +93 -0
  236. data/lib/omnizip/filters/bcj2.rb +99 -0
  237. data/lib/omnizip/filters/bcj_arm.rb +176 -0
  238. data/lib/omnizip/filters/bcj_arm64.rb +244 -0
  239. data/lib/omnizip/filters/bcj_ia64.rb +196 -0
  240. data/lib/omnizip/filters/bcj_ppc.rb +190 -0
  241. data/lib/omnizip/filters/bcj_sparc.rb +176 -0
  242. data/lib/omnizip/filters/bcj_x86.rb +193 -0
  243. data/lib/omnizip/filters/delta.rb +196 -0
  244. data/lib/omnizip/filters/filter_base.rb +72 -0
  245. data/lib/omnizip/filters/registry.rb +123 -0
  246. data/lib/omnizip/filters/xz_delta.rb +258 -0
  247. data/lib/omnizip/format_detector.rb +162 -0
  248. data/lib/omnizip/format_registry.rb +59 -0
  249. data/lib/omnizip/formats/.keep +0 -0
  250. data/lib/omnizip/formats/bzip2_file.rb +172 -0
  251. data/lib/omnizip/formats/cpio/constants.rb +55 -0
  252. data/lib/omnizip/formats/cpio/entry.rb +385 -0
  253. data/lib/omnizip/formats/cpio/reader.rb +196 -0
  254. data/lib/omnizip/formats/cpio/writer.rb +234 -0
  255. data/lib/omnizip/formats/cpio.rb +140 -0
  256. data/lib/omnizip/formats/format_spec_loader.rb +230 -0
  257. data/lib/omnizip/formats/gzip.rb +238 -0
  258. data/lib/omnizip/formats/iso/directory_builder.rb +297 -0
  259. data/lib/omnizip/formats/iso/directory_record.rb +152 -0
  260. data/lib/omnizip/formats/iso/joliet.rb +204 -0
  261. data/lib/omnizip/formats/iso/path_table.rb +125 -0
  262. data/lib/omnizip/formats/iso/reader.rb +197 -0
  263. data/lib/omnizip/formats/iso/rock_ridge.rb +349 -0
  264. data/lib/omnizip/formats/iso/volume_builder.rb +320 -0
  265. data/lib/omnizip/formats/iso/volume_descriptor.rb +168 -0
  266. data/lib/omnizip/formats/iso/writer.rb +530 -0
  267. data/lib/omnizip/formats/iso.rb +140 -0
  268. data/lib/omnizip/formats/lzip.rb +175 -0
  269. data/lib/omnizip/formats/lzma_alone.rb +171 -0
  270. data/lib/omnizip/formats/rar/archive_repairer.rb +243 -0
  271. data/lib/omnizip/formats/rar/archive_verifier.rb +195 -0
  272. data/lib/omnizip/formats/rar/block_parser.rb +243 -0
  273. data/lib/omnizip/formats/rar/compression/bit_stream.rb +180 -0
  274. data/lib/omnizip/formats/rar/compression/dispatcher.rb +217 -0
  275. data/lib/omnizip/formats/rar/compression/lz77_huffman/decoder.rb +216 -0
  276. data/lib/omnizip/formats/rar/compression/lz77_huffman/encoder.rb +158 -0
  277. data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_builder.rb +217 -0
  278. data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_coder.rb +189 -0
  279. data/lib/omnizip/formats/rar/compression/lz77_huffman/match_finder.rb +135 -0
  280. data/lib/omnizip/formats/rar/compression/lz77_huffman/sliding_window.rb +165 -0
  281. data/lib/omnizip/formats/rar/compression/ppmd/context.rb +105 -0
  282. data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +219 -0
  283. data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +262 -0
  284. data/lib/omnizip/formats/rar/compression_method_registry.rb +106 -0
  285. data/lib/omnizip/formats/rar/constants.rb +82 -0
  286. data/lib/omnizip/formats/rar/decompressor.rb +238 -0
  287. data/lib/omnizip/formats/rar/external_writer.rb +312 -0
  288. data/lib/omnizip/formats/rar/header.rb +192 -0
  289. data/lib/omnizip/formats/rar/license_validator.rb +109 -0
  290. data/lib/omnizip/formats/rar/models/rar_archive.rb +77 -0
  291. data/lib/omnizip/formats/rar/models/rar_entry.rb +65 -0
  292. data/lib/omnizip/formats/rar/models/rar_volume.rb +56 -0
  293. data/lib/omnizip/formats/rar/parity_handler.rb +292 -0
  294. data/lib/omnizip/formats/rar/rar5/compression/lzma.rb +202 -0
  295. data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +578 -0
  296. data/lib/omnizip/formats/rar/rar5/compression/store.rb +60 -0
  297. data/lib/omnizip/formats/rar/rar5/crc32.rb +39 -0
  298. data/lib/omnizip/formats/rar/rar5/encryption/aes256_cbc.rb +97 -0
  299. data/lib/omnizip/formats/rar/rar5/encryption/encryption_header.rb +114 -0
  300. data/lib/omnizip/formats/rar/rar5/encryption/encryption_manager.rb +166 -0
  301. data/lib/omnizip/formats/rar/rar5/encryption/key_derivation.rb +97 -0
  302. data/lib/omnizip/formats/rar/rar5/header.rb +187 -0
  303. data/lib/omnizip/formats/rar/rar5/models/encryption_options.rb +74 -0
  304. data/lib/omnizip/formats/rar/rar5/models/recovery_options.rb +63 -0
  305. data/lib/omnizip/formats/rar/rar5/models/solid_options.rb +63 -0
  306. data/lib/omnizip/formats/rar/rar5/models/volume_options.rb +74 -0
  307. data/lib/omnizip/formats/rar/rar5/multi_volume/ARCHITECTURE.md +290 -0
  308. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_manager.rb +264 -0
  309. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_splitter.rb +155 -0
  310. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_writer.rb +194 -0
  311. data/lib/omnizip/formats/rar/rar5/solid/solid_encoder.rb +109 -0
  312. data/lib/omnizip/formats/rar/rar5/solid/solid_manager.rb +142 -0
  313. data/lib/omnizip/formats/rar/rar5/solid/solid_stream.rb +121 -0
  314. data/lib/omnizip/formats/rar/rar5/vint.rb +65 -0
  315. data/lib/omnizip/formats/rar/rar5/writer.rb +466 -0
  316. data/lib/omnizip/formats/rar/rar_format_base.rb +241 -0
  317. data/lib/omnizip/formats/rar/reader.rb +366 -0
  318. data/lib/omnizip/formats/rar/recovery_record.rb +245 -0
  319. data/lib/omnizip/formats/rar/volume_manager.rb +168 -0
  320. data/lib/omnizip/formats/rar/writer.rb +431 -0
  321. data/lib/omnizip/formats/rar.rb +205 -0
  322. data/lib/omnizip/formats/rar3/compressor.rb +73 -0
  323. data/lib/omnizip/formats/rar3/decompressor.rb +66 -0
  324. data/lib/omnizip/formats/rar3/reader.rb +386 -0
  325. data/lib/omnizip/formats/rar3/writer.rb +219 -0
  326. data/lib/omnizip/formats/rar5/compressor.rb +73 -0
  327. data/lib/omnizip/formats/rar5/decompressor.rb +66 -0
  328. data/lib/omnizip/formats/rar5/reader.rb +342 -0
  329. data/lib/omnizip/formats/rar5/writer.rb +214 -0
  330. data/lib/omnizip/formats/seven_zip/coder_chain.rb +150 -0
  331. data/lib/omnizip/formats/seven_zip/constants.rb +126 -0
  332. data/lib/omnizip/formats/seven_zip/encoded_header.rb +114 -0
  333. data/lib/omnizip/formats/seven_zip/encrypted_header.rb +142 -0
  334. data/lib/omnizip/formats/seven_zip/file_collector.rb +144 -0
  335. data/lib/omnizip/formats/seven_zip/header.rb +106 -0
  336. data/lib/omnizip/formats/seven_zip/header_encryptor.rb +134 -0
  337. data/lib/omnizip/formats/seven_zip/header_writer.rb +466 -0
  338. data/lib/omnizip/formats/seven_zip/models/coder_info.rb +30 -0
  339. data/lib/omnizip/formats/seven_zip/models/file_entry.rb +58 -0
  340. data/lib/omnizip/formats/seven_zip/models/folder.rb +69 -0
  341. data/lib/omnizip/formats/seven_zip/models/stream_info.rb +42 -0
  342. data/lib/omnizip/formats/seven_zip/parser.rb +660 -0
  343. data/lib/omnizip/formats/seven_zip/reader.rb +458 -0
  344. data/lib/omnizip/formats/seven_zip/split_archive_reader.rb +632 -0
  345. data/lib/omnizip/formats/seven_zip/split_archive_writer.rb +315 -0
  346. data/lib/omnizip/formats/seven_zip/stream_compressor.rb +151 -0
  347. data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +162 -0
  348. data/lib/omnizip/formats/seven_zip/writer.rb +740 -0
  349. data/lib/omnizip/formats/seven_zip.rb +93 -0
  350. data/lib/omnizip/formats/tar/constants.rb +73 -0
  351. data/lib/omnizip/formats/tar/entry.rb +94 -0
  352. data/lib/omnizip/formats/tar/header.rb +168 -0
  353. data/lib/omnizip/formats/tar/reader.rb +121 -0
  354. data/lib/omnizip/formats/tar/writer.rb +216 -0
  355. data/lib/omnizip/formats/tar.rb +84 -0
  356. data/lib/omnizip/formats/xz/reader.rb +116 -0
  357. data/lib/omnizip/formats/xz.rb +237 -0
  358. data/lib/omnizip/formats/xz_impl/block_decoder.rb +754 -0
  359. data/lib/omnizip/formats/xz_impl/block_encoder.rb +306 -0
  360. data/lib/omnizip/formats/xz_impl/block_header.rb +210 -0
  361. data/lib/omnizip/formats/xz_impl/block_header_parser.rb +186 -0
  362. data/lib/omnizip/formats/xz_impl/constants.rb +49 -0
  363. data/lib/omnizip/formats/xz_impl/index_decoder.rb +174 -0
  364. data/lib/omnizip/formats/xz_impl/index_encoder.rb +122 -0
  365. data/lib/omnizip/formats/xz_impl/stream_decoder.rb +468 -0
  366. data/lib/omnizip/formats/xz_impl/stream_encoder.rb +99 -0
  367. data/lib/omnizip/formats/xz_impl/stream_footer.rb +81 -0
  368. data/lib/omnizip/formats/xz_impl/stream_footer_parser.rb +117 -0
  369. data/lib/omnizip/formats/xz_impl/stream_header.rb +55 -0
  370. data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +108 -0
  371. data/lib/omnizip/formats/xz_impl/vli.rb +128 -0
  372. data/lib/omnizip/formats/xz_impl/writer.rb +421 -0
  373. data/lib/omnizip/formats/zip/central_directory_header.rb +195 -0
  374. data/lib/omnizip/formats/zip/constants.rb +69 -0
  375. data/lib/omnizip/formats/zip/end_of_central_directory.rb +133 -0
  376. data/lib/omnizip/formats/zip/local_file_header.rb +138 -0
  377. data/lib/omnizip/formats/zip/reader.rb +250 -0
  378. data/lib/omnizip/formats/zip/unix_extra_field.rb +153 -0
  379. data/lib/omnizip/formats/zip/writer.rb +375 -0
  380. data/lib/omnizip/formats/zip/zip64_end_of_central_directory.rb +104 -0
  381. data/lib/omnizip/formats/zip/zip64_end_of_central_directory_locator.rb +66 -0
  382. data/lib/omnizip/formats/zip/zip64_extra_field.rb +114 -0
  383. data/lib/omnizip/formats/zip.rb +50 -0
  384. data/lib/omnizip/implementations/base/lzma2_decoder_base.rb +75 -0
  385. data/lib/omnizip/implementations/base/lzma2_encoder_base.rb +128 -0
  386. data/lib/omnizip/implementations/base/lzma_decoder_base.rb +83 -0
  387. data/lib/omnizip/implementations/base/lzma_encoder_base.rb +108 -0
  388. data/lib/omnizip/implementations/base/state_machine_base.rb +182 -0
  389. data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +421 -0
  390. data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +465 -0
  391. data/lib/omnizip/implementations/seven_zip/lzma/match_finder.rb +288 -0
  392. data/lib/omnizip/implementations/seven_zip/lzma/range_decoder.rb +200 -0
  393. data/lib/omnizip/implementations/seven_zip/lzma/range_encoder.rb +197 -0
  394. data/lib/omnizip/implementations/seven_zip/lzma/state_machine.rb +141 -0
  395. data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +519 -0
  396. data/lib/omnizip/implementations/xz_utils/lzma2/decoder.rb +723 -0
  397. data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +750 -0
  398. data/lib/omnizip/io/buffered_input.rb +146 -0
  399. data/lib/omnizip/io/buffered_output.rb +105 -0
  400. data/lib/omnizip/io/stream_manager.rb +115 -0
  401. data/lib/omnizip/link_handler/hard_link.rb +79 -0
  402. data/lib/omnizip/link_handler/symbolic_link.rb +74 -0
  403. data/lib/omnizip/link_handler.rb +124 -0
  404. data/lib/omnizip/metadata/archive_metadata.rb +114 -0
  405. data/lib/omnizip/metadata/entry_metadata.rb +146 -0
  406. data/lib/omnizip/metadata/metadata_editor.rb +171 -0
  407. data/lib/omnizip/metadata/metadata_registry.rb +64 -0
  408. data/lib/omnizip/metadata/metadata_validator.rb +99 -0
  409. data/lib/omnizip/metadata.rb +57 -0
  410. data/lib/omnizip/models/.keep +0 -0
  411. data/lib/omnizip/models/algorithm_metadata.rb +73 -0
  412. data/lib/omnizip/models/compression_options.rb +71 -0
  413. data/lib/omnizip/models/conversion_options.rb +87 -0
  414. data/lib/omnizip/models/conversion_result.rb +135 -0
  415. data/lib/omnizip/models/eta_result.rb +46 -0
  416. data/lib/omnizip/models/extraction_rule.rb +115 -0
  417. data/lib/omnizip/models/filter_chain.rb +144 -0
  418. data/lib/omnizip/models/filter_config.rb +183 -0
  419. data/lib/omnizip/models/match_result.rb +124 -0
  420. data/lib/omnizip/models/optimization_suggestion.rb +91 -0
  421. data/lib/omnizip/models/parallel_options.rb +104 -0
  422. data/lib/omnizip/models/performance_result.rb +79 -0
  423. data/lib/omnizip/models/profile_report.rb +82 -0
  424. data/lib/omnizip/models/progress_options.rb +38 -0
  425. data/lib/omnizip/models/split_options.rb +116 -0
  426. data/lib/omnizip/optimization_registry.rb +81 -0
  427. data/lib/omnizip/parallel/job_queue.rb +209 -0
  428. data/lib/omnizip/parallel/job_scheduler.rb +203 -0
  429. data/lib/omnizip/parallel/parallel_compressor.rb +347 -0
  430. data/lib/omnizip/parallel/parallel_extractor.rb +329 -0
  431. data/lib/omnizip/parallel/worker_pool.rb +223 -0
  432. data/lib/omnizip/parallel.rb +149 -0
  433. data/lib/omnizip/parity/chunked_block_processor.rb +196 -0
  434. data/lib/omnizip/parity/galois16.rb +145 -0
  435. data/lib/omnizip/parity/models/creator_packet.rb +73 -0
  436. data/lib/omnizip/parity/models/file_description_packet.rb +133 -0
  437. data/lib/omnizip/parity/models/ifsc_packet.rb +123 -0
  438. data/lib/omnizip/parity/models/main_packet.rb +128 -0
  439. data/lib/omnizip/parity/models/packet.rb +156 -0
  440. data/lib/omnizip/parity/models/packet_registry.rb +109 -0
  441. data/lib/omnizip/parity/models/recovery_slice_packet.rb +78 -0
  442. data/lib/omnizip/parity/par2_creator.rb +531 -0
  443. data/lib/omnizip/parity/par2_repairer.rb +407 -0
  444. data/lib/omnizip/parity/par2_verifier.rb +364 -0
  445. data/lib/omnizip/parity/par2cmdline_algorithm.rb +110 -0
  446. data/lib/omnizip/parity/par2cmdline_coefficients.rb +78 -0
  447. data/lib/omnizip/parity/reed_solomon_decoder.rb +266 -0
  448. data/lib/omnizip/parity/reed_solomon_encoder.rb +111 -0
  449. data/lib/omnizip/parity/reed_solomon_matrix.rb +342 -0
  450. data/lib/omnizip/parity.rb +186 -0
  451. data/lib/omnizip/password/encryption_registry.rb +65 -0
  452. data/lib/omnizip/password/encryption_strategy.rb +96 -0
  453. data/lib/omnizip/password/password_validator.rb +129 -0
  454. data/lib/omnizip/password/winzip_aes_strategy.rb +192 -0
  455. data/lib/omnizip/password/zip_crypto_strategy.rb +141 -0
  456. data/lib/omnizip/password.rb +87 -0
  457. data/lib/omnizip/pipe/stream_compressor.rb +124 -0
  458. data/lib/omnizip/pipe/stream_decompressor.rb +174 -0
  459. data/lib/omnizip/pipe.rb +121 -0
  460. data/lib/omnizip/platform/ntfs_streams.rb +201 -0
  461. data/lib/omnizip/platform.rb +189 -0
  462. data/lib/omnizip/profile/archive_profile.rb +39 -0
  463. data/lib/omnizip/profile/balanced_profile.rb +33 -0
  464. data/lib/omnizip/profile/binary_profile.rb +36 -0
  465. data/lib/omnizip/profile/compression_profile.rb +158 -0
  466. data/lib/omnizip/profile/custom_profile.rb +157 -0
  467. data/lib/omnizip/profile/fast_profile.rb +33 -0
  468. data/lib/omnizip/profile/maximum_profile.rb +33 -0
  469. data/lib/omnizip/profile/profile_detector.rb +110 -0
  470. data/lib/omnizip/profile/profile_registry.rb +161 -0
  471. data/lib/omnizip/profile/text_profile.rb +36 -0
  472. data/lib/omnizip/profile.rb +190 -0
  473. data/lib/omnizip/profiler/memory_profiler.rb +66 -0
  474. data/lib/omnizip/profiler/method_profiler.rb +49 -0
  475. data/lib/omnizip/profiler/report_generator.rb +169 -0
  476. data/lib/omnizip/profiler.rb +204 -0
  477. data/lib/omnizip/progress/callback_reporter.rb +36 -0
  478. data/lib/omnizip/progress/console_reporter.rb +62 -0
  479. data/lib/omnizip/progress/log_reporter.rb +91 -0
  480. data/lib/omnizip/progress/operation_progress.rb +118 -0
  481. data/lib/omnizip/progress/progress_bar.rb +156 -0
  482. data/lib/omnizip/progress/progress_reporter.rb +40 -0
  483. data/lib/omnizip/progress/progress_tracker.rb +190 -0
  484. data/lib/omnizip/progress/silent_reporter.rb +24 -0
  485. data/lib/omnizip/progress.rb +127 -0
  486. data/lib/omnizip/rubyzip_compat.rb +63 -0
  487. data/lib/omnizip/temp/safe_extract.rb +168 -0
  488. data/lib/omnizip/temp/temp_file.rb +124 -0
  489. data/lib/omnizip/temp/temp_file_pool.rb +109 -0
  490. data/lib/omnizip/temp.rb +181 -0
  491. data/lib/omnizip/version.rb +5 -0
  492. data/lib/omnizip/zip/entry.rb +156 -0
  493. data/lib/omnizip/zip/file.rb +485 -0
  494. data/lib/omnizip/zip/input_stream.rb +273 -0
  495. data/lib/omnizip/zip/output_stream.rb +324 -0
  496. data/lib/omnizip.rb +156 -0
  497. data/readme-docs/advanced-features.adoc +515 -0
  498. data/readme-docs/api-usage.adoc +444 -0
  499. data/readme-docs/architecture.adoc +449 -0
  500. data/readme-docs/archive-formats.adoc +479 -0
  501. data/readme-docs/cli-usage.adoc +222 -0
  502. data/readme-docs/compression-algorithms.adoc +442 -0
  503. data/readme-docs/compression-profiles.adoc +247 -0
  504. data/readme-docs/encryption-checksums.adoc +328 -0
  505. data/readme-docs/format-converter.adoc +325 -0
  506. data/readme-docs/installation.adoc +228 -0
  507. data/readme-docs/par2-archives.adoc +608 -0
  508. data/readme-docs/performance-profiler.adoc +389 -0
  509. data/readme-docs/preprocessing-filters.adoc +280 -0
  510. data/xz-file-format-1.2.1.txt +1174 -0
  511. metadata +617 -0
@@ -0,0 +1,288 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Copyright (C) 2025 Ribose Inc.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a
6
+ # copy of this software and associated documentation files (the "Software"),
7
+ # to deal in the Software without restriction, including without limitation
8
+ # the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
+ # and/or sell copies of the Software, and to permit persons to whom the
10
+ # Software is furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
+ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21
+ # DEALINGS IN THE SOFTWARE.
22
+
23
+ require_relative "../../../algorithms/lzma/constants"
24
+ require_relative "../../../algorithms/lzma/match_finder_config"
25
+
26
+ module Omnizip
27
+ module Implementations
28
+ module SevenZip
29
+ module LZMA
30
+ # 7-Zip LZMA SDK match finder implementation.
31
+ #
32
+ # This is the original SdkMatchFinder moved from algorithms/lzma/sdk_match_finder.rb
33
+ # to the new namespace structure.
34
+ #
35
+ # Ported from 7-Zip LZMA SDK by Igor Pavlov.
36
+ class MatchFinder
37
+ include Omnizip::Algorithms::LZMA::Constants
38
+
39
+ # Represents a match found in the dictionary
40
+ class Match
41
+ attr_reader :length, :distance
42
+
43
+ def initialize(length, distance)
44
+ @length = length
45
+ @distance = distance
46
+ end
47
+ end
48
+
49
+ attr_reader :config
50
+
51
+ # Initialize the SDK-compatible match finder
52
+ #
53
+ # @param config [MatchFinderConfig] Configuration object
54
+ def initialize(config)
55
+ @config = config
56
+ @window_size = config.window_size
57
+ @max_match_length = config.max_match_length
58
+ @chain_length = config.chain_length
59
+ @lazy_matching = config.lazy_matching
60
+
61
+ # Hash table: maps hash value to position
62
+ # SDK uses separate hash2 and hash3 tables, but we simplify
63
+ # to single hash table with chaining
64
+ @hash_table = {}
65
+
66
+ # Hash chain: stores previous positions for each hash value
67
+ @hash_chain = {}
68
+
69
+ # CRC table for hash computation (SDK uses CRC)
70
+ init_crc_table
71
+ end
72
+
73
+ # Find the longest match at the given position
74
+ #
75
+ # Implements SDK's GetMatches() function from LzFind.c
76
+ #
77
+ # @param data [String, Array<Integer>] Input data
78
+ # @param pos [Integer] Current position in data
79
+ # @return [Match, nil] Best match or nil if no match found
80
+ def find_longest_match(data, pos)
81
+ return nil if pos >= data.size
82
+ return nil if data.size - pos < MATCH_LEN_MIN
83
+
84
+ if @lazy_matching && @lazy_match
85
+ # Return lazy match from previous position
86
+ match = @lazy_match
87
+ @lazy_match = nil
88
+ # Don't update hash - current position was already added when lazy match was created
89
+ return match
90
+ end
91
+
92
+ best_match = find_best_match(data, pos)
93
+
94
+ if @lazy_matching && best_match && pos + 1 < data.size
95
+ # Try next position for potentially better match
96
+ next_match = find_best_match(data, pos + 1)
97
+ if next_match && next_match.length > best_match.length
98
+ # Save better match for next call
99
+ @lazy_match = next_match
100
+ # Don't update hash - we'll add it when lazy match is consumed
101
+ return nil
102
+ end
103
+ end
104
+
105
+ # CRITICAL: Update hash AFTER finding matches
106
+ # This ensures the current position is available for future matches
107
+ update_hash(data, pos)
108
+ best_match
109
+ end
110
+
111
+ # Reset the match finder state
112
+ #
113
+ # @return [void]
114
+ def reset
115
+ @hash_table.clear
116
+ @hash_chain.clear
117
+ @lazy_match = nil
118
+ end
119
+
120
+ private
121
+
122
+ # Find best match at position (SDK's GetMatches core logic)
123
+ #
124
+ # Searches both 2-byte and 3-byte hash chains for the best match.
125
+ #
126
+ # @param data [String, Array<Integer>] Input data
127
+ # @param pos [Integer] Current position
128
+ # @return [Match, nil] Best match or nil
129
+ def find_best_match(data, pos)
130
+ best_match = nil
131
+ best_length = MATCH_LEN_MIN - 1
132
+
133
+ hashes = compute_hashes(data, pos)
134
+ return nil if hashes.empty?
135
+
136
+ # Search both hash chains
137
+ hashes.each_value do |hash_val|
138
+ positions = @hash_chain[hash_val] || []
139
+ next if positions.empty?
140
+
141
+ # SDK traverses hash chain from most recent to oldest
142
+ # Limited by chain_length (nice_len in SDK)
143
+ count = 0
144
+ positions.reverse_each do |prev_pos|
145
+ break if count >= @chain_length
146
+ break if pos <= prev_pos || pos - prev_pos > @window_size
147
+
148
+ match_len = calculate_match_length(data, pos, prev_pos)
149
+
150
+ if match_len > best_length
151
+ best_length = match_len
152
+ best_match = Match.new(match_len, pos - prev_pos)
153
+
154
+ # SDK optimization: stop if we found max length
155
+ break if best_length >= @max_match_length
156
+ end
157
+
158
+ count += 1
159
+ end
160
+
161
+ # If we found a full-length match, no need to check other hashes
162
+ break if best_length >= @max_match_length
163
+ end
164
+
165
+ best_match
166
+ end
167
+
168
+ # Compute hash value using SDK's algorithm
169
+ #
170
+ # SDK uses CRC-based hashing with multiple hash levels:
171
+ # - hash2: 2-byte hash
172
+ # - hash3: 3-byte hash
173
+ # - hash4: 4-byte hash (binary tree mode)
174
+ #
175
+ # We compute both 2-byte and 3-byte hashes and store matches
176
+ # in both hash tables to ensure matches are found regardless
177
+ # of which hash size is used at query time.
178
+ #
179
+ # @param data [String, Array<Integer>] Input data
180
+ # @param pos [Integer] Position to hash from
181
+ # @return [Integer] Hash value (3-byte if available, else 2-byte)
182
+ def compute_hash(data, pos)
183
+ bytes = data.is_a?(String) ? data.bytes : data
184
+
185
+ if pos + 3 <= data.size
186
+ # 3-byte hash: CRC[byte[0]] ^ byte[1] ^ (byte[2] << 8)
187
+ hash = @crc_table[bytes[pos]] ^ bytes[pos + 1]
188
+ hash ^= (bytes[pos + 2] << 8)
189
+ hash & 0xFFFF
190
+ elsif pos + 2 <= data.size
191
+ # 2-byte hash: CRC[byte[0]] ^ byte[1]
192
+ hash = @crc_table[bytes[pos]] ^ bytes[pos + 1]
193
+ hash & 0xFFFF
194
+ end
195
+ # Less than 2 bytes remaining returns nil implicitly
196
+ end
197
+
198
+ # Compute both 2-byte and 3-byte hashes
199
+ #
200
+ # @param data [String, Array<Integer>] Input data
201
+ # @param pos [Integer] Position to hash from
202
+ # @return [Array<Integer>] Array of [hash2, hash3] or [hash2, nil]
203
+ def compute_hashes(data, pos)
204
+ bytes = data.is_a?(String) ? data.bytes : data
205
+ hashes = {}
206
+
207
+ # 2-byte hash (always compute if possible)
208
+ if pos + 2 <= data.size
209
+ hash2 = @crc_table[bytes[pos]] ^ bytes[pos + 1]
210
+ hashes[:hash2] = hash2 & 0xFFFF
211
+ end
212
+
213
+ # 3-byte hash (only if 3+ bytes available)
214
+ if pos + 3 <= data.size
215
+ hash3 = @crc_table[bytes[pos]] ^ bytes[pos + 1]
216
+ hash3 ^= (bytes[pos + 2] << 8)
217
+ hashes[:hash3] = hash3 & 0xFFFF
218
+ end
219
+
220
+ hashes
221
+ end
222
+
223
+ # Calculate match length between two positions
224
+ #
225
+ # SDK compares bytes until mismatch or max length
226
+ #
227
+ # @param data [String, Array<Integer>] Input data
228
+ # @param pos1 [Integer] First position
229
+ # @param pos2 [Integer] Second position
230
+ # @return [Integer] Length of match
231
+ def calculate_match_length(data, pos1, pos2)
232
+ bytes = data.is_a?(String) ? data.bytes : data
233
+ max_len = [data.size - pos1, @max_match_length].min
234
+ length = 0
235
+
236
+ while length < max_len && bytes[pos1 + length] == bytes[pos2 + length]
237
+ length += 1
238
+ end
239
+
240
+ length
241
+ end
242
+
243
+ # Update hash table with new position
244
+ #
245
+ # Stores position in both 2-byte and 3-byte hash chains
246
+ # to ensure matches are found regardless of hash size used at query time.
247
+ #
248
+ # @param data [String, Array<Integer>] Input data
249
+ # @param pos [Integer] Position to add
250
+ # @return [void]
251
+ def update_hash(data, pos)
252
+ hashes = compute_hashes(data, pos)
253
+ return if hashes.empty?
254
+
255
+ hashes.each_value do |hash_val|
256
+ @hash_chain[hash_val] ||= []
257
+ @hash_chain[hash_val] << pos
258
+
259
+ # Keep hash chains from growing too large
260
+ # SDK uses cyclic buffer, we use simple truncation
261
+ max_chain = @chain_length * 2
262
+ @hash_chain[hash_val].shift if @hash_chain[hash_val].size > max_chain
263
+ end
264
+ end
265
+
266
+ # Initialize CRC table for hash computation
267
+ #
268
+ # SDK uses CRC32 table for hashing
269
+ #
270
+ # @return [void]
271
+ def init_crc_table
272
+ @crc_table = Array.new(256) do |i|
273
+ crc = i
274
+ 8.times do
275
+ if crc.anybits?(1)
276
+ crc = (crc >> 1) ^ 0xEDB88320
277
+ else
278
+ crc >>= 1
279
+ end
280
+ end
281
+ crc & 0xFF
282
+ end
283
+ end
284
+ end
285
+ end
286
+ end
287
+ end
288
+ end
@@ -0,0 +1,200 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Copyright (C) 2025 Ribose Inc.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a
6
+ # copy of this software and associated documentation files (the "Software"),
7
+ # to deal in the Software without restriction, including without limitation
8
+ # the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
+ # and/or sell copies of the Software, and to permit persons to whom the
10
+ # Software is furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
+ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21
+ # DEALINGS IN THE SOFTWARE.
22
+
23
+ # Ported from 7-Zip SDK C/LzmaDec.c
24
+ # Direct port of the LZMA SDK range decoder for byte-for-byte compatibility
25
+ # with 7-Zip archives.
26
+
27
+ require_relative "../../../algorithms/lzma/constants"
28
+ require_relative "../../../algorithms/lzma/bit_model"
29
+
30
+ module Omnizip
31
+ module Implementations
32
+ module SevenZip
33
+ module LZMA
34
+ # Range decoder for 7-Zip SDK LZMA decompression
35
+ #
36
+ # This is a direct port of 7-Zip SDK's range decoder implementation
37
+ # from LzmaDec.c for guaranteed compatibility with 7-Zip archives.
38
+ #
39
+ # Reference: /Users/mulgogi/src/external/7-Zip/C/LzmaDec.c
40
+ class RangeDecoder
41
+ include Omnizip::Algorithms::LZMA::Constants
42
+
43
+ attr_reader :code
44
+
45
+ # Initialize the range decoder
46
+ #
47
+ # @param input_stream [IO] The input stream of encoded bytes
48
+ def initialize(input_stream)
49
+ @stream = input_stream
50
+ @range = 0xFFFFFFFF
51
+ @code = 0
52
+ init_decoder
53
+ end
54
+
55
+ # Decode a single bit using a probability model
56
+ #
57
+ # Ported from 7-Zip SDK IF_BIT_0/UPDATE_0/UPDATE_1 macros
58
+ # (LzmaDec.c lines 22-26)
59
+ #
60
+ # SDK pattern:
61
+ # #define IF_BIT_0(p) ttt = *(p); NORMALIZE; bound = (range >> kNumBitModelTotalBits) * (UInt32)ttt; if (code < bound)
62
+ # #define UPDATE_0(p) range = bound; *(p) = (CLzmaProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits));
63
+ # #define UPDATE_1(p) range -= bound; code -= bound; *(p) = (CLzmaProb)(ttt - (ttt >> kNumMoveBits));
64
+ #
65
+ # @param model [BitModel] The probability model for this bit
66
+ # @return [Integer] The decoded bit value (0 or 1)
67
+ def decode_bit(model)
68
+ prob = model.probability
69
+
70
+ # NORMALIZE (SDK pattern: normalize BEFORE decoding)
71
+ normalize
72
+
73
+ # Calculate bound
74
+ bound = (@range >> 11) * prob
75
+
76
+ if @code < bound
77
+ # UPDATE_0: bit is 0
78
+ @range = bound & 0xFFFFFFFF
79
+ new_prob = prob + ((BIT_MODEL_TOTAL - prob) >> MOVE_BITS)
80
+ model.instance_variable_set(:@probability, new_prob)
81
+ 0
82
+ else
83
+ # UPDATE_1: bit is 1
84
+ @range = (@range - bound) & 0xFFFFFFFF
85
+ @code = (@code - bound) & 0xFFFFFFFF
86
+ new_prob = prob - (prob >> MOVE_BITS)
87
+ model.instance_variable_set(:@probability, new_prob)
88
+ 1
89
+ end
90
+ end
91
+
92
+ # Decode bits directly without using probability model
93
+ #
94
+ # @param num_bits [Integer] Number of bits to decode
95
+ # @return [Integer] The decoded value
96
+ def decode_direct_bits(num_bits)
97
+ result = 0
98
+ num_bits.times do
99
+ normalize
100
+ @range >>= 1
101
+ @range &= 0xFFFFFFFF
102
+ @code = (@code - @range) & 0xFFFFFFFF
103
+ bit = (@code >> 31) & 1
104
+ @code = (@code + (@range & (0 - bit))) & 0xFFFFFFFF
105
+ result = (result << 1) | bit
106
+ end
107
+ result
108
+ end
109
+
110
+ # Decode bits directly with a base value
111
+ #
112
+ # Used by distance decoder for slots 14+ where we need to
113
+ # build on a base value (2 or 3) iteratively.
114
+ #
115
+ # @param num_bits [Integer] Number of bits to decode
116
+ # @param base [Integer] Base value to start from
117
+ # @return [Integer] The decoded value
118
+ def decode_direct_bits_with_base(num_bits, base)
119
+ result = base
120
+ num_bits.times do
121
+ result = (result << 1) + 1
122
+ normalize
123
+ @range >>= 1
124
+ @range &= 0xFFFFFFFF
125
+
126
+ # Check if bit is 1
127
+ bit = @code >= @range ? 1 : 0
128
+
129
+ if bit == 1
130
+ @code = (@code - @range) & 0xFFFFFFFF
131
+ else
132
+ result -= 1
133
+ end
134
+ end
135
+ result
136
+ end
137
+
138
+ # Update the input stream (for LZMA2 multi-chunk streams)
139
+ #
140
+ # @param new_stream [IO] New input stream
141
+ # @return [void]
142
+ def update_stream(new_stream)
143
+ @stream = new_stream
144
+ end
145
+
146
+ # Reset the decoder state (for LZMA2 chunks)
147
+ #
148
+ # @return [void]
149
+ def reset
150
+ @range = 0xFFFFFFFF
151
+ @code = 0
152
+ # Read initial 5 bytes for code
153
+ 5.times { @code = ((@code << 8) | read_byte) & 0xFFFFFFFF }
154
+ end
155
+
156
+ private
157
+
158
+ # Initialize the decoder by reading the first 5 bytes
159
+ #
160
+ # @return [void]
161
+ def init_decoder
162
+ # Read first byte (should be 0 for valid LZMA stream)
163
+ first = read_byte
164
+ raise "Invalid LZMA stream: first byte not 0" unless first.zero?
165
+
166
+ # Read 4 bytes for initial code value
167
+ @code = 0
168
+ 4.times { @code = ((@code << 8) | read_byte) & 0xFFFFFFFF }
169
+ end
170
+
171
+ # Normalize the range when it becomes too small
172
+ #
173
+ # Ported from 7-Zip SDK NORMALIZE macro (LzmaDec.c line 22):
174
+ # #define NORMALIZE if (range < kTopValue) { range <<= 8; code = (code << 8) | (*buf++); }
175
+ #
176
+ # @return [void]
177
+ def normalize
178
+ while @range < TOP
179
+ @range = (@range << 8) & 0xFFFFFFFF
180
+ @code = ((@code << 8) | read_byte) & 0xFFFFFFFF
181
+ end
182
+ end
183
+
184
+ # Read a single byte from the input stream
185
+ #
186
+ # @return [Integer] The byte value (0-255)
187
+ def read_byte
188
+ byte = @stream.getbyte
189
+ if byte.nil?
190
+ raise Omnizip::DecompressionError,
191
+ "LZMA compressed data exhausted prematurely"
192
+ end
193
+
194
+ byte
195
+ end
196
+ end
197
+ end
198
+ end
199
+ end
200
+ end
@@ -0,0 +1,197 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Copyright (C) 2025 Ribose Inc.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a
6
+ # copy of this software and associated documentation files (the "Software"),
7
+ # to deal in the Software without restriction, including without limitation
8
+ # the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
+ # and/or sell copies of the Software, and to permit persons to whom the
10
+ # Software is furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
+ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21
+ # DEALINGS IN THE SOFTWARE.
22
+
23
+ # Ported from 7-Zip SDK C/LzmaEnc.c
24
+ # Direct port of the LZMA SDK range encoder for byte-for-byte compatibility
25
+ # with 7-Zip archives.
26
+
27
+ require_relative "../../../algorithms/lzma/range_coder"
28
+ require_relative "../../../algorithms/lzma/constants"
29
+
30
+ module Omnizip
31
+ module Implementations
32
+ module SevenZip
33
+ module LZMA
34
+ # Range encoder for 7-Zip SDK LZMA compression
35
+ #
36
+ # This is a direct port of 7-Zip SDK's range encoder implementation
37
+ # from LzmaEnc.c for guaranteed compatibility with 7-Zip archives.
38
+ #
39
+ # KEY DIFFERENCE from XZ Utils:
40
+ # - 7-Zip SDK normalizes AFTER encoding each bit
41
+ # - XZ Utils normalizes BEFORE encoding each bit
42
+ #
43
+ # This difference produces different output bytes, so we need
44
+ # separate implementations for 7-Zip and XZ Utils compatibility.
45
+ #
46
+ # Reference: /Users/mulgogi/src/external/7-Zip/C/LzmaEnc.c lines 730-784
47
+ class RangeEncoder
48
+ include Omnizip::Algorithms::LZMA::Constants
49
+
50
+ # Initialize the range encoder
51
+ #
52
+ # @param output_stream [IO] The output stream for encoded bytes
53
+ def initialize(output_stream)
54
+ @stream = output_stream
55
+ @low = 0
56
+ @range = 0xFFFFFFFF # Full 32-bit range
57
+ @cache = 0
58
+ @cache_size = 1 # SDK initializes to 1
59
+ @pre_flush_pos = 0
60
+ end
61
+
62
+ # Encode a single bit using a probability model
63
+ #
64
+ # Ported from 7-Zip SDK RC_BIT() macro (LzmaEnc.c lines 750-765)
65
+ # The key difference is that normalization happens AFTER encoding.
66
+ #
67
+ # SDK macro:
68
+ # #define RC_BIT(p, prob, bit) { \
69
+ # RC_BIT_PRE(p, prob) \
70
+ # if (bit == 0) { range = newBound; ttt += (kBitModelTotal - ttt) >> kNumMoveBits; } \
71
+ # else { (p)->low += newBound; range -= newBound; ttt -= ttt >> kNumMoveBits; } \
72
+ # *(prob) = (CLzmaProb)ttt; \
73
+ # RC_NORM(p) \
74
+ # }
75
+ #
76
+ # @param model [BitModel] The probability model for this bit
77
+ # @param bit [Integer] The bit value (0 or 1)
78
+ # @return [void]
79
+ def encode_bit(model, bit)
80
+ prob = model.probability
81
+
82
+ # RC_BIT_PRE: Calculate newBound = (range >> kNumBitModelTotalBits) * prob
83
+ new_bound = (@range >> 11) * prob
84
+
85
+ new_prob = if bit.zero?
86
+ # RC_BIT_0: shrink range to lower portion
87
+ @range = new_bound & 0xFFFFFFFF
88
+ # Update probability: ttt += (kBitModelTotal - ttt) >> kNumMoveBits
89
+ prob + ((BIT_MODEL_TOTAL - prob) >> MOVE_BITS)
90
+ else
91
+ # RC_BIT_1: add bound to low, shrink range to upper portion
92
+ @low = (@low + new_bound) & 0xFFFFFFFFFFFFFFFF
93
+ @range = (@range - new_bound) & 0xFFFFFFFF
94
+ # Update probability: ttt -= ttt >> kNumMoveBits
95
+ prob - (prob >> MOVE_BITS)
96
+ end
97
+ model.instance_variable_set(:@probability, new_prob)
98
+
99
+ # RC_NORM: Normalize AFTER encoding (key SDK difference!)
100
+ normalize
101
+ end
102
+
103
+ # Encode bits directly without using probability model
104
+ #
105
+ # Used for encoding values with uniform distribution (e.g., distance
106
+ # high bits).
107
+ #
108
+ # @param value [Integer] The value to encode
109
+ # @param num_bits [Integer] Number of bits to encode
110
+ # @return [void]
111
+ def encode_direct_bits(value, num_bits)
112
+ num_bits.times do |i|
113
+ @range >>= 1
114
+ @range &= 0xFFFFFFFF
115
+ bit = (value >> (num_bits - 1 - i)) & 1
116
+ @low = (@low + @range) & 0xFFFFFFFFFFFFFFFF if bit == 1
117
+ normalize
118
+ end
119
+ end
120
+
121
+ # Flush remaining bytes to output stream
122
+ #
123
+ # Writes the final bytes to complete the range coding stream.
124
+ #
125
+ # @return [void]
126
+ def flush
127
+ # Store position BEFORE flush for compatibility
128
+ @pre_flush_pos = @stream.pos
129
+
130
+ # Prevent further normalizations
131
+ @range = 0xFFFFFFFF
132
+
133
+ # Flush 5 bytes (matches SDK behavior)
134
+ 5.times { shift_low }
135
+ end
136
+
137
+ # Return bytes needed for decoding
138
+ #
139
+ # @return [Integer] Number of bytes decoder will consume
140
+ def bytes_for_decode
141
+ @pre_flush_pos || @stream.pos
142
+ end
143
+
144
+ # Get current output position
145
+ #
146
+ # @return [Integer] Current position in output stream
147
+ def pos
148
+ @stream.pos
149
+ end
150
+
151
+ private
152
+
153
+ # Normalize the range when it becomes too small
154
+ #
155
+ # Ported from 7-Zip SDK RC_NORM macro (LzmaEnc.c line 730):
156
+ # #define RC_NORM(p) if (range < kTopValue) { range <<= 8; RangeEnc_ShiftLow(p); }
157
+ #
158
+ # @return [void]
159
+ def normalize
160
+ while @range < TOP
161
+ @range <<= 8
162
+ @range &= 0xFFFFFFFF
163
+ shift_low
164
+ end
165
+ end
166
+
167
+ # Shift the top byte of 'low' to output
168
+ #
169
+ # Ported from 7-Zip SDK RangeEnc_ShiftLow().
170
+ # Handles carry propagation through the cache mechanism.
171
+ #
172
+ # Reference: 7-Zip SDK C/LzmaEnc.c RangeEnc_ShiftLow
173
+ #
174
+ # @return [void]
175
+ def shift_low
176
+ low_32 = @low & 0xFFFFFFFF
177
+ carry = (@low >> 32) & 0xFF
178
+
179
+ if low_32 < 0xFF000000 || carry != 0
180
+ loop do
181
+ @stream.putc((@cache + carry) & 0xFF)
182
+ @cache = 0xFF
183
+ @cache_size -= 1
184
+ break if @cache_size.zero?
185
+ end
186
+
187
+ @cache = (low_32 >> 24) & 0xFF
188
+ end
189
+
190
+ @cache_size += 1
191
+ @low = (@low & 0x00FFFFFF) << 8
192
+ end
193
+ end
194
+ end
195
+ end
196
+ end
197
+ end