omnizip 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (511) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.rubocop.yml +32 -0
  4. data/.rubocop_todo.yml +754 -0
  5. data/COPYING +502 -0
  6. data/Gemfile +17 -0
  7. data/LICENSE +12 -0
  8. data/README.adoc +1045 -0
  9. data/Rakefile +12 -0
  10. data/benchmark/README.md +260 -0
  11. data/benchmark/benchmark_suite.rb +125 -0
  12. data/benchmark/compression_bench.rb +181 -0
  13. data/benchmark/filter_bench.rb +180 -0
  14. data/benchmark/models/benchmark_result.rb +59 -0
  15. data/benchmark/models/comparison_result.rb +69 -0
  16. data/benchmark/profile_suite.rb +167 -0
  17. data/benchmark/reporter.rb +150 -0
  18. data/benchmark/run_benchmarks.rb +66 -0
  19. data/benchmark/test_data.rb +137 -0
  20. data/config/formats/rar3_spec.yml +91 -0
  21. data/config/formats/rar5_spec.yml +102 -0
  22. data/docs/.github/workflows/docs.yml +142 -0
  23. data/docs/.gitignore +21 -0
  24. data/docs/.lychee.toml +67 -0
  25. data/docs/Gemfile +13 -0
  26. data/docs/RAR_WRITE_SUPPORT.md +26 -0
  27. data/docs/README.md +101 -0
  28. data/docs/_config.yml +112 -0
  29. data/docs/assets/logo.svg +1 -0
  30. data/docs/assets/omnizip-logo.pdf +1540 -11
  31. data/docs/comparison/feature-matrix.adoc +694 -0
  32. data/docs/comparison/index.adoc +113 -0
  33. data/docs/comparison/vs-7zip.adoc +309 -0
  34. data/docs/comparison/vs-peazip.adoc +77 -0
  35. data/docs/comparison/vs-rubyzip.adoc +342 -0
  36. data/docs/comparison/vs-winrar.adoc +100 -0
  37. data/docs/compatibility.adoc +579 -0
  38. data/docs/concepts/index.adoc +129 -0
  39. data/docs/developer/architecture.adoc +256 -0
  40. data/docs/developer/contributing.adoc +158 -0
  41. data/docs/developer/index.adoc +25 -0
  42. data/docs/developer/testing.adoc +212 -0
  43. data/docs/getting-started/basic-usage.adoc +271 -0
  44. data/docs/getting-started/index.adoc +42 -0
  45. data/docs/getting-started/installation.adoc +138 -0
  46. data/docs/getting-started/quick-start.adoc +185 -0
  47. data/docs/getting-started/your-first-archive.adoc +218 -0
  48. data/docs/guides/advanced-features/encryption.adoc +300 -0
  49. data/docs/guides/advanced-features/index.adoc +49 -0
  50. data/docs/guides/advanced-features/parallel-processing.adoc +246 -0
  51. data/docs/guides/advanced-features/progress-tracking.adoc +320 -0
  52. data/docs/guides/advanced-features/streaming.adoc +212 -0
  53. data/docs/guides/archive-formats/gzip-format.adoc +107 -0
  54. data/docs/guides/archive-formats/index.adoc +130 -0
  55. data/docs/guides/archive-formats/rar-format.adoc +104 -0
  56. data/docs/guides/archive-formats/rar5.adoc +521 -0
  57. data/docs/guides/archive-formats/seven-zip-format.adoc +35 -0
  58. data/docs/guides/archive-formats/tar-format.adoc +106 -0
  59. data/docs/guides/archive-formats/xz-format.adoc +118 -0
  60. data/docs/guides/archive-formats/zip-format.adoc +35 -0
  61. data/docs/guides/compression-algorithms/bzip2.adoc +113 -0
  62. data/docs/guides/compression-algorithms/deflate.adoc +319 -0
  63. data/docs/guides/compression-algorithms/index.adoc +190 -0
  64. data/docs/guides/compression-algorithms/lzma.adoc +398 -0
  65. data/docs/guides/compression-algorithms/lzma2.adoc +327 -0
  66. data/docs/guides/compression-algorithms/ppmd.adoc +316 -0
  67. data/docs/guides/compression-algorithms/zstandard.adoc +361 -0
  68. data/docs/guides/creating-archives.adoc +354 -0
  69. data/docs/guides/extracting-archives.adoc +53 -0
  70. data/docs/guides/format-conversion.adoc +64 -0
  71. data/docs/guides/index.adoc +49 -0
  72. data/docs/guides/migration-rubyzip.adoc +217 -0
  73. data/docs/guides/parity-archives.adoc +605 -0
  74. data/docs/guides/performance-tuning.adoc +88 -0
  75. data/docs/index.adoc +218 -0
  76. data/docs/lychee.toml +67 -0
  77. data/docs/reference/api/overview.adoc +188 -0
  78. data/docs/reference/cli/compress-command.adoc +114 -0
  79. data/docs/reference/cli/overview.adoc +140 -0
  80. data/docs/reference/index.adoc +26 -0
  81. data/docs/resources/faq.adoc +185 -0
  82. data/docs/resources/quick-reference.adoc +222 -0
  83. data/docs/troubleshooting/index.adoc +208 -0
  84. data/examples/api_comparison.rb +205 -0
  85. data/examples/deflate64_example.rb +96 -0
  86. data/examples/par2_demo.rb +121 -0
  87. data/examples/quick_start_native.rb +150 -0
  88. data/examples/quick_start_rubyzip.rb +115 -0
  89. data/examples/rubyzip_compatibility_demo.rb +194 -0
  90. data/exe/omnizip +27 -0
  91. data/lib/omnizip/algorithm.rb +130 -0
  92. data/lib/omnizip/algorithm_registry.rb +86 -0
  93. data/lib/omnizip/algorithms/.keep +0 -0
  94. data/lib/omnizip/algorithms/bzip2/bwt.rb +225 -0
  95. data/lib/omnizip/algorithms/bzip2/decoder.rb +193 -0
  96. data/lib/omnizip/algorithms/bzip2/encoder.rb +237 -0
  97. data/lib/omnizip/algorithms/bzip2/huffman.rb +206 -0
  98. data/lib/omnizip/algorithms/bzip2/mtf.rb +101 -0
  99. data/lib/omnizip/algorithms/bzip2/rle.rb +151 -0
  100. data/lib/omnizip/algorithms/bzip2.rb +130 -0
  101. data/lib/omnizip/algorithms/deflate/constants.rb +28 -0
  102. data/lib/omnizip/algorithms/deflate/decoder.rb +38 -0
  103. data/lib/omnizip/algorithms/deflate/encoder.rb +46 -0
  104. data/lib/omnizip/algorithms/deflate.rb +128 -0
  105. data/lib/omnizip/algorithms/deflate64/constants.rb +45 -0
  106. data/lib/omnizip/algorithms/deflate64/decoder.rb +153 -0
  107. data/lib/omnizip/algorithms/deflate64/encoder.rb +98 -0
  108. data/lib/omnizip/algorithms/deflate64/huffman_coder.rb +354 -0
  109. data/lib/omnizip/algorithms/deflate64/lz77_encoder.rb +142 -0
  110. data/lib/omnizip/algorithms/deflate64.rb +109 -0
  111. data/lib/omnizip/algorithms/lzma/bit_model.rb +120 -0
  112. data/lib/omnizip/algorithms/lzma/constants.rb +112 -0
  113. data/lib/omnizip/algorithms/lzma/decoder.rb +148 -0
  114. data/lib/omnizip/algorithms/lzma/dictionary.rb +69 -0
  115. data/lib/omnizip/algorithms/lzma/distance_coder.rb +415 -0
  116. data/lib/omnizip/algorithms/lzma/encoder.rb +142 -0
  117. data/lib/omnizip/algorithms/lzma/length_coder.rb +260 -0
  118. data/lib/omnizip/algorithms/lzma/literal_decoder.rb +320 -0
  119. data/lib/omnizip/algorithms/lzma/literal_encoder.rb +210 -0
  120. data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +341 -0
  121. data/lib/omnizip/algorithms/lzma/lzma_alone_decoder.rb +192 -0
  122. data/lib/omnizip/algorithms/lzma/lzma_state.rb +128 -0
  123. data/lib/omnizip/algorithms/lzma/match.rb +32 -0
  124. data/lib/omnizip/algorithms/lzma/match_finder.rb +205 -0
  125. data/lib/omnizip/algorithms/lzma/match_finder_config.rb +142 -0
  126. data/lib/omnizip/algorithms/lzma/match_finder_factory.rb +88 -0
  127. data/lib/omnizip/algorithms/lzma/optimal_encoder.rb +130 -0
  128. data/lib/omnizip/algorithms/lzma/probability_models.rb +72 -0
  129. data/lib/omnizip/algorithms/lzma/range_coder.rb +85 -0
  130. data/lib/omnizip/algorithms/lzma/range_decoder.rb +434 -0
  131. data/lib/omnizip/algorithms/lzma/range_encoder.rb +194 -0
  132. data/lib/omnizip/algorithms/lzma/state.rb +127 -0
  133. data/lib/omnizip/algorithms/lzma/xz_buffered_range_encoder.rb +325 -0
  134. data/lib/omnizip/algorithms/lzma/xz_encoder.rb +426 -0
  135. data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +645 -0
  136. data/lib/omnizip/algorithms/lzma/xz_match_finder_adapter.rb +227 -0
  137. data/lib/omnizip/algorithms/lzma/xz_price_calculator.rb +169 -0
  138. data/lib/omnizip/algorithms/lzma/xz_probability_models.rb +261 -0
  139. data/lib/omnizip/algorithms/lzma/xz_range_encoder.rb +223 -0
  140. data/lib/omnizip/algorithms/lzma/xz_range_encoder_exact.rb +331 -0
  141. data/lib/omnizip/algorithms/lzma/xz_state.rb +116 -0
  142. data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +2055 -0
  143. data/lib/omnizip/algorithms/lzma.rb +238 -0
  144. data/lib/omnizip/algorithms/lzma2/chunk_manager.rb +182 -0
  145. data/lib/omnizip/algorithms/lzma2/constants.rb +41 -0
  146. data/lib/omnizip/algorithms/lzma2/encoder.rb +147 -0
  147. data/lib/omnizip/algorithms/lzma2/lzma2_chunk.rb +161 -0
  148. data/lib/omnizip/algorithms/lzma2/properties.rb +179 -0
  149. data/lib/omnizip/algorithms/lzma2/simple_lzma2_encoder.rb +127 -0
  150. data/lib/omnizip/algorithms/lzma2/xz_encoder_adapter.rb +85 -0
  151. data/lib/omnizip/algorithms/lzma2.rb +141 -0
  152. data/lib/omnizip/algorithms/ppmd7/constants.rb +74 -0
  153. data/lib/omnizip/algorithms/ppmd7/context.rb +154 -0
  154. data/lib/omnizip/algorithms/ppmd7/decoder.rb +126 -0
  155. data/lib/omnizip/algorithms/ppmd7/encoder.rb +163 -0
  156. data/lib/omnizip/algorithms/ppmd7/model.rb +248 -0
  157. data/lib/omnizip/algorithms/ppmd7/symbol_state.rb +57 -0
  158. data/lib/omnizip/algorithms/ppmd7.rb +116 -0
  159. data/lib/omnizip/algorithms/ppmd8/constants.rb +61 -0
  160. data/lib/omnizip/algorithms/ppmd8/context.rb +34 -0
  161. data/lib/omnizip/algorithms/ppmd8/decoder.rb +107 -0
  162. data/lib/omnizip/algorithms/ppmd8/encoder.rb +138 -0
  163. data/lib/omnizip/algorithms/ppmd8/model.rb +250 -0
  164. data/lib/omnizip/algorithms/ppmd8/restoration_method.rb +78 -0
  165. data/lib/omnizip/algorithms/ppmd8.rb +82 -0
  166. data/lib/omnizip/algorithms/ppmd_base.rb +138 -0
  167. data/lib/omnizip/algorithms/sevenzip_lzma2.rb +123 -0
  168. data/lib/omnizip/algorithms/xz_lzma2.rb +118 -0
  169. data/lib/omnizip/algorithms/zstandard/constants.rb +25 -0
  170. data/lib/omnizip/algorithms/zstandard/decoder.rb +46 -0
  171. data/lib/omnizip/algorithms/zstandard/encoder.rb +51 -0
  172. data/lib/omnizip/algorithms/zstandard.rb +138 -0
  173. data/lib/omnizip/buffer/memory_archive.rb +251 -0
  174. data/lib/omnizip/buffer/memory_extractor.rb +224 -0
  175. data/lib/omnizip/buffer.rb +176 -0
  176. data/lib/omnizip/checksum_registry.rb +114 -0
  177. data/lib/omnizip/checksums/crc32.rb +100 -0
  178. data/lib/omnizip/checksums/crc64.rb +101 -0
  179. data/lib/omnizip/checksums/crc_base.rb +158 -0
  180. data/lib/omnizip/checksums/verifier.rb +131 -0
  181. data/lib/omnizip/chunked/memory_manager.rb +194 -0
  182. data/lib/omnizip/chunked/reader.rb +78 -0
  183. data/lib/omnizip/chunked/writer.rb +120 -0
  184. data/lib/omnizip/chunked.rb +129 -0
  185. data/lib/omnizip/cli/output_formatter.rb +104 -0
  186. data/lib/omnizip/cli.rb +572 -0
  187. data/lib/omnizip/commands/.keep +0 -0
  188. data/lib/omnizip/commands/archive_create_command.rb +427 -0
  189. data/lib/omnizip/commands/archive_extract_command.rb +272 -0
  190. data/lib/omnizip/commands/archive_list_command.rb +218 -0
  191. data/lib/omnizip/commands/archive_repair_command.rb +131 -0
  192. data/lib/omnizip/commands/archive_verify_command.rb +117 -0
  193. data/lib/omnizip/commands/compress_command.rb +117 -0
  194. data/lib/omnizip/commands/decompress_command.rb +120 -0
  195. data/lib/omnizip/commands/list_command.rb +53 -0
  196. data/lib/omnizip/commands/metadata_command.rb +153 -0
  197. data/lib/omnizip/commands/parity_create_command.rb +122 -0
  198. data/lib/omnizip/commands/parity_repair_command.rb +122 -0
  199. data/lib/omnizip/commands/parity_verify_command.rb +124 -0
  200. data/lib/omnizip/commands/profile_list_command.rb +56 -0
  201. data/lib/omnizip/commands/profile_show_command.rb +44 -0
  202. data/lib/omnizip/convenience.rb +359 -0
  203. data/lib/omnizip/converter/conversion_registry.rb +49 -0
  204. data/lib/omnizip/converter/conversion_strategy.rb +121 -0
  205. data/lib/omnizip/converter/seven_zip_to_zip_strategy.rb +97 -0
  206. data/lib/omnizip/converter/zip_to_seven_zip_strategy.rb +112 -0
  207. data/lib/omnizip/converter.rb +105 -0
  208. data/lib/omnizip/crypto/aes256/cipher.rb +100 -0
  209. data/lib/omnizip/crypto/aes256/constants.rb +28 -0
  210. data/lib/omnizip/crypto/aes256/key_derivation.rb +101 -0
  211. data/lib/omnizip/crypto/aes256.rb +102 -0
  212. data/lib/omnizip/error.rb +106 -0
  213. data/lib/omnizip/eta/exponential_smoothing_estimator.rb +98 -0
  214. data/lib/omnizip/eta/moving_average_estimator.rb +99 -0
  215. data/lib/omnizip/eta/rate_calculator.rb +104 -0
  216. data/lib/omnizip/eta/sample_history.rb +143 -0
  217. data/lib/omnizip/eta/time_estimator.rb +106 -0
  218. data/lib/omnizip/eta.rb +63 -0
  219. data/lib/omnizip/extraction/filter_chain.rb +177 -0
  220. data/lib/omnizip/extraction/glob_pattern.rb +140 -0
  221. data/lib/omnizip/extraction/pattern_matcher.rb +70 -0
  222. data/lib/omnizip/extraction/predicate_pattern.rb +52 -0
  223. data/lib/omnizip/extraction/regex_pattern.rb +50 -0
  224. data/lib/omnizip/extraction/selective_extractor.rb +240 -0
  225. data/lib/omnizip/extraction.rb +111 -0
  226. data/lib/omnizip/file_type/mime_classifier.rb +144 -0
  227. data/lib/omnizip/file_type.rb +113 -0
  228. data/lib/omnizip/filter.rb +139 -0
  229. data/lib/omnizip/filter_pipeline.rb +108 -0
  230. data/lib/omnizip/filter_registry.rb +166 -0
  231. data/lib/omnizip/filters/bcj.rb +279 -0
  232. data/lib/omnizip/filters/bcj2/constants.rb +53 -0
  233. data/lib/omnizip/filters/bcj2/decoder.rb +200 -0
  234. data/lib/omnizip/filters/bcj2/encoder.rb +61 -0
  235. data/lib/omnizip/filters/bcj2/stream_data.rb +93 -0
  236. data/lib/omnizip/filters/bcj2.rb +99 -0
  237. data/lib/omnizip/filters/bcj_arm.rb +176 -0
  238. data/lib/omnizip/filters/bcj_arm64.rb +244 -0
  239. data/lib/omnizip/filters/bcj_ia64.rb +196 -0
  240. data/lib/omnizip/filters/bcj_ppc.rb +190 -0
  241. data/lib/omnizip/filters/bcj_sparc.rb +176 -0
  242. data/lib/omnizip/filters/bcj_x86.rb +193 -0
  243. data/lib/omnizip/filters/delta.rb +196 -0
  244. data/lib/omnizip/filters/filter_base.rb +72 -0
  245. data/lib/omnizip/filters/registry.rb +123 -0
  246. data/lib/omnizip/filters/xz_delta.rb +258 -0
  247. data/lib/omnizip/format_detector.rb +162 -0
  248. data/lib/omnizip/format_registry.rb +59 -0
  249. data/lib/omnizip/formats/.keep +0 -0
  250. data/lib/omnizip/formats/bzip2_file.rb +172 -0
  251. data/lib/omnizip/formats/cpio/constants.rb +55 -0
  252. data/lib/omnizip/formats/cpio/entry.rb +385 -0
  253. data/lib/omnizip/formats/cpio/reader.rb +196 -0
  254. data/lib/omnizip/formats/cpio/writer.rb +234 -0
  255. data/lib/omnizip/formats/cpio.rb +140 -0
  256. data/lib/omnizip/formats/format_spec_loader.rb +230 -0
  257. data/lib/omnizip/formats/gzip.rb +238 -0
  258. data/lib/omnizip/formats/iso/directory_builder.rb +297 -0
  259. data/lib/omnizip/formats/iso/directory_record.rb +152 -0
  260. data/lib/omnizip/formats/iso/joliet.rb +204 -0
  261. data/lib/omnizip/formats/iso/path_table.rb +125 -0
  262. data/lib/omnizip/formats/iso/reader.rb +197 -0
  263. data/lib/omnizip/formats/iso/rock_ridge.rb +349 -0
  264. data/lib/omnizip/formats/iso/volume_builder.rb +320 -0
  265. data/lib/omnizip/formats/iso/volume_descriptor.rb +168 -0
  266. data/lib/omnizip/formats/iso/writer.rb +530 -0
  267. data/lib/omnizip/formats/iso.rb +140 -0
  268. data/lib/omnizip/formats/lzip.rb +175 -0
  269. data/lib/omnizip/formats/lzma_alone.rb +171 -0
  270. data/lib/omnizip/formats/rar/archive_repairer.rb +243 -0
  271. data/lib/omnizip/formats/rar/archive_verifier.rb +195 -0
  272. data/lib/omnizip/formats/rar/block_parser.rb +243 -0
  273. data/lib/omnizip/formats/rar/compression/bit_stream.rb +180 -0
  274. data/lib/omnizip/formats/rar/compression/dispatcher.rb +217 -0
  275. data/lib/omnizip/formats/rar/compression/lz77_huffman/decoder.rb +216 -0
  276. data/lib/omnizip/formats/rar/compression/lz77_huffman/encoder.rb +158 -0
  277. data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_builder.rb +217 -0
  278. data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_coder.rb +189 -0
  279. data/lib/omnizip/formats/rar/compression/lz77_huffman/match_finder.rb +135 -0
  280. data/lib/omnizip/formats/rar/compression/lz77_huffman/sliding_window.rb +165 -0
  281. data/lib/omnizip/formats/rar/compression/ppmd/context.rb +105 -0
  282. data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +219 -0
  283. data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +262 -0
  284. data/lib/omnizip/formats/rar/compression_method_registry.rb +106 -0
  285. data/lib/omnizip/formats/rar/constants.rb +82 -0
  286. data/lib/omnizip/formats/rar/decompressor.rb +238 -0
  287. data/lib/omnizip/formats/rar/external_writer.rb +312 -0
  288. data/lib/omnizip/formats/rar/header.rb +192 -0
  289. data/lib/omnizip/formats/rar/license_validator.rb +109 -0
  290. data/lib/omnizip/formats/rar/models/rar_archive.rb +77 -0
  291. data/lib/omnizip/formats/rar/models/rar_entry.rb +65 -0
  292. data/lib/omnizip/formats/rar/models/rar_volume.rb +56 -0
  293. data/lib/omnizip/formats/rar/parity_handler.rb +292 -0
  294. data/lib/omnizip/formats/rar/rar5/compression/lzma.rb +202 -0
  295. data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +578 -0
  296. data/lib/omnizip/formats/rar/rar5/compression/store.rb +60 -0
  297. data/lib/omnizip/formats/rar/rar5/crc32.rb +39 -0
  298. data/lib/omnizip/formats/rar/rar5/encryption/aes256_cbc.rb +97 -0
  299. data/lib/omnizip/formats/rar/rar5/encryption/encryption_header.rb +114 -0
  300. data/lib/omnizip/formats/rar/rar5/encryption/encryption_manager.rb +166 -0
  301. data/lib/omnizip/formats/rar/rar5/encryption/key_derivation.rb +97 -0
  302. data/lib/omnizip/formats/rar/rar5/header.rb +187 -0
  303. data/lib/omnizip/formats/rar/rar5/models/encryption_options.rb +74 -0
  304. data/lib/omnizip/formats/rar/rar5/models/recovery_options.rb +63 -0
  305. data/lib/omnizip/formats/rar/rar5/models/solid_options.rb +63 -0
  306. data/lib/omnizip/formats/rar/rar5/models/volume_options.rb +74 -0
  307. data/lib/omnizip/formats/rar/rar5/multi_volume/ARCHITECTURE.md +290 -0
  308. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_manager.rb +264 -0
  309. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_splitter.rb +155 -0
  310. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_writer.rb +194 -0
  311. data/lib/omnizip/formats/rar/rar5/solid/solid_encoder.rb +109 -0
  312. data/lib/omnizip/formats/rar/rar5/solid/solid_manager.rb +142 -0
  313. data/lib/omnizip/formats/rar/rar5/solid/solid_stream.rb +121 -0
  314. data/lib/omnizip/formats/rar/rar5/vint.rb +65 -0
  315. data/lib/omnizip/formats/rar/rar5/writer.rb +466 -0
  316. data/lib/omnizip/formats/rar/rar_format_base.rb +241 -0
  317. data/lib/omnizip/formats/rar/reader.rb +366 -0
  318. data/lib/omnizip/formats/rar/recovery_record.rb +245 -0
  319. data/lib/omnizip/formats/rar/volume_manager.rb +168 -0
  320. data/lib/omnizip/formats/rar/writer.rb +431 -0
  321. data/lib/omnizip/formats/rar.rb +205 -0
  322. data/lib/omnizip/formats/rar3/compressor.rb +73 -0
  323. data/lib/omnizip/formats/rar3/decompressor.rb +66 -0
  324. data/lib/omnizip/formats/rar3/reader.rb +386 -0
  325. data/lib/omnizip/formats/rar3/writer.rb +219 -0
  326. data/lib/omnizip/formats/rar5/compressor.rb +73 -0
  327. data/lib/omnizip/formats/rar5/decompressor.rb +66 -0
  328. data/lib/omnizip/formats/rar5/reader.rb +342 -0
  329. data/lib/omnizip/formats/rar5/writer.rb +214 -0
  330. data/lib/omnizip/formats/seven_zip/coder_chain.rb +150 -0
  331. data/lib/omnizip/formats/seven_zip/constants.rb +126 -0
  332. data/lib/omnizip/formats/seven_zip/encoded_header.rb +114 -0
  333. data/lib/omnizip/formats/seven_zip/encrypted_header.rb +142 -0
  334. data/lib/omnizip/formats/seven_zip/file_collector.rb +144 -0
  335. data/lib/omnizip/formats/seven_zip/header.rb +106 -0
  336. data/lib/omnizip/formats/seven_zip/header_encryptor.rb +134 -0
  337. data/lib/omnizip/formats/seven_zip/header_writer.rb +466 -0
  338. data/lib/omnizip/formats/seven_zip/models/coder_info.rb +30 -0
  339. data/lib/omnizip/formats/seven_zip/models/file_entry.rb +58 -0
  340. data/lib/omnizip/formats/seven_zip/models/folder.rb +69 -0
  341. data/lib/omnizip/formats/seven_zip/models/stream_info.rb +42 -0
  342. data/lib/omnizip/formats/seven_zip/parser.rb +660 -0
  343. data/lib/omnizip/formats/seven_zip/reader.rb +458 -0
  344. data/lib/omnizip/formats/seven_zip/split_archive_reader.rb +632 -0
  345. data/lib/omnizip/formats/seven_zip/split_archive_writer.rb +315 -0
  346. data/lib/omnizip/formats/seven_zip/stream_compressor.rb +151 -0
  347. data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +162 -0
  348. data/lib/omnizip/formats/seven_zip/writer.rb +740 -0
  349. data/lib/omnizip/formats/seven_zip.rb +93 -0
  350. data/lib/omnizip/formats/tar/constants.rb +73 -0
  351. data/lib/omnizip/formats/tar/entry.rb +94 -0
  352. data/lib/omnizip/formats/tar/header.rb +168 -0
  353. data/lib/omnizip/formats/tar/reader.rb +121 -0
  354. data/lib/omnizip/formats/tar/writer.rb +216 -0
  355. data/lib/omnizip/formats/tar.rb +84 -0
  356. data/lib/omnizip/formats/xz/reader.rb +116 -0
  357. data/lib/omnizip/formats/xz.rb +237 -0
  358. data/lib/omnizip/formats/xz_impl/block_decoder.rb +754 -0
  359. data/lib/omnizip/formats/xz_impl/block_encoder.rb +306 -0
  360. data/lib/omnizip/formats/xz_impl/block_header.rb +210 -0
  361. data/lib/omnizip/formats/xz_impl/block_header_parser.rb +186 -0
  362. data/lib/omnizip/formats/xz_impl/constants.rb +49 -0
  363. data/lib/omnizip/formats/xz_impl/index_decoder.rb +174 -0
  364. data/lib/omnizip/formats/xz_impl/index_encoder.rb +122 -0
  365. data/lib/omnizip/formats/xz_impl/stream_decoder.rb +468 -0
  366. data/lib/omnizip/formats/xz_impl/stream_encoder.rb +99 -0
  367. data/lib/omnizip/formats/xz_impl/stream_footer.rb +81 -0
  368. data/lib/omnizip/formats/xz_impl/stream_footer_parser.rb +117 -0
  369. data/lib/omnizip/formats/xz_impl/stream_header.rb +55 -0
  370. data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +108 -0
  371. data/lib/omnizip/formats/xz_impl/vli.rb +128 -0
  372. data/lib/omnizip/formats/xz_impl/writer.rb +421 -0
  373. data/lib/omnizip/formats/zip/central_directory_header.rb +195 -0
  374. data/lib/omnizip/formats/zip/constants.rb +69 -0
  375. data/lib/omnizip/formats/zip/end_of_central_directory.rb +133 -0
  376. data/lib/omnizip/formats/zip/local_file_header.rb +138 -0
  377. data/lib/omnizip/formats/zip/reader.rb +250 -0
  378. data/lib/omnizip/formats/zip/unix_extra_field.rb +153 -0
  379. data/lib/omnizip/formats/zip/writer.rb +375 -0
  380. data/lib/omnizip/formats/zip/zip64_end_of_central_directory.rb +104 -0
  381. data/lib/omnizip/formats/zip/zip64_end_of_central_directory_locator.rb +66 -0
  382. data/lib/omnizip/formats/zip/zip64_extra_field.rb +114 -0
  383. data/lib/omnizip/formats/zip.rb +50 -0
  384. data/lib/omnizip/implementations/base/lzma2_decoder_base.rb +75 -0
  385. data/lib/omnizip/implementations/base/lzma2_encoder_base.rb +128 -0
  386. data/lib/omnizip/implementations/base/lzma_decoder_base.rb +83 -0
  387. data/lib/omnizip/implementations/base/lzma_encoder_base.rb +108 -0
  388. data/lib/omnizip/implementations/base/state_machine_base.rb +182 -0
  389. data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +421 -0
  390. data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +465 -0
  391. data/lib/omnizip/implementations/seven_zip/lzma/match_finder.rb +288 -0
  392. data/lib/omnizip/implementations/seven_zip/lzma/range_decoder.rb +200 -0
  393. data/lib/omnizip/implementations/seven_zip/lzma/range_encoder.rb +197 -0
  394. data/lib/omnizip/implementations/seven_zip/lzma/state_machine.rb +141 -0
  395. data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +519 -0
  396. data/lib/omnizip/implementations/xz_utils/lzma2/decoder.rb +723 -0
  397. data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +750 -0
  398. data/lib/omnizip/io/buffered_input.rb +146 -0
  399. data/lib/omnizip/io/buffered_output.rb +105 -0
  400. data/lib/omnizip/io/stream_manager.rb +115 -0
  401. data/lib/omnizip/link_handler/hard_link.rb +79 -0
  402. data/lib/omnizip/link_handler/symbolic_link.rb +74 -0
  403. data/lib/omnizip/link_handler.rb +124 -0
  404. data/lib/omnizip/metadata/archive_metadata.rb +114 -0
  405. data/lib/omnizip/metadata/entry_metadata.rb +146 -0
  406. data/lib/omnizip/metadata/metadata_editor.rb +171 -0
  407. data/lib/omnizip/metadata/metadata_registry.rb +64 -0
  408. data/lib/omnizip/metadata/metadata_validator.rb +99 -0
  409. data/lib/omnizip/metadata.rb +57 -0
  410. data/lib/omnizip/models/.keep +0 -0
  411. data/lib/omnizip/models/algorithm_metadata.rb +73 -0
  412. data/lib/omnizip/models/compression_options.rb +71 -0
  413. data/lib/omnizip/models/conversion_options.rb +87 -0
  414. data/lib/omnizip/models/conversion_result.rb +135 -0
  415. data/lib/omnizip/models/eta_result.rb +46 -0
  416. data/lib/omnizip/models/extraction_rule.rb +115 -0
  417. data/lib/omnizip/models/filter_chain.rb +144 -0
  418. data/lib/omnizip/models/filter_config.rb +183 -0
  419. data/lib/omnizip/models/match_result.rb +124 -0
  420. data/lib/omnizip/models/optimization_suggestion.rb +91 -0
  421. data/lib/omnizip/models/parallel_options.rb +104 -0
  422. data/lib/omnizip/models/performance_result.rb +79 -0
  423. data/lib/omnizip/models/profile_report.rb +82 -0
  424. data/lib/omnizip/models/progress_options.rb +38 -0
  425. data/lib/omnizip/models/split_options.rb +116 -0
  426. data/lib/omnizip/optimization_registry.rb +81 -0
  427. data/lib/omnizip/parallel/job_queue.rb +209 -0
  428. data/lib/omnizip/parallel/job_scheduler.rb +203 -0
  429. data/lib/omnizip/parallel/parallel_compressor.rb +347 -0
  430. data/lib/omnizip/parallel/parallel_extractor.rb +329 -0
  431. data/lib/omnizip/parallel/worker_pool.rb +223 -0
  432. data/lib/omnizip/parallel.rb +149 -0
  433. data/lib/omnizip/parity/chunked_block_processor.rb +196 -0
  434. data/lib/omnizip/parity/galois16.rb +145 -0
  435. data/lib/omnizip/parity/models/creator_packet.rb +73 -0
  436. data/lib/omnizip/parity/models/file_description_packet.rb +133 -0
  437. data/lib/omnizip/parity/models/ifsc_packet.rb +123 -0
  438. data/lib/omnizip/parity/models/main_packet.rb +128 -0
  439. data/lib/omnizip/parity/models/packet.rb +156 -0
  440. data/lib/omnizip/parity/models/packet_registry.rb +109 -0
  441. data/lib/omnizip/parity/models/recovery_slice_packet.rb +78 -0
  442. data/lib/omnizip/parity/par2_creator.rb +531 -0
  443. data/lib/omnizip/parity/par2_repairer.rb +407 -0
  444. data/lib/omnizip/parity/par2_verifier.rb +364 -0
  445. data/lib/omnizip/parity/par2cmdline_algorithm.rb +110 -0
  446. data/lib/omnizip/parity/par2cmdline_coefficients.rb +78 -0
  447. data/lib/omnizip/parity/reed_solomon_decoder.rb +266 -0
  448. data/lib/omnizip/parity/reed_solomon_encoder.rb +111 -0
  449. data/lib/omnizip/parity/reed_solomon_matrix.rb +342 -0
  450. data/lib/omnizip/parity.rb +186 -0
  451. data/lib/omnizip/password/encryption_registry.rb +65 -0
  452. data/lib/omnizip/password/encryption_strategy.rb +96 -0
  453. data/lib/omnizip/password/password_validator.rb +129 -0
  454. data/lib/omnizip/password/winzip_aes_strategy.rb +192 -0
  455. data/lib/omnizip/password/zip_crypto_strategy.rb +141 -0
  456. data/lib/omnizip/password.rb +87 -0
  457. data/lib/omnizip/pipe/stream_compressor.rb +124 -0
  458. data/lib/omnizip/pipe/stream_decompressor.rb +174 -0
  459. data/lib/omnizip/pipe.rb +121 -0
  460. data/lib/omnizip/platform/ntfs_streams.rb +201 -0
  461. data/lib/omnizip/platform.rb +189 -0
  462. data/lib/omnizip/profile/archive_profile.rb +39 -0
  463. data/lib/omnizip/profile/balanced_profile.rb +33 -0
  464. data/lib/omnizip/profile/binary_profile.rb +36 -0
  465. data/lib/omnizip/profile/compression_profile.rb +158 -0
  466. data/lib/omnizip/profile/custom_profile.rb +157 -0
  467. data/lib/omnizip/profile/fast_profile.rb +33 -0
  468. data/lib/omnizip/profile/maximum_profile.rb +33 -0
  469. data/lib/omnizip/profile/profile_detector.rb +110 -0
  470. data/lib/omnizip/profile/profile_registry.rb +161 -0
  471. data/lib/omnizip/profile/text_profile.rb +36 -0
  472. data/lib/omnizip/profile.rb +190 -0
  473. data/lib/omnizip/profiler/memory_profiler.rb +66 -0
  474. data/lib/omnizip/profiler/method_profiler.rb +49 -0
  475. data/lib/omnizip/profiler/report_generator.rb +169 -0
  476. data/lib/omnizip/profiler.rb +204 -0
  477. data/lib/omnizip/progress/callback_reporter.rb +36 -0
  478. data/lib/omnizip/progress/console_reporter.rb +62 -0
  479. data/lib/omnizip/progress/log_reporter.rb +91 -0
  480. data/lib/omnizip/progress/operation_progress.rb +118 -0
  481. data/lib/omnizip/progress/progress_bar.rb +156 -0
  482. data/lib/omnizip/progress/progress_reporter.rb +40 -0
  483. data/lib/omnizip/progress/progress_tracker.rb +190 -0
  484. data/lib/omnizip/progress/silent_reporter.rb +24 -0
  485. data/lib/omnizip/progress.rb +127 -0
  486. data/lib/omnizip/rubyzip_compat.rb +63 -0
  487. data/lib/omnizip/temp/safe_extract.rb +168 -0
  488. data/lib/omnizip/temp/temp_file.rb +124 -0
  489. data/lib/omnizip/temp/temp_file_pool.rb +109 -0
  490. data/lib/omnizip/temp.rb +181 -0
  491. data/lib/omnizip/version.rb +5 -0
  492. data/lib/omnizip/zip/entry.rb +156 -0
  493. data/lib/omnizip/zip/file.rb +485 -0
  494. data/lib/omnizip/zip/input_stream.rb +273 -0
  495. data/lib/omnizip/zip/output_stream.rb +324 -0
  496. data/lib/omnizip.rb +156 -0
  497. data/readme-docs/advanced-features.adoc +515 -0
  498. data/readme-docs/api-usage.adoc +444 -0
  499. data/readme-docs/architecture.adoc +449 -0
  500. data/readme-docs/archive-formats.adoc +479 -0
  501. data/readme-docs/cli-usage.adoc +222 -0
  502. data/readme-docs/compression-algorithms.adoc +442 -0
  503. data/readme-docs/compression-profiles.adoc +247 -0
  504. data/readme-docs/encryption-checksums.adoc +328 -0
  505. data/readme-docs/format-converter.adoc +325 -0
  506. data/readme-docs/installation.adoc +228 -0
  507. data/readme-docs/par2-archives.adoc +608 -0
  508. data/readme-docs/performance-profiler.adoc +389 -0
  509. data/readme-docs/preprocessing-filters.adoc +280 -0
  510. data/xz-file-format-1.2.1.txt +1174 -0
  511. metadata +617 -0
@@ -0,0 +1,192 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Copyright (C) 2025 Ribose Inc.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a
6
+ # copy of this software and associated documentation files (the "Software"),
7
+ # to deal in the Software without restriction, including without limitation
8
+ # the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
+ # and/or sell copies of the Software, and to permit persons to whom the
10
+ # Software is furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
+ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21
+ # DEALINGS IN THE SOFTWARE.
22
+
23
+ require_relative "xz_utils_decoder"
24
+ require "stringio"
25
+
26
+ module Omnizip
27
+ module Algorithms
28
+ class LZMA < Algorithm
29
+ #
30
+ # Decoder for .lzma (LZMA_Alone) format
31
+ #
32
+ # This is the legacy LZMA_Alone format used by LZMA Utils 4.32.x.
33
+ # It is DIFFERENT from the XZ format's LZMA2 compression!
34
+ #
35
+ # File format:
36
+ # - Properties (1 byte): encodes lc, lp, pb values
37
+ # - Dictionary size (4 bytes, little-endian)
38
+ # - Uncompressed size (8 bytes, little-endian, UINT64_MAX = unknown)
39
+ # - LZMA1 compressed stream (no footer, no CRC32)
40
+ #
41
+ # Reference: /Users/mulgogi/src/external/xz/src/liblzma/common/alone_decoder.c
42
+ #
43
+ # This decoder uses the same LZMA1 decoding engine as XZ format,
44
+ # but with the legacy .lzma container format.
45
+ #
46
+ # @example Decode .lzma file
47
+ # data = File.binread("file.lzma")
48
+ # decoder = Omnizip::Algorithms::LZMA::LzmaAloneDecoder.new(StringIO.new(data))
49
+ # result = decoder.decode_stream
50
+ #
51
+ class LzmaAloneDecoder
52
+ # Maximum valid uncompressed size (256 GiB)
53
+ # From alone_decoder.c:118
54
+ MAX_UNCOMPRESSED_SIZE = (1 << 38)
55
+
56
+ # Property byte validation limits
57
+ # From lzma_decoder.c:1218
58
+ MAX_PROPERTY_BYTE = (((4 * 5) + 4) * 9) + 8 # = 233
59
+
60
+ # Initialize the decoder with .lzma format input
61
+ #
62
+ # @param input [IO] Input stream of .lzma compressed data
63
+ # @param options [Hash] Decoding options
64
+ # @option options [Boolean] :picky If true, reject files unlikely to be .lzma (default: false)
65
+ # @raise [RuntimeError] If header is invalid or unsupported
66
+ def initialize(input, options = {})
67
+ @input = input
68
+ @picky = options.fetch(:picky, false)
69
+
70
+ # Parse .lzma header
71
+ parse_header
72
+
73
+ # Create a wrapper stream that starts after the header
74
+ # The XzUtilsDecoder will read from this stream
75
+ @lzma_stream = @input
76
+
77
+ # Initialize the XZ Utils LZMA decoder with parsed parameters
78
+ # validate_size=true because .lzma format has explicit uncompressed size
79
+ # allow_eopm=true because .lzma format allows EOPM even with known size
80
+ # Reference: alone_decoder.c:127 (LZMA_LZMA1EXT_ALLOW_EOPM)
81
+ @decoder = XzUtilsDecoder.new(@lzma_stream,
82
+ lzma2_mode: true,
83
+ validate_size: true,
84
+ allow_eopm: true,
85
+ lc: @lc,
86
+ lp: @lp,
87
+ pb: @pb,
88
+ dict_size: @dict_size,
89
+ uncompressed_size: @uncompressed_size)
90
+ end
91
+
92
+ # Decode the .lzma stream
93
+ #
94
+ # @param output [IO, nil] Optional output stream
95
+ # @return [String, Integer] Decompressed data or bytes written
96
+ def decode_stream(output = nil)
97
+ # .lzma format allows EOPM even when uncompressed size is known
98
+ # Reference: alone_decoder.c:127 (LZMA_LZMA1EXT_ALLOW_EOPM)
99
+ @decoder.decode_stream(output, check_rc_finished: false)
100
+ end
101
+
102
+ private
103
+
104
+ # Parse .lzma format header
105
+ #
106
+ # Format (from alone_decoder.c):
107
+ # - Properties (1 byte): lc/lp/pb encoded
108
+ # - Dictionary size (4 bytes, little-endian)
109
+ # - Uncompressed size (8 bytes, little-endian, UINT64_MAX = unknown)
110
+ #
111
+ # Reference: /Users/mulgogi/src/external/xz/src/liblzma/common/alone_decoder.c
112
+ #
113
+ # @return [void]
114
+ # @raise [RuntimeError] If header is invalid
115
+ def parse_header
116
+ # Step 1: Parse properties byte (SEQ_PROPERTIES)
117
+ # Reference: alone_decoder.c:64-68
118
+ props = @input.getbyte
119
+ raise "Invalid .lzma header: missing properties byte" if props.nil?
120
+
121
+ # Use XZ Utils property byte parsing
122
+ # Reference: /Users/mulgogi/src/external/xz/src/liblzma/lzma/lzma_decoder.c:1216-1228
123
+ if props > MAX_PROPERTY_BYTE
124
+ raise "Invalid .lzma header: properties byte #{props} exceeds maximum #{MAX_PROPERTY_BYTE}"
125
+ end
126
+
127
+ # Parse lc, lp, pb from properties byte
128
+ # Formula: pb = props / (9 * 5); lp = (props % 45) / 9; lc = (props % 45) % 9
129
+ @pb = props / (9 * 5)
130
+ remainder = props - (@pb * 9 * 5)
131
+ @lp = remainder / 9
132
+ @lc = remainder - (@lp * 9)
133
+
134
+ # Validate lc + lp <= 4 (LZMA_LCLP_MAX)
135
+ # Reference: lzma_decoder.c:1227
136
+ if @lc + @lp > 4
137
+ raise "Invalid .lzma header: lc (#{@lc}) + lp (#{@lp}) exceeds maximum 4"
138
+ end
139
+
140
+ # Step 2: Parse dictionary size (SEQ_DICTIONARY_SIZE)
141
+ # Reference: alone_decoder.c:71-96
142
+ @dict_size = 0
143
+ 4.times do |i|
144
+ byte = @input.getbyte
145
+ raise "Incomplete .lzma header: missing dictionary size byte" if byte.nil?
146
+
147
+ @dict_size |= (byte << (i * 8))
148
+ end
149
+
150
+ # Picky mode validation: only accept dictionary sizes that are
151
+ # 2^n or 2^n + 2^(n-1). This reduces false positives.
152
+ # Reference: alone_decoder.c:76-93
153
+ if @picky && @dict_size != 0xFFFFFFFF
154
+ # Check if dict_size is valid: 2^n or 2^n + 2^(n-1)
155
+ d = @dict_size - 1
156
+ d |= d >> 2
157
+ d |= d >> 3
158
+ d |= d >> 4
159
+ d |= d >> 8
160
+ d |= d >> 16
161
+ d += 1
162
+
163
+ if d != @dict_size
164
+ raise "Invalid .lzma header: dictionary size #{@dict_size} is not 2^n or 2^n + 2^(n-1)"
165
+ end
166
+ end
167
+
168
+ # Step 3: Parse uncompressed size (SEQ_UNCOMPRESSED_SIZE)
169
+ # Reference: alone_decoder.c:102-120
170
+ @uncompressed_size = 0
171
+ 8.times do |i|
172
+ byte = @input.getbyte
173
+ raise "Incomplete .lzma header: missing uncompressed size byte" if byte.nil?
174
+
175
+ @uncompressed_size |= (byte << (i * 8))
176
+ end
177
+
178
+ # Picky mode validation: if uncompressed size is known (not UINT64_MAX),
179
+ # it must be less than 256 GiB
180
+ # Reference: alone_decoder.c:116-120
181
+ if @picky && @uncompressed_size != 0xFFFFFFFFFFFFFFFF &&
182
+ @uncompressed_size >= MAX_UNCOMPRESSED_SIZE
183
+ raise "Invalid .lzma header: uncompressed size #{@uncompressed_size} exceeds maximum #{MAX_UNCOMPRESSED_SIZE}"
184
+ end
185
+
186
+ # Note: XZ Utils uses UINT64_MAX (0xFFFFFFFFFFFFFFFF) for unknown size
187
+ # Our decoder treats this as "allow end-of-payload marker"
188
+ end
189
+ end
190
+ end
191
+ end
192
+ end
@@ -0,0 +1,128 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Omnizip
4
+ module Algorithms
5
+ class LZMA < Algorithm
6
+ # LZMA State Machine
7
+ # Ported from XZ Utils lzma_common.h and lzma_decoder.c
8
+ class LZMAState
9
+ # State transition table (from lzma_decoder.c)
10
+ TRANSITIONS = {
11
+ # Literal after literal (matches XZ Utils update_literal macro)
12
+ update_literal: {
13
+ 0 => 0, 1 => 0, 2 => 0, 3 => 0, 4 => 1, 5 => 2,
14
+ 6 => 3, 7 => 4, 8 => 5, 9 => 6, 10 => 4, 11 => 5
15
+ }.freeze,
16
+
17
+ # Matched literal (literal after match, matches XZ Utils update_literal_matched macro)
18
+ # Only called when previous state was NOT a literal (states 7-11)
19
+ update_literal_matched: {
20
+ 0 => 0, 1 => 0, 2 => 0, 3 => 0, 4 => 1, 5 => 2,
21
+ 6 => 3, 7 => 4, 8 => 5, 9 => 6, 10 => 4, 11 => 5
22
+ }.freeze,
23
+
24
+ # Regular match
25
+ update_match: {
26
+ 0 => 7, 1 => 7, 2 => 7, 3 => 7, 4 => 7, 5 => 7,
27
+ 6 => 7, 7 => 10, 8 => 10, 9 => 10, 10 => 10, 11 => 10
28
+ }.freeze,
29
+
30
+ # Repeat match
31
+ update_rep: {
32
+ 0 => 8, 1 => 8, 2 => 8, 3 => 8, 4 => 8, 5 => 8,
33
+ 6 => 8, 7 => 11, 8 => 11, 9 => 11, 10 => 11, 11 => 11
34
+ }.freeze,
35
+
36
+ # Short repeat (length=1)
37
+ update_short_rep: {
38
+ 0 => 9, 1 => 9, 2 => 9, 3 => 9, 4 => 9, 5 => 9,
39
+ 6 => 9, 7 => 11, 8 => 11, 9 => 11, 10 => 11, 11 => 11
40
+ }.freeze,
41
+
42
+ # Long repeat (length>1)
43
+ # Ported from XZ Utils: state < LIT_STATES ? STATE_LIT_LONGREP : STATE_NONLIT_REP
44
+ # where LIT_STATES=7, STATE_LIT_LONGREP=8, STATE_NONLIT_REP=11
45
+ update_long_rep: {
46
+ 0 => 8, 1 => 8, 2 => 8, 3 => 8, 4 => 8, 5 => 8,
47
+ 6 => 8, 7 => 11, 8 => 11, 9 => 11, 10 => 11, 11 => 11
48
+ }.freeze,
49
+ }.freeze
50
+
51
+ attr_reader :value, :reps
52
+
53
+ def initialize(value = 0)
54
+ @value = value
55
+ @reps = [0, 0, 0, 0] # Initial repeat distances (matches XZ Utils)
56
+ end
57
+
58
+ # After encoding a literal
59
+ def update_literal!
60
+ @value = if use_matched_literal?
61
+ TRANSITIONS[:update_literal_matched][@value]
62
+ else
63
+ TRANSITIONS[:update_literal][@value]
64
+ end
65
+ end
66
+
67
+ # After encoding a regular match
68
+ def update_match!(distance)
69
+ @value = TRANSITIONS[:update_match][@value]
70
+ rotate_reps!(distance)
71
+ end
72
+
73
+ # After encoding a repeat match
74
+ def update_rep!(rep_index)
75
+ @value = TRANSITIONS[:update_rep][@value]
76
+ rotate_reps_for_rep!(rep_index)
77
+ end
78
+
79
+ # After encoding a short rep (length=1)
80
+ def update_short_rep!
81
+ @value = TRANSITIONS[:update_short_rep][@value]
82
+ end
83
+
84
+ # After encoding a long rep (length>1)
85
+ # Ported from XZ Utils update_long_rep macro
86
+ def update_long_rep!
87
+ @value = TRANSITIONS[:update_long_rep][@value]
88
+ end
89
+
90
+ # Check if we should use matched literal encoding
91
+ # XZ Utils logic: is_literal_state(state) = (state < LIT_STATES)
92
+ # where LIT_STATES = 7
93
+ # States 0-6: literal states (use unmatched literal)
94
+ # States 7-11: non-literal states (use matched literal)
95
+ def use_matched_literal?
96
+ @value >= 7
97
+ end
98
+
99
+ # Repeat distance rotation
100
+ def rotate_reps!(distance)
101
+ @reps[3] = @reps[2]
102
+ @reps[2] = @reps[1]
103
+ @reps[1] = @reps[0]
104
+ @reps[0] = distance
105
+ end
106
+
107
+ private
108
+
109
+ def rotate_reps_for_rep!(rep_index)
110
+ case rep_index
111
+ when 0
112
+ # Keep rep0, no rotation
113
+ when 1
114
+ # rep1 -> rep0
115
+ @reps[0], @reps[1] = @reps[1], @reps[0]
116
+ when 2
117
+ # rep2 -> rep0, rep0 -> rep1, rep1 -> rep2
118
+ @reps[0], @reps[1], @reps[2] = @reps[2], @reps[0], @reps[1]
119
+ when 3
120
+ # rep3 -> rep0, rotate others
121
+ @reps[0], @reps[1], @reps[2], @reps[3] =
122
+ @reps[3], @reps[0], @reps[1], @reps[2]
123
+ end
124
+ end
125
+ end
126
+ end
127
+ end
128
+ end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Omnizip
4
+ module Algorithms
5
+ class LZMA < Algorithm
6
+ # Match candidate result from LZ77 match finding
7
+ class Match
8
+ attr_reader :distance, :length
9
+
10
+ def initialize(distance, length)
11
+ @distance = distance
12
+ @length = length
13
+ end
14
+
15
+ # Check if match is valid for given dictionary size
16
+ #
17
+ # @param dict_size [Integer] Dictionary size in bytes
18
+ # @return [Boolean] true if match is valid
19
+ def valid?(dict_size)
20
+ @distance <= dict_size && @length >= 2
21
+ end
22
+
23
+ # String representation for debugging
24
+ #
25
+ # @return [String] Match description
26
+ def to_s
27
+ "Match(dist=#{@distance}, len=#{@length})"
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,205 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Copyright (C) 2025 Ribose Inc.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a
6
+ # copy of this software and associated documentation files (the "Software"),
7
+ # to deal in the Software without restriction, including without limitation
8
+ # the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
+ # and/or sell copies of the Software, and to permit persons to whom the
10
+ # Software is furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
+ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21
+ # DEALINGS IN THE SOFTWARE.
22
+
23
+ require_relative "match"
24
+
25
+ module Omnizip
26
+ module Algorithms
27
+ class LZMA < Algorithm
28
+ # Match Finder using hash chain algorithm for LZ77 compression
29
+ # Ported from XZ Utils lz_encoder.c
30
+ class MatchFinder
31
+ HASH_SIZE = 4096
32
+ MAX_MATCHES = 274
33
+
34
+ attr_reader :dictionary, :buffer, :position
35
+
36
+ def initialize(dictionary)
37
+ @dictionary = dictionary
38
+ @buffer = String.new(encoding: Encoding::BINARY)
39
+ @position = 0
40
+ # Use nil as empty marker (not 0) to distinguish from position 0
41
+ @hash_table = Array.new(HASH_SIZE, nil)
42
+ @hash_chain = Array.new(0)
43
+ @matches = Array.new(MAX_MATCHES)
44
+ @matches_count = 0
45
+ end
46
+
47
+ # Add input data for processing
48
+ #
49
+ # @param data [String] Binary data to add
50
+ # @return [void]
51
+ def feed(data)
52
+ @buffer << data
53
+ end
54
+
55
+ # Reset the match finder state for a new encoding session
56
+ # Clears the buffer, hash table, and hash chain
57
+ def reset
58
+ @buffer.clear
59
+ @position = 0
60
+ @hash_table = Array.new(HASH_SIZE, nil)
61
+ @hash_chain.clear
62
+ @matches_count = 0
63
+ end
64
+
65
+ # Initialize hash table for all positions up to end_pos
66
+ # This is called before encoding starts to ensure the hash table
67
+ # is populated for all positions. Matches XZ Utils "skip" behavior.
68
+ #
69
+ # @param end_pos [Integer] Last position to initialize (inclusive)
70
+ # @return [void]
71
+ def skip(end_pos)
72
+ pos = 0
73
+ while pos + 3 <= @buffer.bytesize && pos <= end_pos
74
+ hash = calc_hash(@buffer, pos)
75
+ if hash
76
+ @hash_chain[pos] = @hash_table[hash]
77
+ @hash_table[hash] = pos
78
+ end
79
+ pos += 1
80
+ end
81
+ end
82
+
83
+ # Find matches for current position
84
+ #
85
+ # @param current_pos [Integer] Position to find matches at (defaults to end)
86
+ # @return [Array<Match>] Array of matches sorted by length (descending)
87
+ def find_matches(current_pos = @buffer.bytesize - 273)
88
+ # Calculate hash for current position
89
+ hash = nil
90
+ if current_pos >= 0 && current_pos + 3 <= @buffer.bytesize
91
+ hash = calc_hash(@buffer, current_pos)
92
+ end
93
+
94
+ # Update hash table for current position (even for early positions)
95
+ # This ensures positions 0-3 are available for later matches
96
+ # XZ Utils calls this "skip" - update hash without finding matches
97
+ # CRITICAL: Only update if this position hasn't been processed yet
98
+ # (i.e., @hash_table[hash] != current_pos)
99
+ # This prevents overwriting the hash chain when find_matches is called
100
+ # after skip() has already initialized the hash table
101
+ if hash && @hash_table[hash] != current_pos
102
+ @hash_chain[current_pos] = @hash_table[hash]
103
+ @hash_table[hash] = current_pos
104
+ end
105
+
106
+ # Can't find matches if no hash or insufficient data
107
+ # Note: We CAN find matches at early positions (e.g., position 2 can match position 0)
108
+ # The only requirement is that there's enough data for hash calculation (current_pos + 3 <= buffer size)
109
+ # and that there's at least 2 bytes of history (for MIN_MATCH_LENGTH=2)
110
+ # CRITICAL: Don't produce matches until position >= 2 to ensure decoder has enough dict_full
111
+ # The decoder validates: dict_full > distance, where dict_full starts at 0 after 1st byte
112
+ # For distance=1 match to be valid, decoder needs dict_full >= 2 (at least 2 bytes decoded)
113
+ # This happens after processing position 1 (first byte was literal at position 0)
114
+ # So we can only produce matches starting at position 2
115
+ return [] if hash.nil? || @buffer.bytesize < 4 || current_pos + 3 > @buffer.bytesize || current_pos < 2
116
+
117
+ @matches_count = 0
118
+ chain_pos = @hash_chain[current_pos]
119
+
120
+ while chain_pos && @matches_count < MAX_MATCHES
121
+ # CRITICAL: Skip invalid chain_pos values (beyond buffer or negative)
122
+ next if chain_pos >= @buffer.bytesize || chain_pos.negative?
123
+
124
+ distance = current_pos - chain_pos
125
+ # CRITICAL: Break if distance is negative (chain_pos > current_pos)
126
+ # This can happen when skip() links positions within the same chunk
127
+ # where a later position has the same hash as an earlier position
128
+ break if distance.negative? || distance > @dictionary.size || distance.zero?
129
+
130
+ length = verify_match(current_pos, chain_pos)
131
+
132
+ if length >= 2
133
+ @matches[@matches_count] = Match.new(distance, length)
134
+ @matches_count += 1
135
+ end
136
+
137
+ # Safely get next chain position
138
+ chain_pos = if chain_pos < @hash_chain.size
139
+ @hash_chain[chain_pos]
140
+ end
141
+ end
142
+
143
+ @matches.first(@matches_count).sort_by { |m| -m.length }
144
+ end
145
+
146
+ # Get the longest match at current position
147
+ #
148
+ # @return [Match, nil] Longest match found or nil
149
+ def longest_match
150
+ find_matches.first
151
+ end
152
+
153
+ # Legacy API: Find longest match at given position in external byte array
154
+ # This is a compatibility method for older code that passes bytes and position
155
+ #
156
+ # @param bytes [Array<Integer>] Byte array
157
+ # @param pos [Integer] Position to find match at
158
+ # @return [Match, nil] Longest match found or nil
159
+ def find_longest_match(bytes, pos)
160
+ # If position is beyond current buffer, feed more data
161
+ if pos >= @buffer.bytesize
162
+ bytes_to_feed = bytes[pos..]
163
+ @buffer << bytes_to_feed.pack("C*")
164
+ end
165
+
166
+ # Find matches at the given position
167
+ matches = find_matches(pos)
168
+ matches.first
169
+ end
170
+
171
+ private
172
+
173
+ # Calculate hash for position (first 3 bytes)
174
+ #
175
+ # @param data [String] Buffer data
176
+ # @param pos [Integer] Position to hash
177
+ # @return [Integer] Hash value
178
+ def calc_hash(data, pos)
179
+ return 0 if pos + 3 > data.bytesize
180
+
181
+ (data.getbyte(pos) |
182
+ (data.getbyte(pos + 1) << 8) |
183
+ (data.getbyte(pos + 2) << 16)) % HASH_SIZE
184
+ end
185
+
186
+ # Verify match length between two positions
187
+ #
188
+ # @param pos1 [Integer] First position
189
+ # @param pos2 [Integer] Second position
190
+ # @return [Integer] Match length
191
+ def verify_match(pos1, pos2)
192
+ max_len = [273, @buffer.bytesize - pos1, @buffer.bytesize - pos2].min
193
+ length = 0
194
+
195
+ while length < max_len &&
196
+ @buffer.getbyte(pos1 + length) == @buffer.getbyte(pos2 + length)
197
+ length += 1
198
+ end
199
+
200
+ length
201
+ end
202
+ end
203
+ end
204
+ end
205
+ end
@@ -0,0 +1,142 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Copyright (C) 2025 Ribose Inc.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a
6
+ # copy of this software and associated documentation files (the "Software"),
7
+ # to deal in the Software without restriction, including without limitation
8
+ # the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
+ # and/or sell copies of the Software, and to permit persons to whom the
10
+ # Software is furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
+ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21
+ # DEALINGS IN THE SOFTWARE.
22
+
23
+ module Omnizip
24
+ module Algorithms
25
+ class LZMA < Algorithm
26
+ # Configuration model for Match Finder behavior
27
+ #
28
+ # This model separates configuration from implementation, allowing
29
+ # different match finding strategies (SDK-compatible vs simplified)
30
+ # to be configured declaratively.
31
+ #
32
+ # @example SDK-compatible configuration
33
+ # config = MatchFinderConfig.new(
34
+ # mode: :sdk,
35
+ # hash_size: 65536,
36
+ # chain_length: 32,
37
+ # lazy_matching: false
38
+ # )
39
+ #
40
+ # @example Simplified configuration
41
+ # config = MatchFinderConfig.new(
42
+ # mode: :simplified,
43
+ # hash_size: 65536,
44
+ # chain_length: 1024
45
+ # )
46
+ class MatchFinderConfig
47
+ attr_accessor :mode, :hash_size, :chain_length, :search_mode,
48
+ :lazy_matching, :max_match_length, :window_size
49
+
50
+ def initialize(mode: "simplified", hash_size: 65_536,
51
+ chain_length: 1024, search_mode: "hash_chain",
52
+ lazy_matching: false, max_match_length: 273,
53
+ window_size: 65_536)
54
+ @mode = mode
55
+ @hash_size = hash_size
56
+ @chain_length = chain_length
57
+ @search_mode = search_mode
58
+ @lazy_matching = lazy_matching
59
+ @max_match_length = max_match_length
60
+ @window_size = window_size
61
+ end
62
+
63
+ # Validate configuration
64
+ #
65
+ # @return [Boolean] true if valid
66
+ # @raise [ArgumentError] if configuration is invalid
67
+ def validate!
68
+ unless %w[sdk simplified].include?(mode)
69
+ raise ArgumentError, "mode must be :sdk or :simplified"
70
+ end
71
+
72
+ unless %w[hash_chain binary_tree].include?(search_mode)
73
+ raise ArgumentError,
74
+ "search_mode must be :hash_chain or :binary_tree"
75
+ end
76
+
77
+ raise ArgumentError, "hash_size must be positive" if hash_size <= 0
78
+
79
+ if chain_length <= 0
80
+ raise ArgumentError,
81
+ "chain_length must be positive"
82
+ end
83
+ if max_match_length < 2
84
+ raise ArgumentError,
85
+ "max_match_length must be >= 2"
86
+ end
87
+ if window_size <= 0
88
+ raise ArgumentError,
89
+ "window_size must be positive"
90
+ end
91
+
92
+ true
93
+ end
94
+
95
+ # Create SDK-compatible configuration
96
+ #
97
+ # @param dict_size [Integer] Dictionary size
98
+ # @param level [Integer] Compression level (0-9)
99
+ # @return [MatchFinderConfig] SDK-compatible configuration
100
+ def self.sdk_config(dict_size: 65536, level: 5)
101
+ # SDK uses different parameters based on dictionary size and level
102
+ hash_size = dict_size >= (1 << 20) ? (1 << 20) : (1 << 16)
103
+
104
+ # SDK nice_len varies by compression level:
105
+ # Level 0-4: 32, Level 5-6: 64, Level 7-8: 128, Level 9: 273
106
+ chain_length = case level
107
+ when 0..4 then 32
108
+ when 5..6 then 64
109
+ when 7..8 then 128
110
+ else 273
111
+ end
112
+
113
+ new(
114
+ mode: "sdk",
115
+ hash_size: hash_size,
116
+ chain_length: chain_length,
117
+ search_mode: "hash_chain",
118
+ lazy_matching: level >= 7, # Enable lazy matching for high compression
119
+ max_match_length: 273,
120
+ window_size: dict_size,
121
+ )
122
+ end
123
+
124
+ # Create simplified configuration (backward compatible)
125
+ #
126
+ # @param dict_size [Integer] Dictionary size
127
+ # @return [MatchFinderConfig] Simplified configuration
128
+ def self.simplified_config(dict_size: 65536)
129
+ new(
130
+ mode: "simplified",
131
+ hash_size: 65536,
132
+ chain_length: 1024,
133
+ search_mode: "hash_chain",
134
+ lazy_matching: false,
135
+ max_match_length: 273,
136
+ window_size: dict_size,
137
+ )
138
+ end
139
+ end
140
+ end
141
+ end
142
+ end