omnizip 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (511) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.rubocop.yml +32 -0
  4. data/.rubocop_todo.yml +754 -0
  5. data/COPYING +502 -0
  6. data/Gemfile +17 -0
  7. data/LICENSE +12 -0
  8. data/README.adoc +1045 -0
  9. data/Rakefile +12 -0
  10. data/benchmark/README.md +260 -0
  11. data/benchmark/benchmark_suite.rb +125 -0
  12. data/benchmark/compression_bench.rb +181 -0
  13. data/benchmark/filter_bench.rb +180 -0
  14. data/benchmark/models/benchmark_result.rb +59 -0
  15. data/benchmark/models/comparison_result.rb +69 -0
  16. data/benchmark/profile_suite.rb +167 -0
  17. data/benchmark/reporter.rb +150 -0
  18. data/benchmark/run_benchmarks.rb +66 -0
  19. data/benchmark/test_data.rb +137 -0
  20. data/config/formats/rar3_spec.yml +91 -0
  21. data/config/formats/rar5_spec.yml +102 -0
  22. data/docs/.github/workflows/docs.yml +142 -0
  23. data/docs/.gitignore +21 -0
  24. data/docs/.lychee.toml +67 -0
  25. data/docs/Gemfile +13 -0
  26. data/docs/RAR_WRITE_SUPPORT.md +26 -0
  27. data/docs/README.md +101 -0
  28. data/docs/_config.yml +112 -0
  29. data/docs/assets/logo.svg +1 -0
  30. data/docs/assets/omnizip-logo.pdf +1540 -11
  31. data/docs/comparison/feature-matrix.adoc +694 -0
  32. data/docs/comparison/index.adoc +113 -0
  33. data/docs/comparison/vs-7zip.adoc +309 -0
  34. data/docs/comparison/vs-peazip.adoc +77 -0
  35. data/docs/comparison/vs-rubyzip.adoc +342 -0
  36. data/docs/comparison/vs-winrar.adoc +100 -0
  37. data/docs/compatibility.adoc +579 -0
  38. data/docs/concepts/index.adoc +129 -0
  39. data/docs/developer/architecture.adoc +256 -0
  40. data/docs/developer/contributing.adoc +158 -0
  41. data/docs/developer/index.adoc +25 -0
  42. data/docs/developer/testing.adoc +212 -0
  43. data/docs/getting-started/basic-usage.adoc +271 -0
  44. data/docs/getting-started/index.adoc +42 -0
  45. data/docs/getting-started/installation.adoc +138 -0
  46. data/docs/getting-started/quick-start.adoc +185 -0
  47. data/docs/getting-started/your-first-archive.adoc +218 -0
  48. data/docs/guides/advanced-features/encryption.adoc +300 -0
  49. data/docs/guides/advanced-features/index.adoc +49 -0
  50. data/docs/guides/advanced-features/parallel-processing.adoc +246 -0
  51. data/docs/guides/advanced-features/progress-tracking.adoc +320 -0
  52. data/docs/guides/advanced-features/streaming.adoc +212 -0
  53. data/docs/guides/archive-formats/gzip-format.adoc +107 -0
  54. data/docs/guides/archive-formats/index.adoc +130 -0
  55. data/docs/guides/archive-formats/rar-format.adoc +104 -0
  56. data/docs/guides/archive-formats/rar5.adoc +521 -0
  57. data/docs/guides/archive-formats/seven-zip-format.adoc +35 -0
  58. data/docs/guides/archive-formats/tar-format.adoc +106 -0
  59. data/docs/guides/archive-formats/xz-format.adoc +118 -0
  60. data/docs/guides/archive-formats/zip-format.adoc +35 -0
  61. data/docs/guides/compression-algorithms/bzip2.adoc +113 -0
  62. data/docs/guides/compression-algorithms/deflate.adoc +319 -0
  63. data/docs/guides/compression-algorithms/index.adoc +190 -0
  64. data/docs/guides/compression-algorithms/lzma.adoc +398 -0
  65. data/docs/guides/compression-algorithms/lzma2.adoc +327 -0
  66. data/docs/guides/compression-algorithms/ppmd.adoc +316 -0
  67. data/docs/guides/compression-algorithms/zstandard.adoc +361 -0
  68. data/docs/guides/creating-archives.adoc +354 -0
  69. data/docs/guides/extracting-archives.adoc +53 -0
  70. data/docs/guides/format-conversion.adoc +64 -0
  71. data/docs/guides/index.adoc +49 -0
  72. data/docs/guides/migration-rubyzip.adoc +217 -0
  73. data/docs/guides/parity-archives.adoc +605 -0
  74. data/docs/guides/performance-tuning.adoc +88 -0
  75. data/docs/index.adoc +218 -0
  76. data/docs/lychee.toml +67 -0
  77. data/docs/reference/api/overview.adoc +188 -0
  78. data/docs/reference/cli/compress-command.adoc +114 -0
  79. data/docs/reference/cli/overview.adoc +140 -0
  80. data/docs/reference/index.adoc +26 -0
  81. data/docs/resources/faq.adoc +185 -0
  82. data/docs/resources/quick-reference.adoc +222 -0
  83. data/docs/troubleshooting/index.adoc +208 -0
  84. data/examples/api_comparison.rb +205 -0
  85. data/examples/deflate64_example.rb +96 -0
  86. data/examples/par2_demo.rb +121 -0
  87. data/examples/quick_start_native.rb +150 -0
  88. data/examples/quick_start_rubyzip.rb +115 -0
  89. data/examples/rubyzip_compatibility_demo.rb +194 -0
  90. data/exe/omnizip +27 -0
  91. data/lib/omnizip/algorithm.rb +130 -0
  92. data/lib/omnizip/algorithm_registry.rb +86 -0
  93. data/lib/omnizip/algorithms/.keep +0 -0
  94. data/lib/omnizip/algorithms/bzip2/bwt.rb +225 -0
  95. data/lib/omnizip/algorithms/bzip2/decoder.rb +193 -0
  96. data/lib/omnizip/algorithms/bzip2/encoder.rb +237 -0
  97. data/lib/omnizip/algorithms/bzip2/huffman.rb +206 -0
  98. data/lib/omnizip/algorithms/bzip2/mtf.rb +101 -0
  99. data/lib/omnizip/algorithms/bzip2/rle.rb +151 -0
  100. data/lib/omnizip/algorithms/bzip2.rb +130 -0
  101. data/lib/omnizip/algorithms/deflate/constants.rb +28 -0
  102. data/lib/omnizip/algorithms/deflate/decoder.rb +38 -0
  103. data/lib/omnizip/algorithms/deflate/encoder.rb +46 -0
  104. data/lib/omnizip/algorithms/deflate.rb +128 -0
  105. data/lib/omnizip/algorithms/deflate64/constants.rb +45 -0
  106. data/lib/omnizip/algorithms/deflate64/decoder.rb +153 -0
  107. data/lib/omnizip/algorithms/deflate64/encoder.rb +98 -0
  108. data/lib/omnizip/algorithms/deflate64/huffman_coder.rb +354 -0
  109. data/lib/omnizip/algorithms/deflate64/lz77_encoder.rb +142 -0
  110. data/lib/omnizip/algorithms/deflate64.rb +109 -0
  111. data/lib/omnizip/algorithms/lzma/bit_model.rb +120 -0
  112. data/lib/omnizip/algorithms/lzma/constants.rb +112 -0
  113. data/lib/omnizip/algorithms/lzma/decoder.rb +148 -0
  114. data/lib/omnizip/algorithms/lzma/dictionary.rb +69 -0
  115. data/lib/omnizip/algorithms/lzma/distance_coder.rb +415 -0
  116. data/lib/omnizip/algorithms/lzma/encoder.rb +142 -0
  117. data/lib/omnizip/algorithms/lzma/length_coder.rb +260 -0
  118. data/lib/omnizip/algorithms/lzma/literal_decoder.rb +320 -0
  119. data/lib/omnizip/algorithms/lzma/literal_encoder.rb +210 -0
  120. data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +341 -0
  121. data/lib/omnizip/algorithms/lzma/lzma_alone_decoder.rb +192 -0
  122. data/lib/omnizip/algorithms/lzma/lzma_state.rb +128 -0
  123. data/lib/omnizip/algorithms/lzma/match.rb +32 -0
  124. data/lib/omnizip/algorithms/lzma/match_finder.rb +205 -0
  125. data/lib/omnizip/algorithms/lzma/match_finder_config.rb +142 -0
  126. data/lib/omnizip/algorithms/lzma/match_finder_factory.rb +88 -0
  127. data/lib/omnizip/algorithms/lzma/optimal_encoder.rb +130 -0
  128. data/lib/omnizip/algorithms/lzma/probability_models.rb +72 -0
  129. data/lib/omnizip/algorithms/lzma/range_coder.rb +85 -0
  130. data/lib/omnizip/algorithms/lzma/range_decoder.rb +434 -0
  131. data/lib/omnizip/algorithms/lzma/range_encoder.rb +194 -0
  132. data/lib/omnizip/algorithms/lzma/state.rb +127 -0
  133. data/lib/omnizip/algorithms/lzma/xz_buffered_range_encoder.rb +325 -0
  134. data/lib/omnizip/algorithms/lzma/xz_encoder.rb +426 -0
  135. data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +645 -0
  136. data/lib/omnizip/algorithms/lzma/xz_match_finder_adapter.rb +227 -0
  137. data/lib/omnizip/algorithms/lzma/xz_price_calculator.rb +169 -0
  138. data/lib/omnizip/algorithms/lzma/xz_probability_models.rb +261 -0
  139. data/lib/omnizip/algorithms/lzma/xz_range_encoder.rb +223 -0
  140. data/lib/omnizip/algorithms/lzma/xz_range_encoder_exact.rb +331 -0
  141. data/lib/omnizip/algorithms/lzma/xz_state.rb +116 -0
  142. data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +2055 -0
  143. data/lib/omnizip/algorithms/lzma.rb +238 -0
  144. data/lib/omnizip/algorithms/lzma2/chunk_manager.rb +182 -0
  145. data/lib/omnizip/algorithms/lzma2/constants.rb +41 -0
  146. data/lib/omnizip/algorithms/lzma2/encoder.rb +147 -0
  147. data/lib/omnizip/algorithms/lzma2/lzma2_chunk.rb +161 -0
  148. data/lib/omnizip/algorithms/lzma2/properties.rb +179 -0
  149. data/lib/omnizip/algorithms/lzma2/simple_lzma2_encoder.rb +127 -0
  150. data/lib/omnizip/algorithms/lzma2/xz_encoder_adapter.rb +85 -0
  151. data/lib/omnizip/algorithms/lzma2.rb +141 -0
  152. data/lib/omnizip/algorithms/ppmd7/constants.rb +74 -0
  153. data/lib/omnizip/algorithms/ppmd7/context.rb +154 -0
  154. data/lib/omnizip/algorithms/ppmd7/decoder.rb +126 -0
  155. data/lib/omnizip/algorithms/ppmd7/encoder.rb +163 -0
  156. data/lib/omnizip/algorithms/ppmd7/model.rb +248 -0
  157. data/lib/omnizip/algorithms/ppmd7/symbol_state.rb +57 -0
  158. data/lib/omnizip/algorithms/ppmd7.rb +116 -0
  159. data/lib/omnizip/algorithms/ppmd8/constants.rb +61 -0
  160. data/lib/omnizip/algorithms/ppmd8/context.rb +34 -0
  161. data/lib/omnizip/algorithms/ppmd8/decoder.rb +107 -0
  162. data/lib/omnizip/algorithms/ppmd8/encoder.rb +138 -0
  163. data/lib/omnizip/algorithms/ppmd8/model.rb +250 -0
  164. data/lib/omnizip/algorithms/ppmd8/restoration_method.rb +78 -0
  165. data/lib/omnizip/algorithms/ppmd8.rb +82 -0
  166. data/lib/omnizip/algorithms/ppmd_base.rb +138 -0
  167. data/lib/omnizip/algorithms/sevenzip_lzma2.rb +123 -0
  168. data/lib/omnizip/algorithms/xz_lzma2.rb +118 -0
  169. data/lib/omnizip/algorithms/zstandard/constants.rb +25 -0
  170. data/lib/omnizip/algorithms/zstandard/decoder.rb +46 -0
  171. data/lib/omnizip/algorithms/zstandard/encoder.rb +51 -0
  172. data/lib/omnizip/algorithms/zstandard.rb +138 -0
  173. data/lib/omnizip/buffer/memory_archive.rb +251 -0
  174. data/lib/omnizip/buffer/memory_extractor.rb +224 -0
  175. data/lib/omnizip/buffer.rb +176 -0
  176. data/lib/omnizip/checksum_registry.rb +114 -0
  177. data/lib/omnizip/checksums/crc32.rb +100 -0
  178. data/lib/omnizip/checksums/crc64.rb +101 -0
  179. data/lib/omnizip/checksums/crc_base.rb +158 -0
  180. data/lib/omnizip/checksums/verifier.rb +131 -0
  181. data/lib/omnizip/chunked/memory_manager.rb +194 -0
  182. data/lib/omnizip/chunked/reader.rb +78 -0
  183. data/lib/omnizip/chunked/writer.rb +120 -0
  184. data/lib/omnizip/chunked.rb +129 -0
  185. data/lib/omnizip/cli/output_formatter.rb +104 -0
  186. data/lib/omnizip/cli.rb +572 -0
  187. data/lib/omnizip/commands/.keep +0 -0
  188. data/lib/omnizip/commands/archive_create_command.rb +427 -0
  189. data/lib/omnizip/commands/archive_extract_command.rb +272 -0
  190. data/lib/omnizip/commands/archive_list_command.rb +218 -0
  191. data/lib/omnizip/commands/archive_repair_command.rb +131 -0
  192. data/lib/omnizip/commands/archive_verify_command.rb +117 -0
  193. data/lib/omnizip/commands/compress_command.rb +117 -0
  194. data/lib/omnizip/commands/decompress_command.rb +120 -0
  195. data/lib/omnizip/commands/list_command.rb +53 -0
  196. data/lib/omnizip/commands/metadata_command.rb +153 -0
  197. data/lib/omnizip/commands/parity_create_command.rb +122 -0
  198. data/lib/omnizip/commands/parity_repair_command.rb +122 -0
  199. data/lib/omnizip/commands/parity_verify_command.rb +124 -0
  200. data/lib/omnizip/commands/profile_list_command.rb +56 -0
  201. data/lib/omnizip/commands/profile_show_command.rb +44 -0
  202. data/lib/omnizip/convenience.rb +359 -0
  203. data/lib/omnizip/converter/conversion_registry.rb +49 -0
  204. data/lib/omnizip/converter/conversion_strategy.rb +121 -0
  205. data/lib/omnizip/converter/seven_zip_to_zip_strategy.rb +97 -0
  206. data/lib/omnizip/converter/zip_to_seven_zip_strategy.rb +112 -0
  207. data/lib/omnizip/converter.rb +105 -0
  208. data/lib/omnizip/crypto/aes256/cipher.rb +100 -0
  209. data/lib/omnizip/crypto/aes256/constants.rb +28 -0
  210. data/lib/omnizip/crypto/aes256/key_derivation.rb +101 -0
  211. data/lib/omnizip/crypto/aes256.rb +102 -0
  212. data/lib/omnizip/error.rb +106 -0
  213. data/lib/omnizip/eta/exponential_smoothing_estimator.rb +98 -0
  214. data/lib/omnizip/eta/moving_average_estimator.rb +99 -0
  215. data/lib/omnizip/eta/rate_calculator.rb +104 -0
  216. data/lib/omnizip/eta/sample_history.rb +143 -0
  217. data/lib/omnizip/eta/time_estimator.rb +106 -0
  218. data/lib/omnizip/eta.rb +63 -0
  219. data/lib/omnizip/extraction/filter_chain.rb +177 -0
  220. data/lib/omnizip/extraction/glob_pattern.rb +140 -0
  221. data/lib/omnizip/extraction/pattern_matcher.rb +70 -0
  222. data/lib/omnizip/extraction/predicate_pattern.rb +52 -0
  223. data/lib/omnizip/extraction/regex_pattern.rb +50 -0
  224. data/lib/omnizip/extraction/selective_extractor.rb +240 -0
  225. data/lib/omnizip/extraction.rb +111 -0
  226. data/lib/omnizip/file_type/mime_classifier.rb +144 -0
  227. data/lib/omnizip/file_type.rb +113 -0
  228. data/lib/omnizip/filter.rb +139 -0
  229. data/lib/omnizip/filter_pipeline.rb +108 -0
  230. data/lib/omnizip/filter_registry.rb +166 -0
  231. data/lib/omnizip/filters/bcj.rb +279 -0
  232. data/lib/omnizip/filters/bcj2/constants.rb +53 -0
  233. data/lib/omnizip/filters/bcj2/decoder.rb +200 -0
  234. data/lib/omnizip/filters/bcj2/encoder.rb +61 -0
  235. data/lib/omnizip/filters/bcj2/stream_data.rb +93 -0
  236. data/lib/omnizip/filters/bcj2.rb +99 -0
  237. data/lib/omnizip/filters/bcj_arm.rb +176 -0
  238. data/lib/omnizip/filters/bcj_arm64.rb +244 -0
  239. data/lib/omnizip/filters/bcj_ia64.rb +196 -0
  240. data/lib/omnizip/filters/bcj_ppc.rb +190 -0
  241. data/lib/omnizip/filters/bcj_sparc.rb +176 -0
  242. data/lib/omnizip/filters/bcj_x86.rb +193 -0
  243. data/lib/omnizip/filters/delta.rb +196 -0
  244. data/lib/omnizip/filters/filter_base.rb +72 -0
  245. data/lib/omnizip/filters/registry.rb +123 -0
  246. data/lib/omnizip/filters/xz_delta.rb +258 -0
  247. data/lib/omnizip/format_detector.rb +162 -0
  248. data/lib/omnizip/format_registry.rb +59 -0
  249. data/lib/omnizip/formats/.keep +0 -0
  250. data/lib/omnizip/formats/bzip2_file.rb +172 -0
  251. data/lib/omnizip/formats/cpio/constants.rb +55 -0
  252. data/lib/omnizip/formats/cpio/entry.rb +385 -0
  253. data/lib/omnizip/formats/cpio/reader.rb +196 -0
  254. data/lib/omnizip/formats/cpio/writer.rb +234 -0
  255. data/lib/omnizip/formats/cpio.rb +140 -0
  256. data/lib/omnizip/formats/format_spec_loader.rb +230 -0
  257. data/lib/omnizip/formats/gzip.rb +238 -0
  258. data/lib/omnizip/formats/iso/directory_builder.rb +297 -0
  259. data/lib/omnizip/formats/iso/directory_record.rb +152 -0
  260. data/lib/omnizip/formats/iso/joliet.rb +204 -0
  261. data/lib/omnizip/formats/iso/path_table.rb +125 -0
  262. data/lib/omnizip/formats/iso/reader.rb +197 -0
  263. data/lib/omnizip/formats/iso/rock_ridge.rb +349 -0
  264. data/lib/omnizip/formats/iso/volume_builder.rb +320 -0
  265. data/lib/omnizip/formats/iso/volume_descriptor.rb +168 -0
  266. data/lib/omnizip/formats/iso/writer.rb +530 -0
  267. data/lib/omnizip/formats/iso.rb +140 -0
  268. data/lib/omnizip/formats/lzip.rb +175 -0
  269. data/lib/omnizip/formats/lzma_alone.rb +171 -0
  270. data/lib/omnizip/formats/rar/archive_repairer.rb +243 -0
  271. data/lib/omnizip/formats/rar/archive_verifier.rb +195 -0
  272. data/lib/omnizip/formats/rar/block_parser.rb +243 -0
  273. data/lib/omnizip/formats/rar/compression/bit_stream.rb +180 -0
  274. data/lib/omnizip/formats/rar/compression/dispatcher.rb +217 -0
  275. data/lib/omnizip/formats/rar/compression/lz77_huffman/decoder.rb +216 -0
  276. data/lib/omnizip/formats/rar/compression/lz77_huffman/encoder.rb +158 -0
  277. data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_builder.rb +217 -0
  278. data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_coder.rb +189 -0
  279. data/lib/omnizip/formats/rar/compression/lz77_huffman/match_finder.rb +135 -0
  280. data/lib/omnizip/formats/rar/compression/lz77_huffman/sliding_window.rb +165 -0
  281. data/lib/omnizip/formats/rar/compression/ppmd/context.rb +105 -0
  282. data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +219 -0
  283. data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +262 -0
  284. data/lib/omnizip/formats/rar/compression_method_registry.rb +106 -0
  285. data/lib/omnizip/formats/rar/constants.rb +82 -0
  286. data/lib/omnizip/formats/rar/decompressor.rb +238 -0
  287. data/lib/omnizip/formats/rar/external_writer.rb +312 -0
  288. data/lib/omnizip/formats/rar/header.rb +192 -0
  289. data/lib/omnizip/formats/rar/license_validator.rb +109 -0
  290. data/lib/omnizip/formats/rar/models/rar_archive.rb +77 -0
  291. data/lib/omnizip/formats/rar/models/rar_entry.rb +65 -0
  292. data/lib/omnizip/formats/rar/models/rar_volume.rb +56 -0
  293. data/lib/omnizip/formats/rar/parity_handler.rb +292 -0
  294. data/lib/omnizip/formats/rar/rar5/compression/lzma.rb +202 -0
  295. data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +578 -0
  296. data/lib/omnizip/formats/rar/rar5/compression/store.rb +60 -0
  297. data/lib/omnizip/formats/rar/rar5/crc32.rb +39 -0
  298. data/lib/omnizip/formats/rar/rar5/encryption/aes256_cbc.rb +97 -0
  299. data/lib/omnizip/formats/rar/rar5/encryption/encryption_header.rb +114 -0
  300. data/lib/omnizip/formats/rar/rar5/encryption/encryption_manager.rb +166 -0
  301. data/lib/omnizip/formats/rar/rar5/encryption/key_derivation.rb +97 -0
  302. data/lib/omnizip/formats/rar/rar5/header.rb +187 -0
  303. data/lib/omnizip/formats/rar/rar5/models/encryption_options.rb +74 -0
  304. data/lib/omnizip/formats/rar/rar5/models/recovery_options.rb +63 -0
  305. data/lib/omnizip/formats/rar/rar5/models/solid_options.rb +63 -0
  306. data/lib/omnizip/formats/rar/rar5/models/volume_options.rb +74 -0
  307. data/lib/omnizip/formats/rar/rar5/multi_volume/ARCHITECTURE.md +290 -0
  308. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_manager.rb +264 -0
  309. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_splitter.rb +155 -0
  310. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_writer.rb +194 -0
  311. data/lib/omnizip/formats/rar/rar5/solid/solid_encoder.rb +109 -0
  312. data/lib/omnizip/formats/rar/rar5/solid/solid_manager.rb +142 -0
  313. data/lib/omnizip/formats/rar/rar5/solid/solid_stream.rb +121 -0
  314. data/lib/omnizip/formats/rar/rar5/vint.rb +65 -0
  315. data/lib/omnizip/formats/rar/rar5/writer.rb +466 -0
  316. data/lib/omnizip/formats/rar/rar_format_base.rb +241 -0
  317. data/lib/omnizip/formats/rar/reader.rb +366 -0
  318. data/lib/omnizip/formats/rar/recovery_record.rb +245 -0
  319. data/lib/omnizip/formats/rar/volume_manager.rb +168 -0
  320. data/lib/omnizip/formats/rar/writer.rb +431 -0
  321. data/lib/omnizip/formats/rar.rb +205 -0
  322. data/lib/omnizip/formats/rar3/compressor.rb +73 -0
  323. data/lib/omnizip/formats/rar3/decompressor.rb +66 -0
  324. data/lib/omnizip/formats/rar3/reader.rb +386 -0
  325. data/lib/omnizip/formats/rar3/writer.rb +219 -0
  326. data/lib/omnizip/formats/rar5/compressor.rb +73 -0
  327. data/lib/omnizip/formats/rar5/decompressor.rb +66 -0
  328. data/lib/omnizip/formats/rar5/reader.rb +342 -0
  329. data/lib/omnizip/formats/rar5/writer.rb +214 -0
  330. data/lib/omnizip/formats/seven_zip/coder_chain.rb +150 -0
  331. data/lib/omnizip/formats/seven_zip/constants.rb +126 -0
  332. data/lib/omnizip/formats/seven_zip/encoded_header.rb +114 -0
  333. data/lib/omnizip/formats/seven_zip/encrypted_header.rb +142 -0
  334. data/lib/omnizip/formats/seven_zip/file_collector.rb +144 -0
  335. data/lib/omnizip/formats/seven_zip/header.rb +106 -0
  336. data/lib/omnizip/formats/seven_zip/header_encryptor.rb +134 -0
  337. data/lib/omnizip/formats/seven_zip/header_writer.rb +466 -0
  338. data/lib/omnizip/formats/seven_zip/models/coder_info.rb +30 -0
  339. data/lib/omnizip/formats/seven_zip/models/file_entry.rb +58 -0
  340. data/lib/omnizip/formats/seven_zip/models/folder.rb +69 -0
  341. data/lib/omnizip/formats/seven_zip/models/stream_info.rb +42 -0
  342. data/lib/omnizip/formats/seven_zip/parser.rb +660 -0
  343. data/lib/omnizip/formats/seven_zip/reader.rb +458 -0
  344. data/lib/omnizip/formats/seven_zip/split_archive_reader.rb +632 -0
  345. data/lib/omnizip/formats/seven_zip/split_archive_writer.rb +315 -0
  346. data/lib/omnizip/formats/seven_zip/stream_compressor.rb +151 -0
  347. data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +162 -0
  348. data/lib/omnizip/formats/seven_zip/writer.rb +740 -0
  349. data/lib/omnizip/formats/seven_zip.rb +93 -0
  350. data/lib/omnizip/formats/tar/constants.rb +73 -0
  351. data/lib/omnizip/formats/tar/entry.rb +94 -0
  352. data/lib/omnizip/formats/tar/header.rb +168 -0
  353. data/lib/omnizip/formats/tar/reader.rb +121 -0
  354. data/lib/omnizip/formats/tar/writer.rb +216 -0
  355. data/lib/omnizip/formats/tar.rb +84 -0
  356. data/lib/omnizip/formats/xz/reader.rb +116 -0
  357. data/lib/omnizip/formats/xz.rb +237 -0
  358. data/lib/omnizip/formats/xz_impl/block_decoder.rb +754 -0
  359. data/lib/omnizip/formats/xz_impl/block_encoder.rb +306 -0
  360. data/lib/omnizip/formats/xz_impl/block_header.rb +210 -0
  361. data/lib/omnizip/formats/xz_impl/block_header_parser.rb +186 -0
  362. data/lib/omnizip/formats/xz_impl/constants.rb +49 -0
  363. data/lib/omnizip/formats/xz_impl/index_decoder.rb +174 -0
  364. data/lib/omnizip/formats/xz_impl/index_encoder.rb +122 -0
  365. data/lib/omnizip/formats/xz_impl/stream_decoder.rb +468 -0
  366. data/lib/omnizip/formats/xz_impl/stream_encoder.rb +99 -0
  367. data/lib/omnizip/formats/xz_impl/stream_footer.rb +81 -0
  368. data/lib/omnizip/formats/xz_impl/stream_footer_parser.rb +117 -0
  369. data/lib/omnizip/formats/xz_impl/stream_header.rb +55 -0
  370. data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +108 -0
  371. data/lib/omnizip/formats/xz_impl/vli.rb +128 -0
  372. data/lib/omnizip/formats/xz_impl/writer.rb +421 -0
  373. data/lib/omnizip/formats/zip/central_directory_header.rb +195 -0
  374. data/lib/omnizip/formats/zip/constants.rb +69 -0
  375. data/lib/omnizip/formats/zip/end_of_central_directory.rb +133 -0
  376. data/lib/omnizip/formats/zip/local_file_header.rb +138 -0
  377. data/lib/omnizip/formats/zip/reader.rb +250 -0
  378. data/lib/omnizip/formats/zip/unix_extra_field.rb +153 -0
  379. data/lib/omnizip/formats/zip/writer.rb +375 -0
  380. data/lib/omnizip/formats/zip/zip64_end_of_central_directory.rb +104 -0
  381. data/lib/omnizip/formats/zip/zip64_end_of_central_directory_locator.rb +66 -0
  382. data/lib/omnizip/formats/zip/zip64_extra_field.rb +114 -0
  383. data/lib/omnizip/formats/zip.rb +50 -0
  384. data/lib/omnizip/implementations/base/lzma2_decoder_base.rb +75 -0
  385. data/lib/omnizip/implementations/base/lzma2_encoder_base.rb +128 -0
  386. data/lib/omnizip/implementations/base/lzma_decoder_base.rb +83 -0
  387. data/lib/omnizip/implementations/base/lzma_encoder_base.rb +108 -0
  388. data/lib/omnizip/implementations/base/state_machine_base.rb +182 -0
  389. data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +421 -0
  390. data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +465 -0
  391. data/lib/omnizip/implementations/seven_zip/lzma/match_finder.rb +288 -0
  392. data/lib/omnizip/implementations/seven_zip/lzma/range_decoder.rb +200 -0
  393. data/lib/omnizip/implementations/seven_zip/lzma/range_encoder.rb +197 -0
  394. data/lib/omnizip/implementations/seven_zip/lzma/state_machine.rb +141 -0
  395. data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +519 -0
  396. data/lib/omnizip/implementations/xz_utils/lzma2/decoder.rb +723 -0
  397. data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +750 -0
  398. data/lib/omnizip/io/buffered_input.rb +146 -0
  399. data/lib/omnizip/io/buffered_output.rb +105 -0
  400. data/lib/omnizip/io/stream_manager.rb +115 -0
  401. data/lib/omnizip/link_handler/hard_link.rb +79 -0
  402. data/lib/omnizip/link_handler/symbolic_link.rb +74 -0
  403. data/lib/omnizip/link_handler.rb +124 -0
  404. data/lib/omnizip/metadata/archive_metadata.rb +114 -0
  405. data/lib/omnizip/metadata/entry_metadata.rb +146 -0
  406. data/lib/omnizip/metadata/metadata_editor.rb +171 -0
  407. data/lib/omnizip/metadata/metadata_registry.rb +64 -0
  408. data/lib/omnizip/metadata/metadata_validator.rb +99 -0
  409. data/lib/omnizip/metadata.rb +57 -0
  410. data/lib/omnizip/models/.keep +0 -0
  411. data/lib/omnizip/models/algorithm_metadata.rb +73 -0
  412. data/lib/omnizip/models/compression_options.rb +71 -0
  413. data/lib/omnizip/models/conversion_options.rb +87 -0
  414. data/lib/omnizip/models/conversion_result.rb +135 -0
  415. data/lib/omnizip/models/eta_result.rb +46 -0
  416. data/lib/omnizip/models/extraction_rule.rb +115 -0
  417. data/lib/omnizip/models/filter_chain.rb +144 -0
  418. data/lib/omnizip/models/filter_config.rb +183 -0
  419. data/lib/omnizip/models/match_result.rb +124 -0
  420. data/lib/omnizip/models/optimization_suggestion.rb +91 -0
  421. data/lib/omnizip/models/parallel_options.rb +104 -0
  422. data/lib/omnizip/models/performance_result.rb +79 -0
  423. data/lib/omnizip/models/profile_report.rb +82 -0
  424. data/lib/omnizip/models/progress_options.rb +38 -0
  425. data/lib/omnizip/models/split_options.rb +116 -0
  426. data/lib/omnizip/optimization_registry.rb +81 -0
  427. data/lib/omnizip/parallel/job_queue.rb +209 -0
  428. data/lib/omnizip/parallel/job_scheduler.rb +203 -0
  429. data/lib/omnizip/parallel/parallel_compressor.rb +347 -0
  430. data/lib/omnizip/parallel/parallel_extractor.rb +329 -0
  431. data/lib/omnizip/parallel/worker_pool.rb +223 -0
  432. data/lib/omnizip/parallel.rb +149 -0
  433. data/lib/omnizip/parity/chunked_block_processor.rb +196 -0
  434. data/lib/omnizip/parity/galois16.rb +145 -0
  435. data/lib/omnizip/parity/models/creator_packet.rb +73 -0
  436. data/lib/omnizip/parity/models/file_description_packet.rb +133 -0
  437. data/lib/omnizip/parity/models/ifsc_packet.rb +123 -0
  438. data/lib/omnizip/parity/models/main_packet.rb +128 -0
  439. data/lib/omnizip/parity/models/packet.rb +156 -0
  440. data/lib/omnizip/parity/models/packet_registry.rb +109 -0
  441. data/lib/omnizip/parity/models/recovery_slice_packet.rb +78 -0
  442. data/lib/omnizip/parity/par2_creator.rb +531 -0
  443. data/lib/omnizip/parity/par2_repairer.rb +407 -0
  444. data/lib/omnizip/parity/par2_verifier.rb +364 -0
  445. data/lib/omnizip/parity/par2cmdline_algorithm.rb +110 -0
  446. data/lib/omnizip/parity/par2cmdline_coefficients.rb +78 -0
  447. data/lib/omnizip/parity/reed_solomon_decoder.rb +266 -0
  448. data/lib/omnizip/parity/reed_solomon_encoder.rb +111 -0
  449. data/lib/omnizip/parity/reed_solomon_matrix.rb +342 -0
  450. data/lib/omnizip/parity.rb +186 -0
  451. data/lib/omnizip/password/encryption_registry.rb +65 -0
  452. data/lib/omnizip/password/encryption_strategy.rb +96 -0
  453. data/lib/omnizip/password/password_validator.rb +129 -0
  454. data/lib/omnizip/password/winzip_aes_strategy.rb +192 -0
  455. data/lib/omnizip/password/zip_crypto_strategy.rb +141 -0
  456. data/lib/omnizip/password.rb +87 -0
  457. data/lib/omnizip/pipe/stream_compressor.rb +124 -0
  458. data/lib/omnizip/pipe/stream_decompressor.rb +174 -0
  459. data/lib/omnizip/pipe.rb +121 -0
  460. data/lib/omnizip/platform/ntfs_streams.rb +201 -0
  461. data/lib/omnizip/platform.rb +189 -0
  462. data/lib/omnizip/profile/archive_profile.rb +39 -0
  463. data/lib/omnizip/profile/balanced_profile.rb +33 -0
  464. data/lib/omnizip/profile/binary_profile.rb +36 -0
  465. data/lib/omnizip/profile/compression_profile.rb +158 -0
  466. data/lib/omnizip/profile/custom_profile.rb +157 -0
  467. data/lib/omnizip/profile/fast_profile.rb +33 -0
  468. data/lib/omnizip/profile/maximum_profile.rb +33 -0
  469. data/lib/omnizip/profile/profile_detector.rb +110 -0
  470. data/lib/omnizip/profile/profile_registry.rb +161 -0
  471. data/lib/omnizip/profile/text_profile.rb +36 -0
  472. data/lib/omnizip/profile.rb +190 -0
  473. data/lib/omnizip/profiler/memory_profiler.rb +66 -0
  474. data/lib/omnizip/profiler/method_profiler.rb +49 -0
  475. data/lib/omnizip/profiler/report_generator.rb +169 -0
  476. data/lib/omnizip/profiler.rb +204 -0
  477. data/lib/omnizip/progress/callback_reporter.rb +36 -0
  478. data/lib/omnizip/progress/console_reporter.rb +62 -0
  479. data/lib/omnizip/progress/log_reporter.rb +91 -0
  480. data/lib/omnizip/progress/operation_progress.rb +118 -0
  481. data/lib/omnizip/progress/progress_bar.rb +156 -0
  482. data/lib/omnizip/progress/progress_reporter.rb +40 -0
  483. data/lib/omnizip/progress/progress_tracker.rb +190 -0
  484. data/lib/omnizip/progress/silent_reporter.rb +24 -0
  485. data/lib/omnizip/progress.rb +127 -0
  486. data/lib/omnizip/rubyzip_compat.rb +63 -0
  487. data/lib/omnizip/temp/safe_extract.rb +168 -0
  488. data/lib/omnizip/temp/temp_file.rb +124 -0
  489. data/lib/omnizip/temp/temp_file_pool.rb +109 -0
  490. data/lib/omnizip/temp.rb +181 -0
  491. data/lib/omnizip/version.rb +5 -0
  492. data/lib/omnizip/zip/entry.rb +156 -0
  493. data/lib/omnizip/zip/file.rb +485 -0
  494. data/lib/omnizip/zip/input_stream.rb +273 -0
  495. data/lib/omnizip/zip/output_stream.rb +324 -0
  496. data/lib/omnizip.rb +156 -0
  497. data/readme-docs/advanced-features.adoc +515 -0
  498. data/readme-docs/api-usage.adoc +444 -0
  499. data/readme-docs/architecture.adoc +449 -0
  500. data/readme-docs/archive-formats.adoc +479 -0
  501. data/readme-docs/cli-usage.adoc +222 -0
  502. data/readme-docs/compression-algorithms.adoc +442 -0
  503. data/readme-docs/compression-profiles.adoc +247 -0
  504. data/readme-docs/encryption-checksums.adoc +328 -0
  505. data/readme-docs/format-converter.adoc +325 -0
  506. data/readme-docs/installation.adoc +228 -0
  507. data/readme-docs/par2-archives.adoc +608 -0
  508. data/readme-docs/performance-profiler.adoc +389 -0
  509. data/readme-docs/preprocessing-filters.adoc +280 -0
  510. data/xz-file-format-1.2.1.txt +1174 -0
  511. metadata +617 -0
@@ -0,0 +1,142 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "constants"
4
+
5
+ module Omnizip
6
+ module Algorithms
7
+ class Deflate64
8
+ # LZ77 encoder with 64KB sliding window for Deflate64
9
+ class LZ77Encoder
10
+ include Constants
11
+
12
+ attr_reader :window_size
13
+
14
+ def initialize(window_size = DICTIONARY_SIZE)
15
+ @window_size = window_size
16
+ @window = []
17
+ @hash_table = {}
18
+ @position = 0
19
+ end
20
+
21
+ # Find matches in data and return array of literals and match tokens
22
+ #
23
+ # @param data [String] Input data to compress
24
+ # @return [Array<Hash>] Array of match tokens
25
+ def find_matches(data)
26
+ tokens = []
27
+ pos = 0
28
+
29
+ while pos < data.bytesize
30
+ match = find_longest_match(pos, data)
31
+
32
+ if match && match[:length] >= MIN_MATCH_LENGTH
33
+ tokens << {
34
+ type: :match,
35
+ length: match[:length],
36
+ distance: match[:distance],
37
+ }
38
+ pos += match[:length]
39
+ else
40
+ tokens << {
41
+ type: :literal,
42
+ value: data.getbyte(pos),
43
+ }
44
+ pos += 1
45
+ end
46
+
47
+ update_window(data, pos)
48
+ end
49
+
50
+ tokens
51
+ end
52
+
53
+ private
54
+
55
+ # Find longest match for current position
56
+ #
57
+ # @param pos [Integer] Current position in data
58
+ # @param data [String] Input data
59
+ # @return [Hash, nil] Match information or nil
60
+ def find_longest_match(pos, data)
61
+ return nil if pos + MIN_MATCH_LENGTH > data.bytesize
62
+
63
+ best_match = nil
64
+ best_length = MIN_MATCH_LENGTH - 1
65
+
66
+ # Calculate hash for current position
67
+ hash = calculate_hash(data, pos)
68
+ candidates = @hash_table[hash] || []
69
+
70
+ # Search through candidate matches
71
+ candidates.reverse.take(MAX_CHAIN_LENGTH).each do |candidate_pos|
72
+ distance = pos - candidate_pos
73
+ break if distance > MAX_DISTANCE
74
+
75
+ # Find match length
76
+ length = match_length(data, pos, candidate_pos)
77
+
78
+ if length > best_length
79
+ best_length = length
80
+ best_match = {
81
+ length: length,
82
+ distance: distance,
83
+ }
84
+
85
+ break if length >= NICE_MATCH
86
+ end
87
+ end
88
+
89
+ # Add current position to hash table
90
+ @hash_table[hash] ||= []
91
+ @hash_table[hash] << pos
92
+
93
+ best_match
94
+ end
95
+
96
+ # Calculate match length between two positions
97
+ #
98
+ # @param data [String] Input data
99
+ # @param pos1 [Integer] First position
100
+ # @param pos2 [Integer] Second position
101
+ # @return [Integer] Match length
102
+ def match_length(data, pos1, pos2)
103
+ max_length = [MAX_MATCH_LENGTH, data.bytesize - pos1].min
104
+ length = 0
105
+
106
+ while length < max_length &&
107
+ data.getbyte(pos1 + length) == data.getbyte(pos2 + length)
108
+ length += 1
109
+ end
110
+
111
+ length
112
+ end
113
+
114
+ # Calculate hash value for position
115
+ #
116
+ # @param data [String] Input data
117
+ # @param pos [Integer] Position to hash
118
+ # @return [Integer] Hash value
119
+ def calculate_hash(data, pos)
120
+ return 0 if pos + MIN_MATCH_LENGTH > data.bytesize
121
+
122
+ hash = 0
123
+ MIN_MATCH_LENGTH.times do |i|
124
+ hash = ((hash << HASH_SHIFT) ^
125
+ data.getbyte(pos + i)) & (HASH_SIZE - 1)
126
+ end
127
+ hash
128
+ end
129
+
130
+ # Update sliding window
131
+ #
132
+ # @param data [String] Input data
133
+ # @param pos [Integer] Current position
134
+ def update_window(data, pos)
135
+ @window << data.getbyte(pos - 1) if pos.positive?
136
+ @window.shift if @window.size > @window_size
137
+ @position = pos
138
+ end
139
+ end
140
+ end
141
+ end
142
+ end
@@ -0,0 +1,109 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../algorithm"
4
+ require "zlib"
5
+
6
+ module Omnizip
7
+ module Algorithms
8
+ # Deflate64 (Enhanced Deflate) compression algorithm
9
+ #
10
+ # Extends standard Deflate with:
11
+ # - 64KB sliding window (vs 32KB)
12
+ # - Better compression for large files
13
+ # - ZIP compression method 9
14
+ #
15
+ # NOTE: This is a simplified implementation that uses standard
16
+ # Deflate internally, as true Deflate64 requires complex
17
+ # bit-level manipulation that is better handled by libraries
18
+ # specifically designed for it.
19
+ class Deflate64 < Algorithm
20
+ # Constants
21
+ DICTIONARY_SIZE = 65_536 # 64KB window
22
+
23
+ # Algorithm metadata
24
+ def self.metadata
25
+ {
26
+ name: "Deflate64",
27
+ type: :compression,
28
+ streaming_supported: true,
29
+ dictionary_size: DICTIONARY_SIZE,
30
+ compression_method: 9,
31
+ description: "Enhanced Deflate with 64KB window",
32
+ }
33
+ end
34
+
35
+ # Compress input stream to output stream
36
+ #
37
+ # @param input [IO] Input stream
38
+ # @param output [IO] Output stream
39
+ # @param options [Hash] Compression options
40
+ # @option options [Integer] :level Compression level (1-9)
41
+ def compress(input, output, options = {})
42
+ level = options[:level] || Zlib::DEFAULT_COMPRESSION
43
+
44
+ data = input.read
45
+ return if data.nil? || data.empty?
46
+
47
+ # Use Zlib::Deflate with maximum window size
48
+ deflater = Zlib::Deflate.new(
49
+ level,
50
+ Zlib::MAX_WBITS, # Maximum window size
51
+ Zlib::MAX_MEM_LEVEL,
52
+ )
53
+
54
+ compressed = deflater.deflate(data, Zlib::FINISH)
55
+ deflater.close
56
+
57
+ output.write(compressed)
58
+ end
59
+
60
+ # Decompress input stream to output stream
61
+ #
62
+ # @param input [IO] Input stream
63
+ # @param output [IO] Output stream
64
+ # @param options [Hash] Decompression options
65
+ def decompress(input, output, _options = {})
66
+ compressed = input.read
67
+ return if compressed.nil? || compressed.empty?
68
+
69
+ # Set output to binary mode if it's a StringIO
70
+ output.set_encoding(Encoding::BINARY) if output.respond_to?(:set_encoding)
71
+ output.binmode if output.respond_to?(:binmode)
72
+
73
+ # Use Zlib::Inflate with maximum window size
74
+ inflater = Zlib::Inflate.new(Zlib::MAX_WBITS)
75
+ decompressed = inflater.inflate(compressed)
76
+ inflater.close
77
+
78
+ # Force binary encoding to match original data
79
+ decompressed.force_encoding(Encoding::BINARY)
80
+
81
+ output.write(decompressed)
82
+ end
83
+
84
+ # Check if streaming is supported
85
+ #
86
+ # @return [Boolean] Always true for Deflate64
87
+ def self.streaming_supported?
88
+ true
89
+ end
90
+
91
+ # Get dictionary size
92
+ #
93
+ # @return [Integer] 64KB
94
+ def self.dictionary_size
95
+ DICTIONARY_SIZE
96
+ end
97
+
98
+ # Get compression method ID for ZIP format
99
+ #
100
+ # @return [Integer] Method 9
101
+ def self.compression_method
102
+ 9
103
+ end
104
+ end
105
+ end
106
+ end
107
+
108
+ # Register algorithm
109
+ Omnizip::AlgorithmRegistry.register(:deflate64, Omnizip::Algorithms::Deflate64)
@@ -0,0 +1,120 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Copyright (C) 2025 Ribose Inc.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a
6
+ # copy of this software and associated documentation files (the "Software"),
7
+ # to deal in the Software without restriction, including without limitation
8
+ # the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
+ # and/or sell copies of the Software, and to permit persons to whom the
10
+ # Software is furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
+ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21
+ # DEALINGS IN THE SOFTWARE.
22
+
23
+ module Omnizip
24
+ module Algorithms
25
+ class LZMA < Algorithm
26
+ # Adaptive probability model for range coding
27
+ #
28
+ # This class manages probability states for individual bits in the
29
+ # range coder. It uses adaptive arithmetic coding where probabilities
30
+ # are updated based on actual bit values encountered during encoding
31
+ # or decoding.
32
+ #
33
+ # Ported from XZ Utils range_encoder.c probability model implementation.
34
+ class BitModel
35
+ PROB_INIT = 1024 # Initial probability (0.5)
36
+ MOVE_BITS = 5 # Probability update speed
37
+ MAX_PROB = 1 << 11 # 4096
38
+ BIT_MODEL_TOTAL = 0x800 # XZ Utils RC_BIT_MODEL_TOTAL = 2048
39
+
40
+ attr_reader :probability
41
+
42
+ # Initialize a new bit probability model
43
+ #
44
+ # @param initial_prob [Integer] Initial probability value (default: PROB_INIT)
45
+ def initialize(initial_prob = PROB_INIT)
46
+ @probability = initial_prob
47
+ end
48
+
49
+ # Update the probability model based on an actual bit value
50
+ #
51
+ # This method implements the XZ Utils adaptive algorithm:
52
+ # - If bit is 0: probability increases (shifts toward encoding 0)
53
+ # - If bit is 1: probability decreases (shifts toward encoding 1)
54
+ #
55
+ # The update uses a shift operation (MOVE_BITS) to control the
56
+ # adaptation rate. Smaller MOVE_BITS means faster adaptation.
57
+ #
58
+ # XZ Utils formula (lzma/lzma_encoder.c:RC_BIT_*):
59
+ # bit 0: prob += (RC_BIT_MODEL_TOTAL - prob) >> RC_MOVE_BITS
60
+ # bit 1: prob -= prob >> RC_MOVE_BITS
61
+ # where RC_BIT_MODEL_TOTAL = 2048, RC_MOVE_BITS = 5
62
+ #
63
+ # @param bit [Integer] The actual bit value (0 or 1)
64
+ # @return [void]
65
+ def update(bit)
66
+ if bit.zero?
67
+ # XZ Utils formula: prob += (RC_BIT_MODEL_TOTAL - prob) >> RC_MOVE_BITS
68
+ @probability += ((BIT_MODEL_TOTAL - @probability) >> MOVE_BITS)
69
+ else
70
+ # XZ Utils formula: prob -= prob >> RC_MOVE_BITS
71
+ @probability -= (@probability >> MOVE_BITS)
72
+ end
73
+ end
74
+
75
+ # @deprecated Use {update} instead (same functionality, XZ Utils compatible)
76
+ def update!(bit)
77
+ update(bit)
78
+ end
79
+
80
+ # Reset the probability model to initial state
81
+ #
82
+ # @return [void]
83
+ def reset
84
+ @probability = PROB_INIT
85
+ end
86
+
87
+ # Get the probability of encoding a 0 bit
88
+ #
89
+ # @return [Integer] Probability value (0..MAX_PROB)
90
+ def prob_0
91
+ @probability
92
+ end
93
+
94
+ # Get the probability of encoding a 1 bit
95
+ #
96
+ # @return [Integer] Probability value (0..MAX_PROB)
97
+ def prob_1
98
+ MAX_PROB - @probability
99
+ end
100
+
101
+ # Create a copy of this bit model
102
+ #
103
+ # @return [BitModel] A new BitModel with the same probability
104
+ def dup
105
+ BitModel.new(@probability)
106
+ end
107
+
108
+ # For range coder: get probability scaled to 11 bits (XZ Utils compatibility)
109
+ #
110
+ # This method returns the probability value in the format expected
111
+ # by the range coder for encoding/decoding operations.
112
+ #
113
+ # @return [Integer] Probability value (0..MAX_PROB)
114
+ def to_range
115
+ @probability
116
+ end
117
+ end
118
+ end
119
+ end
120
+ end
@@ -0,0 +1,112 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Copyright (C) 2025 Ribose Inc.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a
6
+ # copy of this software and associated documentation files (the "Software"),
7
+ # to deal in the Software without restriction, including without limitation
8
+ # the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
+ # and/or sell copies of the Software, and to permit persons to whom the
10
+ # Software is furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
+ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21
+ # DEALINGS IN THE SOFTWARE.
22
+
23
+ module Omnizip
24
+ module Algorithms
25
+ class LZMA
26
+ # LZMA algorithm constants
27
+ #
28
+ # This module contains all constants used by the LZMA algorithm,
29
+ # including range coding parameters, probability models, and
30
+ # compression limits.
31
+ module Constants
32
+ # Range coder constants
33
+ # TOP: Threshold for range normalization (2^24)
34
+ TOP = 0x01000000
35
+
36
+ # BIT_MODEL_TOTAL: Total probability range for bit models (2^11)
37
+ BIT_MODEL_TOTAL = 0x800
38
+
39
+ # BIT_MODEL_MOVE_BITS: Number of bits to shift for prob updates
40
+ MOVE_BITS = 5
41
+
42
+ # INIT_PROBS: Initial probability value (0.5 probability)
43
+ INIT_PROBS = BIT_MODEL_TOTAL >> 1
44
+
45
+ # Number of bits used in direct bit encoding
46
+ NUM_DIRECT_BITS = 8
47
+
48
+ # LZMA state constants
49
+ # Number of position bits for literal context (lp)
50
+ NUM_LIT_POS_BITS_MAX = 4
51
+
52
+ # Number of literal context bits (lc)
53
+ NUM_LIT_CONTEXT_BITS_MAX = 8
54
+
55
+ # Number of position bits (pb)
56
+ NUM_POS_BITS_MAX = 4
57
+
58
+ # Number of LZMA states (from state machine)
59
+ NUM_STATES = 12
60
+
61
+ # Dictionary size limits
62
+ DICT_SIZE_MIN = 1 << 12 # 4KB
63
+ DICT_SIZE_MAX = 1 << 30 # 1GB
64
+
65
+ # Match length constants
66
+ MATCH_LEN_MIN = 2
67
+ MATCH_LEN_MAX = 273
68
+
69
+ # Number of distance slots
70
+ NUM_DIST_SLOTS = 64
71
+
72
+ # Position states
73
+ POS_STATES_MAX = 1 << NUM_POS_BITS_MAX
74
+
75
+ # Literal coder size
76
+ LIT_SIZE_MAX = (1 << (NUM_LIT_POS_BITS_MAX +
77
+ NUM_LIT_CONTEXT_BITS_MAX))
78
+
79
+ # Number of length to position states
80
+ NUM_LEN_TO_POS_STATES = 4
81
+
82
+ # Compression levels
83
+ COMPRESSION_LEVEL_MIN = 0
84
+ COMPRESSION_LEVEL_MAX = 9
85
+ COMPRESSION_LEVEL_DEFAULT = 5
86
+
87
+ # End of stream marker
88
+ EOS_MARKER = true
89
+
90
+ # SDK-specific encoding constants
91
+ # Length encoding constants
92
+ NUM_LEN_LOW_BITS = 3
93
+ NUM_LEN_MID_BITS = 3
94
+ NUM_LEN_HIGH_BITS = 8
95
+ LEN_LOW_SYMBOLS = 1 << NUM_LEN_LOW_BITS
96
+ LEN_MID_SYMBOLS = 1 << NUM_LEN_MID_BITS
97
+ LEN_HIGH_SYMBOLS = 1 << NUM_LEN_HIGH_BITS
98
+
99
+ # Distance encoding constants
100
+ NUM_DIST_SLOT_BITS = 6
101
+ DIST_ALIGN_BITS = 4
102
+ DIST_ALIGN_SIZE = 1 << DIST_ALIGN_BITS
103
+ START_POS_MODEL_INDEX = 4
104
+ END_POS_MODEL_INDEX = 14
105
+ NUM_FULL_DISTANCES = 1 << (END_POS_MODEL_INDEX >> 1)
106
+
107
+ # Distance slot calculation helper
108
+ DIST_SLOT_FAST_LIMIT = 1 << (NUM_DIST_SLOT_BITS + 1)
109
+ end
110
+ end
111
+ end
112
+ end
@@ -0,0 +1,148 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Copyright (C) 2025 Ribose Inc.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a
6
+ # copy of this software and associated documentation files (the "Software"),
7
+ # to deal in the Software without restriction, including without limitation
8
+ # the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
+ # and/or sell copies of the Software, and to permit persons to whom the
10
+ # Software is furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
+ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21
+ # DEALINGS IN THE SOFTWARE.
22
+
23
+ require_relative "xz_utils_decoder"
24
+
25
+ module Omnizip
26
+ module Algorithms
27
+ class LZMA < Algorithm
28
+ # LZMA Decoder - Factory for LZMA decompression implementations
29
+ #
30
+ # This class provides a unified interface for LZMA decoding, delegating
31
+ # to the XZ Utils implementation for full compatibility.
32
+ #
33
+ # The decoder reads a stream that consists of:
34
+ # - Property byte (lc, lp, pb parameters)
35
+ # - Dictionary size (4 bytes)
36
+ # - Uncompressed size (8 bytes)
37
+ # - Compressed data
38
+ class Decoder
39
+ attr_reader :dict_size, :lc, :lp, :pb, :uncompressed_size
40
+
41
+ # Initialize the decoder
42
+ #
43
+ # @param input [IO] Input stream of compressed data
44
+ # @param options [Hash] Decoding options
45
+ # @option options [Boolean] :raw_mode Skip header parsing for raw LZMA (for LZMA2)
46
+ # @option options [Integer] :dict_size Dictionary size for raw mode
47
+ def initialize(input, options = {})
48
+ # Use XZ Utils LZMA decoder (full XZ Utils compatibility)
49
+ @impl = XzUtilsDecoder.new(input, options)
50
+
51
+ # Expose header info for backward compatibility
52
+ @lc = @impl.lc
53
+ @lp = @impl.lp
54
+ @pb = @impl.pb
55
+ @dict_size = @impl.dict_size
56
+ @uncompressed_size = @impl.uncompressed_size
57
+ end
58
+
59
+ # Decode a compressed stream
60
+ #
61
+ # @param output [IO, nil] Optional output stream (if nil, returns String)
62
+ # @param preserve_dict [Boolean] Whether to preserve dictionary from previous decode
63
+ # @return [String, Integer] Decompressed data or bytes written
64
+ def decode_stream(output = nil, preserve_dict: false)
65
+ @impl.decode_stream(output, preserve_dict: preserve_dict)
66
+ end
67
+
68
+ # Reset the decoder state for reuse with new properties
69
+ #
70
+ # This method is used by LZMA2 decoder for multi-chunk streams.
71
+ #
72
+ # @param new_lc [Integer, nil] New lc value (if nil, keeps current)
73
+ # @param new_lp [Integer, nil] New lp value (if nil, keeps current)
74
+ # @param new_pb [Integer, nil] New pb value (if nil, keeps current)
75
+ # @param preserve_dict [Boolean] If true, preserve dictionary state (pos, dict_full)
76
+ # @return [void]
77
+ def reset(new_lc: nil, new_lp: nil, new_pb: nil, preserve_dict: false)
78
+ @impl.reset(new_lc: new_lc, new_lp: new_lp, new_pb: new_pb,
79
+ preserve_dict: preserve_dict)
80
+
81
+ # Update cached properties
82
+ @lc = @impl.lc
83
+ @lp = @impl.lp
84
+ @pb = @impl.pb
85
+ end
86
+
87
+ # Reset only state machine and rep distances, preserve probability models
88
+ #
89
+ # This method is used by LZMA2 decoder for multi-chunk streams.
90
+ #
91
+ # @return [void]
92
+ def reset_state_only
93
+ @impl.reset_state_only
94
+ end
95
+
96
+ # Prepare state reset - called BEFORE setting new input
97
+ #
98
+ # This method is used by LZMA2 decoder for multi-chunk streams.
99
+ #
100
+ # @return [void]
101
+ def prepare_state_reset
102
+ @impl.prepare_state_reset
103
+ end
104
+
105
+ # Reset state machine only - preserves rep distances
106
+ #
107
+ # This method is used by LZMA2 decoder for multi-chunk streams
108
+ # where we want to reset the state machine but preserve rep distances
109
+ # from the previous chunk (control >= 0xA0 but < 0xC0).
110
+ #
111
+ # @return [void]
112
+ def reset_state_machine_only
113
+ @impl.reset_state_machine_only
114
+ end
115
+
116
+ # Finish state reset - called AFTER setting new input
117
+ #
118
+ # This method is used by LZMA2 decoder for multi-chunk streams.
119
+ #
120
+ # @return [void]
121
+ def finish_state_reset
122
+ @impl.finish_state_reset
123
+ end
124
+
125
+ # Set new input stream for chunked decoding
126
+ #
127
+ # This method is used by LZMA2 decoder for multi-chunk streams.
128
+ #
129
+ # @param new_input [IO] New input stream
130
+ # @return [void]
131
+ def set_input(new_input)
132
+ @impl.set_input(new_input)
133
+ end
134
+
135
+ # Set uncompressed size for chunked decoding
136
+ #
137
+ # This method is used by LZMA2 decoder for multi-chunk streams.
138
+ #
139
+ # @param size [Integer] Uncompressed size
140
+ # @param allow_eopm [Boolean] Whether to allow end-of-stream marker
141
+ # @return [void]
142
+ def set_uncompressed_size(size, allow_eopm: true)
143
+ @impl.set_uncompressed_size(size, allow_eopm: allow_eopm)
144
+ end
145
+ end
146
+ end
147
+ end
148
+ end
@@ -0,0 +1,69 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Omnizip
4
+ module Algorithms
5
+ class LZMA < Algorithm
6
+ # Circular buffer dictionary for LZMA sliding window
7
+ # Ported from XZ Utils lzma_decoder.c
8
+ class Dictionary
9
+ attr_reader :size, :position, :buffer
10
+
11
+ def initialize(size)
12
+ @size = size
13
+ @buffer = String.new(encoding: Encoding::BINARY)
14
+ @position = 0
15
+ end
16
+
17
+ # Append bytes to dictionary
18
+ def append(data)
19
+ data.each_byte do |byte|
20
+ @buffer << byte
21
+ @position += 1
22
+
23
+ # Trim if exceeds size
24
+ if @buffer.bytesize > @size
25
+ excess = @buffer.bytesize - @size
26
+ @buffer = @buffer.byteslice(excess..-1)
27
+ end
28
+ end
29
+ end
30
+
31
+ # Read bytes from dictionary at a distance back
32
+ def read_bytes(distance, length)
33
+ raise "Invalid distance: #{distance}" if distance > @buffer.bytesize
34
+
35
+ result = String.new(encoding: Encoding::BINARY)
36
+ src_pos = @buffer.bytesize - distance
37
+
38
+ length.times do |i|
39
+ byte = @buffer[(src_pos + i) % @buffer.bytesize]
40
+ result << byte
41
+ end
42
+
43
+ result
44
+ end
45
+
46
+ # Get byte at distance back
47
+ def get_byte(distance)
48
+ raise "Invalid distance: #{distance}" if distance > @buffer.bytesize
49
+
50
+ @buffer.getbyte(@buffer.bytesize - distance)
51
+ end
52
+
53
+ # Reset dictionary
54
+ def reset!
55
+ @buffer.clear
56
+ @position = 0
57
+ end
58
+
59
+ # Clone dictionary
60
+ def clone
61
+ dict = Dictionary.new(@size)
62
+ dict.instance_variable_set(:@buffer, @buffer.dup)
63
+ dict.instance_variable_set(:@position, @position)
64
+ dict
65
+ end
66
+ end
67
+ end
68
+ end
69
+ end