omnizip 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (511) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.rubocop.yml +32 -0
  4. data/.rubocop_todo.yml +754 -0
  5. data/COPYING +502 -0
  6. data/Gemfile +17 -0
  7. data/LICENSE +12 -0
  8. data/README.adoc +1045 -0
  9. data/Rakefile +12 -0
  10. data/benchmark/README.md +260 -0
  11. data/benchmark/benchmark_suite.rb +125 -0
  12. data/benchmark/compression_bench.rb +181 -0
  13. data/benchmark/filter_bench.rb +180 -0
  14. data/benchmark/models/benchmark_result.rb +59 -0
  15. data/benchmark/models/comparison_result.rb +69 -0
  16. data/benchmark/profile_suite.rb +167 -0
  17. data/benchmark/reporter.rb +150 -0
  18. data/benchmark/run_benchmarks.rb +66 -0
  19. data/benchmark/test_data.rb +137 -0
  20. data/config/formats/rar3_spec.yml +91 -0
  21. data/config/formats/rar5_spec.yml +102 -0
  22. data/docs/.github/workflows/docs.yml +142 -0
  23. data/docs/.gitignore +21 -0
  24. data/docs/.lychee.toml +67 -0
  25. data/docs/Gemfile +13 -0
  26. data/docs/RAR_WRITE_SUPPORT.md +26 -0
  27. data/docs/README.md +101 -0
  28. data/docs/_config.yml +112 -0
  29. data/docs/assets/logo.svg +1 -0
  30. data/docs/assets/omnizip-logo.pdf +1540 -11
  31. data/docs/comparison/feature-matrix.adoc +694 -0
  32. data/docs/comparison/index.adoc +113 -0
  33. data/docs/comparison/vs-7zip.adoc +309 -0
  34. data/docs/comparison/vs-peazip.adoc +77 -0
  35. data/docs/comparison/vs-rubyzip.adoc +342 -0
  36. data/docs/comparison/vs-winrar.adoc +100 -0
  37. data/docs/compatibility.adoc +579 -0
  38. data/docs/concepts/index.adoc +129 -0
  39. data/docs/developer/architecture.adoc +256 -0
  40. data/docs/developer/contributing.adoc +158 -0
  41. data/docs/developer/index.adoc +25 -0
  42. data/docs/developer/testing.adoc +212 -0
  43. data/docs/getting-started/basic-usage.adoc +271 -0
  44. data/docs/getting-started/index.adoc +42 -0
  45. data/docs/getting-started/installation.adoc +138 -0
  46. data/docs/getting-started/quick-start.adoc +185 -0
  47. data/docs/getting-started/your-first-archive.adoc +218 -0
  48. data/docs/guides/advanced-features/encryption.adoc +300 -0
  49. data/docs/guides/advanced-features/index.adoc +49 -0
  50. data/docs/guides/advanced-features/parallel-processing.adoc +246 -0
  51. data/docs/guides/advanced-features/progress-tracking.adoc +320 -0
  52. data/docs/guides/advanced-features/streaming.adoc +212 -0
  53. data/docs/guides/archive-formats/gzip-format.adoc +107 -0
  54. data/docs/guides/archive-formats/index.adoc +130 -0
  55. data/docs/guides/archive-formats/rar-format.adoc +104 -0
  56. data/docs/guides/archive-formats/rar5.adoc +521 -0
  57. data/docs/guides/archive-formats/seven-zip-format.adoc +35 -0
  58. data/docs/guides/archive-formats/tar-format.adoc +106 -0
  59. data/docs/guides/archive-formats/xz-format.adoc +118 -0
  60. data/docs/guides/archive-formats/zip-format.adoc +35 -0
  61. data/docs/guides/compression-algorithms/bzip2.adoc +113 -0
  62. data/docs/guides/compression-algorithms/deflate.adoc +319 -0
  63. data/docs/guides/compression-algorithms/index.adoc +190 -0
  64. data/docs/guides/compression-algorithms/lzma.adoc +398 -0
  65. data/docs/guides/compression-algorithms/lzma2.adoc +327 -0
  66. data/docs/guides/compression-algorithms/ppmd.adoc +316 -0
  67. data/docs/guides/compression-algorithms/zstandard.adoc +361 -0
  68. data/docs/guides/creating-archives.adoc +354 -0
  69. data/docs/guides/extracting-archives.adoc +53 -0
  70. data/docs/guides/format-conversion.adoc +64 -0
  71. data/docs/guides/index.adoc +49 -0
  72. data/docs/guides/migration-rubyzip.adoc +217 -0
  73. data/docs/guides/parity-archives.adoc +605 -0
  74. data/docs/guides/performance-tuning.adoc +88 -0
  75. data/docs/index.adoc +218 -0
  76. data/docs/lychee.toml +67 -0
  77. data/docs/reference/api/overview.adoc +188 -0
  78. data/docs/reference/cli/compress-command.adoc +114 -0
  79. data/docs/reference/cli/overview.adoc +140 -0
  80. data/docs/reference/index.adoc +26 -0
  81. data/docs/resources/faq.adoc +185 -0
  82. data/docs/resources/quick-reference.adoc +222 -0
  83. data/docs/troubleshooting/index.adoc +208 -0
  84. data/examples/api_comparison.rb +205 -0
  85. data/examples/deflate64_example.rb +96 -0
  86. data/examples/par2_demo.rb +121 -0
  87. data/examples/quick_start_native.rb +150 -0
  88. data/examples/quick_start_rubyzip.rb +115 -0
  89. data/examples/rubyzip_compatibility_demo.rb +194 -0
  90. data/exe/omnizip +27 -0
  91. data/lib/omnizip/algorithm.rb +130 -0
  92. data/lib/omnizip/algorithm_registry.rb +86 -0
  93. data/lib/omnizip/algorithms/.keep +0 -0
  94. data/lib/omnizip/algorithms/bzip2/bwt.rb +225 -0
  95. data/lib/omnizip/algorithms/bzip2/decoder.rb +193 -0
  96. data/lib/omnizip/algorithms/bzip2/encoder.rb +237 -0
  97. data/lib/omnizip/algorithms/bzip2/huffman.rb +206 -0
  98. data/lib/omnizip/algorithms/bzip2/mtf.rb +101 -0
  99. data/lib/omnizip/algorithms/bzip2/rle.rb +151 -0
  100. data/lib/omnizip/algorithms/bzip2.rb +130 -0
  101. data/lib/omnizip/algorithms/deflate/constants.rb +28 -0
  102. data/lib/omnizip/algorithms/deflate/decoder.rb +38 -0
  103. data/lib/omnizip/algorithms/deflate/encoder.rb +46 -0
  104. data/lib/omnizip/algorithms/deflate.rb +128 -0
  105. data/lib/omnizip/algorithms/deflate64/constants.rb +45 -0
  106. data/lib/omnizip/algorithms/deflate64/decoder.rb +153 -0
  107. data/lib/omnizip/algorithms/deflate64/encoder.rb +98 -0
  108. data/lib/omnizip/algorithms/deflate64/huffman_coder.rb +354 -0
  109. data/lib/omnizip/algorithms/deflate64/lz77_encoder.rb +142 -0
  110. data/lib/omnizip/algorithms/deflate64.rb +109 -0
  111. data/lib/omnizip/algorithms/lzma/bit_model.rb +120 -0
  112. data/lib/omnizip/algorithms/lzma/constants.rb +112 -0
  113. data/lib/omnizip/algorithms/lzma/decoder.rb +148 -0
  114. data/lib/omnizip/algorithms/lzma/dictionary.rb +69 -0
  115. data/lib/omnizip/algorithms/lzma/distance_coder.rb +415 -0
  116. data/lib/omnizip/algorithms/lzma/encoder.rb +142 -0
  117. data/lib/omnizip/algorithms/lzma/length_coder.rb +260 -0
  118. data/lib/omnizip/algorithms/lzma/literal_decoder.rb +320 -0
  119. data/lib/omnizip/algorithms/lzma/literal_encoder.rb +210 -0
  120. data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +341 -0
  121. data/lib/omnizip/algorithms/lzma/lzma_alone_decoder.rb +192 -0
  122. data/lib/omnizip/algorithms/lzma/lzma_state.rb +128 -0
  123. data/lib/omnizip/algorithms/lzma/match.rb +32 -0
  124. data/lib/omnizip/algorithms/lzma/match_finder.rb +205 -0
  125. data/lib/omnizip/algorithms/lzma/match_finder_config.rb +142 -0
  126. data/lib/omnizip/algorithms/lzma/match_finder_factory.rb +88 -0
  127. data/lib/omnizip/algorithms/lzma/optimal_encoder.rb +130 -0
  128. data/lib/omnizip/algorithms/lzma/probability_models.rb +72 -0
  129. data/lib/omnizip/algorithms/lzma/range_coder.rb +85 -0
  130. data/lib/omnizip/algorithms/lzma/range_decoder.rb +434 -0
  131. data/lib/omnizip/algorithms/lzma/range_encoder.rb +194 -0
  132. data/lib/omnizip/algorithms/lzma/state.rb +127 -0
  133. data/lib/omnizip/algorithms/lzma/xz_buffered_range_encoder.rb +325 -0
  134. data/lib/omnizip/algorithms/lzma/xz_encoder.rb +426 -0
  135. data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +645 -0
  136. data/lib/omnizip/algorithms/lzma/xz_match_finder_adapter.rb +227 -0
  137. data/lib/omnizip/algorithms/lzma/xz_price_calculator.rb +169 -0
  138. data/lib/omnizip/algorithms/lzma/xz_probability_models.rb +261 -0
  139. data/lib/omnizip/algorithms/lzma/xz_range_encoder.rb +223 -0
  140. data/lib/omnizip/algorithms/lzma/xz_range_encoder_exact.rb +331 -0
  141. data/lib/omnizip/algorithms/lzma/xz_state.rb +116 -0
  142. data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +2055 -0
  143. data/lib/omnizip/algorithms/lzma.rb +238 -0
  144. data/lib/omnizip/algorithms/lzma2/chunk_manager.rb +182 -0
  145. data/lib/omnizip/algorithms/lzma2/constants.rb +41 -0
  146. data/lib/omnizip/algorithms/lzma2/encoder.rb +147 -0
  147. data/lib/omnizip/algorithms/lzma2/lzma2_chunk.rb +161 -0
  148. data/lib/omnizip/algorithms/lzma2/properties.rb +179 -0
  149. data/lib/omnizip/algorithms/lzma2/simple_lzma2_encoder.rb +127 -0
  150. data/lib/omnizip/algorithms/lzma2/xz_encoder_adapter.rb +85 -0
  151. data/lib/omnizip/algorithms/lzma2.rb +141 -0
  152. data/lib/omnizip/algorithms/ppmd7/constants.rb +74 -0
  153. data/lib/omnizip/algorithms/ppmd7/context.rb +154 -0
  154. data/lib/omnizip/algorithms/ppmd7/decoder.rb +126 -0
  155. data/lib/omnizip/algorithms/ppmd7/encoder.rb +163 -0
  156. data/lib/omnizip/algorithms/ppmd7/model.rb +248 -0
  157. data/lib/omnizip/algorithms/ppmd7/symbol_state.rb +57 -0
  158. data/lib/omnizip/algorithms/ppmd7.rb +116 -0
  159. data/lib/omnizip/algorithms/ppmd8/constants.rb +61 -0
  160. data/lib/omnizip/algorithms/ppmd8/context.rb +34 -0
  161. data/lib/omnizip/algorithms/ppmd8/decoder.rb +107 -0
  162. data/lib/omnizip/algorithms/ppmd8/encoder.rb +138 -0
  163. data/lib/omnizip/algorithms/ppmd8/model.rb +250 -0
  164. data/lib/omnizip/algorithms/ppmd8/restoration_method.rb +78 -0
  165. data/lib/omnizip/algorithms/ppmd8.rb +82 -0
  166. data/lib/omnizip/algorithms/ppmd_base.rb +138 -0
  167. data/lib/omnizip/algorithms/sevenzip_lzma2.rb +123 -0
  168. data/lib/omnizip/algorithms/xz_lzma2.rb +118 -0
  169. data/lib/omnizip/algorithms/zstandard/constants.rb +25 -0
  170. data/lib/omnizip/algorithms/zstandard/decoder.rb +46 -0
  171. data/lib/omnizip/algorithms/zstandard/encoder.rb +51 -0
  172. data/lib/omnizip/algorithms/zstandard.rb +138 -0
  173. data/lib/omnizip/buffer/memory_archive.rb +251 -0
  174. data/lib/omnizip/buffer/memory_extractor.rb +224 -0
  175. data/lib/omnizip/buffer.rb +176 -0
  176. data/lib/omnizip/checksum_registry.rb +114 -0
  177. data/lib/omnizip/checksums/crc32.rb +100 -0
  178. data/lib/omnizip/checksums/crc64.rb +101 -0
  179. data/lib/omnizip/checksums/crc_base.rb +158 -0
  180. data/lib/omnizip/checksums/verifier.rb +131 -0
  181. data/lib/omnizip/chunked/memory_manager.rb +194 -0
  182. data/lib/omnizip/chunked/reader.rb +78 -0
  183. data/lib/omnizip/chunked/writer.rb +120 -0
  184. data/lib/omnizip/chunked.rb +129 -0
  185. data/lib/omnizip/cli/output_formatter.rb +104 -0
  186. data/lib/omnizip/cli.rb +572 -0
  187. data/lib/omnizip/commands/.keep +0 -0
  188. data/lib/omnizip/commands/archive_create_command.rb +427 -0
  189. data/lib/omnizip/commands/archive_extract_command.rb +272 -0
  190. data/lib/omnizip/commands/archive_list_command.rb +218 -0
  191. data/lib/omnizip/commands/archive_repair_command.rb +131 -0
  192. data/lib/omnizip/commands/archive_verify_command.rb +117 -0
  193. data/lib/omnizip/commands/compress_command.rb +117 -0
  194. data/lib/omnizip/commands/decompress_command.rb +120 -0
  195. data/lib/omnizip/commands/list_command.rb +53 -0
  196. data/lib/omnizip/commands/metadata_command.rb +153 -0
  197. data/lib/omnizip/commands/parity_create_command.rb +122 -0
  198. data/lib/omnizip/commands/parity_repair_command.rb +122 -0
  199. data/lib/omnizip/commands/parity_verify_command.rb +124 -0
  200. data/lib/omnizip/commands/profile_list_command.rb +56 -0
  201. data/lib/omnizip/commands/profile_show_command.rb +44 -0
  202. data/lib/omnizip/convenience.rb +359 -0
  203. data/lib/omnizip/converter/conversion_registry.rb +49 -0
  204. data/lib/omnizip/converter/conversion_strategy.rb +121 -0
  205. data/lib/omnizip/converter/seven_zip_to_zip_strategy.rb +97 -0
  206. data/lib/omnizip/converter/zip_to_seven_zip_strategy.rb +112 -0
  207. data/lib/omnizip/converter.rb +105 -0
  208. data/lib/omnizip/crypto/aes256/cipher.rb +100 -0
  209. data/lib/omnizip/crypto/aes256/constants.rb +28 -0
  210. data/lib/omnizip/crypto/aes256/key_derivation.rb +101 -0
  211. data/lib/omnizip/crypto/aes256.rb +102 -0
  212. data/lib/omnizip/error.rb +106 -0
  213. data/lib/omnizip/eta/exponential_smoothing_estimator.rb +98 -0
  214. data/lib/omnizip/eta/moving_average_estimator.rb +99 -0
  215. data/lib/omnizip/eta/rate_calculator.rb +104 -0
  216. data/lib/omnizip/eta/sample_history.rb +143 -0
  217. data/lib/omnizip/eta/time_estimator.rb +106 -0
  218. data/lib/omnizip/eta.rb +63 -0
  219. data/lib/omnizip/extraction/filter_chain.rb +177 -0
  220. data/lib/omnizip/extraction/glob_pattern.rb +140 -0
  221. data/lib/omnizip/extraction/pattern_matcher.rb +70 -0
  222. data/lib/omnizip/extraction/predicate_pattern.rb +52 -0
  223. data/lib/omnizip/extraction/regex_pattern.rb +50 -0
  224. data/lib/omnizip/extraction/selective_extractor.rb +240 -0
  225. data/lib/omnizip/extraction.rb +111 -0
  226. data/lib/omnizip/file_type/mime_classifier.rb +144 -0
  227. data/lib/omnizip/file_type.rb +113 -0
  228. data/lib/omnizip/filter.rb +139 -0
  229. data/lib/omnizip/filter_pipeline.rb +108 -0
  230. data/lib/omnizip/filter_registry.rb +166 -0
  231. data/lib/omnizip/filters/bcj.rb +279 -0
  232. data/lib/omnizip/filters/bcj2/constants.rb +53 -0
  233. data/lib/omnizip/filters/bcj2/decoder.rb +200 -0
  234. data/lib/omnizip/filters/bcj2/encoder.rb +61 -0
  235. data/lib/omnizip/filters/bcj2/stream_data.rb +93 -0
  236. data/lib/omnizip/filters/bcj2.rb +99 -0
  237. data/lib/omnizip/filters/bcj_arm.rb +176 -0
  238. data/lib/omnizip/filters/bcj_arm64.rb +244 -0
  239. data/lib/omnizip/filters/bcj_ia64.rb +196 -0
  240. data/lib/omnizip/filters/bcj_ppc.rb +190 -0
  241. data/lib/omnizip/filters/bcj_sparc.rb +176 -0
  242. data/lib/omnizip/filters/bcj_x86.rb +193 -0
  243. data/lib/omnizip/filters/delta.rb +196 -0
  244. data/lib/omnizip/filters/filter_base.rb +72 -0
  245. data/lib/omnizip/filters/registry.rb +123 -0
  246. data/lib/omnizip/filters/xz_delta.rb +258 -0
  247. data/lib/omnizip/format_detector.rb +162 -0
  248. data/lib/omnizip/format_registry.rb +59 -0
  249. data/lib/omnizip/formats/.keep +0 -0
  250. data/lib/omnizip/formats/bzip2_file.rb +172 -0
  251. data/lib/omnizip/formats/cpio/constants.rb +55 -0
  252. data/lib/omnizip/formats/cpio/entry.rb +385 -0
  253. data/lib/omnizip/formats/cpio/reader.rb +196 -0
  254. data/lib/omnizip/formats/cpio/writer.rb +234 -0
  255. data/lib/omnizip/formats/cpio.rb +140 -0
  256. data/lib/omnizip/formats/format_spec_loader.rb +230 -0
  257. data/lib/omnizip/formats/gzip.rb +238 -0
  258. data/lib/omnizip/formats/iso/directory_builder.rb +297 -0
  259. data/lib/omnizip/formats/iso/directory_record.rb +152 -0
  260. data/lib/omnizip/formats/iso/joliet.rb +204 -0
  261. data/lib/omnizip/formats/iso/path_table.rb +125 -0
  262. data/lib/omnizip/formats/iso/reader.rb +197 -0
  263. data/lib/omnizip/formats/iso/rock_ridge.rb +349 -0
  264. data/lib/omnizip/formats/iso/volume_builder.rb +320 -0
  265. data/lib/omnizip/formats/iso/volume_descriptor.rb +168 -0
  266. data/lib/omnizip/formats/iso/writer.rb +530 -0
  267. data/lib/omnizip/formats/iso.rb +140 -0
  268. data/lib/omnizip/formats/lzip.rb +175 -0
  269. data/lib/omnizip/formats/lzma_alone.rb +171 -0
  270. data/lib/omnizip/formats/rar/archive_repairer.rb +243 -0
  271. data/lib/omnizip/formats/rar/archive_verifier.rb +195 -0
  272. data/lib/omnizip/formats/rar/block_parser.rb +243 -0
  273. data/lib/omnizip/formats/rar/compression/bit_stream.rb +180 -0
  274. data/lib/omnizip/formats/rar/compression/dispatcher.rb +217 -0
  275. data/lib/omnizip/formats/rar/compression/lz77_huffman/decoder.rb +216 -0
  276. data/lib/omnizip/formats/rar/compression/lz77_huffman/encoder.rb +158 -0
  277. data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_builder.rb +217 -0
  278. data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_coder.rb +189 -0
  279. data/lib/omnizip/formats/rar/compression/lz77_huffman/match_finder.rb +135 -0
  280. data/lib/omnizip/formats/rar/compression/lz77_huffman/sliding_window.rb +165 -0
  281. data/lib/omnizip/formats/rar/compression/ppmd/context.rb +105 -0
  282. data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +219 -0
  283. data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +262 -0
  284. data/lib/omnizip/formats/rar/compression_method_registry.rb +106 -0
  285. data/lib/omnizip/formats/rar/constants.rb +82 -0
  286. data/lib/omnizip/formats/rar/decompressor.rb +238 -0
  287. data/lib/omnizip/formats/rar/external_writer.rb +312 -0
  288. data/lib/omnizip/formats/rar/header.rb +192 -0
  289. data/lib/omnizip/formats/rar/license_validator.rb +109 -0
  290. data/lib/omnizip/formats/rar/models/rar_archive.rb +77 -0
  291. data/lib/omnizip/formats/rar/models/rar_entry.rb +65 -0
  292. data/lib/omnizip/formats/rar/models/rar_volume.rb +56 -0
  293. data/lib/omnizip/formats/rar/parity_handler.rb +292 -0
  294. data/lib/omnizip/formats/rar/rar5/compression/lzma.rb +202 -0
  295. data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +578 -0
  296. data/lib/omnizip/formats/rar/rar5/compression/store.rb +60 -0
  297. data/lib/omnizip/formats/rar/rar5/crc32.rb +39 -0
  298. data/lib/omnizip/formats/rar/rar5/encryption/aes256_cbc.rb +97 -0
  299. data/lib/omnizip/formats/rar/rar5/encryption/encryption_header.rb +114 -0
  300. data/lib/omnizip/formats/rar/rar5/encryption/encryption_manager.rb +166 -0
  301. data/lib/omnizip/formats/rar/rar5/encryption/key_derivation.rb +97 -0
  302. data/lib/omnizip/formats/rar/rar5/header.rb +187 -0
  303. data/lib/omnizip/formats/rar/rar5/models/encryption_options.rb +74 -0
  304. data/lib/omnizip/formats/rar/rar5/models/recovery_options.rb +63 -0
  305. data/lib/omnizip/formats/rar/rar5/models/solid_options.rb +63 -0
  306. data/lib/omnizip/formats/rar/rar5/models/volume_options.rb +74 -0
  307. data/lib/omnizip/formats/rar/rar5/multi_volume/ARCHITECTURE.md +290 -0
  308. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_manager.rb +264 -0
  309. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_splitter.rb +155 -0
  310. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_writer.rb +194 -0
  311. data/lib/omnizip/formats/rar/rar5/solid/solid_encoder.rb +109 -0
  312. data/lib/omnizip/formats/rar/rar5/solid/solid_manager.rb +142 -0
  313. data/lib/omnizip/formats/rar/rar5/solid/solid_stream.rb +121 -0
  314. data/lib/omnizip/formats/rar/rar5/vint.rb +65 -0
  315. data/lib/omnizip/formats/rar/rar5/writer.rb +466 -0
  316. data/lib/omnizip/formats/rar/rar_format_base.rb +241 -0
  317. data/lib/omnizip/formats/rar/reader.rb +366 -0
  318. data/lib/omnizip/formats/rar/recovery_record.rb +245 -0
  319. data/lib/omnizip/formats/rar/volume_manager.rb +168 -0
  320. data/lib/omnizip/formats/rar/writer.rb +431 -0
  321. data/lib/omnizip/formats/rar.rb +205 -0
  322. data/lib/omnizip/formats/rar3/compressor.rb +73 -0
  323. data/lib/omnizip/formats/rar3/decompressor.rb +66 -0
  324. data/lib/omnizip/formats/rar3/reader.rb +386 -0
  325. data/lib/omnizip/formats/rar3/writer.rb +219 -0
  326. data/lib/omnizip/formats/rar5/compressor.rb +73 -0
  327. data/lib/omnizip/formats/rar5/decompressor.rb +66 -0
  328. data/lib/omnizip/formats/rar5/reader.rb +342 -0
  329. data/lib/omnizip/formats/rar5/writer.rb +214 -0
  330. data/lib/omnizip/formats/seven_zip/coder_chain.rb +150 -0
  331. data/lib/omnizip/formats/seven_zip/constants.rb +126 -0
  332. data/lib/omnizip/formats/seven_zip/encoded_header.rb +114 -0
  333. data/lib/omnizip/formats/seven_zip/encrypted_header.rb +142 -0
  334. data/lib/omnizip/formats/seven_zip/file_collector.rb +144 -0
  335. data/lib/omnizip/formats/seven_zip/header.rb +106 -0
  336. data/lib/omnizip/formats/seven_zip/header_encryptor.rb +134 -0
  337. data/lib/omnizip/formats/seven_zip/header_writer.rb +466 -0
  338. data/lib/omnizip/formats/seven_zip/models/coder_info.rb +30 -0
  339. data/lib/omnizip/formats/seven_zip/models/file_entry.rb +58 -0
  340. data/lib/omnizip/formats/seven_zip/models/folder.rb +69 -0
  341. data/lib/omnizip/formats/seven_zip/models/stream_info.rb +42 -0
  342. data/lib/omnizip/formats/seven_zip/parser.rb +660 -0
  343. data/lib/omnizip/formats/seven_zip/reader.rb +458 -0
  344. data/lib/omnizip/formats/seven_zip/split_archive_reader.rb +632 -0
  345. data/lib/omnizip/formats/seven_zip/split_archive_writer.rb +315 -0
  346. data/lib/omnizip/formats/seven_zip/stream_compressor.rb +151 -0
  347. data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +162 -0
  348. data/lib/omnizip/formats/seven_zip/writer.rb +740 -0
  349. data/lib/omnizip/formats/seven_zip.rb +93 -0
  350. data/lib/omnizip/formats/tar/constants.rb +73 -0
  351. data/lib/omnizip/formats/tar/entry.rb +94 -0
  352. data/lib/omnizip/formats/tar/header.rb +168 -0
  353. data/lib/omnizip/formats/tar/reader.rb +121 -0
  354. data/lib/omnizip/formats/tar/writer.rb +216 -0
  355. data/lib/omnizip/formats/tar.rb +84 -0
  356. data/lib/omnizip/formats/xz/reader.rb +116 -0
  357. data/lib/omnizip/formats/xz.rb +237 -0
  358. data/lib/omnizip/formats/xz_impl/block_decoder.rb +754 -0
  359. data/lib/omnizip/formats/xz_impl/block_encoder.rb +306 -0
  360. data/lib/omnizip/formats/xz_impl/block_header.rb +210 -0
  361. data/lib/omnizip/formats/xz_impl/block_header_parser.rb +186 -0
  362. data/lib/omnizip/formats/xz_impl/constants.rb +49 -0
  363. data/lib/omnizip/formats/xz_impl/index_decoder.rb +174 -0
  364. data/lib/omnizip/formats/xz_impl/index_encoder.rb +122 -0
  365. data/lib/omnizip/formats/xz_impl/stream_decoder.rb +468 -0
  366. data/lib/omnizip/formats/xz_impl/stream_encoder.rb +99 -0
  367. data/lib/omnizip/formats/xz_impl/stream_footer.rb +81 -0
  368. data/lib/omnizip/formats/xz_impl/stream_footer_parser.rb +117 -0
  369. data/lib/omnizip/formats/xz_impl/stream_header.rb +55 -0
  370. data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +108 -0
  371. data/lib/omnizip/formats/xz_impl/vli.rb +128 -0
  372. data/lib/omnizip/formats/xz_impl/writer.rb +421 -0
  373. data/lib/omnizip/formats/zip/central_directory_header.rb +195 -0
  374. data/lib/omnizip/formats/zip/constants.rb +69 -0
  375. data/lib/omnizip/formats/zip/end_of_central_directory.rb +133 -0
  376. data/lib/omnizip/formats/zip/local_file_header.rb +138 -0
  377. data/lib/omnizip/formats/zip/reader.rb +250 -0
  378. data/lib/omnizip/formats/zip/unix_extra_field.rb +153 -0
  379. data/lib/omnizip/formats/zip/writer.rb +375 -0
  380. data/lib/omnizip/formats/zip/zip64_end_of_central_directory.rb +104 -0
  381. data/lib/omnizip/formats/zip/zip64_end_of_central_directory_locator.rb +66 -0
  382. data/lib/omnizip/formats/zip/zip64_extra_field.rb +114 -0
  383. data/lib/omnizip/formats/zip.rb +50 -0
  384. data/lib/omnizip/implementations/base/lzma2_decoder_base.rb +75 -0
  385. data/lib/omnizip/implementations/base/lzma2_encoder_base.rb +128 -0
  386. data/lib/omnizip/implementations/base/lzma_decoder_base.rb +83 -0
  387. data/lib/omnizip/implementations/base/lzma_encoder_base.rb +108 -0
  388. data/lib/omnizip/implementations/base/state_machine_base.rb +182 -0
  389. data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +421 -0
  390. data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +465 -0
  391. data/lib/omnizip/implementations/seven_zip/lzma/match_finder.rb +288 -0
  392. data/lib/omnizip/implementations/seven_zip/lzma/range_decoder.rb +200 -0
  393. data/lib/omnizip/implementations/seven_zip/lzma/range_encoder.rb +197 -0
  394. data/lib/omnizip/implementations/seven_zip/lzma/state_machine.rb +141 -0
  395. data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +519 -0
  396. data/lib/omnizip/implementations/xz_utils/lzma2/decoder.rb +723 -0
  397. data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +750 -0
  398. data/lib/omnizip/io/buffered_input.rb +146 -0
  399. data/lib/omnizip/io/buffered_output.rb +105 -0
  400. data/lib/omnizip/io/stream_manager.rb +115 -0
  401. data/lib/omnizip/link_handler/hard_link.rb +79 -0
  402. data/lib/omnizip/link_handler/symbolic_link.rb +74 -0
  403. data/lib/omnizip/link_handler.rb +124 -0
  404. data/lib/omnizip/metadata/archive_metadata.rb +114 -0
  405. data/lib/omnizip/metadata/entry_metadata.rb +146 -0
  406. data/lib/omnizip/metadata/metadata_editor.rb +171 -0
  407. data/lib/omnizip/metadata/metadata_registry.rb +64 -0
  408. data/lib/omnizip/metadata/metadata_validator.rb +99 -0
  409. data/lib/omnizip/metadata.rb +57 -0
  410. data/lib/omnizip/models/.keep +0 -0
  411. data/lib/omnizip/models/algorithm_metadata.rb +73 -0
  412. data/lib/omnizip/models/compression_options.rb +71 -0
  413. data/lib/omnizip/models/conversion_options.rb +87 -0
  414. data/lib/omnizip/models/conversion_result.rb +135 -0
  415. data/lib/omnizip/models/eta_result.rb +46 -0
  416. data/lib/omnizip/models/extraction_rule.rb +115 -0
  417. data/lib/omnizip/models/filter_chain.rb +144 -0
  418. data/lib/omnizip/models/filter_config.rb +183 -0
  419. data/lib/omnizip/models/match_result.rb +124 -0
  420. data/lib/omnizip/models/optimization_suggestion.rb +91 -0
  421. data/lib/omnizip/models/parallel_options.rb +104 -0
  422. data/lib/omnizip/models/performance_result.rb +79 -0
  423. data/lib/omnizip/models/profile_report.rb +82 -0
  424. data/lib/omnizip/models/progress_options.rb +38 -0
  425. data/lib/omnizip/models/split_options.rb +116 -0
  426. data/lib/omnizip/optimization_registry.rb +81 -0
  427. data/lib/omnizip/parallel/job_queue.rb +209 -0
  428. data/lib/omnizip/parallel/job_scheduler.rb +203 -0
  429. data/lib/omnizip/parallel/parallel_compressor.rb +347 -0
  430. data/lib/omnizip/parallel/parallel_extractor.rb +329 -0
  431. data/lib/omnizip/parallel/worker_pool.rb +223 -0
  432. data/lib/omnizip/parallel.rb +149 -0
  433. data/lib/omnizip/parity/chunked_block_processor.rb +196 -0
  434. data/lib/omnizip/parity/galois16.rb +145 -0
  435. data/lib/omnizip/parity/models/creator_packet.rb +73 -0
  436. data/lib/omnizip/parity/models/file_description_packet.rb +133 -0
  437. data/lib/omnizip/parity/models/ifsc_packet.rb +123 -0
  438. data/lib/omnizip/parity/models/main_packet.rb +128 -0
  439. data/lib/omnizip/parity/models/packet.rb +156 -0
  440. data/lib/omnizip/parity/models/packet_registry.rb +109 -0
  441. data/lib/omnizip/parity/models/recovery_slice_packet.rb +78 -0
  442. data/lib/omnizip/parity/par2_creator.rb +531 -0
  443. data/lib/omnizip/parity/par2_repairer.rb +407 -0
  444. data/lib/omnizip/parity/par2_verifier.rb +364 -0
  445. data/lib/omnizip/parity/par2cmdline_algorithm.rb +110 -0
  446. data/lib/omnizip/parity/par2cmdline_coefficients.rb +78 -0
  447. data/lib/omnizip/parity/reed_solomon_decoder.rb +266 -0
  448. data/lib/omnizip/parity/reed_solomon_encoder.rb +111 -0
  449. data/lib/omnizip/parity/reed_solomon_matrix.rb +342 -0
  450. data/lib/omnizip/parity.rb +186 -0
  451. data/lib/omnizip/password/encryption_registry.rb +65 -0
  452. data/lib/omnizip/password/encryption_strategy.rb +96 -0
  453. data/lib/omnizip/password/password_validator.rb +129 -0
  454. data/lib/omnizip/password/winzip_aes_strategy.rb +192 -0
  455. data/lib/omnizip/password/zip_crypto_strategy.rb +141 -0
  456. data/lib/omnizip/password.rb +87 -0
  457. data/lib/omnizip/pipe/stream_compressor.rb +124 -0
  458. data/lib/omnizip/pipe/stream_decompressor.rb +174 -0
  459. data/lib/omnizip/pipe.rb +121 -0
  460. data/lib/omnizip/platform/ntfs_streams.rb +201 -0
  461. data/lib/omnizip/platform.rb +189 -0
  462. data/lib/omnizip/profile/archive_profile.rb +39 -0
  463. data/lib/omnizip/profile/balanced_profile.rb +33 -0
  464. data/lib/omnizip/profile/binary_profile.rb +36 -0
  465. data/lib/omnizip/profile/compression_profile.rb +158 -0
  466. data/lib/omnizip/profile/custom_profile.rb +157 -0
  467. data/lib/omnizip/profile/fast_profile.rb +33 -0
  468. data/lib/omnizip/profile/maximum_profile.rb +33 -0
  469. data/lib/omnizip/profile/profile_detector.rb +110 -0
  470. data/lib/omnizip/profile/profile_registry.rb +161 -0
  471. data/lib/omnizip/profile/text_profile.rb +36 -0
  472. data/lib/omnizip/profile.rb +190 -0
  473. data/lib/omnizip/profiler/memory_profiler.rb +66 -0
  474. data/lib/omnizip/profiler/method_profiler.rb +49 -0
  475. data/lib/omnizip/profiler/report_generator.rb +169 -0
  476. data/lib/omnizip/profiler.rb +204 -0
  477. data/lib/omnizip/progress/callback_reporter.rb +36 -0
  478. data/lib/omnizip/progress/console_reporter.rb +62 -0
  479. data/lib/omnizip/progress/log_reporter.rb +91 -0
  480. data/lib/omnizip/progress/operation_progress.rb +118 -0
  481. data/lib/omnizip/progress/progress_bar.rb +156 -0
  482. data/lib/omnizip/progress/progress_reporter.rb +40 -0
  483. data/lib/omnizip/progress/progress_tracker.rb +190 -0
  484. data/lib/omnizip/progress/silent_reporter.rb +24 -0
  485. data/lib/omnizip/progress.rb +127 -0
  486. data/lib/omnizip/rubyzip_compat.rb +63 -0
  487. data/lib/omnizip/temp/safe_extract.rb +168 -0
  488. data/lib/omnizip/temp/temp_file.rb +124 -0
  489. data/lib/omnizip/temp/temp_file_pool.rb +109 -0
  490. data/lib/omnizip/temp.rb +181 -0
  491. data/lib/omnizip/version.rb +5 -0
  492. data/lib/omnizip/zip/entry.rb +156 -0
  493. data/lib/omnizip/zip/file.rb +485 -0
  494. data/lib/omnizip/zip/input_stream.rb +273 -0
  495. data/lib/omnizip/zip/output_stream.rb +324 -0
  496. data/lib/omnizip.rb +156 -0
  497. data/readme-docs/advanced-features.adoc +515 -0
  498. data/readme-docs/api-usage.adoc +444 -0
  499. data/readme-docs/architecture.adoc +449 -0
  500. data/readme-docs/archive-formats.adoc +479 -0
  501. data/readme-docs/cli-usage.adoc +222 -0
  502. data/readme-docs/compression-algorithms.adoc +442 -0
  503. data/readme-docs/compression-profiles.adoc +247 -0
  504. data/readme-docs/encryption-checksums.adoc +328 -0
  505. data/readme-docs/format-converter.adoc +325 -0
  506. data/readme-docs/installation.adoc +228 -0
  507. data/readme-docs/par2-archives.adoc +608 -0
  508. data/readme-docs/performance-profiler.adoc +389 -0
  509. data/readme-docs/preprocessing-filters.adoc +280 -0
  510. data/xz-file-format-1.2.1.txt +1174 -0
  511. metadata +617 -0
@@ -0,0 +1,238 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Copyright (C) 2025 Ribose Inc.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a
6
+ # copy of this software and associated documentation files (the "Software"),
7
+ # to deal in the Software without restriction, including without limitation
8
+ # the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
+ # and/or sell copies of the Software, and to permit persons to whom the
10
+ # Software is furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
+ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21
+ # DEALINGS IN THE SOFTWARE.
22
+
23
+ require_relative "../algorithm"
24
+ require_relative "../models/algorithm_metadata"
25
+
26
+ module Omnizip
27
+ module Algorithms
28
+ # LZMA (Lempel-Ziv-Markov chain Algorithm) compression
29
+ #
30
+ # LZMA is a lossless data compression algorithm that combines
31
+ # Lempel-Ziv dictionary compression with range coding (a form
32
+ # of arithmetic coding). It achieves high compression ratios
33
+ # by using adaptive probability models.
34
+ #
35
+ # This implementation uses:
36
+ # - LZ77 match finder for finding duplicate sequences
37
+ # - Range coding for probability-based encoding
38
+ # - Adaptive bit models that adjust based on input data
39
+ # - State machine for compression context tracking
40
+ #
41
+ # The algorithm operates by:
42
+ # 1. Finding matches using LZ77 dictionary compression
43
+ # 2. Encoding decisions using range coder with probability models
44
+ # 3. Maintaining state for optimal compression
45
+ class LZMA < Algorithm
46
+ # Initialize the LZMA algorithm with options
47
+ #
48
+ # @param options [Hash] Algorithm options
49
+ # @option options [Integer] :lc Literal context bits (default: 3)
50
+ # @option options [Integer] :lp Literal position bits (default: 0)
51
+ # @option options [Integer] :pb Position bits (default: 2)
52
+ # @option options [Integer] :dict_size Dictionary size (default: 4MB)
53
+ # @option options [Boolean] :lzma2_mode Raw LZMA mode (no header, for 7-Zip)
54
+ def initialize(options = {})
55
+ super()
56
+ @lc = options[:lc] || 3
57
+ @lp = options[:lp] || 0
58
+ @pb = options[:pb] || 2
59
+ @dict_size = options[:dict_size] || (4 * 1024 * 1024) # 4 MB default
60
+ @lzma2_mode = options[:lzma2_mode]
61
+ @uncompressed_size = options[:uncompressed_size] || options[:size]
62
+ end
63
+
64
+ # Get algorithm metadata
65
+ #
66
+ # @return [AlgorithmMetadata] Algorithm information
67
+ def self.metadata
68
+ Models::AlgorithmMetadata.new.tap do |meta|
69
+ meta.name = "lzma"
70
+ meta.description = "LZMA compression using range coding " \
71
+ "and dictionary compression"
72
+ meta.version = "1.0.0"
73
+ end
74
+ end
75
+
76
+ # Compress data using LZMA algorithm
77
+ #
78
+ # @param input_stream [IO] Input stream to compress
79
+ # @param output_stream [IO] Output stream for compressed data
80
+ # @param options [Models::CompressionOptions] Compression options
81
+ # @return [void]
82
+ def compress(input_stream, output_stream, options = nil)
83
+ input_data = input_stream.read
84
+ encoder = Encoder.new(output_stream, build_encoder_options(options))
85
+ encoder.encode_stream(input_data)
86
+ end
87
+
88
+ # Decompress LZMA-compressed data
89
+ #
90
+ # @param input_stream [IO] Input stream of compressed data
91
+ # @param output_stream [IO] Output stream for decompressed data
92
+ # @param options [Models::CompressionOptions, Hash] Decompression options
93
+ # @return [IO] The output_stream (for chaining)
94
+ def decompress(input_stream, output_stream, options = nil)
95
+ # Set binary encoding on output stream for proper byte handling
96
+ output_stream.set_encoding(Encoding::BINARY) if output_stream.respond_to?(:set_encoding)
97
+
98
+ # Build decoder options, merging with instance variables as fallbacks
99
+ decoder_opts = build_decoder_options(options)
100
+ decoder_opts[:lzma2_mode] = @lzma2_mode if @lzma2_mode && !decoder_opts.key?(:lzma2_mode)
101
+ decoder_opts[:lc] = @lc if @lc && !decoder_opts.key?(:lc)
102
+ decoder_opts[:lp] = @lp if @lp && !decoder_opts.key?(:lp)
103
+ decoder_opts[:pb] = @pb if @pb && !decoder_opts.key?(:pb)
104
+ decoder_opts[:dict_size] = @dict_size if @dict_size && !decoder_opts.key?(:dict_size)
105
+ decoder_opts[:uncompressed_size] = @uncompressed_size if @uncompressed_size && !decoder_opts.key?(:uncompressed_size)
106
+ decoder_opts[:uncompressed_size] ||= options[:size] if options.respond_to?(:key?) && options.key?(:size)
107
+
108
+ decoder = Decoder.new(input_stream, decoder_opts)
109
+ decoder.decode_stream(output_stream)
110
+ output_stream
111
+ end
112
+
113
+ private
114
+
115
+ # Build encoder options from compression options
116
+ #
117
+ # @param options [Models::CompressionOptions, Hash, nil] Compression opts
118
+ # @return [Hash] Encoder options
119
+ def build_encoder_options(options)
120
+ return {} if options.nil?
121
+
122
+ opts = {}
123
+
124
+ # Handle Hash-like options
125
+ if options.respond_to?(:[])
126
+ opts[:lc] = options[:lc] if options[:lc]
127
+ opts[:lp] = options[:lp] if options[:lp]
128
+ opts[:pb] = options[:pb] if options[:pb]
129
+ opts[:dict_size] = options[:dict_size] if options[:dict_size]
130
+ opts[:write_size] = options[:write_size] if options.key?(:write_size)
131
+ if options.key?(:sdk_compatible)
132
+ opts[:sdk_compatible] =
133
+ options[:sdk_compatible]
134
+ end
135
+ if options.key?(:xz_compatible)
136
+ opts[:xz_compatible] =
137
+ options[:xz_compatible]
138
+ end
139
+ opts[:raw_mode] = options[:raw_mode] if options.key?(:raw_mode)
140
+ end
141
+
142
+ # Handle level from both Hash and CompressionOptions
143
+ level = if options.respond_to?(:level)
144
+ options.level || 5
145
+ elsif options.respond_to?(:[]) && options[:level]
146
+ options[:level] || 5
147
+ else
148
+ 5
149
+ end
150
+
151
+ opts[:dict_size] ||= dictionary_size_for_level(level)
152
+
153
+ opts
154
+ end
155
+
156
+ # Build decoder options from decompression options
157
+ #
158
+ # @param options [Models::CompressionOptions, Hash, nil] Decompression opts
159
+ # @return [Hash] Decoder options
160
+ def build_decoder_options(options)
161
+ return {} if options.nil?
162
+
163
+ # Handle case where options is an Integer (uncompressed size) instead of Hash
164
+ return {} if options.is_a?(Integer)
165
+
166
+ opts = {}
167
+
168
+ # Handle Hash-like options - pass through all decoder-relevant options
169
+ if options.respond_to?(:key?)
170
+ opts[:sdk_compatible] = options[:sdk_compatible] if options.key?(:sdk_compatible)
171
+ opts[:lzma2_mode] = options[:lzma2_mode] if options.key?(:lzma2_mode)
172
+ opts[:lc] = options[:lc] if options.key?(:lc)
173
+ opts[:lp] = options[:lp] if options.key?(:lp)
174
+ opts[:pb] = options[:pb] if options.key?(:pb)
175
+ opts[:dict_size] = options[:dict_size] if options.key?(:dict_size)
176
+ opts[:uncompressed_size] = options[:uncompressed_size] if options.key?(:uncompressed_size)
177
+ opts[:size] = options[:size] if options.key?(:size)
178
+ end
179
+
180
+ opts
181
+ end
182
+
183
+ # Get dictionary size based on compression level
184
+ #
185
+ # @param level [Integer] Compression level (0-9)
186
+ # @return [Integer] Dictionary size in bytes
187
+ def dictionary_size_for_level(level)
188
+ 1 << case level
189
+ when 0..1 then 16 # 64KB
190
+ when 2..3 then 20 # 1MB
191
+ when 4..5 then 22 # 4MB
192
+ when 6..7 then 23 # 8MB
193
+ else 24 # 16MB
194
+ end
195
+ end
196
+ end
197
+ end
198
+ end
199
+
200
+ # Load nested classes after LZMA class is defined
201
+ require_relative "lzma/constants"
202
+ require_relative "lzma/bit_model"
203
+ require_relative "lzma/probability_models"
204
+ require_relative "lzma/xz_range_encoder"
205
+ require_relative "lzma/dictionary"
206
+ require_relative "lzma/lzma_state"
207
+ require_relative "lzma/range_coder"
208
+ require_relative "lzma/range_encoder"
209
+ require_relative "lzma/range_decoder"
210
+ require_relative "lzma/match"
211
+ require_relative "lzma/match_finder"
212
+ require_relative "lzma/optimal_encoder"
213
+ require_relative "lzma/state"
214
+ require_relative "lzma/xz_state"
215
+ require_relative "lzma/xz_probability_models"
216
+ require_relative "lzma/xz_price_calculator"
217
+ require_relative "lzma/xz_match_finder_adapter"
218
+ require_relative "../implementations/seven_zip/lzma/state_machine"
219
+ require_relative "lzma/length_coder"
220
+ require_relative "lzma/distance_coder"
221
+ require_relative "lzma/literal_encoder"
222
+ require_relative "lzma/literal_decoder"
223
+ require_relative "lzma/match_finder_config"
224
+ require_relative "../implementations/seven_zip/lzma/match_finder"
225
+ require_relative "lzma/match_finder_factory"
226
+ require_relative "../implementations/seven_zip/lzma/encoder"
227
+ require_relative "lzma/xz_encoder"
228
+ require_relative "lzma/encoder"
229
+ require_relative "lzma/decoder"
230
+ require_relative "lzma/xz_utils_decoder"
231
+
232
+ # LZMA container format decoders (DIFFERENT from XZ format!)
233
+ # These are standalone formats that use LZMA1 compression
234
+ require_relative "lzma/lzma_alone_decoder" # .lzma (LZMA_Alone) format
235
+ require_relative "lzma/lzip_decoder" # .lz (lzip) format
236
+
237
+ # Register the LZMA algorithm
238
+ Omnizip::AlgorithmRegistry.register(:lzma, Omnizip::Algorithms::LZMA)
@@ -0,0 +1,182 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Copyright (C) 2025 Ribose Inc.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a
6
+ # copy of this software and associated documentation files (the "Software"),
7
+ # to deal in the Software without restriction, including without limitation
8
+ # the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
+ # and/or sell copies of the Software, and to permit persons to whom the
10
+ # Software is furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
+ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21
+ # DEALINGS IN THE SOFTWARE.
22
+
23
+ require_relative "constants"
24
+
25
+ module Omnizip
26
+ module Algorithms
27
+ class LZMA2
28
+ # LZMA2 Chunk Manager - handles chunk boundaries and decisions
29
+ #
30
+ # This class is responsible for:
31
+ # - Splitting data into chunks of appropriate size
32
+ # - Deciding whether to compress or store each chunk uncompressed
33
+ # - Managing chunk buffering
34
+ # - Providing thread safety infrastructure (for future use)
35
+ #
36
+ # The chunk manager implements intelligent compression decisions
37
+ # based on compression ratio thresholds.
38
+ #
39
+ # IMPORTANT: LZMA2 format limits uncompressed chunk size to 65536 bytes
40
+ # due to 2-byte size encoding. This manager enforces that limit.
41
+ class ChunkManager
42
+ include Constants
43
+
44
+ attr_reader :chunk_size
45
+
46
+ # Maximum bytes per chunk (LZMA2 spec limit)
47
+ MAX_CHUNK_BYTES = UNCOMPRESSED_SIZE_MAX + 1 # 65536 bytes
48
+
49
+ # Chunk data model
50
+ #
51
+ # Represents a single chunk of data with its metadata
52
+ class Chunk
53
+ attr_reader :data, :compressed_data, :is_compressed
54
+
55
+ # Initialize a chunk
56
+ #
57
+ # @param data [String] Uncompressed chunk data
58
+ def initialize(data)
59
+ @data = data
60
+ @compressed_data = nil
61
+ @is_compressed = false
62
+ end
63
+
64
+ # Set compressed data
65
+ #
66
+ # @param compressed [String] Compressed data
67
+ # @return [void]
68
+ def compressed_data=(compressed)
69
+ @compressed_data = compressed
70
+ @is_compressed = true
71
+ end
72
+
73
+ # Get the data to write (compressed or uncompressed)
74
+ #
75
+ # @return [String] Data to write
76
+ def output_data
77
+ @is_compressed ? @compressed_data : @data
78
+ end
79
+
80
+ # Get size of output data
81
+ #
82
+ # @return [Integer] Size in bytes
83
+ def output_size
84
+ output_data.bytesize
85
+ end
86
+
87
+ # Get uncompressed size
88
+ #
89
+ # @return [Integer] Size in bytes
90
+ def uncompressed_size
91
+ @data.bytesize
92
+ end
93
+ end
94
+
95
+ # Initialize chunk manager
96
+ #
97
+ # @param chunk_size [Integer] Desired chunk size
98
+ def initialize(chunk_size = CHUNK_SIZE_DEFAULT)
99
+ @chunk_size = validate_chunk_size(chunk_size)
100
+ # Enforce LZMA2 format limit
101
+ @effective_chunk_size = [@chunk_size, MAX_CHUNK_BYTES].min
102
+ end
103
+
104
+ # Split data into chunks
105
+ #
106
+ # @param data [String] Data to split
107
+ # @return [Array<Chunk>] Array of chunks
108
+ def create_chunks(data)
109
+ chunks = []
110
+ pos = 0
111
+
112
+ while pos < data.bytesize
113
+ chunk_data = data.byteslice(pos, @effective_chunk_size)
114
+ chunks << Chunk.new(chunk_data)
115
+ pos += @effective_chunk_size
116
+ end
117
+
118
+ chunks
119
+ end
120
+
121
+ # Decide if chunk should be compressed
122
+ #
123
+ # Makes decision based on compression ratio threshold.
124
+ # If compressed size is not significantly smaller than
125
+ # uncompressed size, store uncompressed.
126
+ #
127
+ # @param chunk [Chunk] Chunk with compressed data set
128
+ # @return [Boolean] True if should use compression
129
+ def should_compress?(chunk)
130
+ return false unless chunk.compressed_data
131
+
132
+ # Calculate compression ratio
133
+ ratio = chunk.output_size.to_f / chunk.uncompressed_size
134
+
135
+ # Only use compression if ratio is below threshold
136
+ ratio < COMPRESSION_THRESHOLD
137
+ end
138
+
139
+ # Decide if chunk is last chunk
140
+ #
141
+ # @param chunk_index [Integer] Current chunk index
142
+ # @param total_chunks [Integer] Total number of chunks
143
+ # @return [Boolean] True if last chunk
144
+ def last_chunk?(chunk_index, total_chunks)
145
+ chunk_index == total_chunks - 1
146
+ end
147
+
148
+ # Calculate optimal chunk size for data
149
+ #
150
+ # This method can be used to dynamically adjust chunk size
151
+ # based on data characteristics (future enhancement).
152
+ #
153
+ # @param data_size [Integer] Total data size
154
+ # @return [Integer] Optimal chunk size
155
+ def optimal_chunk_size(data_size)
156
+ # For now, use effective chunk size
157
+ # Future: could adjust based on data size
158
+ return @effective_chunk_size if data_size <= @effective_chunk_size * 2
159
+
160
+ # For larger data, might want larger chunks (up to max)
161
+ [@effective_chunk_size * 2, MAX_CHUNK_BYTES].min
162
+ end
163
+
164
+ private
165
+
166
+ # Validate chunk size
167
+ #
168
+ # @param size [Integer] Chunk size to validate
169
+ # @return [Integer] Validated size
170
+ # @raise [ArgumentError] If size is invalid
171
+ def validate_chunk_size(size)
172
+ unless size.between?(CHUNK_SIZE_MIN, CHUNK_SIZE_MAX)
173
+ raise ArgumentError,
174
+ "Chunk size must be between #{CHUNK_SIZE_MIN} " \
175
+ "and #{CHUNK_SIZE_MAX}"
176
+ end
177
+ size
178
+ end
179
+ end
180
+ end
181
+ end
182
+ end
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Omnizip
4
+ module Algorithms
5
+ # LZMA2 Format Constants (from XZ Utils specification)
6
+ module LZMA2Const
7
+ # Maximum size of compressed data per chunk (excluding headers)
8
+ CHUNK_MAX = 65536 # 64 KB
9
+
10
+ # Maximum size of uncompressed data per chunk
11
+ # Limited by 16-bit size field in LZMA2 header (stores size-1)
12
+ # Max value is 0xFFFF + 1 = 65536 bytes
13
+ UNCOMPRESSED_MAX = 65536 # 64 KB
14
+
15
+ # Maximum size of LZMA2 chunk header
16
+ HEADER_MAX = 6
17
+
18
+ # Size of uncompressed chunk header
19
+ HEADER_UNCOMPRESSED = 3
20
+
21
+ # Control byte values
22
+ CONTROL_END = 0x00 # End of stream marker
23
+ CONTROL_UNCOMPRESSED_RESET = 0x01 # Uncompressed with dict reset
24
+ CONTROL_UNCOMPRESSED = 0x02 # Uncompressed without reset
25
+ CONTROL_LZMA_MIN = 0x80 # Minimum LZMA control byte
26
+
27
+ # Control byte flags (for LZMA chunks)
28
+ FLAG_UNCOMPRESSED_SIZE = 0x80 # Base flag for LZMA chunks
29
+ FLAG_RESET_STATE = 0x20 # Reset LZMA state
30
+ FLAG_RESET_PROPERTIES = 0x40 # Reset properties + state
31
+ FLAG_RESET_DICT = 0x60 # Reset dict + properties + state
32
+
33
+ # Dictionary size encoding constants
34
+ DICT_SIZE_MIN = 4096 # 4 KB minimum
35
+ DICT_SIZE_MAX = 0xFFFFFFFF # 4 GB maximum
36
+ end
37
+
38
+ # Alias for backward compatibility
39
+ LZMA2Constants = LZMA2Const
40
+ end
41
+ end
@@ -0,0 +1,147 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "constants"
4
+ require_relative "properties"
5
+ require_relative "simple_lzma2_encoder"
6
+
7
+ module Omnizip
8
+ module Algorithms
9
+ # LZMA2 encoder - delegates to XzLZMA2Encoder
10
+ #
11
+ # This class provides a backward-compatible API that delegates to the
12
+ # complete XzLZMA2Encoder implementation ported from XZ Utils.
13
+ #
14
+ # Based on XZ Utils lzma2_encoder.c
15
+ class LZMA2Encoder
16
+ attr_reader :dict_size, :lc, :lp, :pb
17
+
18
+ # Initialize the encoder
19
+ #
20
+ # @param dict_size [Integer] Dictionary size (default: 8MB)
21
+ # @param lc [Integer] Literal context bits (default: 3)
22
+ # @param lp [Integer] Literal position bits (default: 0)
23
+ # @param pb [Integer] Position bits (default: 2)
24
+ # @param standalone [Boolean] If true, write property byte for
25
+ # standalone LZMA2 files (default: false)
26
+ def initialize(
27
+ dict_size: 8 * 1024 * 1024,
28
+ lc: 3,
29
+ lp: 0,
30
+ pb: 2,
31
+ standalone: false,
32
+ **
33
+ )
34
+ @dict_size = dict_size
35
+ @lc = lc
36
+ @lp = lp
37
+ @pb = pb
38
+ @standalone = standalone
39
+
40
+ # Create the SimpleLZMA2Encoder (uses working XzEncoder internally)
41
+ @encoder = LZMA2::SimpleLZMA2Encoder.new(
42
+ dict_size: dict_size,
43
+ lc: lc,
44
+ lp: lp,
45
+ pb: pb,
46
+ standalone: standalone,
47
+ )
48
+ end
49
+
50
+ # Encode data into LZMA2 format
51
+ #
52
+ # @param input [String] Input data to compress
53
+ # @return [String] LZMA2 compressed data
54
+ def encode(input)
55
+ @encoder.encode(input)
56
+ end
57
+
58
+ # Compress data from input stream to output stream
59
+ # This method provides compatibility with the AlgorithmRegistry interface
60
+ #
61
+ # @param input_io [IO] Input stream to read from
62
+ # @param output_io [IO] Output stream to write to
63
+ # @param level [Integer] Compression level (not used, kept for compatibility)
64
+ # @return [Integer] Number of bytes written
65
+ def compress(input_io, output_io, _level = nil)
66
+ input_data = input_io.read
67
+ compressed = encode(input_data)
68
+ output_io.write(compressed)
69
+ compressed.bytesize
70
+ end
71
+
72
+ # Decompress data from input stream to output stream
73
+ # This method provides compatibility with the AlgorithmRegistry interface
74
+ #
75
+ # @param input_io [IO] Input stream to read from
76
+ # @param output_io [IO] Output stream to write to
77
+ # @param size [Integer] Expected uncompressed size (optional)
78
+ # @return [Integer] Number of bytes written
79
+ def decompress(input_io, output_io, _size = nil)
80
+ # Check if this is being called for 7-Zip format (raw LZMA2 stream)
81
+ # 7-Zip stores LZMA2 without a property byte
82
+ # We can detect this by checking if input_io is a StringIO (which is used
83
+ # by StreamDecompressor for 7-Zip format)
84
+ raw_mode = input_io.is_a?(StringIO)
85
+
86
+ # Create a decoder instance
87
+ decoder = LZMA2::Decoder.new(input_io, raw_mode: raw_mode)
88
+
89
+ # For raw_mode (7-Zip format), we need to provide dict_size
90
+ # Use default 8MB if not specified
91
+ if raw_mode
92
+ # Re-create decoder with dict_size option
93
+ decoder = LZMA2::Decoder.new(input_io,
94
+ raw_mode: true,
95
+ dict_size: @dict_size)
96
+ end
97
+
98
+ # Decode the stream
99
+ result = decoder.decode_stream
100
+
101
+ # Write to output
102
+ output_io.write(result)
103
+
104
+ result.bytesize
105
+ end
106
+
107
+ # Encode dictionary size for LZMA2 properties
108
+ # Returns a single byte encoding the dictionary size
109
+ #
110
+ # @param dict_size [Integer] Dictionary size to encode
111
+ # @return [Integer] Encoded dictionary size byte
112
+ def self.encode_dict_size(dict_size)
113
+ # LZMA2 dictionary size encoding (XZ Utils format)
114
+ # Byte value d encodes dictionary size as:
115
+ # If d < 40: size = 2^((d/2) + 12) (for even d)
116
+ # or size = 3 * 2^((d-1)/2 + 11) (for odd d)
117
+ # If d == 40: size = 0xFFFFFFFF (4GB - 1)
118
+
119
+ # Clamp to valid range
120
+ d = [dict_size, LZMA2Constants::DICT_SIZE_MIN].max
121
+
122
+ # For 8MB (8 * 1024 * 1024 = 8388608 = 2^23):
123
+ # We want: 2^((d/2) + 12) = 2^23
124
+ # So: (d/2) + 12 = 23
125
+ # Therefore: d/2 = 11, d = 22
126
+
127
+ # Calculate log2 of dict_size
128
+ log2_size = 0
129
+ temp = d
130
+ while temp > 1
131
+ log2_size += 1
132
+ temp >>= 1
133
+ end
134
+
135
+ # Encoding formula for power-of-2 sizes:
136
+ # d = 2 * (log2_size - 12)
137
+ if d == (1 << log2_size)
138
+ # Exact power of 2
139
+ [(log2_size - 12) * 2, 40].min
140
+ else
141
+ # Between 2^n and 2^n + 2^(n-1), use odd encoding
142
+ [((log2_size - 12) * 2) + 1, 40].min
143
+ end
144
+ end
145
+ end
146
+ end
147
+ end