omnizip 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (511) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.rubocop.yml +32 -0
  4. data/.rubocop_todo.yml +754 -0
  5. data/COPYING +502 -0
  6. data/Gemfile +17 -0
  7. data/LICENSE +12 -0
  8. data/README.adoc +1045 -0
  9. data/Rakefile +12 -0
  10. data/benchmark/README.md +260 -0
  11. data/benchmark/benchmark_suite.rb +125 -0
  12. data/benchmark/compression_bench.rb +181 -0
  13. data/benchmark/filter_bench.rb +180 -0
  14. data/benchmark/models/benchmark_result.rb +59 -0
  15. data/benchmark/models/comparison_result.rb +69 -0
  16. data/benchmark/profile_suite.rb +167 -0
  17. data/benchmark/reporter.rb +150 -0
  18. data/benchmark/run_benchmarks.rb +66 -0
  19. data/benchmark/test_data.rb +137 -0
  20. data/config/formats/rar3_spec.yml +91 -0
  21. data/config/formats/rar5_spec.yml +102 -0
  22. data/docs/.github/workflows/docs.yml +142 -0
  23. data/docs/.gitignore +21 -0
  24. data/docs/.lychee.toml +67 -0
  25. data/docs/Gemfile +13 -0
  26. data/docs/RAR_WRITE_SUPPORT.md +26 -0
  27. data/docs/README.md +101 -0
  28. data/docs/_config.yml +112 -0
  29. data/docs/assets/logo.svg +1 -0
  30. data/docs/assets/omnizip-logo.pdf +1540 -11
  31. data/docs/comparison/feature-matrix.adoc +694 -0
  32. data/docs/comparison/index.adoc +113 -0
  33. data/docs/comparison/vs-7zip.adoc +309 -0
  34. data/docs/comparison/vs-peazip.adoc +77 -0
  35. data/docs/comparison/vs-rubyzip.adoc +342 -0
  36. data/docs/comparison/vs-winrar.adoc +100 -0
  37. data/docs/compatibility.adoc +579 -0
  38. data/docs/concepts/index.adoc +129 -0
  39. data/docs/developer/architecture.adoc +256 -0
  40. data/docs/developer/contributing.adoc +158 -0
  41. data/docs/developer/index.adoc +25 -0
  42. data/docs/developer/testing.adoc +212 -0
  43. data/docs/getting-started/basic-usage.adoc +271 -0
  44. data/docs/getting-started/index.adoc +42 -0
  45. data/docs/getting-started/installation.adoc +138 -0
  46. data/docs/getting-started/quick-start.adoc +185 -0
  47. data/docs/getting-started/your-first-archive.adoc +218 -0
  48. data/docs/guides/advanced-features/encryption.adoc +300 -0
  49. data/docs/guides/advanced-features/index.adoc +49 -0
  50. data/docs/guides/advanced-features/parallel-processing.adoc +246 -0
  51. data/docs/guides/advanced-features/progress-tracking.adoc +320 -0
  52. data/docs/guides/advanced-features/streaming.adoc +212 -0
  53. data/docs/guides/archive-formats/gzip-format.adoc +107 -0
  54. data/docs/guides/archive-formats/index.adoc +130 -0
  55. data/docs/guides/archive-formats/rar-format.adoc +104 -0
  56. data/docs/guides/archive-formats/rar5.adoc +521 -0
  57. data/docs/guides/archive-formats/seven-zip-format.adoc +35 -0
  58. data/docs/guides/archive-formats/tar-format.adoc +106 -0
  59. data/docs/guides/archive-formats/xz-format.adoc +118 -0
  60. data/docs/guides/archive-formats/zip-format.adoc +35 -0
  61. data/docs/guides/compression-algorithms/bzip2.adoc +113 -0
  62. data/docs/guides/compression-algorithms/deflate.adoc +319 -0
  63. data/docs/guides/compression-algorithms/index.adoc +190 -0
  64. data/docs/guides/compression-algorithms/lzma.adoc +398 -0
  65. data/docs/guides/compression-algorithms/lzma2.adoc +327 -0
  66. data/docs/guides/compression-algorithms/ppmd.adoc +316 -0
  67. data/docs/guides/compression-algorithms/zstandard.adoc +361 -0
  68. data/docs/guides/creating-archives.adoc +354 -0
  69. data/docs/guides/extracting-archives.adoc +53 -0
  70. data/docs/guides/format-conversion.adoc +64 -0
  71. data/docs/guides/index.adoc +49 -0
  72. data/docs/guides/migration-rubyzip.adoc +217 -0
  73. data/docs/guides/parity-archives.adoc +605 -0
  74. data/docs/guides/performance-tuning.adoc +88 -0
  75. data/docs/index.adoc +218 -0
  76. data/docs/lychee.toml +67 -0
  77. data/docs/reference/api/overview.adoc +188 -0
  78. data/docs/reference/cli/compress-command.adoc +114 -0
  79. data/docs/reference/cli/overview.adoc +140 -0
  80. data/docs/reference/index.adoc +26 -0
  81. data/docs/resources/faq.adoc +185 -0
  82. data/docs/resources/quick-reference.adoc +222 -0
  83. data/docs/troubleshooting/index.adoc +208 -0
  84. data/examples/api_comparison.rb +205 -0
  85. data/examples/deflate64_example.rb +96 -0
  86. data/examples/par2_demo.rb +121 -0
  87. data/examples/quick_start_native.rb +150 -0
  88. data/examples/quick_start_rubyzip.rb +115 -0
  89. data/examples/rubyzip_compatibility_demo.rb +194 -0
  90. data/exe/omnizip +27 -0
  91. data/lib/omnizip/algorithm.rb +130 -0
  92. data/lib/omnizip/algorithm_registry.rb +86 -0
  93. data/lib/omnizip/algorithms/.keep +0 -0
  94. data/lib/omnizip/algorithms/bzip2/bwt.rb +225 -0
  95. data/lib/omnizip/algorithms/bzip2/decoder.rb +193 -0
  96. data/lib/omnizip/algorithms/bzip2/encoder.rb +237 -0
  97. data/lib/omnizip/algorithms/bzip2/huffman.rb +206 -0
  98. data/lib/omnizip/algorithms/bzip2/mtf.rb +101 -0
  99. data/lib/omnizip/algorithms/bzip2/rle.rb +151 -0
  100. data/lib/omnizip/algorithms/bzip2.rb +130 -0
  101. data/lib/omnizip/algorithms/deflate/constants.rb +28 -0
  102. data/lib/omnizip/algorithms/deflate/decoder.rb +38 -0
  103. data/lib/omnizip/algorithms/deflate/encoder.rb +46 -0
  104. data/lib/omnizip/algorithms/deflate.rb +128 -0
  105. data/lib/omnizip/algorithms/deflate64/constants.rb +45 -0
  106. data/lib/omnizip/algorithms/deflate64/decoder.rb +153 -0
  107. data/lib/omnizip/algorithms/deflate64/encoder.rb +98 -0
  108. data/lib/omnizip/algorithms/deflate64/huffman_coder.rb +354 -0
  109. data/lib/omnizip/algorithms/deflate64/lz77_encoder.rb +142 -0
  110. data/lib/omnizip/algorithms/deflate64.rb +109 -0
  111. data/lib/omnizip/algorithms/lzma/bit_model.rb +120 -0
  112. data/lib/omnizip/algorithms/lzma/constants.rb +112 -0
  113. data/lib/omnizip/algorithms/lzma/decoder.rb +148 -0
  114. data/lib/omnizip/algorithms/lzma/dictionary.rb +69 -0
  115. data/lib/omnizip/algorithms/lzma/distance_coder.rb +415 -0
  116. data/lib/omnizip/algorithms/lzma/encoder.rb +142 -0
  117. data/lib/omnizip/algorithms/lzma/length_coder.rb +260 -0
  118. data/lib/omnizip/algorithms/lzma/literal_decoder.rb +320 -0
  119. data/lib/omnizip/algorithms/lzma/literal_encoder.rb +210 -0
  120. data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +341 -0
  121. data/lib/omnizip/algorithms/lzma/lzma_alone_decoder.rb +192 -0
  122. data/lib/omnizip/algorithms/lzma/lzma_state.rb +128 -0
  123. data/lib/omnizip/algorithms/lzma/match.rb +32 -0
  124. data/lib/omnizip/algorithms/lzma/match_finder.rb +205 -0
  125. data/lib/omnizip/algorithms/lzma/match_finder_config.rb +142 -0
  126. data/lib/omnizip/algorithms/lzma/match_finder_factory.rb +88 -0
  127. data/lib/omnizip/algorithms/lzma/optimal_encoder.rb +130 -0
  128. data/lib/omnizip/algorithms/lzma/probability_models.rb +72 -0
  129. data/lib/omnizip/algorithms/lzma/range_coder.rb +85 -0
  130. data/lib/omnizip/algorithms/lzma/range_decoder.rb +434 -0
  131. data/lib/omnizip/algorithms/lzma/range_encoder.rb +194 -0
  132. data/lib/omnizip/algorithms/lzma/state.rb +127 -0
  133. data/lib/omnizip/algorithms/lzma/xz_buffered_range_encoder.rb +325 -0
  134. data/lib/omnizip/algorithms/lzma/xz_encoder.rb +426 -0
  135. data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +645 -0
  136. data/lib/omnizip/algorithms/lzma/xz_match_finder_adapter.rb +227 -0
  137. data/lib/omnizip/algorithms/lzma/xz_price_calculator.rb +169 -0
  138. data/lib/omnizip/algorithms/lzma/xz_probability_models.rb +261 -0
  139. data/lib/omnizip/algorithms/lzma/xz_range_encoder.rb +223 -0
  140. data/lib/omnizip/algorithms/lzma/xz_range_encoder_exact.rb +331 -0
  141. data/lib/omnizip/algorithms/lzma/xz_state.rb +116 -0
  142. data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +2055 -0
  143. data/lib/omnizip/algorithms/lzma.rb +238 -0
  144. data/lib/omnizip/algorithms/lzma2/chunk_manager.rb +182 -0
  145. data/lib/omnizip/algorithms/lzma2/constants.rb +41 -0
  146. data/lib/omnizip/algorithms/lzma2/encoder.rb +147 -0
  147. data/lib/omnizip/algorithms/lzma2/lzma2_chunk.rb +161 -0
  148. data/lib/omnizip/algorithms/lzma2/properties.rb +179 -0
  149. data/lib/omnizip/algorithms/lzma2/simple_lzma2_encoder.rb +127 -0
  150. data/lib/omnizip/algorithms/lzma2/xz_encoder_adapter.rb +85 -0
  151. data/lib/omnizip/algorithms/lzma2.rb +141 -0
  152. data/lib/omnizip/algorithms/ppmd7/constants.rb +74 -0
  153. data/lib/omnizip/algorithms/ppmd7/context.rb +154 -0
  154. data/lib/omnizip/algorithms/ppmd7/decoder.rb +126 -0
  155. data/lib/omnizip/algorithms/ppmd7/encoder.rb +163 -0
  156. data/lib/omnizip/algorithms/ppmd7/model.rb +248 -0
  157. data/lib/omnizip/algorithms/ppmd7/symbol_state.rb +57 -0
  158. data/lib/omnizip/algorithms/ppmd7.rb +116 -0
  159. data/lib/omnizip/algorithms/ppmd8/constants.rb +61 -0
  160. data/lib/omnizip/algorithms/ppmd8/context.rb +34 -0
  161. data/lib/omnizip/algorithms/ppmd8/decoder.rb +107 -0
  162. data/lib/omnizip/algorithms/ppmd8/encoder.rb +138 -0
  163. data/lib/omnizip/algorithms/ppmd8/model.rb +250 -0
  164. data/lib/omnizip/algorithms/ppmd8/restoration_method.rb +78 -0
  165. data/lib/omnizip/algorithms/ppmd8.rb +82 -0
  166. data/lib/omnizip/algorithms/ppmd_base.rb +138 -0
  167. data/lib/omnizip/algorithms/sevenzip_lzma2.rb +123 -0
  168. data/lib/omnizip/algorithms/xz_lzma2.rb +118 -0
  169. data/lib/omnizip/algorithms/zstandard/constants.rb +25 -0
  170. data/lib/omnizip/algorithms/zstandard/decoder.rb +46 -0
  171. data/lib/omnizip/algorithms/zstandard/encoder.rb +51 -0
  172. data/lib/omnizip/algorithms/zstandard.rb +138 -0
  173. data/lib/omnizip/buffer/memory_archive.rb +251 -0
  174. data/lib/omnizip/buffer/memory_extractor.rb +224 -0
  175. data/lib/omnizip/buffer.rb +176 -0
  176. data/lib/omnizip/checksum_registry.rb +114 -0
  177. data/lib/omnizip/checksums/crc32.rb +100 -0
  178. data/lib/omnizip/checksums/crc64.rb +101 -0
  179. data/lib/omnizip/checksums/crc_base.rb +158 -0
  180. data/lib/omnizip/checksums/verifier.rb +131 -0
  181. data/lib/omnizip/chunked/memory_manager.rb +194 -0
  182. data/lib/omnizip/chunked/reader.rb +78 -0
  183. data/lib/omnizip/chunked/writer.rb +120 -0
  184. data/lib/omnizip/chunked.rb +129 -0
  185. data/lib/omnizip/cli/output_formatter.rb +104 -0
  186. data/lib/omnizip/cli.rb +572 -0
  187. data/lib/omnizip/commands/.keep +0 -0
  188. data/lib/omnizip/commands/archive_create_command.rb +427 -0
  189. data/lib/omnizip/commands/archive_extract_command.rb +272 -0
  190. data/lib/omnizip/commands/archive_list_command.rb +218 -0
  191. data/lib/omnizip/commands/archive_repair_command.rb +131 -0
  192. data/lib/omnizip/commands/archive_verify_command.rb +117 -0
  193. data/lib/omnizip/commands/compress_command.rb +117 -0
  194. data/lib/omnizip/commands/decompress_command.rb +120 -0
  195. data/lib/omnizip/commands/list_command.rb +53 -0
  196. data/lib/omnizip/commands/metadata_command.rb +153 -0
  197. data/lib/omnizip/commands/parity_create_command.rb +122 -0
  198. data/lib/omnizip/commands/parity_repair_command.rb +122 -0
  199. data/lib/omnizip/commands/parity_verify_command.rb +124 -0
  200. data/lib/omnizip/commands/profile_list_command.rb +56 -0
  201. data/lib/omnizip/commands/profile_show_command.rb +44 -0
  202. data/lib/omnizip/convenience.rb +359 -0
  203. data/lib/omnizip/converter/conversion_registry.rb +49 -0
  204. data/lib/omnizip/converter/conversion_strategy.rb +121 -0
  205. data/lib/omnizip/converter/seven_zip_to_zip_strategy.rb +97 -0
  206. data/lib/omnizip/converter/zip_to_seven_zip_strategy.rb +112 -0
  207. data/lib/omnizip/converter.rb +105 -0
  208. data/lib/omnizip/crypto/aes256/cipher.rb +100 -0
  209. data/lib/omnizip/crypto/aes256/constants.rb +28 -0
  210. data/lib/omnizip/crypto/aes256/key_derivation.rb +101 -0
  211. data/lib/omnizip/crypto/aes256.rb +102 -0
  212. data/lib/omnizip/error.rb +106 -0
  213. data/lib/omnizip/eta/exponential_smoothing_estimator.rb +98 -0
  214. data/lib/omnizip/eta/moving_average_estimator.rb +99 -0
  215. data/lib/omnizip/eta/rate_calculator.rb +104 -0
  216. data/lib/omnizip/eta/sample_history.rb +143 -0
  217. data/lib/omnizip/eta/time_estimator.rb +106 -0
  218. data/lib/omnizip/eta.rb +63 -0
  219. data/lib/omnizip/extraction/filter_chain.rb +177 -0
  220. data/lib/omnizip/extraction/glob_pattern.rb +140 -0
  221. data/lib/omnizip/extraction/pattern_matcher.rb +70 -0
  222. data/lib/omnizip/extraction/predicate_pattern.rb +52 -0
  223. data/lib/omnizip/extraction/regex_pattern.rb +50 -0
  224. data/lib/omnizip/extraction/selective_extractor.rb +240 -0
  225. data/lib/omnizip/extraction.rb +111 -0
  226. data/lib/omnizip/file_type/mime_classifier.rb +144 -0
  227. data/lib/omnizip/file_type.rb +113 -0
  228. data/lib/omnizip/filter.rb +139 -0
  229. data/lib/omnizip/filter_pipeline.rb +108 -0
  230. data/lib/omnizip/filter_registry.rb +166 -0
  231. data/lib/omnizip/filters/bcj.rb +279 -0
  232. data/lib/omnizip/filters/bcj2/constants.rb +53 -0
  233. data/lib/omnizip/filters/bcj2/decoder.rb +200 -0
  234. data/lib/omnizip/filters/bcj2/encoder.rb +61 -0
  235. data/lib/omnizip/filters/bcj2/stream_data.rb +93 -0
  236. data/lib/omnizip/filters/bcj2.rb +99 -0
  237. data/lib/omnizip/filters/bcj_arm.rb +176 -0
  238. data/lib/omnizip/filters/bcj_arm64.rb +244 -0
  239. data/lib/omnizip/filters/bcj_ia64.rb +196 -0
  240. data/lib/omnizip/filters/bcj_ppc.rb +190 -0
  241. data/lib/omnizip/filters/bcj_sparc.rb +176 -0
  242. data/lib/omnizip/filters/bcj_x86.rb +193 -0
  243. data/lib/omnizip/filters/delta.rb +196 -0
  244. data/lib/omnizip/filters/filter_base.rb +72 -0
  245. data/lib/omnizip/filters/registry.rb +123 -0
  246. data/lib/omnizip/filters/xz_delta.rb +258 -0
  247. data/lib/omnizip/format_detector.rb +162 -0
  248. data/lib/omnizip/format_registry.rb +59 -0
  249. data/lib/omnizip/formats/.keep +0 -0
  250. data/lib/omnizip/formats/bzip2_file.rb +172 -0
  251. data/lib/omnizip/formats/cpio/constants.rb +55 -0
  252. data/lib/omnizip/formats/cpio/entry.rb +385 -0
  253. data/lib/omnizip/formats/cpio/reader.rb +196 -0
  254. data/lib/omnizip/formats/cpio/writer.rb +234 -0
  255. data/lib/omnizip/formats/cpio.rb +140 -0
  256. data/lib/omnizip/formats/format_spec_loader.rb +230 -0
  257. data/lib/omnizip/formats/gzip.rb +238 -0
  258. data/lib/omnizip/formats/iso/directory_builder.rb +297 -0
  259. data/lib/omnizip/formats/iso/directory_record.rb +152 -0
  260. data/lib/omnizip/formats/iso/joliet.rb +204 -0
  261. data/lib/omnizip/formats/iso/path_table.rb +125 -0
  262. data/lib/omnizip/formats/iso/reader.rb +197 -0
  263. data/lib/omnizip/formats/iso/rock_ridge.rb +349 -0
  264. data/lib/omnizip/formats/iso/volume_builder.rb +320 -0
  265. data/lib/omnizip/formats/iso/volume_descriptor.rb +168 -0
  266. data/lib/omnizip/formats/iso/writer.rb +530 -0
  267. data/lib/omnizip/formats/iso.rb +140 -0
  268. data/lib/omnizip/formats/lzip.rb +175 -0
  269. data/lib/omnizip/formats/lzma_alone.rb +171 -0
  270. data/lib/omnizip/formats/rar/archive_repairer.rb +243 -0
  271. data/lib/omnizip/formats/rar/archive_verifier.rb +195 -0
  272. data/lib/omnizip/formats/rar/block_parser.rb +243 -0
  273. data/lib/omnizip/formats/rar/compression/bit_stream.rb +180 -0
  274. data/lib/omnizip/formats/rar/compression/dispatcher.rb +217 -0
  275. data/lib/omnizip/formats/rar/compression/lz77_huffman/decoder.rb +216 -0
  276. data/lib/omnizip/formats/rar/compression/lz77_huffman/encoder.rb +158 -0
  277. data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_builder.rb +217 -0
  278. data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_coder.rb +189 -0
  279. data/lib/omnizip/formats/rar/compression/lz77_huffman/match_finder.rb +135 -0
  280. data/lib/omnizip/formats/rar/compression/lz77_huffman/sliding_window.rb +165 -0
  281. data/lib/omnizip/formats/rar/compression/ppmd/context.rb +105 -0
  282. data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +219 -0
  283. data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +262 -0
  284. data/lib/omnizip/formats/rar/compression_method_registry.rb +106 -0
  285. data/lib/omnizip/formats/rar/constants.rb +82 -0
  286. data/lib/omnizip/formats/rar/decompressor.rb +238 -0
  287. data/lib/omnizip/formats/rar/external_writer.rb +312 -0
  288. data/lib/omnizip/formats/rar/header.rb +192 -0
  289. data/lib/omnizip/formats/rar/license_validator.rb +109 -0
  290. data/lib/omnizip/formats/rar/models/rar_archive.rb +77 -0
  291. data/lib/omnizip/formats/rar/models/rar_entry.rb +65 -0
  292. data/lib/omnizip/formats/rar/models/rar_volume.rb +56 -0
  293. data/lib/omnizip/formats/rar/parity_handler.rb +292 -0
  294. data/lib/omnizip/formats/rar/rar5/compression/lzma.rb +202 -0
  295. data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +578 -0
  296. data/lib/omnizip/formats/rar/rar5/compression/store.rb +60 -0
  297. data/lib/omnizip/formats/rar/rar5/crc32.rb +39 -0
  298. data/lib/omnizip/formats/rar/rar5/encryption/aes256_cbc.rb +97 -0
  299. data/lib/omnizip/formats/rar/rar5/encryption/encryption_header.rb +114 -0
  300. data/lib/omnizip/formats/rar/rar5/encryption/encryption_manager.rb +166 -0
  301. data/lib/omnizip/formats/rar/rar5/encryption/key_derivation.rb +97 -0
  302. data/lib/omnizip/formats/rar/rar5/header.rb +187 -0
  303. data/lib/omnizip/formats/rar/rar5/models/encryption_options.rb +74 -0
  304. data/lib/omnizip/formats/rar/rar5/models/recovery_options.rb +63 -0
  305. data/lib/omnizip/formats/rar/rar5/models/solid_options.rb +63 -0
  306. data/lib/omnizip/formats/rar/rar5/models/volume_options.rb +74 -0
  307. data/lib/omnizip/formats/rar/rar5/multi_volume/ARCHITECTURE.md +290 -0
  308. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_manager.rb +264 -0
  309. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_splitter.rb +155 -0
  310. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_writer.rb +194 -0
  311. data/lib/omnizip/formats/rar/rar5/solid/solid_encoder.rb +109 -0
  312. data/lib/omnizip/formats/rar/rar5/solid/solid_manager.rb +142 -0
  313. data/lib/omnizip/formats/rar/rar5/solid/solid_stream.rb +121 -0
  314. data/lib/omnizip/formats/rar/rar5/vint.rb +65 -0
  315. data/lib/omnizip/formats/rar/rar5/writer.rb +466 -0
  316. data/lib/omnizip/formats/rar/rar_format_base.rb +241 -0
  317. data/lib/omnizip/formats/rar/reader.rb +366 -0
  318. data/lib/omnizip/formats/rar/recovery_record.rb +245 -0
  319. data/lib/omnizip/formats/rar/volume_manager.rb +168 -0
  320. data/lib/omnizip/formats/rar/writer.rb +431 -0
  321. data/lib/omnizip/formats/rar.rb +205 -0
  322. data/lib/omnizip/formats/rar3/compressor.rb +73 -0
  323. data/lib/omnizip/formats/rar3/decompressor.rb +66 -0
  324. data/lib/omnizip/formats/rar3/reader.rb +386 -0
  325. data/lib/omnizip/formats/rar3/writer.rb +219 -0
  326. data/lib/omnizip/formats/rar5/compressor.rb +73 -0
  327. data/lib/omnizip/formats/rar5/decompressor.rb +66 -0
  328. data/lib/omnizip/formats/rar5/reader.rb +342 -0
  329. data/lib/omnizip/formats/rar5/writer.rb +214 -0
  330. data/lib/omnizip/formats/seven_zip/coder_chain.rb +150 -0
  331. data/lib/omnizip/formats/seven_zip/constants.rb +126 -0
  332. data/lib/omnizip/formats/seven_zip/encoded_header.rb +114 -0
  333. data/lib/omnizip/formats/seven_zip/encrypted_header.rb +142 -0
  334. data/lib/omnizip/formats/seven_zip/file_collector.rb +144 -0
  335. data/lib/omnizip/formats/seven_zip/header.rb +106 -0
  336. data/lib/omnizip/formats/seven_zip/header_encryptor.rb +134 -0
  337. data/lib/omnizip/formats/seven_zip/header_writer.rb +466 -0
  338. data/lib/omnizip/formats/seven_zip/models/coder_info.rb +30 -0
  339. data/lib/omnizip/formats/seven_zip/models/file_entry.rb +58 -0
  340. data/lib/omnizip/formats/seven_zip/models/folder.rb +69 -0
  341. data/lib/omnizip/formats/seven_zip/models/stream_info.rb +42 -0
  342. data/lib/omnizip/formats/seven_zip/parser.rb +660 -0
  343. data/lib/omnizip/formats/seven_zip/reader.rb +458 -0
  344. data/lib/omnizip/formats/seven_zip/split_archive_reader.rb +632 -0
  345. data/lib/omnizip/formats/seven_zip/split_archive_writer.rb +315 -0
  346. data/lib/omnizip/formats/seven_zip/stream_compressor.rb +151 -0
  347. data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +162 -0
  348. data/lib/omnizip/formats/seven_zip/writer.rb +740 -0
  349. data/lib/omnizip/formats/seven_zip.rb +93 -0
  350. data/lib/omnizip/formats/tar/constants.rb +73 -0
  351. data/lib/omnizip/formats/tar/entry.rb +94 -0
  352. data/lib/omnizip/formats/tar/header.rb +168 -0
  353. data/lib/omnizip/formats/tar/reader.rb +121 -0
  354. data/lib/omnizip/formats/tar/writer.rb +216 -0
  355. data/lib/omnizip/formats/tar.rb +84 -0
  356. data/lib/omnizip/formats/xz/reader.rb +116 -0
  357. data/lib/omnizip/formats/xz.rb +237 -0
  358. data/lib/omnizip/formats/xz_impl/block_decoder.rb +754 -0
  359. data/lib/omnizip/formats/xz_impl/block_encoder.rb +306 -0
  360. data/lib/omnizip/formats/xz_impl/block_header.rb +210 -0
  361. data/lib/omnizip/formats/xz_impl/block_header_parser.rb +186 -0
  362. data/lib/omnizip/formats/xz_impl/constants.rb +49 -0
  363. data/lib/omnizip/formats/xz_impl/index_decoder.rb +174 -0
  364. data/lib/omnizip/formats/xz_impl/index_encoder.rb +122 -0
  365. data/lib/omnizip/formats/xz_impl/stream_decoder.rb +468 -0
  366. data/lib/omnizip/formats/xz_impl/stream_encoder.rb +99 -0
  367. data/lib/omnizip/formats/xz_impl/stream_footer.rb +81 -0
  368. data/lib/omnizip/formats/xz_impl/stream_footer_parser.rb +117 -0
  369. data/lib/omnizip/formats/xz_impl/stream_header.rb +55 -0
  370. data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +108 -0
  371. data/lib/omnizip/formats/xz_impl/vli.rb +128 -0
  372. data/lib/omnizip/formats/xz_impl/writer.rb +421 -0
  373. data/lib/omnizip/formats/zip/central_directory_header.rb +195 -0
  374. data/lib/omnizip/formats/zip/constants.rb +69 -0
  375. data/lib/omnizip/formats/zip/end_of_central_directory.rb +133 -0
  376. data/lib/omnizip/formats/zip/local_file_header.rb +138 -0
  377. data/lib/omnizip/formats/zip/reader.rb +250 -0
  378. data/lib/omnizip/formats/zip/unix_extra_field.rb +153 -0
  379. data/lib/omnizip/formats/zip/writer.rb +375 -0
  380. data/lib/omnizip/formats/zip/zip64_end_of_central_directory.rb +104 -0
  381. data/lib/omnizip/formats/zip/zip64_end_of_central_directory_locator.rb +66 -0
  382. data/lib/omnizip/formats/zip/zip64_extra_field.rb +114 -0
  383. data/lib/omnizip/formats/zip.rb +50 -0
  384. data/lib/omnizip/implementations/base/lzma2_decoder_base.rb +75 -0
  385. data/lib/omnizip/implementations/base/lzma2_encoder_base.rb +128 -0
  386. data/lib/omnizip/implementations/base/lzma_decoder_base.rb +83 -0
  387. data/lib/omnizip/implementations/base/lzma_encoder_base.rb +108 -0
  388. data/lib/omnizip/implementations/base/state_machine_base.rb +182 -0
  389. data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +421 -0
  390. data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +465 -0
  391. data/lib/omnizip/implementations/seven_zip/lzma/match_finder.rb +288 -0
  392. data/lib/omnizip/implementations/seven_zip/lzma/range_decoder.rb +200 -0
  393. data/lib/omnizip/implementations/seven_zip/lzma/range_encoder.rb +197 -0
  394. data/lib/omnizip/implementations/seven_zip/lzma/state_machine.rb +141 -0
  395. data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +519 -0
  396. data/lib/omnizip/implementations/xz_utils/lzma2/decoder.rb +723 -0
  397. data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +750 -0
  398. data/lib/omnizip/io/buffered_input.rb +146 -0
  399. data/lib/omnizip/io/buffered_output.rb +105 -0
  400. data/lib/omnizip/io/stream_manager.rb +115 -0
  401. data/lib/omnizip/link_handler/hard_link.rb +79 -0
  402. data/lib/omnizip/link_handler/symbolic_link.rb +74 -0
  403. data/lib/omnizip/link_handler.rb +124 -0
  404. data/lib/omnizip/metadata/archive_metadata.rb +114 -0
  405. data/lib/omnizip/metadata/entry_metadata.rb +146 -0
  406. data/lib/omnizip/metadata/metadata_editor.rb +171 -0
  407. data/lib/omnizip/metadata/metadata_registry.rb +64 -0
  408. data/lib/omnizip/metadata/metadata_validator.rb +99 -0
  409. data/lib/omnizip/metadata.rb +57 -0
  410. data/lib/omnizip/models/.keep +0 -0
  411. data/lib/omnizip/models/algorithm_metadata.rb +73 -0
  412. data/lib/omnizip/models/compression_options.rb +71 -0
  413. data/lib/omnizip/models/conversion_options.rb +87 -0
  414. data/lib/omnizip/models/conversion_result.rb +135 -0
  415. data/lib/omnizip/models/eta_result.rb +46 -0
  416. data/lib/omnizip/models/extraction_rule.rb +115 -0
  417. data/lib/omnizip/models/filter_chain.rb +144 -0
  418. data/lib/omnizip/models/filter_config.rb +183 -0
  419. data/lib/omnizip/models/match_result.rb +124 -0
  420. data/lib/omnizip/models/optimization_suggestion.rb +91 -0
  421. data/lib/omnizip/models/parallel_options.rb +104 -0
  422. data/lib/omnizip/models/performance_result.rb +79 -0
  423. data/lib/omnizip/models/profile_report.rb +82 -0
  424. data/lib/omnizip/models/progress_options.rb +38 -0
  425. data/lib/omnizip/models/split_options.rb +116 -0
  426. data/lib/omnizip/optimization_registry.rb +81 -0
  427. data/lib/omnizip/parallel/job_queue.rb +209 -0
  428. data/lib/omnizip/parallel/job_scheduler.rb +203 -0
  429. data/lib/omnizip/parallel/parallel_compressor.rb +347 -0
  430. data/lib/omnizip/parallel/parallel_extractor.rb +329 -0
  431. data/lib/omnizip/parallel/worker_pool.rb +223 -0
  432. data/lib/omnizip/parallel.rb +149 -0
  433. data/lib/omnizip/parity/chunked_block_processor.rb +196 -0
  434. data/lib/omnizip/parity/galois16.rb +145 -0
  435. data/lib/omnizip/parity/models/creator_packet.rb +73 -0
  436. data/lib/omnizip/parity/models/file_description_packet.rb +133 -0
  437. data/lib/omnizip/parity/models/ifsc_packet.rb +123 -0
  438. data/lib/omnizip/parity/models/main_packet.rb +128 -0
  439. data/lib/omnizip/parity/models/packet.rb +156 -0
  440. data/lib/omnizip/parity/models/packet_registry.rb +109 -0
  441. data/lib/omnizip/parity/models/recovery_slice_packet.rb +78 -0
  442. data/lib/omnizip/parity/par2_creator.rb +531 -0
  443. data/lib/omnizip/parity/par2_repairer.rb +407 -0
  444. data/lib/omnizip/parity/par2_verifier.rb +364 -0
  445. data/lib/omnizip/parity/par2cmdline_algorithm.rb +110 -0
  446. data/lib/omnizip/parity/par2cmdline_coefficients.rb +78 -0
  447. data/lib/omnizip/parity/reed_solomon_decoder.rb +266 -0
  448. data/lib/omnizip/parity/reed_solomon_encoder.rb +111 -0
  449. data/lib/omnizip/parity/reed_solomon_matrix.rb +342 -0
  450. data/lib/omnizip/parity.rb +186 -0
  451. data/lib/omnizip/password/encryption_registry.rb +65 -0
  452. data/lib/omnizip/password/encryption_strategy.rb +96 -0
  453. data/lib/omnizip/password/password_validator.rb +129 -0
  454. data/lib/omnizip/password/winzip_aes_strategy.rb +192 -0
  455. data/lib/omnizip/password/zip_crypto_strategy.rb +141 -0
  456. data/lib/omnizip/password.rb +87 -0
  457. data/lib/omnizip/pipe/stream_compressor.rb +124 -0
  458. data/lib/omnizip/pipe/stream_decompressor.rb +174 -0
  459. data/lib/omnizip/pipe.rb +121 -0
  460. data/lib/omnizip/platform/ntfs_streams.rb +201 -0
  461. data/lib/omnizip/platform.rb +189 -0
  462. data/lib/omnizip/profile/archive_profile.rb +39 -0
  463. data/lib/omnizip/profile/balanced_profile.rb +33 -0
  464. data/lib/omnizip/profile/binary_profile.rb +36 -0
  465. data/lib/omnizip/profile/compression_profile.rb +158 -0
  466. data/lib/omnizip/profile/custom_profile.rb +157 -0
  467. data/lib/omnizip/profile/fast_profile.rb +33 -0
  468. data/lib/omnizip/profile/maximum_profile.rb +33 -0
  469. data/lib/omnizip/profile/profile_detector.rb +110 -0
  470. data/lib/omnizip/profile/profile_registry.rb +161 -0
  471. data/lib/omnizip/profile/text_profile.rb +36 -0
  472. data/lib/omnizip/profile.rb +190 -0
  473. data/lib/omnizip/profiler/memory_profiler.rb +66 -0
  474. data/lib/omnizip/profiler/method_profiler.rb +49 -0
  475. data/lib/omnizip/profiler/report_generator.rb +169 -0
  476. data/lib/omnizip/profiler.rb +204 -0
  477. data/lib/omnizip/progress/callback_reporter.rb +36 -0
  478. data/lib/omnizip/progress/console_reporter.rb +62 -0
  479. data/lib/omnizip/progress/log_reporter.rb +91 -0
  480. data/lib/omnizip/progress/operation_progress.rb +118 -0
  481. data/lib/omnizip/progress/progress_bar.rb +156 -0
  482. data/lib/omnizip/progress/progress_reporter.rb +40 -0
  483. data/lib/omnizip/progress/progress_tracker.rb +190 -0
  484. data/lib/omnizip/progress/silent_reporter.rb +24 -0
  485. data/lib/omnizip/progress.rb +127 -0
  486. data/lib/omnizip/rubyzip_compat.rb +63 -0
  487. data/lib/omnizip/temp/safe_extract.rb +168 -0
  488. data/lib/omnizip/temp/temp_file.rb +124 -0
  489. data/lib/omnizip/temp/temp_file_pool.rb +109 -0
  490. data/lib/omnizip/temp.rb +181 -0
  491. data/lib/omnizip/version.rb +5 -0
  492. data/lib/omnizip/zip/entry.rb +156 -0
  493. data/lib/omnizip/zip/file.rb +485 -0
  494. data/lib/omnizip/zip/input_stream.rb +273 -0
  495. data/lib/omnizip/zip/output_stream.rb +324 -0
  496. data/lib/omnizip.rb +156 -0
  497. data/readme-docs/advanced-features.adoc +515 -0
  498. data/readme-docs/api-usage.adoc +444 -0
  499. data/readme-docs/architecture.adoc +449 -0
  500. data/readme-docs/archive-formats.adoc +479 -0
  501. data/readme-docs/cli-usage.adoc +222 -0
  502. data/readme-docs/compression-algorithms.adoc +442 -0
  503. data/readme-docs/compression-profiles.adoc +247 -0
  504. data/readme-docs/encryption-checksums.adoc +328 -0
  505. data/readme-docs/format-converter.adoc +325 -0
  506. data/readme-docs/installation.adoc +228 -0
  507. data/readme-docs/par2-archives.adoc +608 -0
  508. data/readme-docs/performance-profiler.adoc +389 -0
  509. data/readme-docs/preprocessing-filters.adoc +280 -0
  510. data/xz-file-format-1.2.1.txt +1174 -0
  511. metadata +617 -0
@@ -0,0 +1,161 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Omnizip
4
+ module Algorithms
5
+ class LZMA2 < Algorithm
6
+ # LZMA2 Chunk structure using Array#pack for binary serialization
7
+ class LZMA2Chunk
8
+ TYPE_END = :end
9
+ TYPE_UNCOMPRESSED = :uncompressed
10
+ TYPE_COMPRESSED = :compressed
11
+
12
+ attr_reader :chunk_type, :uncompressed_data, :compressed_data,
13
+ :properties, :control_byte
14
+
15
+ # Factory method for end marker
16
+ def self.end_chunk
17
+ new(
18
+ chunk_type: TYPE_END,
19
+ uncompressed_data: "",
20
+ compressed_data: "",
21
+ need_dict_reset: false,
22
+ need_state_reset: false,
23
+ need_props: false,
24
+ )
25
+ end
26
+
27
+ def initialize(chunk_type:, need_dict_reset:, need_state_reset:,
28
+ need_props:, uncompressed_data: "",
29
+ compressed_data: "", properties: nil,
30
+ compressed_size: nil)
31
+ # Validate chunk_type
32
+ valid_types = [TYPE_END, TYPE_UNCOMPRESSED, TYPE_COMPRESSED]
33
+ unless valid_types.include?(chunk_type)
34
+ raise ArgumentError,
35
+ "Invalid chunk_type: #{chunk_type.inspect}. " \
36
+ "Must be :end, :uncompressed, or :compressed"
37
+ end
38
+
39
+ @chunk_type = chunk_type
40
+ @uncompressed_data = uncompressed_data
41
+ @compressed_data = compressed_data
42
+ # For XZ format, compressed_size excludes flush bytes
43
+ # For standalone LZMA2, compressed_size includes all bytes
44
+ @compressed_size = compressed_size || @compressed_data.bytesize
45
+ @properties = properties
46
+ @need_dict_reset = need_dict_reset
47
+ @need_state_reset = need_state_reset
48
+ @need_props = need_props
49
+
50
+ @control_byte = build_control_byte if chunk_type != TYPE_END
51
+ end
52
+
53
+ # Serialize to binary format
54
+ def to_bytes
55
+ case @chunk_type
56
+ when TYPE_END
57
+ [0x00].pack("C")
58
+ when TYPE_UNCOMPRESSED
59
+ serialize_uncompressed
60
+ when TYPE_COMPRESSED
61
+ serialize_compressed
62
+ end
63
+ end
64
+
65
+ private
66
+
67
+ def build_control_byte
68
+ case @chunk_type
69
+ when TYPE_COMPRESSED
70
+ # XZ Utils LZMA2 compressed chunk format:
71
+ # Base is 0x80 (bit 7 = 1 for compressed)
72
+ # Bits 6-5 encode reset state (shifted left by 5):
73
+ # 3 << 5 = 0x60 = dict reset + state reset + properties
74
+ # 2 << 5 = 0x40 = state reset + properties
75
+ # 1 << 5 = 0x20 = state reset only
76
+ # 0 << 5 = 0x00 = no reset (no properties)
77
+ #
78
+ # Control byte format: 0x80 + (reset_type << 5)
79
+ # High 5 bits of (uncompressed_size - 1) are added later in serialize_compressed
80
+
81
+ control = if @need_props
82
+ if @need_dict_reset
83
+ 0x80 + (3 << 5) # 0xE0 = dict reset + state reset + properties
84
+ elsif @need_state_reset
85
+ 0x80 + (2 << 5) # 0xC0 = state reset + properties
86
+ else
87
+ # This shouldn't happen - if need_props, we need some reset
88
+ 0x80 + (2 << 5) # Default to state reset + properties
89
+ end
90
+ elsif @need_state_reset
91
+ 0x80 + (1 << 5)
92
+ else
93
+ 0x80 # 0x80 = no reset
94
+ end
95
+
96
+ # DEBUG: Print control byte calculation
97
+ if ENV["DEBUG_CHUNK"]
98
+ warn "LZMA2Chunk build_control_byte: need_props=#{@need_props}, need_dict_reset=#{@need_dict_reset}, need_state_reset=#{@need_state_reset}, control=0x#{control.to_s(16)}"
99
+ end
100
+
101
+ control
102
+ when TYPE_UNCOMPRESSED
103
+ # XZ Utils LZMA2 uncompressed chunk format:
104
+ # Control byte is simply 1 or 2 (NOT complex bit encoding!)
105
+ # 1 = dictionary reset
106
+ # 2 = no dictionary reset
107
+ if @need_dict_reset
108
+ 1
109
+ else
110
+ 2
111
+ end
112
+ end
113
+ end
114
+
115
+ def serialize_uncompressed
116
+ size = @uncompressed_data.bytesize - 1
117
+
118
+ # LZMA2 uncompressed chunk format (matches XZ Utils lzma2_encoder.c lzma2_header_uncompressed):
119
+ # 1 byte: control (1 = dict reset, 2 = no reset)
120
+ # 2 bytes: Uncompressed Size Minus One in BIG-ENDIAN
121
+ # N bytes: uncompressed data
122
+ [
123
+ @control_byte, # Control byte (1 or 2)
124
+ (size >> 8) & 0xFF, # Size high byte (BIG-ENDIAN)
125
+ size & 0xFF, # Size low byte (BIG-ENDIAN)
126
+ ].pack("CCC") + @uncompressed_data
127
+ end
128
+
129
+ def serialize_compressed
130
+ uncomp_size = @uncompressed_data.bytesize - 1
131
+ comp_size = @compressed_size - 1
132
+
133
+ # Add high 5 bits to control byte
134
+ high_bits = ((uncomp_size >> 16) & 0x1F)
135
+ control = @control_byte | high_bits
136
+
137
+ # DEBUG: Print final control byte calculation
138
+ if ENV["DEBUG_CHUNK"]
139
+ warn "LZMA2Chunk serialize_compressed: @control_byte=0x#{@control_byte.to_s(16)}, high_bits=0x#{high_bits.to_s(16)}, final_control=0x#{control.to_s(16)}"
140
+ warn " uncomp_size=#{@uncompressed_data.bytesize} (uncomp_size-1=#{uncomp_size}), comp_size=#{@compressed_size}"
141
+ end
142
+
143
+ # XZ Utils LZMA2 compressed chunk format (matches lzma2_encoder.c lzma2_header_lzma):
144
+ # 1 byte: control + high 5 bits of (uncompressed_size - 1)
145
+ # 2 bytes: low 16 bits of (uncompressed_size - 1) in BIG-ENDIAN
146
+ # 2 bytes: (compressed_size - 1) in BIG-ENDIAN
147
+ header = [
148
+ control,
149
+ (uncomp_size >> 8) & 0xFF, # Uncompressed size mid byte (BIG-ENDIAN)
150
+ uncomp_size & 0xFF, # Uncompressed size low byte (BIG-ENDIAN)
151
+ (comp_size >> 8) & 0xFF, # Compressed size high byte (BIG-ENDIAN)
152
+ comp_size & 0xFF, # Compressed size low byte (BIG-ENDIAN)
153
+ ].pack("CCCCC")
154
+
155
+ prop_bytes = @properties ? [@properties].pack("C") : ""
156
+ header + prop_bytes + @compressed_data
157
+ end
158
+ end
159
+ end
160
+ end
161
+ end
@@ -0,0 +1,179 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Copyright (C) 2025 Ribose Inc.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a
6
+ # copy of this software and associated documentation files (the "Software"),
7
+ # to deal in the Software without restriction, including without limitation
8
+ # the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
+ # and/or sell copies of the Software, and to permit persons to whom the
10
+ # Software is furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
+ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21
+ # DEALINGS IN THE SOFTWARE.
22
+
23
+ require_relative "constants"
24
+
25
+ module Omnizip
26
+ module Algorithms
27
+ class LZMA2
28
+ # LZMA2 Properties - handles dictionary size encoding/decoding
29
+ #
30
+ # The LZMA2 format uses a single property byte that encodes the
31
+ # dictionary size. This is more compact than LZMA's multiple
32
+ # property bytes.
33
+ #
34
+ # Dictionary size encoding formula:
35
+ # dictSize = (2 | (props & 1)) << (props / 2 + 11)
36
+ #
37
+ # This gives sizes from 4KB (props=0) to 4GB (props=40)
38
+ #
39
+ # Note: In XZ format, the LZMA2 filter properties byte contains ONLY
40
+ # the dictionary size encoding. The lc/lp/pb parameters are encoded
41
+ # in the LZMA chunk properties (inside the compressed data).
42
+ class Properties
43
+ include LZMA2Const
44
+
45
+ attr_reader :dict_size, :prop_byte
46
+
47
+ # Initialize properties from dictionary size
48
+ #
49
+ # @param dict_size [Integer] Dictionary size in bytes
50
+ def initialize(dict_size)
51
+ @dict_size = validate_dict_size(dict_size)
52
+ @prop_byte = encode_dict_size(@dict_size)
53
+ end
54
+
55
+ # Create properties from property byte
56
+ #
57
+ # @param prop_byte [Integer] Encoded property byte
58
+ # @return [Properties] New properties instance
59
+ def self.from_byte(prop_byte)
60
+ validate_prop_byte(prop_byte)
61
+ dict_size = decode_dict_size(prop_byte)
62
+ new(dict_size)
63
+ end
64
+
65
+ # Encode dictionary size to property byte
66
+ #
67
+ # @param dict_size [Integer] Dictionary size
68
+ # @return [Integer] Property byte value
69
+ def encode_dict_size(dict_size)
70
+ # Find the smallest prop value that gives >= dict_size
71
+ # Valid range for property byte is 0-40 (per XZ spec)
72
+ (0..40).each do |prop|
73
+ size = self.class.decode_dict_size(prop)
74
+ return prop if size >= dict_size
75
+ end
76
+
77
+ # If we couldn't find a suitable prop, use maximum
78
+ 40
79
+ end
80
+
81
+ # Decode property byte to dictionary size
82
+ #
83
+ # XZ Utils formula from lzma_lzma2_props_decode (lzma2_decoder.c:290-302):
84
+ # dict_size = (2 | (props & 1)) << (props / 2 + 11)
85
+ #
86
+ # For even props: dict_size = 2 * 2^((props/2) + 11) = 2^((props/2) + 12)
87
+ # For odd props: dict_size = 3 * 2^((props-1)/2 + 11)
88
+ #
89
+ # @param prop [Integer] Property byte
90
+ # @return [Integer] Dictionary size in bytes
91
+ def self.decode_dict_size(prop)
92
+ # XZ Utils formula: dict_size = (2 | (prop & 1)) << (prop / 2 + 11)
93
+ base = 2 | (prop & 1)
94
+ base << ((prop / 2) + 11)
95
+ end
96
+
97
+ # Encode properties to property byte
98
+ # This is for standalone LZMA2 files where the property byte
99
+ # encodes both dictionary size and lc/lp/pb parameters
100
+ #
101
+ # @param dict_size [Integer] Dictionary size
102
+ # @param lc [Integer] Literal context bits
103
+ # @param lp [Integer] Literal position bits
104
+ # @param pb [Integer] Position bits
105
+ # @return [Integer] Property byte value
106
+ def self.encode(dict_size, _lc = 3, _lp = 0, _pb = 2)
107
+ # For standalone LZMA2 files, we only encode the dictionary size
108
+ # The lc/lp/pb parameters are encoded in the LZMA chunk properties instead
109
+ validate_prop_byte_range(dict_size)
110
+ encode_dict_size_to_byte(dict_size)
111
+ end
112
+
113
+ # Encode dictionary size to property byte
114
+ #
115
+ # @param dict_size [Integer] Dictionary size
116
+ # @return [Integer] Property byte value
117
+ def self.encode_dict_size_to_byte(dict_size)
118
+ # Find the smallest prop value that gives >= dict_size
119
+ (0..40).each do |prop|
120
+ size = decode_dict_size(prop)
121
+ return prop if size >= dict_size
122
+ end
123
+ 40
124
+ end
125
+
126
+ # Validate dictionary size for property byte encoding
127
+ #
128
+ # @param size [Integer] Dictionary size to validate
129
+ # @raise [ArgumentError] If size is invalid
130
+ def self.validate_prop_byte_range(size)
131
+ unless size.between?(DICT_SIZE_MIN, DICT_SIZE_MAX)
132
+ raise ArgumentError,
133
+ "Dictionary size must be between #{DICT_SIZE_MIN} " \
134
+ "and #{DICT_SIZE_MAX}"
135
+ end
136
+ end
137
+
138
+ # Get the actual dictionary size (may differ from requested)
139
+ #
140
+ # @return [Integer] Actual dictionary size
141
+ def actual_dict_size
142
+ self.class.decode_dict_size(@prop_byte)
143
+ end
144
+
145
+ private
146
+
147
+ # Validate dictionary size
148
+ #
149
+ # @param size [Integer] Dictionary size to validate
150
+ # @return [Integer] Validated size
151
+ # @raise [ArgumentError] If size is invalid
152
+ def validate_dict_size(size)
153
+ # LZMA2 practical maximum is 2GB due to implementation limitations
154
+ # The spec allows up to 4GB, but practical limits are lower
155
+ # Maximum is (1 << 31) - 1 due to signed 32-bit integer limits
156
+ max_size = [DICT_SIZE_MAX, (1 << 31) - 1].min
157
+ unless size.between?(DICT_SIZE_MIN, max_size)
158
+ raise ArgumentError,
159
+ "Dictionary size must be between #{DICT_SIZE_MIN} " \
160
+ "and #{max_size}"
161
+ end
162
+ size
163
+ end
164
+
165
+ # Validate property byte
166
+ #
167
+ # @param prop [Integer] Property byte to validate
168
+ # @return [void]
169
+ # @raise [ArgumentError] If property byte is invalid
170
+ def self.validate_prop_byte(prop)
171
+ return if prop.between?(0, 40)
172
+
173
+ raise ArgumentError,
174
+ "Property byte must be between 0 and 40"
175
+ end
176
+ end
177
+ end
178
+ end
179
+ end
@@ -0,0 +1,127 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Copyright (C) 2025 Ribose Inc.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a
6
+ # copy of this software and associated documentation files (the "Software"),
7
+ # to deal in the Software without restriction, including without limitation
8
+ # the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
+ # and/or sell copies of the Software, and to permit persons to whom the
10
+ # Software is furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
+ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21
+ # DEALINGS IN THE SOFTWARE.
22
+
23
+ require "stringio"
24
+ require_relative "../lzma"
25
+ require_relative "constants"
26
+ require_relative "lzma2_chunk"
27
+ require_relative "properties"
28
+ require_relative "../../implementations/xz_utils/lzma2/encoder"
29
+
30
+ module Omnizip
31
+ module Algorithms
32
+ class LZMA2 < Algorithm
33
+ # Simple LZMA2 encoder using XzEncoder internally
34
+ #
35
+ # This encoder uses the working XzEncoder for LZMA compression
36
+ # and wraps the result in proper LZMA2 chunks.
37
+ #
38
+ # For 7-Zip format compatibility, we need to produce LZMA2 chunks
39
+ # without a leading property byte (raw mode).
40
+ class SimpleLZMA2Encoder
41
+ # Maximum uncompressed size per LZMA2 chunk (2MB)
42
+ UNCOMPRESSED_MAX = 1 << 21
43
+
44
+ # Initialize the encoder
45
+ # @param dict_size [Integer] Dictionary size (default: 8MB)
46
+ # @param lc [Integer] Literal context bits (default: 3)
47
+ # @param lp [Integer] Literal position bits (default: 0)
48
+ # @param pb [Integer] Position bits (default: 2)
49
+ # @param standalone [Boolean] If true, write property byte at start
50
+ def initialize(
51
+ dict_size: 8 * 1024 * 1024,
52
+ lc: 3,
53
+ lp: 0,
54
+ pb: 2,
55
+ standalone: true
56
+ )
57
+ @dict_size = dict_size
58
+ @lc = lc
59
+ @lp = lp
60
+ @pb = pb
61
+ @standalone = standalone
62
+ end
63
+
64
+ # Encode data into LZMA2 format
65
+ # @param input_data [String] Input data to compress
66
+ # @return [String] LZMA2 compressed data
67
+ def encode(input_data)
68
+ output = StringIO.new
69
+ output.set_encoding(Encoding::BINARY)
70
+
71
+ # Write property byte if standalone
72
+ # LZMA2 property byte encodes dictionary size
73
+ if @standalone
74
+ prop_byte = encode_dict_size(@dict_size)
75
+ output.putc(prop_byte)
76
+ end
77
+
78
+ # Use XZ Utils LZMA2 encoder for proper LZMA2 encoding (no EOS marker)
79
+ # Pass standalone: false since SimpleLZMA2Encoder handles property byte
80
+ encoder = Omnizip::Implementations::XZUtils::LZMA2::Encoder.new(
81
+ dict_size: @dict_size,
82
+ lc: @lc,
83
+ lp: @lp,
84
+ pb: @pb,
85
+ standalone: false,
86
+ )
87
+
88
+ # Encode data - returns LZMA2 data as String (includes end marker)
89
+ encoded = encoder.encode(input_data)
90
+
91
+ # Write encoded data to output
92
+ output.write(encoded)
93
+
94
+ output.string
95
+ end
96
+
97
+ private
98
+
99
+ # Encode dictionary size to LZMA2 property byte
100
+ # @param dict_size [Integer] Dictionary size
101
+ # @return [Integer] Property byte (0-40)
102
+ def encode_dict_size(dict_size)
103
+ # Clamp to valid range
104
+ d = [dict_size, LZMA2Constants::DICT_SIZE_MIN].max
105
+
106
+ # Calculate log2 of dict_size
107
+ log2_size = 0
108
+ temp = d
109
+ while temp > 1
110
+ log2_size += 1
111
+ temp >>= 1
112
+ end
113
+
114
+ # Encoding formula for power-of-2 sizes:
115
+ # d = 2 * (log2_size - 12)
116
+ if d == (1 << log2_size)
117
+ # Exact power of 2
118
+ [(log2_size - 12) * 2, 40].min
119
+ else
120
+ # Between 2^n and 2^n + 2^(n-1), use odd encoding
121
+ [((log2_size - 12) * 2) + 1, 40].min
122
+ end
123
+ end
124
+ end
125
+ end
126
+ end
127
+ end
@@ -0,0 +1,85 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Copyright (C) 2025 Ribose Inc.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a
6
+ # copy of this software and associated documentation files (the "Software"),
7
+ # to deal in the Software without restriction, including without limitation
8
+ # the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
+ # and/or sell copies of the Software, and to permit persons to whom the
10
+ # Software is furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
+ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21
+ # DEALINGS IN THE SOFTWARE.
22
+
23
+ require_relative "../lzma/xz_encoder"
24
+
25
+ module Omnizip
26
+ module Algorithms
27
+ # Adapter for XZ Encoder to work with LZMA2 chunking
28
+ #
29
+ # Wraps the pure Ruby XZ encoder to provide LZMA2-compatible interface
30
+ # for chunked encoding with size limits.
31
+ class LZMA2XzEncoderAdapter
32
+ # Initialize XZ encoder adapter
33
+ #
34
+ # @param options [Hash] Encoding options
35
+ # @option options [Integer] :lc Literal context bits (default 3)
36
+ # @option options [Integer] :lp Literal position bits (default 0)
37
+ # @option options [Integer] :pb Position bits (default 2)
38
+ # @option options [Integer] :nice_len Nice match length (default 32)
39
+ # @option options [Integer] :dict_size Dictionary size (default 8MB)
40
+ def initialize(options = {})
41
+ @options = options
42
+ @lc = options[:lc] || 3
43
+ @lp = options[:lp] || 0
44
+ @pb = options[:pb] || 2
45
+ end
46
+
47
+ # Encode data chunk
48
+ #
49
+ # @param data [String] Input data to encode
50
+ # @param limit [Integer, nil] Optional output size limit
51
+ # @return [String] Encoded data
52
+ def encode_chunk(data, _limit = nil)
53
+ output = StringIO.new
54
+ encoder = LZMA::XzEncoder.new(@options)
55
+
56
+ # Encode with optional size limit
57
+ # XZ encoder returns bytes written to output
58
+ encoder.encode(data, output)
59
+
60
+ # Return the encoded data string
61
+ output.string
62
+ end
63
+
64
+ # Get LZMA properties byte
65
+ #
66
+ # Encodes lc, lp, pb into single byte using formula:
67
+ # (pb * 5 + lp) * 9 + lc
68
+ #
69
+ # @return [Integer] Properties byte (0x00-0xFF)
70
+ def properties
71
+ (((@pb * 5) + @lp) * 9) + @lc
72
+ end
73
+
74
+ # Get dictionary size
75
+ #
76
+ # @return [Integer] Dictionary size in bytes
77
+ def dict_size
78
+ @options[:dict_size] || (1 << 23) # 8MB default
79
+ end
80
+ end
81
+
82
+ # Backward compatibility alias
83
+ XzEncoderAdapter = LZMA2XzEncoderAdapter unless defined?(XzEncoderAdapter)
84
+ end
85
+ end
@@ -0,0 +1,141 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Copyright (C) 2025 Ribose Inc.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a
6
+ # copy of this software and associated documentation files (the "Software"),
7
+ # to deal in the Software without restriction, including without limitation
8
+ # the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
+ # and/or sell copies of the Software, and to permit persons to whom the
10
+ # Software is furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
+ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21
+ # DEALINGS IN THE SOFTWARE.
22
+
23
+ require_relative "../algorithm"
24
+ require_relative "../models/algorithm_metadata"
25
+
26
+ module Omnizip
27
+ module Algorithms
28
+ # LZMA2 compression algorithm
29
+ # Improved version of LZMA with chunked format for better streaming
30
+ class LZMA2 < Algorithm
31
+ end
32
+ end
33
+ end
34
+
35
+ # Now require the nested classes that will reopen LZMA2
36
+ require_relative "lzma2/constants"
37
+ require_relative "lzma2/properties"
38
+ require_relative "lzma2/lzma2_chunk"
39
+ require_relative "lzma2/encoder"
40
+ require_relative "../implementations/xz_utils/lzma2/decoder"
41
+ require_relative "../implementations/xz_utils/lzma2/encoder"
42
+ require_relative "../implementations/seven_zip/lzma2/encoder"
43
+ require_relative "lzma2/xz_encoder_adapter"
44
+
45
+ module Omnizip
46
+ module Algorithms
47
+ class LZMA2 < Algorithm
48
+ class << self
49
+ # Get algorithm metadata
50
+ #
51
+ # @return [Models::AlgorithmMetadata] Algorithm metadata
52
+ def metadata
53
+ Models::AlgorithmMetadata.new.tap do |meta|
54
+ meta.name = "lzma2"
55
+ meta.description = "LZMA2 compression with improved chunking format for better streaming"
56
+ meta.version = "1.0.0"
57
+ meta.supports_streaming = true
58
+ end
59
+ end
60
+ end
61
+
62
+ def initialize(options = {})
63
+ super()
64
+ @dict_size = options[:dict_size] || (8 * 1024 * 1024) # 8 MB default
65
+ @lc = options[:lc] || 3
66
+ @lp = options[:lp] || 0
67
+ @pb = options[:pb] || 2
68
+ @level = options[:level] || 6
69
+ @raw_mode = options[:raw_mode] # For 7-Zip format (no property byte)
70
+ end
71
+
72
+ # Compress data using LZMA2
73
+ def compress(input, output, options = {})
74
+ # For 7-Zip format, use raw_mode (no property byte in data stream)
75
+ # Default to true for backward compatibility with standalone LZMA2 files
76
+ standalone = options.fetch(:standalone, true)
77
+ options.fetch(:raw_mode, !standalone)
78
+
79
+ encoder = LZMA2Encoder.new(
80
+ dict_size: @dict_size,
81
+ lc: @lc,
82
+ lp: @lp,
83
+ pb: @pb,
84
+ standalone: standalone, # Write property byte only for standalone files
85
+ )
86
+
87
+ # Read input
88
+ input_data = input.respond_to?(:read) ? input.read : input
89
+
90
+ # Encode with LZMA2
91
+ compressed = encoder.encode(input_data)
92
+
93
+ # Write to output
94
+ if output.respond_to?(:write)
95
+ output.write(compressed)
96
+ else
97
+ output.replace(compressed)
98
+ end
99
+ end
100
+
101
+ # Decompress LZMA2 data
102
+ def decompress(input, output, options = {})
103
+ # Read input data
104
+ input_data = input.respond_to?(:read) ? input.read : input
105
+ input_stream = StringIO.new(input_data)
106
+ input_stream.set_encoding(Encoding::BINARY)
107
+
108
+ # Determine raw_mode:
109
+ # - For 7-Zip format: raw_mode=true, dict_size from coder properties
110
+ # - For standalone LZMA2 files: raw_mode=false, dict_size from property byte
111
+ raw_mode = options.fetch(:raw_mode, @raw_mode || false)
112
+ dict_size = options.fetch(:dict_size, @dict_size)
113
+
114
+ # Create decoder using XZ Utils implementation
115
+ decoder = Omnizip::Implementations::XZUtils::LZMA2::Decoder.new(
116
+ input_stream,
117
+ raw_mode: raw_mode,
118
+ dict_size: dict_size,
119
+ )
120
+
121
+ # Decode LZMA2 data
122
+ decompressed = decoder.decode_stream
123
+
124
+ # Write to output
125
+ if output.respond_to?(:write)
126
+ output.write(decompressed)
127
+ else
128
+ output.replace(decompressed)
129
+ end
130
+ end
131
+
132
+ # Encode dictionary size as single byte for LZMA2 properties
133
+ def self.encode_dict_size(dict_size)
134
+ LZMA2Encoder.encode_dict_size(dict_size)
135
+ end
136
+ end
137
+ end
138
+ end
139
+
140
+ # Auto-register LZMA2 in algorithm registry
141
+ Omnizip::Algorithms::LZMA2.register_algorithm