omnizip 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (511) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.rubocop.yml +32 -0
  4. data/.rubocop_todo.yml +754 -0
  5. data/COPYING +502 -0
  6. data/Gemfile +17 -0
  7. data/LICENSE +12 -0
  8. data/README.adoc +1045 -0
  9. data/Rakefile +12 -0
  10. data/benchmark/README.md +260 -0
  11. data/benchmark/benchmark_suite.rb +125 -0
  12. data/benchmark/compression_bench.rb +181 -0
  13. data/benchmark/filter_bench.rb +180 -0
  14. data/benchmark/models/benchmark_result.rb +59 -0
  15. data/benchmark/models/comparison_result.rb +69 -0
  16. data/benchmark/profile_suite.rb +167 -0
  17. data/benchmark/reporter.rb +150 -0
  18. data/benchmark/run_benchmarks.rb +66 -0
  19. data/benchmark/test_data.rb +137 -0
  20. data/config/formats/rar3_spec.yml +91 -0
  21. data/config/formats/rar5_spec.yml +102 -0
  22. data/docs/.github/workflows/docs.yml +142 -0
  23. data/docs/.gitignore +21 -0
  24. data/docs/.lychee.toml +67 -0
  25. data/docs/Gemfile +13 -0
  26. data/docs/RAR_WRITE_SUPPORT.md +26 -0
  27. data/docs/README.md +101 -0
  28. data/docs/_config.yml +112 -0
  29. data/docs/assets/logo.svg +1 -0
  30. data/docs/assets/omnizip-logo.pdf +1540 -11
  31. data/docs/comparison/feature-matrix.adoc +694 -0
  32. data/docs/comparison/index.adoc +113 -0
  33. data/docs/comparison/vs-7zip.adoc +309 -0
  34. data/docs/comparison/vs-peazip.adoc +77 -0
  35. data/docs/comparison/vs-rubyzip.adoc +342 -0
  36. data/docs/comparison/vs-winrar.adoc +100 -0
  37. data/docs/compatibility.adoc +579 -0
  38. data/docs/concepts/index.adoc +129 -0
  39. data/docs/developer/architecture.adoc +256 -0
  40. data/docs/developer/contributing.adoc +158 -0
  41. data/docs/developer/index.adoc +25 -0
  42. data/docs/developer/testing.adoc +212 -0
  43. data/docs/getting-started/basic-usage.adoc +271 -0
  44. data/docs/getting-started/index.adoc +42 -0
  45. data/docs/getting-started/installation.adoc +138 -0
  46. data/docs/getting-started/quick-start.adoc +185 -0
  47. data/docs/getting-started/your-first-archive.adoc +218 -0
  48. data/docs/guides/advanced-features/encryption.adoc +300 -0
  49. data/docs/guides/advanced-features/index.adoc +49 -0
  50. data/docs/guides/advanced-features/parallel-processing.adoc +246 -0
  51. data/docs/guides/advanced-features/progress-tracking.adoc +320 -0
  52. data/docs/guides/advanced-features/streaming.adoc +212 -0
  53. data/docs/guides/archive-formats/gzip-format.adoc +107 -0
  54. data/docs/guides/archive-formats/index.adoc +130 -0
  55. data/docs/guides/archive-formats/rar-format.adoc +104 -0
  56. data/docs/guides/archive-formats/rar5.adoc +521 -0
  57. data/docs/guides/archive-formats/seven-zip-format.adoc +35 -0
  58. data/docs/guides/archive-formats/tar-format.adoc +106 -0
  59. data/docs/guides/archive-formats/xz-format.adoc +118 -0
  60. data/docs/guides/archive-formats/zip-format.adoc +35 -0
  61. data/docs/guides/compression-algorithms/bzip2.adoc +113 -0
  62. data/docs/guides/compression-algorithms/deflate.adoc +319 -0
  63. data/docs/guides/compression-algorithms/index.adoc +190 -0
  64. data/docs/guides/compression-algorithms/lzma.adoc +398 -0
  65. data/docs/guides/compression-algorithms/lzma2.adoc +327 -0
  66. data/docs/guides/compression-algorithms/ppmd.adoc +316 -0
  67. data/docs/guides/compression-algorithms/zstandard.adoc +361 -0
  68. data/docs/guides/creating-archives.adoc +354 -0
  69. data/docs/guides/extracting-archives.adoc +53 -0
  70. data/docs/guides/format-conversion.adoc +64 -0
  71. data/docs/guides/index.adoc +49 -0
  72. data/docs/guides/migration-rubyzip.adoc +217 -0
  73. data/docs/guides/parity-archives.adoc +605 -0
  74. data/docs/guides/performance-tuning.adoc +88 -0
  75. data/docs/index.adoc +218 -0
  76. data/docs/lychee.toml +67 -0
  77. data/docs/reference/api/overview.adoc +188 -0
  78. data/docs/reference/cli/compress-command.adoc +114 -0
  79. data/docs/reference/cli/overview.adoc +140 -0
  80. data/docs/reference/index.adoc +26 -0
  81. data/docs/resources/faq.adoc +185 -0
  82. data/docs/resources/quick-reference.adoc +222 -0
  83. data/docs/troubleshooting/index.adoc +208 -0
  84. data/examples/api_comparison.rb +205 -0
  85. data/examples/deflate64_example.rb +96 -0
  86. data/examples/par2_demo.rb +121 -0
  87. data/examples/quick_start_native.rb +150 -0
  88. data/examples/quick_start_rubyzip.rb +115 -0
  89. data/examples/rubyzip_compatibility_demo.rb +194 -0
  90. data/exe/omnizip +27 -0
  91. data/lib/omnizip/algorithm.rb +130 -0
  92. data/lib/omnizip/algorithm_registry.rb +86 -0
  93. data/lib/omnizip/algorithms/.keep +0 -0
  94. data/lib/omnizip/algorithms/bzip2/bwt.rb +225 -0
  95. data/lib/omnizip/algorithms/bzip2/decoder.rb +193 -0
  96. data/lib/omnizip/algorithms/bzip2/encoder.rb +237 -0
  97. data/lib/omnizip/algorithms/bzip2/huffman.rb +206 -0
  98. data/lib/omnizip/algorithms/bzip2/mtf.rb +101 -0
  99. data/lib/omnizip/algorithms/bzip2/rle.rb +151 -0
  100. data/lib/omnizip/algorithms/bzip2.rb +130 -0
  101. data/lib/omnizip/algorithms/deflate/constants.rb +28 -0
  102. data/lib/omnizip/algorithms/deflate/decoder.rb +38 -0
  103. data/lib/omnizip/algorithms/deflate/encoder.rb +46 -0
  104. data/lib/omnizip/algorithms/deflate.rb +128 -0
  105. data/lib/omnizip/algorithms/deflate64/constants.rb +45 -0
  106. data/lib/omnizip/algorithms/deflate64/decoder.rb +153 -0
  107. data/lib/omnizip/algorithms/deflate64/encoder.rb +98 -0
  108. data/lib/omnizip/algorithms/deflate64/huffman_coder.rb +354 -0
  109. data/lib/omnizip/algorithms/deflate64/lz77_encoder.rb +142 -0
  110. data/lib/omnizip/algorithms/deflate64.rb +109 -0
  111. data/lib/omnizip/algorithms/lzma/bit_model.rb +120 -0
  112. data/lib/omnizip/algorithms/lzma/constants.rb +112 -0
  113. data/lib/omnizip/algorithms/lzma/decoder.rb +148 -0
  114. data/lib/omnizip/algorithms/lzma/dictionary.rb +69 -0
  115. data/lib/omnizip/algorithms/lzma/distance_coder.rb +415 -0
  116. data/lib/omnizip/algorithms/lzma/encoder.rb +142 -0
  117. data/lib/omnizip/algorithms/lzma/length_coder.rb +260 -0
  118. data/lib/omnizip/algorithms/lzma/literal_decoder.rb +320 -0
  119. data/lib/omnizip/algorithms/lzma/literal_encoder.rb +210 -0
  120. data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +341 -0
  121. data/lib/omnizip/algorithms/lzma/lzma_alone_decoder.rb +192 -0
  122. data/lib/omnizip/algorithms/lzma/lzma_state.rb +128 -0
  123. data/lib/omnizip/algorithms/lzma/match.rb +32 -0
  124. data/lib/omnizip/algorithms/lzma/match_finder.rb +205 -0
  125. data/lib/omnizip/algorithms/lzma/match_finder_config.rb +142 -0
  126. data/lib/omnizip/algorithms/lzma/match_finder_factory.rb +88 -0
  127. data/lib/omnizip/algorithms/lzma/optimal_encoder.rb +130 -0
  128. data/lib/omnizip/algorithms/lzma/probability_models.rb +72 -0
  129. data/lib/omnizip/algorithms/lzma/range_coder.rb +85 -0
  130. data/lib/omnizip/algorithms/lzma/range_decoder.rb +434 -0
  131. data/lib/omnizip/algorithms/lzma/range_encoder.rb +194 -0
  132. data/lib/omnizip/algorithms/lzma/state.rb +127 -0
  133. data/lib/omnizip/algorithms/lzma/xz_buffered_range_encoder.rb +325 -0
  134. data/lib/omnizip/algorithms/lzma/xz_encoder.rb +426 -0
  135. data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +645 -0
  136. data/lib/omnizip/algorithms/lzma/xz_match_finder_adapter.rb +227 -0
  137. data/lib/omnizip/algorithms/lzma/xz_price_calculator.rb +169 -0
  138. data/lib/omnizip/algorithms/lzma/xz_probability_models.rb +261 -0
  139. data/lib/omnizip/algorithms/lzma/xz_range_encoder.rb +223 -0
  140. data/lib/omnizip/algorithms/lzma/xz_range_encoder_exact.rb +331 -0
  141. data/lib/omnizip/algorithms/lzma/xz_state.rb +116 -0
  142. data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +2055 -0
  143. data/lib/omnizip/algorithms/lzma.rb +238 -0
  144. data/lib/omnizip/algorithms/lzma2/chunk_manager.rb +182 -0
  145. data/lib/omnizip/algorithms/lzma2/constants.rb +41 -0
  146. data/lib/omnizip/algorithms/lzma2/encoder.rb +147 -0
  147. data/lib/omnizip/algorithms/lzma2/lzma2_chunk.rb +161 -0
  148. data/lib/omnizip/algorithms/lzma2/properties.rb +179 -0
  149. data/lib/omnizip/algorithms/lzma2/simple_lzma2_encoder.rb +127 -0
  150. data/lib/omnizip/algorithms/lzma2/xz_encoder_adapter.rb +85 -0
  151. data/lib/omnizip/algorithms/lzma2.rb +141 -0
  152. data/lib/omnizip/algorithms/ppmd7/constants.rb +74 -0
  153. data/lib/omnizip/algorithms/ppmd7/context.rb +154 -0
  154. data/lib/omnizip/algorithms/ppmd7/decoder.rb +126 -0
  155. data/lib/omnizip/algorithms/ppmd7/encoder.rb +163 -0
  156. data/lib/omnizip/algorithms/ppmd7/model.rb +248 -0
  157. data/lib/omnizip/algorithms/ppmd7/symbol_state.rb +57 -0
  158. data/lib/omnizip/algorithms/ppmd7.rb +116 -0
  159. data/lib/omnizip/algorithms/ppmd8/constants.rb +61 -0
  160. data/lib/omnizip/algorithms/ppmd8/context.rb +34 -0
  161. data/lib/omnizip/algorithms/ppmd8/decoder.rb +107 -0
  162. data/lib/omnizip/algorithms/ppmd8/encoder.rb +138 -0
  163. data/lib/omnizip/algorithms/ppmd8/model.rb +250 -0
  164. data/lib/omnizip/algorithms/ppmd8/restoration_method.rb +78 -0
  165. data/lib/omnizip/algorithms/ppmd8.rb +82 -0
  166. data/lib/omnizip/algorithms/ppmd_base.rb +138 -0
  167. data/lib/omnizip/algorithms/sevenzip_lzma2.rb +123 -0
  168. data/lib/omnizip/algorithms/xz_lzma2.rb +118 -0
  169. data/lib/omnizip/algorithms/zstandard/constants.rb +25 -0
  170. data/lib/omnizip/algorithms/zstandard/decoder.rb +46 -0
  171. data/lib/omnizip/algorithms/zstandard/encoder.rb +51 -0
  172. data/lib/omnizip/algorithms/zstandard.rb +138 -0
  173. data/lib/omnizip/buffer/memory_archive.rb +251 -0
  174. data/lib/omnizip/buffer/memory_extractor.rb +224 -0
  175. data/lib/omnizip/buffer.rb +176 -0
  176. data/lib/omnizip/checksum_registry.rb +114 -0
  177. data/lib/omnizip/checksums/crc32.rb +100 -0
  178. data/lib/omnizip/checksums/crc64.rb +101 -0
  179. data/lib/omnizip/checksums/crc_base.rb +158 -0
  180. data/lib/omnizip/checksums/verifier.rb +131 -0
  181. data/lib/omnizip/chunked/memory_manager.rb +194 -0
  182. data/lib/omnizip/chunked/reader.rb +78 -0
  183. data/lib/omnizip/chunked/writer.rb +120 -0
  184. data/lib/omnizip/chunked.rb +129 -0
  185. data/lib/omnizip/cli/output_formatter.rb +104 -0
  186. data/lib/omnizip/cli.rb +572 -0
  187. data/lib/omnizip/commands/.keep +0 -0
  188. data/lib/omnizip/commands/archive_create_command.rb +427 -0
  189. data/lib/omnizip/commands/archive_extract_command.rb +272 -0
  190. data/lib/omnizip/commands/archive_list_command.rb +218 -0
  191. data/lib/omnizip/commands/archive_repair_command.rb +131 -0
  192. data/lib/omnizip/commands/archive_verify_command.rb +117 -0
  193. data/lib/omnizip/commands/compress_command.rb +117 -0
  194. data/lib/omnizip/commands/decompress_command.rb +120 -0
  195. data/lib/omnizip/commands/list_command.rb +53 -0
  196. data/lib/omnizip/commands/metadata_command.rb +153 -0
  197. data/lib/omnizip/commands/parity_create_command.rb +122 -0
  198. data/lib/omnizip/commands/parity_repair_command.rb +122 -0
  199. data/lib/omnizip/commands/parity_verify_command.rb +124 -0
  200. data/lib/omnizip/commands/profile_list_command.rb +56 -0
  201. data/lib/omnizip/commands/profile_show_command.rb +44 -0
  202. data/lib/omnizip/convenience.rb +359 -0
  203. data/lib/omnizip/converter/conversion_registry.rb +49 -0
  204. data/lib/omnizip/converter/conversion_strategy.rb +121 -0
  205. data/lib/omnizip/converter/seven_zip_to_zip_strategy.rb +97 -0
  206. data/lib/omnizip/converter/zip_to_seven_zip_strategy.rb +112 -0
  207. data/lib/omnizip/converter.rb +105 -0
  208. data/lib/omnizip/crypto/aes256/cipher.rb +100 -0
  209. data/lib/omnizip/crypto/aes256/constants.rb +28 -0
  210. data/lib/omnizip/crypto/aes256/key_derivation.rb +101 -0
  211. data/lib/omnizip/crypto/aes256.rb +102 -0
  212. data/lib/omnizip/error.rb +106 -0
  213. data/lib/omnizip/eta/exponential_smoothing_estimator.rb +98 -0
  214. data/lib/omnizip/eta/moving_average_estimator.rb +99 -0
  215. data/lib/omnizip/eta/rate_calculator.rb +104 -0
  216. data/lib/omnizip/eta/sample_history.rb +143 -0
  217. data/lib/omnizip/eta/time_estimator.rb +106 -0
  218. data/lib/omnizip/eta.rb +63 -0
  219. data/lib/omnizip/extraction/filter_chain.rb +177 -0
  220. data/lib/omnizip/extraction/glob_pattern.rb +140 -0
  221. data/lib/omnizip/extraction/pattern_matcher.rb +70 -0
  222. data/lib/omnizip/extraction/predicate_pattern.rb +52 -0
  223. data/lib/omnizip/extraction/regex_pattern.rb +50 -0
  224. data/lib/omnizip/extraction/selective_extractor.rb +240 -0
  225. data/lib/omnizip/extraction.rb +111 -0
  226. data/lib/omnizip/file_type/mime_classifier.rb +144 -0
  227. data/lib/omnizip/file_type.rb +113 -0
  228. data/lib/omnizip/filter.rb +139 -0
  229. data/lib/omnizip/filter_pipeline.rb +108 -0
  230. data/lib/omnizip/filter_registry.rb +166 -0
  231. data/lib/omnizip/filters/bcj.rb +279 -0
  232. data/lib/omnizip/filters/bcj2/constants.rb +53 -0
  233. data/lib/omnizip/filters/bcj2/decoder.rb +200 -0
  234. data/lib/omnizip/filters/bcj2/encoder.rb +61 -0
  235. data/lib/omnizip/filters/bcj2/stream_data.rb +93 -0
  236. data/lib/omnizip/filters/bcj2.rb +99 -0
  237. data/lib/omnizip/filters/bcj_arm.rb +176 -0
  238. data/lib/omnizip/filters/bcj_arm64.rb +244 -0
  239. data/lib/omnizip/filters/bcj_ia64.rb +196 -0
  240. data/lib/omnizip/filters/bcj_ppc.rb +190 -0
  241. data/lib/omnizip/filters/bcj_sparc.rb +176 -0
  242. data/lib/omnizip/filters/bcj_x86.rb +193 -0
  243. data/lib/omnizip/filters/delta.rb +196 -0
  244. data/lib/omnizip/filters/filter_base.rb +72 -0
  245. data/lib/omnizip/filters/registry.rb +123 -0
  246. data/lib/omnizip/filters/xz_delta.rb +258 -0
  247. data/lib/omnizip/format_detector.rb +162 -0
  248. data/lib/omnizip/format_registry.rb +59 -0
  249. data/lib/omnizip/formats/.keep +0 -0
  250. data/lib/omnizip/formats/bzip2_file.rb +172 -0
  251. data/lib/omnizip/formats/cpio/constants.rb +55 -0
  252. data/lib/omnizip/formats/cpio/entry.rb +385 -0
  253. data/lib/omnizip/formats/cpio/reader.rb +196 -0
  254. data/lib/omnizip/formats/cpio/writer.rb +234 -0
  255. data/lib/omnizip/formats/cpio.rb +140 -0
  256. data/lib/omnizip/formats/format_spec_loader.rb +230 -0
  257. data/lib/omnizip/formats/gzip.rb +238 -0
  258. data/lib/omnizip/formats/iso/directory_builder.rb +297 -0
  259. data/lib/omnizip/formats/iso/directory_record.rb +152 -0
  260. data/lib/omnizip/formats/iso/joliet.rb +204 -0
  261. data/lib/omnizip/formats/iso/path_table.rb +125 -0
  262. data/lib/omnizip/formats/iso/reader.rb +197 -0
  263. data/lib/omnizip/formats/iso/rock_ridge.rb +349 -0
  264. data/lib/omnizip/formats/iso/volume_builder.rb +320 -0
  265. data/lib/omnizip/formats/iso/volume_descriptor.rb +168 -0
  266. data/lib/omnizip/formats/iso/writer.rb +530 -0
  267. data/lib/omnizip/formats/iso.rb +140 -0
  268. data/lib/omnizip/formats/lzip.rb +175 -0
  269. data/lib/omnizip/formats/lzma_alone.rb +171 -0
  270. data/lib/omnizip/formats/rar/archive_repairer.rb +243 -0
  271. data/lib/omnizip/formats/rar/archive_verifier.rb +195 -0
  272. data/lib/omnizip/formats/rar/block_parser.rb +243 -0
  273. data/lib/omnizip/formats/rar/compression/bit_stream.rb +180 -0
  274. data/lib/omnizip/formats/rar/compression/dispatcher.rb +217 -0
  275. data/lib/omnizip/formats/rar/compression/lz77_huffman/decoder.rb +216 -0
  276. data/lib/omnizip/formats/rar/compression/lz77_huffman/encoder.rb +158 -0
  277. data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_builder.rb +217 -0
  278. data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_coder.rb +189 -0
  279. data/lib/omnizip/formats/rar/compression/lz77_huffman/match_finder.rb +135 -0
  280. data/lib/omnizip/formats/rar/compression/lz77_huffman/sliding_window.rb +165 -0
  281. data/lib/omnizip/formats/rar/compression/ppmd/context.rb +105 -0
  282. data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +219 -0
  283. data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +262 -0
  284. data/lib/omnizip/formats/rar/compression_method_registry.rb +106 -0
  285. data/lib/omnizip/formats/rar/constants.rb +82 -0
  286. data/lib/omnizip/formats/rar/decompressor.rb +238 -0
  287. data/lib/omnizip/formats/rar/external_writer.rb +312 -0
  288. data/lib/omnizip/formats/rar/header.rb +192 -0
  289. data/lib/omnizip/formats/rar/license_validator.rb +109 -0
  290. data/lib/omnizip/formats/rar/models/rar_archive.rb +77 -0
  291. data/lib/omnizip/formats/rar/models/rar_entry.rb +65 -0
  292. data/lib/omnizip/formats/rar/models/rar_volume.rb +56 -0
  293. data/lib/omnizip/formats/rar/parity_handler.rb +292 -0
  294. data/lib/omnizip/formats/rar/rar5/compression/lzma.rb +202 -0
  295. data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +578 -0
  296. data/lib/omnizip/formats/rar/rar5/compression/store.rb +60 -0
  297. data/lib/omnizip/formats/rar/rar5/crc32.rb +39 -0
  298. data/lib/omnizip/formats/rar/rar5/encryption/aes256_cbc.rb +97 -0
  299. data/lib/omnizip/formats/rar/rar5/encryption/encryption_header.rb +114 -0
  300. data/lib/omnizip/formats/rar/rar5/encryption/encryption_manager.rb +166 -0
  301. data/lib/omnizip/formats/rar/rar5/encryption/key_derivation.rb +97 -0
  302. data/lib/omnizip/formats/rar/rar5/header.rb +187 -0
  303. data/lib/omnizip/formats/rar/rar5/models/encryption_options.rb +74 -0
  304. data/lib/omnizip/formats/rar/rar5/models/recovery_options.rb +63 -0
  305. data/lib/omnizip/formats/rar/rar5/models/solid_options.rb +63 -0
  306. data/lib/omnizip/formats/rar/rar5/models/volume_options.rb +74 -0
  307. data/lib/omnizip/formats/rar/rar5/multi_volume/ARCHITECTURE.md +290 -0
  308. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_manager.rb +264 -0
  309. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_splitter.rb +155 -0
  310. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_writer.rb +194 -0
  311. data/lib/omnizip/formats/rar/rar5/solid/solid_encoder.rb +109 -0
  312. data/lib/omnizip/formats/rar/rar5/solid/solid_manager.rb +142 -0
  313. data/lib/omnizip/formats/rar/rar5/solid/solid_stream.rb +121 -0
  314. data/lib/omnizip/formats/rar/rar5/vint.rb +65 -0
  315. data/lib/omnizip/formats/rar/rar5/writer.rb +466 -0
  316. data/lib/omnizip/formats/rar/rar_format_base.rb +241 -0
  317. data/lib/omnizip/formats/rar/reader.rb +366 -0
  318. data/lib/omnizip/formats/rar/recovery_record.rb +245 -0
  319. data/lib/omnizip/formats/rar/volume_manager.rb +168 -0
  320. data/lib/omnizip/formats/rar/writer.rb +431 -0
  321. data/lib/omnizip/formats/rar.rb +205 -0
  322. data/lib/omnizip/formats/rar3/compressor.rb +73 -0
  323. data/lib/omnizip/formats/rar3/decompressor.rb +66 -0
  324. data/lib/omnizip/formats/rar3/reader.rb +386 -0
  325. data/lib/omnizip/formats/rar3/writer.rb +219 -0
  326. data/lib/omnizip/formats/rar5/compressor.rb +73 -0
  327. data/lib/omnizip/formats/rar5/decompressor.rb +66 -0
  328. data/lib/omnizip/formats/rar5/reader.rb +342 -0
  329. data/lib/omnizip/formats/rar5/writer.rb +214 -0
  330. data/lib/omnizip/formats/seven_zip/coder_chain.rb +150 -0
  331. data/lib/omnizip/formats/seven_zip/constants.rb +126 -0
  332. data/lib/omnizip/formats/seven_zip/encoded_header.rb +114 -0
  333. data/lib/omnizip/formats/seven_zip/encrypted_header.rb +142 -0
  334. data/lib/omnizip/formats/seven_zip/file_collector.rb +144 -0
  335. data/lib/omnizip/formats/seven_zip/header.rb +106 -0
  336. data/lib/omnizip/formats/seven_zip/header_encryptor.rb +134 -0
  337. data/lib/omnizip/formats/seven_zip/header_writer.rb +466 -0
  338. data/lib/omnizip/formats/seven_zip/models/coder_info.rb +30 -0
  339. data/lib/omnizip/formats/seven_zip/models/file_entry.rb +58 -0
  340. data/lib/omnizip/formats/seven_zip/models/folder.rb +69 -0
  341. data/lib/omnizip/formats/seven_zip/models/stream_info.rb +42 -0
  342. data/lib/omnizip/formats/seven_zip/parser.rb +660 -0
  343. data/lib/omnizip/formats/seven_zip/reader.rb +458 -0
  344. data/lib/omnizip/formats/seven_zip/split_archive_reader.rb +632 -0
  345. data/lib/omnizip/formats/seven_zip/split_archive_writer.rb +315 -0
  346. data/lib/omnizip/formats/seven_zip/stream_compressor.rb +151 -0
  347. data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +162 -0
  348. data/lib/omnizip/formats/seven_zip/writer.rb +740 -0
  349. data/lib/omnizip/formats/seven_zip.rb +93 -0
  350. data/lib/omnizip/formats/tar/constants.rb +73 -0
  351. data/lib/omnizip/formats/tar/entry.rb +94 -0
  352. data/lib/omnizip/formats/tar/header.rb +168 -0
  353. data/lib/omnizip/formats/tar/reader.rb +121 -0
  354. data/lib/omnizip/formats/tar/writer.rb +216 -0
  355. data/lib/omnizip/formats/tar.rb +84 -0
  356. data/lib/omnizip/formats/xz/reader.rb +116 -0
  357. data/lib/omnizip/formats/xz.rb +237 -0
  358. data/lib/omnizip/formats/xz_impl/block_decoder.rb +754 -0
  359. data/lib/omnizip/formats/xz_impl/block_encoder.rb +306 -0
  360. data/lib/omnizip/formats/xz_impl/block_header.rb +210 -0
  361. data/lib/omnizip/formats/xz_impl/block_header_parser.rb +186 -0
  362. data/lib/omnizip/formats/xz_impl/constants.rb +49 -0
  363. data/lib/omnizip/formats/xz_impl/index_decoder.rb +174 -0
  364. data/lib/omnizip/formats/xz_impl/index_encoder.rb +122 -0
  365. data/lib/omnizip/formats/xz_impl/stream_decoder.rb +468 -0
  366. data/lib/omnizip/formats/xz_impl/stream_encoder.rb +99 -0
  367. data/lib/omnizip/formats/xz_impl/stream_footer.rb +81 -0
  368. data/lib/omnizip/formats/xz_impl/stream_footer_parser.rb +117 -0
  369. data/lib/omnizip/formats/xz_impl/stream_header.rb +55 -0
  370. data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +108 -0
  371. data/lib/omnizip/formats/xz_impl/vli.rb +128 -0
  372. data/lib/omnizip/formats/xz_impl/writer.rb +421 -0
  373. data/lib/omnizip/formats/zip/central_directory_header.rb +195 -0
  374. data/lib/omnizip/formats/zip/constants.rb +69 -0
  375. data/lib/omnizip/formats/zip/end_of_central_directory.rb +133 -0
  376. data/lib/omnizip/formats/zip/local_file_header.rb +138 -0
  377. data/lib/omnizip/formats/zip/reader.rb +250 -0
  378. data/lib/omnizip/formats/zip/unix_extra_field.rb +153 -0
  379. data/lib/omnizip/formats/zip/writer.rb +375 -0
  380. data/lib/omnizip/formats/zip/zip64_end_of_central_directory.rb +104 -0
  381. data/lib/omnizip/formats/zip/zip64_end_of_central_directory_locator.rb +66 -0
  382. data/lib/omnizip/formats/zip/zip64_extra_field.rb +114 -0
  383. data/lib/omnizip/formats/zip.rb +50 -0
  384. data/lib/omnizip/implementations/base/lzma2_decoder_base.rb +75 -0
  385. data/lib/omnizip/implementations/base/lzma2_encoder_base.rb +128 -0
  386. data/lib/omnizip/implementations/base/lzma_decoder_base.rb +83 -0
  387. data/lib/omnizip/implementations/base/lzma_encoder_base.rb +108 -0
  388. data/lib/omnizip/implementations/base/state_machine_base.rb +182 -0
  389. data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +421 -0
  390. data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +465 -0
  391. data/lib/omnizip/implementations/seven_zip/lzma/match_finder.rb +288 -0
  392. data/lib/omnizip/implementations/seven_zip/lzma/range_decoder.rb +200 -0
  393. data/lib/omnizip/implementations/seven_zip/lzma/range_encoder.rb +197 -0
  394. data/lib/omnizip/implementations/seven_zip/lzma/state_machine.rb +141 -0
  395. data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +519 -0
  396. data/lib/omnizip/implementations/xz_utils/lzma2/decoder.rb +723 -0
  397. data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +750 -0
  398. data/lib/omnizip/io/buffered_input.rb +146 -0
  399. data/lib/omnizip/io/buffered_output.rb +105 -0
  400. data/lib/omnizip/io/stream_manager.rb +115 -0
  401. data/lib/omnizip/link_handler/hard_link.rb +79 -0
  402. data/lib/omnizip/link_handler/symbolic_link.rb +74 -0
  403. data/lib/omnizip/link_handler.rb +124 -0
  404. data/lib/omnizip/metadata/archive_metadata.rb +114 -0
  405. data/lib/omnizip/metadata/entry_metadata.rb +146 -0
  406. data/lib/omnizip/metadata/metadata_editor.rb +171 -0
  407. data/lib/omnizip/metadata/metadata_registry.rb +64 -0
  408. data/lib/omnizip/metadata/metadata_validator.rb +99 -0
  409. data/lib/omnizip/metadata.rb +57 -0
  410. data/lib/omnizip/models/.keep +0 -0
  411. data/lib/omnizip/models/algorithm_metadata.rb +73 -0
  412. data/lib/omnizip/models/compression_options.rb +71 -0
  413. data/lib/omnizip/models/conversion_options.rb +87 -0
  414. data/lib/omnizip/models/conversion_result.rb +135 -0
  415. data/lib/omnizip/models/eta_result.rb +46 -0
  416. data/lib/omnizip/models/extraction_rule.rb +115 -0
  417. data/lib/omnizip/models/filter_chain.rb +144 -0
  418. data/lib/omnizip/models/filter_config.rb +183 -0
  419. data/lib/omnizip/models/match_result.rb +124 -0
  420. data/lib/omnizip/models/optimization_suggestion.rb +91 -0
  421. data/lib/omnizip/models/parallel_options.rb +104 -0
  422. data/lib/omnizip/models/performance_result.rb +79 -0
  423. data/lib/omnizip/models/profile_report.rb +82 -0
  424. data/lib/omnizip/models/progress_options.rb +38 -0
  425. data/lib/omnizip/models/split_options.rb +116 -0
  426. data/lib/omnizip/optimization_registry.rb +81 -0
  427. data/lib/omnizip/parallel/job_queue.rb +209 -0
  428. data/lib/omnizip/parallel/job_scheduler.rb +203 -0
  429. data/lib/omnizip/parallel/parallel_compressor.rb +347 -0
  430. data/lib/omnizip/parallel/parallel_extractor.rb +329 -0
  431. data/lib/omnizip/parallel/worker_pool.rb +223 -0
  432. data/lib/omnizip/parallel.rb +149 -0
  433. data/lib/omnizip/parity/chunked_block_processor.rb +196 -0
  434. data/lib/omnizip/parity/galois16.rb +145 -0
  435. data/lib/omnizip/parity/models/creator_packet.rb +73 -0
  436. data/lib/omnizip/parity/models/file_description_packet.rb +133 -0
  437. data/lib/omnizip/parity/models/ifsc_packet.rb +123 -0
  438. data/lib/omnizip/parity/models/main_packet.rb +128 -0
  439. data/lib/omnizip/parity/models/packet.rb +156 -0
  440. data/lib/omnizip/parity/models/packet_registry.rb +109 -0
  441. data/lib/omnizip/parity/models/recovery_slice_packet.rb +78 -0
  442. data/lib/omnizip/parity/par2_creator.rb +531 -0
  443. data/lib/omnizip/parity/par2_repairer.rb +407 -0
  444. data/lib/omnizip/parity/par2_verifier.rb +364 -0
  445. data/lib/omnizip/parity/par2cmdline_algorithm.rb +110 -0
  446. data/lib/omnizip/parity/par2cmdline_coefficients.rb +78 -0
  447. data/lib/omnizip/parity/reed_solomon_decoder.rb +266 -0
  448. data/lib/omnizip/parity/reed_solomon_encoder.rb +111 -0
  449. data/lib/omnizip/parity/reed_solomon_matrix.rb +342 -0
  450. data/lib/omnizip/parity.rb +186 -0
  451. data/lib/omnizip/password/encryption_registry.rb +65 -0
  452. data/lib/omnizip/password/encryption_strategy.rb +96 -0
  453. data/lib/omnizip/password/password_validator.rb +129 -0
  454. data/lib/omnizip/password/winzip_aes_strategy.rb +192 -0
  455. data/lib/omnizip/password/zip_crypto_strategy.rb +141 -0
  456. data/lib/omnizip/password.rb +87 -0
  457. data/lib/omnizip/pipe/stream_compressor.rb +124 -0
  458. data/lib/omnizip/pipe/stream_decompressor.rb +174 -0
  459. data/lib/omnizip/pipe.rb +121 -0
  460. data/lib/omnizip/platform/ntfs_streams.rb +201 -0
  461. data/lib/omnizip/platform.rb +189 -0
  462. data/lib/omnizip/profile/archive_profile.rb +39 -0
  463. data/lib/omnizip/profile/balanced_profile.rb +33 -0
  464. data/lib/omnizip/profile/binary_profile.rb +36 -0
  465. data/lib/omnizip/profile/compression_profile.rb +158 -0
  466. data/lib/omnizip/profile/custom_profile.rb +157 -0
  467. data/lib/omnizip/profile/fast_profile.rb +33 -0
  468. data/lib/omnizip/profile/maximum_profile.rb +33 -0
  469. data/lib/omnizip/profile/profile_detector.rb +110 -0
  470. data/lib/omnizip/profile/profile_registry.rb +161 -0
  471. data/lib/omnizip/profile/text_profile.rb +36 -0
  472. data/lib/omnizip/profile.rb +190 -0
  473. data/lib/omnizip/profiler/memory_profiler.rb +66 -0
  474. data/lib/omnizip/profiler/method_profiler.rb +49 -0
  475. data/lib/omnizip/profiler/report_generator.rb +169 -0
  476. data/lib/omnizip/profiler.rb +204 -0
  477. data/lib/omnizip/progress/callback_reporter.rb +36 -0
  478. data/lib/omnizip/progress/console_reporter.rb +62 -0
  479. data/lib/omnizip/progress/log_reporter.rb +91 -0
  480. data/lib/omnizip/progress/operation_progress.rb +118 -0
  481. data/lib/omnizip/progress/progress_bar.rb +156 -0
  482. data/lib/omnizip/progress/progress_reporter.rb +40 -0
  483. data/lib/omnizip/progress/progress_tracker.rb +190 -0
  484. data/lib/omnizip/progress/silent_reporter.rb +24 -0
  485. data/lib/omnizip/progress.rb +127 -0
  486. data/lib/omnizip/rubyzip_compat.rb +63 -0
  487. data/lib/omnizip/temp/safe_extract.rb +168 -0
  488. data/lib/omnizip/temp/temp_file.rb +124 -0
  489. data/lib/omnizip/temp/temp_file_pool.rb +109 -0
  490. data/lib/omnizip/temp.rb +181 -0
  491. data/lib/omnizip/version.rb +5 -0
  492. data/lib/omnizip/zip/entry.rb +156 -0
  493. data/lib/omnizip/zip/file.rb +485 -0
  494. data/lib/omnizip/zip/input_stream.rb +273 -0
  495. data/lib/omnizip/zip/output_stream.rb +324 -0
  496. data/lib/omnizip.rb +156 -0
  497. data/readme-docs/advanced-features.adoc +515 -0
  498. data/readme-docs/api-usage.adoc +444 -0
  499. data/readme-docs/architecture.adoc +449 -0
  500. data/readme-docs/archive-formats.adoc +479 -0
  501. data/readme-docs/cli-usage.adoc +222 -0
  502. data/readme-docs/compression-algorithms.adoc +442 -0
  503. data/readme-docs/compression-profiles.adoc +247 -0
  504. data/readme-docs/encryption-checksums.adoc +328 -0
  505. data/readme-docs/format-converter.adoc +325 -0
  506. data/readme-docs/installation.adoc +228 -0
  507. data/readme-docs/par2-archives.adoc +608 -0
  508. data/readme-docs/performance-profiler.adoc +389 -0
  509. data/readme-docs/preprocessing-filters.adoc +280 -0
  510. data/xz-file-format-1.2.1.txt +1174 -0
  511. metadata +617 -0
@@ -0,0 +1,210 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Copyright (C) 2025 Ribose Inc.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a
6
+ # copy of this software and associated documentation files (the "Software"),
7
+ # to deal in the Software without restriction, including without limitation
8
+ # the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
+ # and/or sell copies of the Software, and to permit persons to whom the
10
+ # Software is furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
+ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21
+ # DEALINGS IN THE SOFTWARE.
22
+
23
+ require_relative "constants"
24
+
25
+ module Omnizip
26
+ module Algorithms
27
+ class LZMA < Algorithm
28
+ # Literal byte encoder
29
+ #
30
+ # This class is responsible for encoding literal bytes using
31
+ # probability models. It supports two modes:
32
+ #
33
+ # 1. Unmatched mode: Simple 8-bit encoding
34
+ # 2. Matched mode: Uses match byte for context (SDK feature)
35
+ #
36
+ # The matched mode improves compression when a literal follows
37
+ # a match, by using the corresponding byte from the match as
38
+ # context for probability modeling.
39
+ #
40
+ # Single Responsibility: Literal byte encoding only
41
+ #
42
+ # @example Unmatched encoding
43
+ # encoder = LiteralEncoder.new
44
+ # encoder.encode_unmatched(byte, lit_state, range_encoder, models)
45
+ #
46
+ # @example Matched encoding (SDK mode)
47
+ # encoder = LiteralEncoder.new
48
+ # encoder.encode_matched(byte, match_byte, lit_state, range_encoder, models)
49
+ class LiteralEncoder
50
+ include Constants
51
+
52
+ # Initialize the literal encoder
53
+ #
54
+ # @param lc [Integer] Literal context bits (0-8)
55
+ # Default to 3 for compatibility
56
+ def initialize(lc = 3)
57
+ @lc = lc
58
+ end
59
+
60
+ # Encode literal byte in unmatched mode
61
+ #
62
+ # This is the standard LZMA literal encoding where each bit
63
+ # is encoded using probability models based on the partial
64
+ # symbol value.
65
+ #
66
+ # # XZ Utils literal_subcoder macro (from lzma_common.h:141-145):
67
+ # # ((probs) + 3 * (((((pos) << 8) + (prev_byte)) & (literal_mask)) << (lc))
68
+ #
69
+ # @param byte [Integer] Byte value to encode (0-255)
70
+ # @param pos [Integer] Current position in stream
71
+ # @param prev_byte [Integer] Previous byte value
72
+ # @param lc [Integer] Literal context bits (0-8)
73
+ # @param literal_mask [Integer] Literal mask for context calculation
74
+ # @param range_encoder [RangeEncoder] Range encoder instance
75
+ # @param models [Array<BitModel>] Literal probability models
76
+ # @return [void]
77
+ def encode_unmatched(byte, pos, prev_byte, lc, literal_mask,
78
+ range_encoder, models)
79
+ # Calculate base_offset using XZ Utils formula
80
+ # (((pos << 8) + prev_byte) & literal_mask) << lc
81
+ context = (((pos << 8) + prev_byte) & literal_mask)
82
+ base_offset = 3 * (context << lc)
83
+ model_index = 1
84
+ bit_count = 8
85
+
86
+ loop do
87
+ # const uint32_t bit = (symbol >> --bit_count) & 1;
88
+ bit_count -= 1
89
+ bit = (byte >> bit_count) & 1
90
+
91
+ # rc_bit(rc, &probs[model_index], bit);
92
+ range_encoder.encode_bit(models[base_offset + model_index], bit)
93
+
94
+ # model_index = (model_index << 1) + bit;
95
+ model_index = (model_index << 1) + bit
96
+
97
+ break if bit_count.zero?
98
+ end
99
+ end
100
+
101
+ # Encode literal byte in matched mode (SDK feature)
102
+ #
103
+ # This mode uses a byte from the dictionary (the "match byte")
104
+ # as context for encoding the literal. This improves compression
105
+ # when the literal follows a match, as the match byte provides
106
+ # additional predictive information.
107
+ #
108
+ # Direct port from XZ Utils literal_matched() in lzma_encoder.c:22-41
109
+ #
110
+ # @param byte [Integer] Byte value to encode (0-255)
111
+ # @param match_byte [Integer] Corresponding byte from dictionary
112
+ # @param pos [Integer] Current position in stream
113
+ # @param prev_byte [Integer] Previous byte value
114
+ # @param lc [Integer] Literal context bits (0-8)
115
+ # @param literal_mask [Integer] Literal mask for context calculation
116
+ # @param range_encoder [RangeEncoder] Range encoder instance
117
+ # @param models [Array<BitModel>] Literal probability models
118
+ # @return [void]
119
+ def encode_matched(byte, match_byte, pos, prev_byte, lc, literal_mask,
120
+ range_encoder, models)
121
+ # Direct port of xz's literal_matched
122
+ # static inline void
123
+ # literal_matched(lzma_range_encoder *rc, probability *subcoder,
124
+ # uint32_t match_byte, uint32_t symbol)
125
+ # {
126
+ # uint32_t offset = 0x100;
127
+ # symbol += UINT32_C(1) << 8;
128
+ #
129
+ # do {
130
+ # match_byte <<= 1;
131
+ # const uint32_t match_bit = match_byte & offset;
132
+ # const uint32_t subcoder_index
133
+ # = offset + match_bit + (symbol >> 8);
134
+ # const uint32_t bit = (symbol >> 7) & 1;
135
+ # rc_bit(rc, &subcoder[subcoder_index], bit);
136
+ #
137
+ # symbol <<= 1;
138
+ # offset &= ~(match_byte ^ symbol);
139
+ #
140
+ # } while (symbol < (UINT32_C(1) << 16));
141
+ # }
142
+
143
+ # Calculate base_offset using XZ Utils formula (same as encode_unmatched)
144
+ # (((pos << 8) + prev_byte) & literal_mask) << lc
145
+ context = (((pos << 8) + prev_byte) & literal_mask)
146
+ base_offset = 3 * (context << lc)
147
+
148
+ offset = 0x100
149
+ symbol = byte + (1 << 8) # symbol += UINT32_C(1) << 8
150
+
151
+ loop do
152
+ # match_byte <<= 1;
153
+ match_byte <<= 1
154
+
155
+ # const uint32_t match_bit = match_byte & offset;
156
+ match_bit = match_byte & offset
157
+
158
+ # const uint32_t subcoder_index = offset + match_bit + (symbol >> 8);
159
+ subcoder_index = base_offset + offset + match_bit + (symbol >> 8)
160
+
161
+ # const uint32_t bit = (symbol >> 7) & 1;
162
+ bit = (symbol >> 7) & 1
163
+
164
+ # rc_bit(rc, &subcoder[subcoder_index], bit);
165
+ range_encoder.encode_bit(models[subcoder_index], bit)
166
+
167
+ # symbol <<= 1;
168
+ symbol <<= 1
169
+
170
+ # offset &= ~(match_byte ^ symbol);
171
+ offset &= ~(match_byte ^ symbol)
172
+
173
+ # } while (symbol < (UINT32_C(1) << 16));
174
+ break if symbol >= (1 << 16)
175
+ end
176
+ end
177
+
178
+ private
179
+
180
+ # Encode remaining bits in unmatched mode
181
+ #
182
+ # Called from matched mode when bits diverge.
183
+ # Similar to encode_unmatched but starts with partial symbol.
184
+ #
185
+ # @param byte [Integer] Original byte value
186
+ # @param symbol [Integer] Partial symbol value
187
+ # @param base_offset [Integer] Model base offset
188
+ # @param range_encoder [RangeEncoder] Range encoder instance
189
+ # @param models [Array<BitModel>] Literal probability models
190
+ # @return [void]
191
+ def encode_unmatched_tail(byte, symbol, base_offset, range_encoder,
192
+ models)
193
+ # Continue encoding remaining bits of the byte
194
+ # symbol contains the bits already encoded (built up from MSB)
195
+ # We need to encode bits from symbol's current position to the end
196
+ remaining_bits = 8 - (symbol.bit_length - 1)
197
+ remaining_bits.times do |i|
198
+ model_index = base_offset + symbol
199
+
200
+ # Extract next bit from byte (MSB first from current position)
201
+ bit = (byte >> (7 - i)) & 1
202
+
203
+ range_encoder.encode_bit(models[model_index], bit)
204
+ symbol = (symbol << 1) | bit
205
+ end
206
+ end
207
+ end
208
+ end
209
+ end
210
+ end
@@ -0,0 +1,341 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Copyright (C) 2025 Ribose Inc.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a
6
+ # copy of this software and associated documentation files (the "Software"),
7
+ # to deal in the Software without restriction, including without limitation
8
+ # the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
+ # and/or sell copies of the Software, and to permit persons to whom the
10
+ # Software is furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
+ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21
+ # DEALINGS IN THE SOFTWARE.
22
+
23
+ require_relative "xz_utils_decoder"
24
+ require_relative "../../checksums/crc32"
25
+ require "stringio"
26
+
27
+ module Omnizip
28
+ module Algorithms
29
+ class LZMA < Algorithm
30
+ #
31
+ # Decoder for .lz (lzip) format
32
+ #
33
+ # This is the lzip format, a DIFFERENT container format from both
34
+ # XZ and .lzma (LZMA_Alone). Lzip was created as an alternative to
35
+ # the legacy .lzma format with better integrity checking.
36
+ #
37
+ # File format:
38
+ # - Magic bytes: "LZIP" (0x4C 0x5A 0x49 0x50)
39
+ # - Version (1 byte): 0 or 1
40
+ # - Dictionary size (1 byte): encoded format
41
+ # - LZMA1 compressed stream (with fixed LC=3, LP=0, PB=2)
42
+ # - Footer:
43
+ # - Version 0 (12 bytes): CRC32 (4) + Uncompressed size (8)
44
+ # - Version 1 (20 bytes): CRC32 (4) + Uncompressed size (8) + Member size (8)
45
+ #
46
+ # Reference: /Users/mulgogi/src/external/xz/src/liblzma/common/lzip_decoder.c
47
+ #
48
+ # This decoder uses the same LZMA1 decoding engine as XZ format,
49
+ # but with the lzip container format and CRC32 integrity checking.
50
+ #
51
+ # @example Decode .lz file
52
+ # data = File.binread("file.lz")
53
+ # decoder = Omnizip::Algorithms::LZMA::LzipDecoder.new(StringIO.new(data))
54
+ # result = decoder.decode_stream
55
+ #
56
+ class LzipDecoder
57
+ # Lzip magic bytes: "LZIP" in ASCII
58
+ # Reference: lzip_decoder.c:106
59
+ MAGIC = [0x4C, 0x5A, 0x49, 0x50].freeze
60
+
61
+ # Fixed LC/LP/PB values for lzip format
62
+ # Reference: lzip_decoder.c:23-26
63
+ LZIP_LC = 3
64
+ LZIP_LP = 0
65
+ LZIP_PB = 2
66
+
67
+ # Footer sizes
68
+ # Reference: lzip_decoder.c:19-21
69
+ LZIP_V0_FOOTER_SIZE = 12
70
+ LZIP_V1_FOOTER_SIZE = 20
71
+ LZIP_FOOTER_SIZE_MAX = LZIP_V1_FOOTER_SIZE
72
+
73
+ # Minimum and maximum dictionary sizes (in bytes)
74
+ # Reference: lzip_decoder.c:197-198
75
+ MIN_DICT_SIZE = 4096 # 4 KiB
76
+ MAX_DICT_SIZE = (512 << 20) # 512 MiB
77
+
78
+ # Initialize the decoder with .lz format input
79
+ #
80
+ # @param input [IO] Input stream of .lz compressed data
81
+ # @param options [Hash] Decoding options
82
+ # @option options [Boolean] :ignore_check If true, skip CRC32 verification (default: false)
83
+ # @option options [Boolean] :concatenated If true, decode concatenated .lz members (default: false)
84
+ # @raise [Omnizip::DecompressionError] If header is invalid or unsupported
85
+ def initialize(input, options = {})
86
+ @input = input
87
+ @ignore_check = options.fetch(:ignore_check, false)
88
+ # Concatenated mode is enabled by default (lzip natively supports multiple members)
89
+ @concatenated = options.fetch(:concatenated, true)
90
+
91
+ # Parse .lz header
92
+ parse_header
93
+
94
+ # Track member size (including header and footer)
95
+ # We start with the 6 bytes we've already read (magic + version + dict_size)
96
+ @member_size = 6
97
+
98
+ # For concatenated mode, track if this is the first member
99
+ @first_member = true
100
+
101
+ # Initialize CRC32 calculator
102
+ @crc32 = 0
103
+ @uncompressed_size = 0
104
+ end
105
+
106
+ # Decode the .lz stream
107
+ #
108
+ # @param output [IO, nil] Optional output stream
109
+ # @return [String, Integer] Decompressed data or bytes written
110
+ def decode_stream(output = nil)
111
+ # For concatenated mode, accumulate all decoded data
112
+ all_decoded_data = String.new(encoding: Encoding::BINARY)
113
+ bytes_written = 0
114
+ result = nil # Initialize result variable
115
+
116
+ loop do
117
+ # Track the starting position of compressed data
118
+ start_pos = @input.pos
119
+
120
+ # Initialize the XZ Utils LZMA decoder with fixed lzip parameters
121
+ decoder = XzUtilsDecoder.new(@input,
122
+ lzma2_mode: true,
123
+ lc: LZIP_LC,
124
+ lp: LZIP_LP,
125
+ pb: LZIP_PB,
126
+ dict_size: @dict_size,
127
+ uncompressed_size: 0xFFFFFFFFFFFFFFFF) # Unknown size, allow EOPM
128
+
129
+ # Decode the LZMA stream (allow EOPM for .lz format)
130
+ # Get decoded data as string (no output stream)
131
+ decoded_data = decoder.decode_stream(nil, check_rc_finished: false)
132
+
133
+ # If caller provided output stream, write to it
134
+ if output
135
+ output.write(decoded_data)
136
+ bytes_written += decoded_data.bytesize
137
+ result = bytes_written
138
+ else
139
+ all_decoded_data << decoded_data
140
+ result = all_decoded_data
141
+ end
142
+
143
+ # Calculate member size (header + compressed data + footer)
144
+ # We calculate it here (compressed data + header), then add footer size below
145
+ @member_size = @input.pos - start_pos + 6 # +6 for header bytes
146
+
147
+ # Calculate and verify CRC32
148
+ if @ignore_check
149
+ # Skip footer
150
+ footer_size = @version.zero? ? LZIP_V0_FOOTER_SIZE : LZIP_V1_FOOTER_SIZE
151
+ @input.read(footer_size)
152
+ @member_size += footer_size
153
+ else
154
+ data_to_crc = decoded_data || +''
155
+ calculated_crc = Omnizip::Checksums::Crc32.calculate(data_to_crc)
156
+ @uncompressed_size = data_to_crc.bytesize
157
+
158
+ # Read and verify footer (also updates @member_size to include footer)
159
+ verify_footer(calculated_crc)
160
+ end
161
+
162
+ # Check for concatenated members
163
+ break unless @concatenated
164
+
165
+ # Peek ahead to check if there's another LZIP member
166
+ break unless has_next_member?
167
+
168
+ # Parse header for next member
169
+ parse_header
170
+ end
171
+
172
+ # Return decoded data or bytes written
173
+ result
174
+ end
175
+
176
+ private
177
+
178
+ # Check if there's another concatenated LZIP member
179
+ # Peeks ahead without consuming the magic bytes
180
+ #
181
+ # @return [Boolean] true if another member is present
182
+ def has_next_member?
183
+ # Peek at next 4 bytes to check for magic
184
+ magic_bytes = @input.read(4)
185
+ return false if magic_bytes.nil? || magic_bytes.bytesize < 4
186
+
187
+ # Check if it's LZIP magic
188
+ is_lzip = magic_bytes.getbyte(0) == MAGIC[0] &&
189
+ magic_bytes.getbyte(1) == MAGIC[1] &&
190
+ magic_bytes.getbyte(2) == MAGIC[2] &&
191
+ magic_bytes.getbyte(3) == MAGIC[3]
192
+
193
+ # Put the bytes back by seeking back
194
+ @input.seek(-4, ::IO::SEEK_CUR) if is_lzip
195
+
196
+ is_lzip
197
+ end
198
+
199
+ # Parse .lz format header
200
+ #
201
+ # Format (from lzip_decoder.c):
202
+ # - Magic bytes: "LZIP" (4 bytes)
203
+ # - Version (1 byte): 0 or 1
204
+ # - Dictionary size (1 byte): encoded format
205
+ #
206
+ # Reference: /Users/mulgogi/src/external/xz/src/liblzma/common/lzip_decoder.c
207
+ #
208
+ # @return [void]
209
+ # @raise [Omnizip::DecompressionError] If header is invalid
210
+ def parse_header
211
+ # Step 1: Verify magic bytes (SEQ_ID_STRING)
212
+ # Reference: lzip_decoder.c:104-153
213
+ magic_bytes = @input.read(4)
214
+ raise Omnizip::DecompressionError, "Incomplete .lz header: missing magic bytes" if magic_bytes.nil? || magic_bytes.bytesize < 4
215
+
216
+ 4.times do |i|
217
+ if magic_bytes.getbyte(i) != MAGIC[i]
218
+ raise Omnizip::DecompressionError, "Invalid .lz header: magic bytes don't match LZIP (expected #{MAGIC.map { |b| "0x#{b.to_s(16).upcase}" }.join(' ')}, got #{magic_bytes.bytes.map { |b| "0x#{b.to_s(16).upcase}" }.join(' ')})"
219
+ end
220
+ end
221
+
222
+ # Step 2: Read version byte (SEQ_VERSION)
223
+ # Reference: lzip_decoder.c:156-174
224
+ version_byte = @input.getbyte
225
+ raise Omnizip::DecompressionError, "Incomplete .lz header: missing version byte" if version_byte.nil?
226
+
227
+ @version = version_byte
228
+
229
+ # We support version 0 and unextended version 1
230
+ # Reference: lzip_decoder.c:163-164
231
+ if @version > 1
232
+ raise Omnizip::UnsupportedFormatError, "Unsupported .lz version: #{@version} (only 0 and 1 are supported)"
233
+ end
234
+
235
+ # Step 3: Parse dictionary size (SEQ_DICT_SIZE)
236
+ # Reference: lzip_decoder.c:177-222
237
+ dict_size_byte = @input.getbyte
238
+ raise Omnizip::DecompressionError, "Incomplete .lz header: missing dictionary size byte" if dict_size_byte.nil?
239
+
240
+ # Decode dictionary size from the encoded byte
241
+ # The five lowest bits are for the base-2 logarithm of the dictionary size
242
+ # and the highest three bits are the fractional part (0/16 to 7/16)
243
+ # Reference: lzip_decoder.c:183-204
244
+ b2log = dict_size_byte & 0x1F
245
+ fracnum = dict_size_byte >> 5
246
+
247
+ # Validate range: [4 KiB, 512 MiB]
248
+ # Reference: lzip_decoder.c:198-199
249
+ if b2log < 12 || b2log > 29 || (b2log == 12 && fracnum.positive?)
250
+ raise Omnizip::DecompressionError, "Invalid .lz header: dictionary size byte 0x#{dict_size_byte.to_s(16).upcase} is out of valid range"
251
+ end
252
+
253
+ # Calculate: 2^[b2log] - [fracnum] * 2^([b2log] - 4)
254
+ # Reference: lzip_decoder.c:201-204
255
+ @dict_size = (1 << b2log) - (fracnum << (b2log - 4))
256
+
257
+ # Sanity checks
258
+ raise Omnizip::DecompressionError, "Dictionary size calculation error: too small" if @dict_size < MIN_DICT_SIZE
259
+ raise Omnizip::DecompressionError, "Dictionary size calculation error: too large" if @dict_size > MAX_DICT_SIZE
260
+ end
261
+
262
+ # Verify .lz format footer
263
+ #
264
+ # Format (from lzip_decoder.c):
265
+ # - CRC32 of uncompressed data (4 bytes, little-endian)
266
+ # - Uncompressed size (8 bytes, little-endian)
267
+ # - Member size (8 bytes, little-endian) - only for version 1
268
+ #
269
+ # Reference: /Users/mulgogi/src/external/xz/src/liblzma/common/lzip_decoder.c:277-309
270
+ #
271
+ # @param calculated_crc [Integer] Calculated CRC32 of uncompressed data
272
+ # @raise [Omnizip::DecompressionError] If footer is invalid or checksum mismatch
273
+ def verify_footer(calculated_crc)
274
+ footer_size = @version.zero? ? LZIP_V0_FOOTER_SIZE : LZIP_V1_FOOTER_SIZE
275
+ footer = @input.read(footer_size)
276
+ raise Omnizip::DecompressionError, "Incomplete .lz footer: expected #{footer_size} bytes, got #{footer&.bytesize || 0}" if footer.nil? || footer.bytesize < footer_size
277
+
278
+ # Update member_size to include the footer
279
+ @member_size += footer_size
280
+
281
+ # Parse CRC32 (little-endian)
282
+ stored_crc = footer.getbyte(0) | (footer.getbyte(1) << 8) |
283
+ (footer.getbyte(2) << 16) | (footer.getbyte(3) << 24)
284
+
285
+ # Verify CRC32
286
+ if calculated_crc != stored_crc
287
+ raise Omnizip::ChecksumError, "CRC32 mismatch: calculated 0x#{calculated_crc.to_s(16).upcase}, stored 0x#{stored_crc.to_s(16).upcase}"
288
+ end
289
+
290
+ # Parse and verify uncompressed size (little-endian)
291
+ stored_uncompressed_size = footer.getbyte(4) | (footer.getbyte(5) << 8) |
292
+ (footer.getbyte(6) << 16) | (footer.getbyte(7) << 24) |
293
+ (footer.getbyte(8) << 32) | (footer.getbyte(9) << 40) |
294
+ (footer.getbyte(10) << 48) | (footer.getbyte(11) << 56)
295
+
296
+ if @uncompressed_size != stored_uncompressed_size
297
+ raise Omnizip::ChecksumError, "Uncompressed size mismatch: decoded #{@uncompressed_size}, stored #{stored_uncompressed_size}"
298
+ end
299
+
300
+ # For version 1, verify member size
301
+ if @version.positive?
302
+ stored_member_size = footer.getbyte(12) | (footer.getbyte(13) << 8) |
303
+ (footer.getbyte(14) << 16) | (footer.getbyte(15) << 24) |
304
+ (footer.getbyte(16) << 32) | (footer.getbyte(17) << 40) |
305
+ (footer.getbyte(18) << 48) | (footer.getbyte(19) << 56)
306
+
307
+ if @member_size != stored_member_size
308
+ raise Omnizip::ChecksumError, "Member size mismatch: decoded #{@member_size}, stored #{stored_member_size}"
309
+ end
310
+ end
311
+ end
312
+
313
+ # Wrapper input stream that tracks bytes read
314
+ class TrackingInputStream
315
+ attr_reader :bytes_read
316
+
317
+ def initialize(input, start_offset = 0)
318
+ @input = input
319
+ @bytes_read = start_offset
320
+ end
321
+
322
+ def read(size = nil)
323
+ data = @input.read(size)
324
+ @bytes_read += data.bytesize if data
325
+ data
326
+ end
327
+
328
+ def getbyte
329
+ byte = @input.getbyte
330
+ @bytes_read += 1 if byte
331
+ byte
332
+ end
333
+
334
+ def eof?
335
+ @input.eof?
336
+ end
337
+ end
338
+ end
339
+ end
340
+ end
341
+ end