omnizip 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (511) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.rubocop.yml +32 -0
  4. data/.rubocop_todo.yml +754 -0
  5. data/COPYING +502 -0
  6. data/Gemfile +17 -0
  7. data/LICENSE +12 -0
  8. data/README.adoc +1045 -0
  9. data/Rakefile +12 -0
  10. data/benchmark/README.md +260 -0
  11. data/benchmark/benchmark_suite.rb +125 -0
  12. data/benchmark/compression_bench.rb +181 -0
  13. data/benchmark/filter_bench.rb +180 -0
  14. data/benchmark/models/benchmark_result.rb +59 -0
  15. data/benchmark/models/comparison_result.rb +69 -0
  16. data/benchmark/profile_suite.rb +167 -0
  17. data/benchmark/reporter.rb +150 -0
  18. data/benchmark/run_benchmarks.rb +66 -0
  19. data/benchmark/test_data.rb +137 -0
  20. data/config/formats/rar3_spec.yml +91 -0
  21. data/config/formats/rar5_spec.yml +102 -0
  22. data/docs/.github/workflows/docs.yml +142 -0
  23. data/docs/.gitignore +21 -0
  24. data/docs/.lychee.toml +67 -0
  25. data/docs/Gemfile +13 -0
  26. data/docs/RAR_WRITE_SUPPORT.md +26 -0
  27. data/docs/README.md +101 -0
  28. data/docs/_config.yml +112 -0
  29. data/docs/assets/logo.svg +1 -0
  30. data/docs/assets/omnizip-logo.pdf +1540 -11
  31. data/docs/comparison/feature-matrix.adoc +694 -0
  32. data/docs/comparison/index.adoc +113 -0
  33. data/docs/comparison/vs-7zip.adoc +309 -0
  34. data/docs/comparison/vs-peazip.adoc +77 -0
  35. data/docs/comparison/vs-rubyzip.adoc +342 -0
  36. data/docs/comparison/vs-winrar.adoc +100 -0
  37. data/docs/compatibility.adoc +579 -0
  38. data/docs/concepts/index.adoc +129 -0
  39. data/docs/developer/architecture.adoc +256 -0
  40. data/docs/developer/contributing.adoc +158 -0
  41. data/docs/developer/index.adoc +25 -0
  42. data/docs/developer/testing.adoc +212 -0
  43. data/docs/getting-started/basic-usage.adoc +271 -0
  44. data/docs/getting-started/index.adoc +42 -0
  45. data/docs/getting-started/installation.adoc +138 -0
  46. data/docs/getting-started/quick-start.adoc +185 -0
  47. data/docs/getting-started/your-first-archive.adoc +218 -0
  48. data/docs/guides/advanced-features/encryption.adoc +300 -0
  49. data/docs/guides/advanced-features/index.adoc +49 -0
  50. data/docs/guides/advanced-features/parallel-processing.adoc +246 -0
  51. data/docs/guides/advanced-features/progress-tracking.adoc +320 -0
  52. data/docs/guides/advanced-features/streaming.adoc +212 -0
  53. data/docs/guides/archive-formats/gzip-format.adoc +107 -0
  54. data/docs/guides/archive-formats/index.adoc +130 -0
  55. data/docs/guides/archive-formats/rar-format.adoc +104 -0
  56. data/docs/guides/archive-formats/rar5.adoc +521 -0
  57. data/docs/guides/archive-formats/seven-zip-format.adoc +35 -0
  58. data/docs/guides/archive-formats/tar-format.adoc +106 -0
  59. data/docs/guides/archive-formats/xz-format.adoc +118 -0
  60. data/docs/guides/archive-formats/zip-format.adoc +35 -0
  61. data/docs/guides/compression-algorithms/bzip2.adoc +113 -0
  62. data/docs/guides/compression-algorithms/deflate.adoc +319 -0
  63. data/docs/guides/compression-algorithms/index.adoc +190 -0
  64. data/docs/guides/compression-algorithms/lzma.adoc +398 -0
  65. data/docs/guides/compression-algorithms/lzma2.adoc +327 -0
  66. data/docs/guides/compression-algorithms/ppmd.adoc +316 -0
  67. data/docs/guides/compression-algorithms/zstandard.adoc +361 -0
  68. data/docs/guides/creating-archives.adoc +354 -0
  69. data/docs/guides/extracting-archives.adoc +53 -0
  70. data/docs/guides/format-conversion.adoc +64 -0
  71. data/docs/guides/index.adoc +49 -0
  72. data/docs/guides/migration-rubyzip.adoc +217 -0
  73. data/docs/guides/parity-archives.adoc +605 -0
  74. data/docs/guides/performance-tuning.adoc +88 -0
  75. data/docs/index.adoc +218 -0
  76. data/docs/lychee.toml +67 -0
  77. data/docs/reference/api/overview.adoc +188 -0
  78. data/docs/reference/cli/compress-command.adoc +114 -0
  79. data/docs/reference/cli/overview.adoc +140 -0
  80. data/docs/reference/index.adoc +26 -0
  81. data/docs/resources/faq.adoc +185 -0
  82. data/docs/resources/quick-reference.adoc +222 -0
  83. data/docs/troubleshooting/index.adoc +208 -0
  84. data/examples/api_comparison.rb +205 -0
  85. data/examples/deflate64_example.rb +96 -0
  86. data/examples/par2_demo.rb +121 -0
  87. data/examples/quick_start_native.rb +150 -0
  88. data/examples/quick_start_rubyzip.rb +115 -0
  89. data/examples/rubyzip_compatibility_demo.rb +194 -0
  90. data/exe/omnizip +27 -0
  91. data/lib/omnizip/algorithm.rb +130 -0
  92. data/lib/omnizip/algorithm_registry.rb +86 -0
  93. data/lib/omnizip/algorithms/.keep +0 -0
  94. data/lib/omnizip/algorithms/bzip2/bwt.rb +225 -0
  95. data/lib/omnizip/algorithms/bzip2/decoder.rb +193 -0
  96. data/lib/omnizip/algorithms/bzip2/encoder.rb +237 -0
  97. data/lib/omnizip/algorithms/bzip2/huffman.rb +206 -0
  98. data/lib/omnizip/algorithms/bzip2/mtf.rb +101 -0
  99. data/lib/omnizip/algorithms/bzip2/rle.rb +151 -0
  100. data/lib/omnizip/algorithms/bzip2.rb +130 -0
  101. data/lib/omnizip/algorithms/deflate/constants.rb +28 -0
  102. data/lib/omnizip/algorithms/deflate/decoder.rb +38 -0
  103. data/lib/omnizip/algorithms/deflate/encoder.rb +46 -0
  104. data/lib/omnizip/algorithms/deflate.rb +128 -0
  105. data/lib/omnizip/algorithms/deflate64/constants.rb +45 -0
  106. data/lib/omnizip/algorithms/deflate64/decoder.rb +153 -0
  107. data/lib/omnizip/algorithms/deflate64/encoder.rb +98 -0
  108. data/lib/omnizip/algorithms/deflate64/huffman_coder.rb +354 -0
  109. data/lib/omnizip/algorithms/deflate64/lz77_encoder.rb +142 -0
  110. data/lib/omnizip/algorithms/deflate64.rb +109 -0
  111. data/lib/omnizip/algorithms/lzma/bit_model.rb +120 -0
  112. data/lib/omnizip/algorithms/lzma/constants.rb +112 -0
  113. data/lib/omnizip/algorithms/lzma/decoder.rb +148 -0
  114. data/lib/omnizip/algorithms/lzma/dictionary.rb +69 -0
  115. data/lib/omnizip/algorithms/lzma/distance_coder.rb +415 -0
  116. data/lib/omnizip/algorithms/lzma/encoder.rb +142 -0
  117. data/lib/omnizip/algorithms/lzma/length_coder.rb +260 -0
  118. data/lib/omnizip/algorithms/lzma/literal_decoder.rb +320 -0
  119. data/lib/omnizip/algorithms/lzma/literal_encoder.rb +210 -0
  120. data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +341 -0
  121. data/lib/omnizip/algorithms/lzma/lzma_alone_decoder.rb +192 -0
  122. data/lib/omnizip/algorithms/lzma/lzma_state.rb +128 -0
  123. data/lib/omnizip/algorithms/lzma/match.rb +32 -0
  124. data/lib/omnizip/algorithms/lzma/match_finder.rb +205 -0
  125. data/lib/omnizip/algorithms/lzma/match_finder_config.rb +142 -0
  126. data/lib/omnizip/algorithms/lzma/match_finder_factory.rb +88 -0
  127. data/lib/omnizip/algorithms/lzma/optimal_encoder.rb +130 -0
  128. data/lib/omnizip/algorithms/lzma/probability_models.rb +72 -0
  129. data/lib/omnizip/algorithms/lzma/range_coder.rb +85 -0
  130. data/lib/omnizip/algorithms/lzma/range_decoder.rb +434 -0
  131. data/lib/omnizip/algorithms/lzma/range_encoder.rb +194 -0
  132. data/lib/omnizip/algorithms/lzma/state.rb +127 -0
  133. data/lib/omnizip/algorithms/lzma/xz_buffered_range_encoder.rb +325 -0
  134. data/lib/omnizip/algorithms/lzma/xz_encoder.rb +426 -0
  135. data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +645 -0
  136. data/lib/omnizip/algorithms/lzma/xz_match_finder_adapter.rb +227 -0
  137. data/lib/omnizip/algorithms/lzma/xz_price_calculator.rb +169 -0
  138. data/lib/omnizip/algorithms/lzma/xz_probability_models.rb +261 -0
  139. data/lib/omnizip/algorithms/lzma/xz_range_encoder.rb +223 -0
  140. data/lib/omnizip/algorithms/lzma/xz_range_encoder_exact.rb +331 -0
  141. data/lib/omnizip/algorithms/lzma/xz_state.rb +116 -0
  142. data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +2055 -0
  143. data/lib/omnizip/algorithms/lzma.rb +238 -0
  144. data/lib/omnizip/algorithms/lzma2/chunk_manager.rb +182 -0
  145. data/lib/omnizip/algorithms/lzma2/constants.rb +41 -0
  146. data/lib/omnizip/algorithms/lzma2/encoder.rb +147 -0
  147. data/lib/omnizip/algorithms/lzma2/lzma2_chunk.rb +161 -0
  148. data/lib/omnizip/algorithms/lzma2/properties.rb +179 -0
  149. data/lib/omnizip/algorithms/lzma2/simple_lzma2_encoder.rb +127 -0
  150. data/lib/omnizip/algorithms/lzma2/xz_encoder_adapter.rb +85 -0
  151. data/lib/omnizip/algorithms/lzma2.rb +141 -0
  152. data/lib/omnizip/algorithms/ppmd7/constants.rb +74 -0
  153. data/lib/omnizip/algorithms/ppmd7/context.rb +154 -0
  154. data/lib/omnizip/algorithms/ppmd7/decoder.rb +126 -0
  155. data/lib/omnizip/algorithms/ppmd7/encoder.rb +163 -0
  156. data/lib/omnizip/algorithms/ppmd7/model.rb +248 -0
  157. data/lib/omnizip/algorithms/ppmd7/symbol_state.rb +57 -0
  158. data/lib/omnizip/algorithms/ppmd7.rb +116 -0
  159. data/lib/omnizip/algorithms/ppmd8/constants.rb +61 -0
  160. data/lib/omnizip/algorithms/ppmd8/context.rb +34 -0
  161. data/lib/omnizip/algorithms/ppmd8/decoder.rb +107 -0
  162. data/lib/omnizip/algorithms/ppmd8/encoder.rb +138 -0
  163. data/lib/omnizip/algorithms/ppmd8/model.rb +250 -0
  164. data/lib/omnizip/algorithms/ppmd8/restoration_method.rb +78 -0
  165. data/lib/omnizip/algorithms/ppmd8.rb +82 -0
  166. data/lib/omnizip/algorithms/ppmd_base.rb +138 -0
  167. data/lib/omnizip/algorithms/sevenzip_lzma2.rb +123 -0
  168. data/lib/omnizip/algorithms/xz_lzma2.rb +118 -0
  169. data/lib/omnizip/algorithms/zstandard/constants.rb +25 -0
  170. data/lib/omnizip/algorithms/zstandard/decoder.rb +46 -0
  171. data/lib/omnizip/algorithms/zstandard/encoder.rb +51 -0
  172. data/lib/omnizip/algorithms/zstandard.rb +138 -0
  173. data/lib/omnizip/buffer/memory_archive.rb +251 -0
  174. data/lib/omnizip/buffer/memory_extractor.rb +224 -0
  175. data/lib/omnizip/buffer.rb +176 -0
  176. data/lib/omnizip/checksum_registry.rb +114 -0
  177. data/lib/omnizip/checksums/crc32.rb +100 -0
  178. data/lib/omnizip/checksums/crc64.rb +101 -0
  179. data/lib/omnizip/checksums/crc_base.rb +158 -0
  180. data/lib/omnizip/checksums/verifier.rb +131 -0
  181. data/lib/omnizip/chunked/memory_manager.rb +194 -0
  182. data/lib/omnizip/chunked/reader.rb +78 -0
  183. data/lib/omnizip/chunked/writer.rb +120 -0
  184. data/lib/omnizip/chunked.rb +129 -0
  185. data/lib/omnizip/cli/output_formatter.rb +104 -0
  186. data/lib/omnizip/cli.rb +572 -0
  187. data/lib/omnizip/commands/.keep +0 -0
  188. data/lib/omnizip/commands/archive_create_command.rb +427 -0
  189. data/lib/omnizip/commands/archive_extract_command.rb +272 -0
  190. data/lib/omnizip/commands/archive_list_command.rb +218 -0
  191. data/lib/omnizip/commands/archive_repair_command.rb +131 -0
  192. data/lib/omnizip/commands/archive_verify_command.rb +117 -0
  193. data/lib/omnizip/commands/compress_command.rb +117 -0
  194. data/lib/omnizip/commands/decompress_command.rb +120 -0
  195. data/lib/omnizip/commands/list_command.rb +53 -0
  196. data/lib/omnizip/commands/metadata_command.rb +153 -0
  197. data/lib/omnizip/commands/parity_create_command.rb +122 -0
  198. data/lib/omnizip/commands/parity_repair_command.rb +122 -0
  199. data/lib/omnizip/commands/parity_verify_command.rb +124 -0
  200. data/lib/omnizip/commands/profile_list_command.rb +56 -0
  201. data/lib/omnizip/commands/profile_show_command.rb +44 -0
  202. data/lib/omnizip/convenience.rb +359 -0
  203. data/lib/omnizip/converter/conversion_registry.rb +49 -0
  204. data/lib/omnizip/converter/conversion_strategy.rb +121 -0
  205. data/lib/omnizip/converter/seven_zip_to_zip_strategy.rb +97 -0
  206. data/lib/omnizip/converter/zip_to_seven_zip_strategy.rb +112 -0
  207. data/lib/omnizip/converter.rb +105 -0
  208. data/lib/omnizip/crypto/aes256/cipher.rb +100 -0
  209. data/lib/omnizip/crypto/aes256/constants.rb +28 -0
  210. data/lib/omnizip/crypto/aes256/key_derivation.rb +101 -0
  211. data/lib/omnizip/crypto/aes256.rb +102 -0
  212. data/lib/omnizip/error.rb +106 -0
  213. data/lib/omnizip/eta/exponential_smoothing_estimator.rb +98 -0
  214. data/lib/omnizip/eta/moving_average_estimator.rb +99 -0
  215. data/lib/omnizip/eta/rate_calculator.rb +104 -0
  216. data/lib/omnizip/eta/sample_history.rb +143 -0
  217. data/lib/omnizip/eta/time_estimator.rb +106 -0
  218. data/lib/omnizip/eta.rb +63 -0
  219. data/lib/omnizip/extraction/filter_chain.rb +177 -0
  220. data/lib/omnizip/extraction/glob_pattern.rb +140 -0
  221. data/lib/omnizip/extraction/pattern_matcher.rb +70 -0
  222. data/lib/omnizip/extraction/predicate_pattern.rb +52 -0
  223. data/lib/omnizip/extraction/regex_pattern.rb +50 -0
  224. data/lib/omnizip/extraction/selective_extractor.rb +240 -0
  225. data/lib/omnizip/extraction.rb +111 -0
  226. data/lib/omnizip/file_type/mime_classifier.rb +144 -0
  227. data/lib/omnizip/file_type.rb +113 -0
  228. data/lib/omnizip/filter.rb +139 -0
  229. data/lib/omnizip/filter_pipeline.rb +108 -0
  230. data/lib/omnizip/filter_registry.rb +166 -0
  231. data/lib/omnizip/filters/bcj.rb +279 -0
  232. data/lib/omnizip/filters/bcj2/constants.rb +53 -0
  233. data/lib/omnizip/filters/bcj2/decoder.rb +200 -0
  234. data/lib/omnizip/filters/bcj2/encoder.rb +61 -0
  235. data/lib/omnizip/filters/bcj2/stream_data.rb +93 -0
  236. data/lib/omnizip/filters/bcj2.rb +99 -0
  237. data/lib/omnizip/filters/bcj_arm.rb +176 -0
  238. data/lib/omnizip/filters/bcj_arm64.rb +244 -0
  239. data/lib/omnizip/filters/bcj_ia64.rb +196 -0
  240. data/lib/omnizip/filters/bcj_ppc.rb +190 -0
  241. data/lib/omnizip/filters/bcj_sparc.rb +176 -0
  242. data/lib/omnizip/filters/bcj_x86.rb +193 -0
  243. data/lib/omnizip/filters/delta.rb +196 -0
  244. data/lib/omnizip/filters/filter_base.rb +72 -0
  245. data/lib/omnizip/filters/registry.rb +123 -0
  246. data/lib/omnizip/filters/xz_delta.rb +258 -0
  247. data/lib/omnizip/format_detector.rb +162 -0
  248. data/lib/omnizip/format_registry.rb +59 -0
  249. data/lib/omnizip/formats/.keep +0 -0
  250. data/lib/omnizip/formats/bzip2_file.rb +172 -0
  251. data/lib/omnizip/formats/cpio/constants.rb +55 -0
  252. data/lib/omnizip/formats/cpio/entry.rb +385 -0
  253. data/lib/omnizip/formats/cpio/reader.rb +196 -0
  254. data/lib/omnizip/formats/cpio/writer.rb +234 -0
  255. data/lib/omnizip/formats/cpio.rb +140 -0
  256. data/lib/omnizip/formats/format_spec_loader.rb +230 -0
  257. data/lib/omnizip/formats/gzip.rb +238 -0
  258. data/lib/omnizip/formats/iso/directory_builder.rb +297 -0
  259. data/lib/omnizip/formats/iso/directory_record.rb +152 -0
  260. data/lib/omnizip/formats/iso/joliet.rb +204 -0
  261. data/lib/omnizip/formats/iso/path_table.rb +125 -0
  262. data/lib/omnizip/formats/iso/reader.rb +197 -0
  263. data/lib/omnizip/formats/iso/rock_ridge.rb +349 -0
  264. data/lib/omnizip/formats/iso/volume_builder.rb +320 -0
  265. data/lib/omnizip/formats/iso/volume_descriptor.rb +168 -0
  266. data/lib/omnizip/formats/iso/writer.rb +530 -0
  267. data/lib/omnizip/formats/iso.rb +140 -0
  268. data/lib/omnizip/formats/lzip.rb +175 -0
  269. data/lib/omnizip/formats/lzma_alone.rb +171 -0
  270. data/lib/omnizip/formats/rar/archive_repairer.rb +243 -0
  271. data/lib/omnizip/formats/rar/archive_verifier.rb +195 -0
  272. data/lib/omnizip/formats/rar/block_parser.rb +243 -0
  273. data/lib/omnizip/formats/rar/compression/bit_stream.rb +180 -0
  274. data/lib/omnizip/formats/rar/compression/dispatcher.rb +217 -0
  275. data/lib/omnizip/formats/rar/compression/lz77_huffman/decoder.rb +216 -0
  276. data/lib/omnizip/formats/rar/compression/lz77_huffman/encoder.rb +158 -0
  277. data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_builder.rb +217 -0
  278. data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_coder.rb +189 -0
  279. data/lib/omnizip/formats/rar/compression/lz77_huffman/match_finder.rb +135 -0
  280. data/lib/omnizip/formats/rar/compression/lz77_huffman/sliding_window.rb +165 -0
  281. data/lib/omnizip/formats/rar/compression/ppmd/context.rb +105 -0
  282. data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +219 -0
  283. data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +262 -0
  284. data/lib/omnizip/formats/rar/compression_method_registry.rb +106 -0
  285. data/lib/omnizip/formats/rar/constants.rb +82 -0
  286. data/lib/omnizip/formats/rar/decompressor.rb +238 -0
  287. data/lib/omnizip/formats/rar/external_writer.rb +312 -0
  288. data/lib/omnizip/formats/rar/header.rb +192 -0
  289. data/lib/omnizip/formats/rar/license_validator.rb +109 -0
  290. data/lib/omnizip/formats/rar/models/rar_archive.rb +77 -0
  291. data/lib/omnizip/formats/rar/models/rar_entry.rb +65 -0
  292. data/lib/omnizip/formats/rar/models/rar_volume.rb +56 -0
  293. data/lib/omnizip/formats/rar/parity_handler.rb +292 -0
  294. data/lib/omnizip/formats/rar/rar5/compression/lzma.rb +202 -0
  295. data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +578 -0
  296. data/lib/omnizip/formats/rar/rar5/compression/store.rb +60 -0
  297. data/lib/omnizip/formats/rar/rar5/crc32.rb +39 -0
  298. data/lib/omnizip/formats/rar/rar5/encryption/aes256_cbc.rb +97 -0
  299. data/lib/omnizip/formats/rar/rar5/encryption/encryption_header.rb +114 -0
  300. data/lib/omnizip/formats/rar/rar5/encryption/encryption_manager.rb +166 -0
  301. data/lib/omnizip/formats/rar/rar5/encryption/key_derivation.rb +97 -0
  302. data/lib/omnizip/formats/rar/rar5/header.rb +187 -0
  303. data/lib/omnizip/formats/rar/rar5/models/encryption_options.rb +74 -0
  304. data/lib/omnizip/formats/rar/rar5/models/recovery_options.rb +63 -0
  305. data/lib/omnizip/formats/rar/rar5/models/solid_options.rb +63 -0
  306. data/lib/omnizip/formats/rar/rar5/models/volume_options.rb +74 -0
  307. data/lib/omnizip/formats/rar/rar5/multi_volume/ARCHITECTURE.md +290 -0
  308. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_manager.rb +264 -0
  309. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_splitter.rb +155 -0
  310. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_writer.rb +194 -0
  311. data/lib/omnizip/formats/rar/rar5/solid/solid_encoder.rb +109 -0
  312. data/lib/omnizip/formats/rar/rar5/solid/solid_manager.rb +142 -0
  313. data/lib/omnizip/formats/rar/rar5/solid/solid_stream.rb +121 -0
  314. data/lib/omnizip/formats/rar/rar5/vint.rb +65 -0
  315. data/lib/omnizip/formats/rar/rar5/writer.rb +466 -0
  316. data/lib/omnizip/formats/rar/rar_format_base.rb +241 -0
  317. data/lib/omnizip/formats/rar/reader.rb +366 -0
  318. data/lib/omnizip/formats/rar/recovery_record.rb +245 -0
  319. data/lib/omnizip/formats/rar/volume_manager.rb +168 -0
  320. data/lib/omnizip/formats/rar/writer.rb +431 -0
  321. data/lib/omnizip/formats/rar.rb +205 -0
  322. data/lib/omnizip/formats/rar3/compressor.rb +73 -0
  323. data/lib/omnizip/formats/rar3/decompressor.rb +66 -0
  324. data/lib/omnizip/formats/rar3/reader.rb +386 -0
  325. data/lib/omnizip/formats/rar3/writer.rb +219 -0
  326. data/lib/omnizip/formats/rar5/compressor.rb +73 -0
  327. data/lib/omnizip/formats/rar5/decompressor.rb +66 -0
  328. data/lib/omnizip/formats/rar5/reader.rb +342 -0
  329. data/lib/omnizip/formats/rar5/writer.rb +214 -0
  330. data/lib/omnizip/formats/seven_zip/coder_chain.rb +150 -0
  331. data/lib/omnizip/formats/seven_zip/constants.rb +126 -0
  332. data/lib/omnizip/formats/seven_zip/encoded_header.rb +114 -0
  333. data/lib/omnizip/formats/seven_zip/encrypted_header.rb +142 -0
  334. data/lib/omnizip/formats/seven_zip/file_collector.rb +144 -0
  335. data/lib/omnizip/formats/seven_zip/header.rb +106 -0
  336. data/lib/omnizip/formats/seven_zip/header_encryptor.rb +134 -0
  337. data/lib/omnizip/formats/seven_zip/header_writer.rb +466 -0
  338. data/lib/omnizip/formats/seven_zip/models/coder_info.rb +30 -0
  339. data/lib/omnizip/formats/seven_zip/models/file_entry.rb +58 -0
  340. data/lib/omnizip/formats/seven_zip/models/folder.rb +69 -0
  341. data/lib/omnizip/formats/seven_zip/models/stream_info.rb +42 -0
  342. data/lib/omnizip/formats/seven_zip/parser.rb +660 -0
  343. data/lib/omnizip/formats/seven_zip/reader.rb +458 -0
  344. data/lib/omnizip/formats/seven_zip/split_archive_reader.rb +632 -0
  345. data/lib/omnizip/formats/seven_zip/split_archive_writer.rb +315 -0
  346. data/lib/omnizip/formats/seven_zip/stream_compressor.rb +151 -0
  347. data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +162 -0
  348. data/lib/omnizip/formats/seven_zip/writer.rb +740 -0
  349. data/lib/omnizip/formats/seven_zip.rb +93 -0
  350. data/lib/omnizip/formats/tar/constants.rb +73 -0
  351. data/lib/omnizip/formats/tar/entry.rb +94 -0
  352. data/lib/omnizip/formats/tar/header.rb +168 -0
  353. data/lib/omnizip/formats/tar/reader.rb +121 -0
  354. data/lib/omnizip/formats/tar/writer.rb +216 -0
  355. data/lib/omnizip/formats/tar.rb +84 -0
  356. data/lib/omnizip/formats/xz/reader.rb +116 -0
  357. data/lib/omnizip/formats/xz.rb +237 -0
  358. data/lib/omnizip/formats/xz_impl/block_decoder.rb +754 -0
  359. data/lib/omnizip/formats/xz_impl/block_encoder.rb +306 -0
  360. data/lib/omnizip/formats/xz_impl/block_header.rb +210 -0
  361. data/lib/omnizip/formats/xz_impl/block_header_parser.rb +186 -0
  362. data/lib/omnizip/formats/xz_impl/constants.rb +49 -0
  363. data/lib/omnizip/formats/xz_impl/index_decoder.rb +174 -0
  364. data/lib/omnizip/formats/xz_impl/index_encoder.rb +122 -0
  365. data/lib/omnizip/formats/xz_impl/stream_decoder.rb +468 -0
  366. data/lib/omnizip/formats/xz_impl/stream_encoder.rb +99 -0
  367. data/lib/omnizip/formats/xz_impl/stream_footer.rb +81 -0
  368. data/lib/omnizip/formats/xz_impl/stream_footer_parser.rb +117 -0
  369. data/lib/omnizip/formats/xz_impl/stream_header.rb +55 -0
  370. data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +108 -0
  371. data/lib/omnizip/formats/xz_impl/vli.rb +128 -0
  372. data/lib/omnizip/formats/xz_impl/writer.rb +421 -0
  373. data/lib/omnizip/formats/zip/central_directory_header.rb +195 -0
  374. data/lib/omnizip/formats/zip/constants.rb +69 -0
  375. data/lib/omnizip/formats/zip/end_of_central_directory.rb +133 -0
  376. data/lib/omnizip/formats/zip/local_file_header.rb +138 -0
  377. data/lib/omnizip/formats/zip/reader.rb +250 -0
  378. data/lib/omnizip/formats/zip/unix_extra_field.rb +153 -0
  379. data/lib/omnizip/formats/zip/writer.rb +375 -0
  380. data/lib/omnizip/formats/zip/zip64_end_of_central_directory.rb +104 -0
  381. data/lib/omnizip/formats/zip/zip64_end_of_central_directory_locator.rb +66 -0
  382. data/lib/omnizip/formats/zip/zip64_extra_field.rb +114 -0
  383. data/lib/omnizip/formats/zip.rb +50 -0
  384. data/lib/omnizip/implementations/base/lzma2_decoder_base.rb +75 -0
  385. data/lib/omnizip/implementations/base/lzma2_encoder_base.rb +128 -0
  386. data/lib/omnizip/implementations/base/lzma_decoder_base.rb +83 -0
  387. data/lib/omnizip/implementations/base/lzma_encoder_base.rb +108 -0
  388. data/lib/omnizip/implementations/base/state_machine_base.rb +182 -0
  389. data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +421 -0
  390. data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +465 -0
  391. data/lib/omnizip/implementations/seven_zip/lzma/match_finder.rb +288 -0
  392. data/lib/omnizip/implementations/seven_zip/lzma/range_decoder.rb +200 -0
  393. data/lib/omnizip/implementations/seven_zip/lzma/range_encoder.rb +197 -0
  394. data/lib/omnizip/implementations/seven_zip/lzma/state_machine.rb +141 -0
  395. data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +519 -0
  396. data/lib/omnizip/implementations/xz_utils/lzma2/decoder.rb +723 -0
  397. data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +750 -0
  398. data/lib/omnizip/io/buffered_input.rb +146 -0
  399. data/lib/omnizip/io/buffered_output.rb +105 -0
  400. data/lib/omnizip/io/stream_manager.rb +115 -0
  401. data/lib/omnizip/link_handler/hard_link.rb +79 -0
  402. data/lib/omnizip/link_handler/symbolic_link.rb +74 -0
  403. data/lib/omnizip/link_handler.rb +124 -0
  404. data/lib/omnizip/metadata/archive_metadata.rb +114 -0
  405. data/lib/omnizip/metadata/entry_metadata.rb +146 -0
  406. data/lib/omnizip/metadata/metadata_editor.rb +171 -0
  407. data/lib/omnizip/metadata/metadata_registry.rb +64 -0
  408. data/lib/omnizip/metadata/metadata_validator.rb +99 -0
  409. data/lib/omnizip/metadata.rb +57 -0
  410. data/lib/omnizip/models/.keep +0 -0
  411. data/lib/omnizip/models/algorithm_metadata.rb +73 -0
  412. data/lib/omnizip/models/compression_options.rb +71 -0
  413. data/lib/omnizip/models/conversion_options.rb +87 -0
  414. data/lib/omnizip/models/conversion_result.rb +135 -0
  415. data/lib/omnizip/models/eta_result.rb +46 -0
  416. data/lib/omnizip/models/extraction_rule.rb +115 -0
  417. data/lib/omnizip/models/filter_chain.rb +144 -0
  418. data/lib/omnizip/models/filter_config.rb +183 -0
  419. data/lib/omnizip/models/match_result.rb +124 -0
  420. data/lib/omnizip/models/optimization_suggestion.rb +91 -0
  421. data/lib/omnizip/models/parallel_options.rb +104 -0
  422. data/lib/omnizip/models/performance_result.rb +79 -0
  423. data/lib/omnizip/models/profile_report.rb +82 -0
  424. data/lib/omnizip/models/progress_options.rb +38 -0
  425. data/lib/omnizip/models/split_options.rb +116 -0
  426. data/lib/omnizip/optimization_registry.rb +81 -0
  427. data/lib/omnizip/parallel/job_queue.rb +209 -0
  428. data/lib/omnizip/parallel/job_scheduler.rb +203 -0
  429. data/lib/omnizip/parallel/parallel_compressor.rb +347 -0
  430. data/lib/omnizip/parallel/parallel_extractor.rb +329 -0
  431. data/lib/omnizip/parallel/worker_pool.rb +223 -0
  432. data/lib/omnizip/parallel.rb +149 -0
  433. data/lib/omnizip/parity/chunked_block_processor.rb +196 -0
  434. data/lib/omnizip/parity/galois16.rb +145 -0
  435. data/lib/omnizip/parity/models/creator_packet.rb +73 -0
  436. data/lib/omnizip/parity/models/file_description_packet.rb +133 -0
  437. data/lib/omnizip/parity/models/ifsc_packet.rb +123 -0
  438. data/lib/omnizip/parity/models/main_packet.rb +128 -0
  439. data/lib/omnizip/parity/models/packet.rb +156 -0
  440. data/lib/omnizip/parity/models/packet_registry.rb +109 -0
  441. data/lib/omnizip/parity/models/recovery_slice_packet.rb +78 -0
  442. data/lib/omnizip/parity/par2_creator.rb +531 -0
  443. data/lib/omnizip/parity/par2_repairer.rb +407 -0
  444. data/lib/omnizip/parity/par2_verifier.rb +364 -0
  445. data/lib/omnizip/parity/par2cmdline_algorithm.rb +110 -0
  446. data/lib/omnizip/parity/par2cmdline_coefficients.rb +78 -0
  447. data/lib/omnizip/parity/reed_solomon_decoder.rb +266 -0
  448. data/lib/omnizip/parity/reed_solomon_encoder.rb +111 -0
  449. data/lib/omnizip/parity/reed_solomon_matrix.rb +342 -0
  450. data/lib/omnizip/parity.rb +186 -0
  451. data/lib/omnizip/password/encryption_registry.rb +65 -0
  452. data/lib/omnizip/password/encryption_strategy.rb +96 -0
  453. data/lib/omnizip/password/password_validator.rb +129 -0
  454. data/lib/omnizip/password/winzip_aes_strategy.rb +192 -0
  455. data/lib/omnizip/password/zip_crypto_strategy.rb +141 -0
  456. data/lib/omnizip/password.rb +87 -0
  457. data/lib/omnizip/pipe/stream_compressor.rb +124 -0
  458. data/lib/omnizip/pipe/stream_decompressor.rb +174 -0
  459. data/lib/omnizip/pipe.rb +121 -0
  460. data/lib/omnizip/platform/ntfs_streams.rb +201 -0
  461. data/lib/omnizip/platform.rb +189 -0
  462. data/lib/omnizip/profile/archive_profile.rb +39 -0
  463. data/lib/omnizip/profile/balanced_profile.rb +33 -0
  464. data/lib/omnizip/profile/binary_profile.rb +36 -0
  465. data/lib/omnizip/profile/compression_profile.rb +158 -0
  466. data/lib/omnizip/profile/custom_profile.rb +157 -0
  467. data/lib/omnizip/profile/fast_profile.rb +33 -0
  468. data/lib/omnizip/profile/maximum_profile.rb +33 -0
  469. data/lib/omnizip/profile/profile_detector.rb +110 -0
  470. data/lib/omnizip/profile/profile_registry.rb +161 -0
  471. data/lib/omnizip/profile/text_profile.rb +36 -0
  472. data/lib/omnizip/profile.rb +190 -0
  473. data/lib/omnizip/profiler/memory_profiler.rb +66 -0
  474. data/lib/omnizip/profiler/method_profiler.rb +49 -0
  475. data/lib/omnizip/profiler/report_generator.rb +169 -0
  476. data/lib/omnizip/profiler.rb +204 -0
  477. data/lib/omnizip/progress/callback_reporter.rb +36 -0
  478. data/lib/omnizip/progress/console_reporter.rb +62 -0
  479. data/lib/omnizip/progress/log_reporter.rb +91 -0
  480. data/lib/omnizip/progress/operation_progress.rb +118 -0
  481. data/lib/omnizip/progress/progress_bar.rb +156 -0
  482. data/lib/omnizip/progress/progress_reporter.rb +40 -0
  483. data/lib/omnizip/progress/progress_tracker.rb +190 -0
  484. data/lib/omnizip/progress/silent_reporter.rb +24 -0
  485. data/lib/omnizip/progress.rb +127 -0
  486. data/lib/omnizip/rubyzip_compat.rb +63 -0
  487. data/lib/omnizip/temp/safe_extract.rb +168 -0
  488. data/lib/omnizip/temp/temp_file.rb +124 -0
  489. data/lib/omnizip/temp/temp_file_pool.rb +109 -0
  490. data/lib/omnizip/temp.rb +181 -0
  491. data/lib/omnizip/version.rb +5 -0
  492. data/lib/omnizip/zip/entry.rb +156 -0
  493. data/lib/omnizip/zip/file.rb +485 -0
  494. data/lib/omnizip/zip/input_stream.rb +273 -0
  495. data/lib/omnizip/zip/output_stream.rb +324 -0
  496. data/lib/omnizip.rb +156 -0
  497. data/readme-docs/advanced-features.adoc +515 -0
  498. data/readme-docs/api-usage.adoc +444 -0
  499. data/readme-docs/architecture.adoc +449 -0
  500. data/readme-docs/archive-formats.adoc +479 -0
  501. data/readme-docs/cli-usage.adoc +222 -0
  502. data/readme-docs/compression-algorithms.adoc +442 -0
  503. data/readme-docs/compression-profiles.adoc +247 -0
  504. data/readme-docs/encryption-checksums.adoc +328 -0
  505. data/readme-docs/format-converter.adoc +325 -0
  506. data/readme-docs/installation.adoc +228 -0
  507. data/readme-docs/par2-archives.adoc +608 -0
  508. data/readme-docs/performance-profiler.adoc +389 -0
  509. data/readme-docs/preprocessing-filters.adoc +280 -0
  510. data/xz-file-format-1.2.1.txt +1174 -0
  511. metadata +617 -0
@@ -0,0 +1,754 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Copyright (C) 2025 Ribose Inc.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a
6
+ # copy of this software and associated documentation files (the "Software"),
7
+ # to deal in the Software without restriction, including without limitation
8
+ # the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
+ # and/or sell copies of the Software, and to permit persons to whom the
10
+ # Software is furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
+ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21
+ # DEALINGS IN THE SOFTWARE.
22
+
23
+ require "stringio"
24
+ require_relative "constants"
25
+ require_relative "block_header_parser"
26
+ require_relative "../../checksums/verifier"
27
+ # Load BCJ filters for filter chain decoding
28
+ require_relative "../../filters/bcj_x86"
29
+ require_relative "../../filters/bcj_arm"
30
+ require_relative "../../filters/bcj_ppc"
31
+ require_relative "../../filters/bcj_ia64"
32
+ require_relative "../../filters/bcj_sparc"
33
+ require_relative "../../filters/delta"
34
+ # LZMA2::Decoder is loaded by the main omnizip library via lzma2.rb
35
+
36
+ module Omnizip
37
+ module Formats
38
+ module XzFormat
39
+ # XZ Block decoder
40
+ #
41
+ # Decodes a single XZ block which consists of:
42
+ # - Block Header
43
+ # - Compressed Data
44
+ # - Block Padding (to 4-byte boundary)
45
+ # - Check (CRC32/CRC64/SHA256)
46
+ #
47
+ # Reference: /tmp/xz-source/src/liblzma/common/block_decoder.c
48
+ class BlockDecoder
49
+ # Filter IDs
50
+ FILTER_LZMA2 = 0x21
51
+
52
+ # Accessor for new input after block (used by stream decoder for multi-block files)
53
+ attr_reader :new_input_after_block
54
+ # Accessor for block size information (used for index validation)
55
+ attr_reader :unpadded_size, :uncompressed_size
56
+
57
+ # Wrapper for counting bytes read from a stream
58
+ class CountingInputStream
59
+ attr_reader :bytes_read
60
+
61
+ def initialize(stream)
62
+ @stream = stream
63
+ @bytes_read = 0
64
+ end
65
+
66
+ def read(length = nil, outbuf = nil)
67
+ result = @stream.read(length, outbuf)
68
+ if result
69
+ bytes_read = result.bytesize
70
+ @bytes_read += bytes_read
71
+ end
72
+ result
73
+ end
74
+
75
+ def getbyte
76
+ byte = @stream.getbyte
77
+ @bytes_read += 1 if byte
78
+ byte
79
+ end
80
+
81
+ def eos?
82
+ @stream.eos?
83
+ end
84
+
85
+ def set_encoding(enc)
86
+ @stream.set_encoding(enc) if @stream.respond_to?(:set_encoding)
87
+ end
88
+ end
89
+
90
+ # Initialize block decoder
91
+ #
92
+ # @param input [IO] Input stream positioned at block header
93
+ # @param check_type [Integer] Check type (0=None, 1=CRC32, 4=CRC64, 10=SHA256)
94
+ def initialize(input, check_type)
95
+ @input = input
96
+ @check_type = check_type
97
+ @new_input_after_block = nil # Track new input for stream decoder
98
+ @data_already_decompressed = false # Track if LZMA2 already decoded the data
99
+ @unpadded_size = nil # Track unpadded block size (for index validation)
100
+ @uncompressed_size = nil # Track uncompressed size (for index validation)
101
+ end
102
+
103
+ # Decode block
104
+ #
105
+ # @return [Array<String, Hash>] Decompressed data and block info:
106
+ # - data: String (decompressed data)
107
+ # - info: Hash with header info
108
+ # @raise [RuntimeError] If block is invalid or checksum mismatch
109
+ def decode
110
+ # Parse block header
111
+ header = BlockHeaderParser.parse(@input)
112
+
113
+ # Read compressed data
114
+ compressed_size = header[:compressed_size]
115
+ check_size = Checksums::Verifier.check_size(@check_type)
116
+
117
+ if ENV["XZ_BLOCK_DEBUG"]
118
+ warn "DEBUG: decode - compressed_size=#{compressed_size.inspect}, check_type=#{@check_type}"
119
+ warn "DEBUG: @input.class=#{@input.class}, @input.respond_to?(:pos)=#{@input.respond_to?(:pos)}"
120
+ pos = @input.respond_to?(:pos) ? @input.pos : "N/A"
121
+ warn "DEBUG: @input.pos=#{pos}"
122
+ end
123
+
124
+ if compressed_size.nil?
125
+ # Compressed size is not present in header - need to determine block boundary
126
+ # Read all remaining data
127
+ all_remaining = @input.read
128
+
129
+ # Decode LZMA2 and track how many bytes it consumes
130
+ uncompressed_data, consumed_bytes = decode_lzma2_with_consumption_tracking(
131
+ all_remaining: all_remaining,
132
+ filters: header[:filters],
133
+ )
134
+
135
+ # Mark that data is already decompressed (LZMA2 only for now)
136
+ @data_already_decompressed = true
137
+
138
+ # Calculate padding and check positions
139
+ # Block structure: [compressed data] [padding to 4-byte boundary] [check]
140
+ padding_needed = (4 - (consumed_bytes % 4)) % 4
141
+ check_start_pos = consumed_bytes + padding_needed
142
+
143
+ # XZ Utils: Validate padding bytes are all zeros
144
+ # Reference: /Users/mulgogi/src/external/xz/src/liblzma/common/block_decoder.c:131-139
145
+ if padding_needed.positive?
146
+ padding_bytes = all_remaining.byteslice(consumed_bytes,
147
+ padding_needed)
148
+ if padding_bytes.nil? || padding_bytes.bytesize < padding_needed
149
+ raise Omnizip::FormatError,
150
+ "Unexpected end of stream in block padding"
151
+ end
152
+ # Verify padding is all zeros
153
+ unless padding_bytes.bytes.all?(0)
154
+ raise Omnizip::FormatError,
155
+ "Block padding contains non-zero bytes"
156
+ end
157
+ end
158
+
159
+ if ENV["XZ_BLOCK_DEBUG"]
160
+ warn "DEBUG: consumed_bytes=#{consumed_bytes}, padding_needed=#{padding_needed}, check_start_pos=#{check_start_pos}"
161
+ warn "DEBUG: all_remaining.bytesize=#{all_remaining.bytesize}"
162
+ end
163
+
164
+ if check_start_pos + check_size > all_remaining.bytesize
165
+ raise Omnizip::FormatError,
166
+ "Invalid check position"
167
+ end
168
+
169
+ check_bytes = all_remaining.byteslice(check_start_pos, check_size)
170
+
171
+ # Create new input with remaining data (after this block)
172
+ total_block_size = check_start_pos + check_size
173
+ data_after_block = all_remaining[total_block_size..]
174
+
175
+ # Create new StringIO with remaining data
176
+ new_input = StringIO.new(data_after_block)
177
+ new_input.set_encoding(Encoding::BINARY)
178
+
179
+ # Store the new input for the stream decoder to use
180
+ @new_input_after_block = new_input
181
+ else
182
+ compressed_data = @input.read(compressed_size)
183
+ if compressed_data.nil? || compressed_data.bytesize < compressed_size
184
+ raise Omnizip::IOError,
185
+ "Unexpected end of stream in compressed data: expected #{compressed_size} bytes"
186
+ end
187
+
188
+ # Read block padding (align to 4-byte boundary)
189
+ # Block header is always 4-byte aligned, so we only need to pad the data
190
+ padding_needed = (4 - (compressed_size % 4)) % 4
191
+ if padding_needed.positive?
192
+ padding = @input.read(padding_needed)
193
+ if padding.nil? || padding.bytesize < padding_needed
194
+ raise Omnizip::IOError,
195
+ "Unexpected end of stream in block padding"
196
+ end
197
+ # Verify padding is all zeros
198
+ unless padding.bytes.all?(0)
199
+ raise Omnizip::FormatError,
200
+ "Block padding contains non-zero bytes"
201
+ end
202
+ end
203
+
204
+ # Read check
205
+ if check_size.positive?
206
+ check_bytes = @input.read(check_size)
207
+ if check_bytes.nil? || check_bytes.bytesize < check_size
208
+ raise Omnizip::IOError,
209
+ "Unexpected end of stream in block check"
210
+ end
211
+ else
212
+ check_bytes = ""
213
+ end
214
+
215
+ # When compressed_size is explicit, the input stream is now correctly
216
+ # positioned at the start of the next block, so no need to create new input
217
+ end
218
+
219
+ # Decode filter chain (for now, just LZMA2)
220
+ # Skip if data was already decompressed by decode_lzma2_with_consumption_tracking
221
+ if @data_already_decompressed
222
+ # LZMA2 was already decoded, but we may still have other filters to apply
223
+ # For multi-filter chains, apply remaining filters in reverse order
224
+ filters_to_process = header[:filters].dup
225
+ # Remove the LZMA2 filter that was already processed
226
+ filters_to_process.reject! { |f| f[:id] == FILTER_LZMA2 }
227
+
228
+ if filters_to_process.empty?
229
+ # No remaining filters
230
+ uncompressed_data = @decompressed_data
231
+ else
232
+ # Apply remaining filters in reverse order
233
+ data = @decompressed_data
234
+ filters_to_process.reverse_each do |filter|
235
+ data = decode_single_filter(data, filter)
236
+ end
237
+ uncompressed_data = data
238
+ end
239
+ else
240
+ uncompressed_data = decode_filters(compressed_data,
241
+ header[:filters])
242
+ end
243
+
244
+ # Verify uncompressed size matches header (if present)
245
+ if header[:uncompressed_size] && (uncompressed_data.bytesize != header[:uncompressed_size])
246
+ raise Omnizip::DecompressionError,
247
+ "Uncompressed size mismatch: header says #{header[:uncompressed_size]}, got #{uncompressed_data.bytesize}"
248
+ end
249
+
250
+ # DEBUG: Show output before checksum check
251
+ if ENV["DEBUG_CHECKSUM"]
252
+ puts "DEBUG: uncompressed_data.bytesize=#{uncompressed_data.bytesize}"
253
+ puts "DEBUG: first 100 bytes: #{uncompressed_data[0, 100].inspect}"
254
+ puts "DEBUG: last 50 bytes: #{uncompressed_data[-50..].inspect}"
255
+ end
256
+
257
+ # Verify check
258
+ unless Checksums::Verifier.verify(uncompressed_data, check_bytes,
259
+ @check_type)
260
+ raise Omnizip::ChecksumError,
261
+ "Block checksum mismatch for check type #{@check_type}"
262
+ end
263
+
264
+ # Track block sizes for index validation (per XZ Utils index_hash.c)
265
+ # Unpadded size = block header + compressed data + check (NO padding)
266
+ # This is used to validate against the index records
267
+ # Reference: xz-file-format-1.2.1.txt Section 3.3.2:
268
+ # "Unpadded Size is the size of the Block Header, Compressed Data,
269
+ # and Check fields. The Block Padding field is NOT included."
270
+ @uncompressed_size = uncompressed_data.bytesize
271
+
272
+ # Calculate unpadded block size (excludes padding per XZ spec)
273
+ # Block structure: [block header] [compressed data] [padding] [check]
274
+ # Reference: /Users/mulgogi/src/external/xz/src/liblzma/common/block_decoder.c
275
+ header_size = header[:header_size] || 0
276
+ if compressed_size.nil?
277
+ # When compressed_size wasn't specified, we tracked consumed_bytes
278
+ # unpadded_size = header_size + consumed_bytes + check_size (NO padding)
279
+ # Note: BlockHeaderParser already consumed the header from input
280
+ # For the size calculation, we need to include header size
281
+ actual_compressed_size = consumed_bytes
282
+ @unpadded_size = header_size + actual_compressed_size + check_size
283
+ else
284
+ # When compressed_size was specified
285
+ @unpadded_size = header_size + compressed_size + check_size
286
+ end
287
+
288
+ uncompressed_data
289
+ end
290
+
291
+ private
292
+
293
+ # Decode filter chain
294
+ #
295
+ # @param compressed_data [String] Compressed data
296
+ # @param filters [Array<Hash>] Filter definitions
297
+ # @return [String] Decompressed data
298
+ # @raise [RuntimeError] If filter chain is unsupported
299
+ def decode_filters(compressed_data, filters)
300
+ case filters.size
301
+ when 0
302
+ # No filters - passthrough
303
+ compressed_data
304
+ when 1
305
+ # Single filter - should be LZMA2
306
+ decode_single_filter(compressed_data, filters[0])
307
+ else
308
+ # Multiple filters - decode in reverse order
309
+ # For now, only support LZMA2
310
+ data = compressed_data
311
+ filters.reverse_each do |filter|
312
+ data = decode_single_filter(data, filter)
313
+ end
314
+ data
315
+ end
316
+ end
317
+
318
+ # Decode a single filter
319
+ #
320
+ # @param compressed_data [String] Compressed data
321
+ # @param filter [Hash] Filter definition with :id and :properties
322
+ # @return [String] Decompressed data
323
+ def decode_single_filter(compressed_data, filter)
324
+ case filter[:id]
325
+ when FILTER_LZMA2
326
+ decode_lzma2(compressed_data, filter[:properties])
327
+ when 0x03 # FILTER_DELTA
328
+ decode_delta(compressed_data, filter[:properties])
329
+ when 0x04 # x86 BCJ
330
+ decode_bcj(compressed_data, :x86, filter[:properties])
331
+ when 0x05 # PowerPC BCJ
332
+ decode_bcj(compressed_data, :powerpc, filter[:properties])
333
+ when 0x06 # IA-64 BCJ
334
+ decode_bcj(compressed_data, :ia64, filter[:properties])
335
+ when 0x07 # ARM BCJ
336
+ decode_bcj(compressed_data, :arm, filter[:properties])
337
+ when 0x08 # ARM Thumb BCJ
338
+ decode_bcj(compressed_data, :armthumb, filter[:properties])
339
+ when 0x09 # SPARC BCJ
340
+ decode_bcj(compressed_data, :sparc, filter[:properties])
341
+ when 0x0A # ARM64 BCJ
342
+ decode_bcj_arm64(compressed_data, filter[:properties])
343
+ else
344
+ raise Omnizip::FormatError,
345
+ "Unsupported filter ID: 0x#{filter[:id].to_s(16).upcase}"
346
+ end
347
+ end
348
+
349
+ # Decode Delta filter
350
+ #
351
+ # @param data [String] Input data
352
+ # @param properties [String, nil] Filter properties (first byte is distance - 1)
353
+ # @return [String] Delta-transformed data
354
+ def decode_delta(data, properties)
355
+ # XZ Utils: lzma_delta_props_decode sets opt->dist = props[0] + 1
356
+ # So if props[0] = 0, distance = 1; if props[0] = 255, distance = 256
357
+ distance = if properties&.bytesize&.positive?
358
+ (properties.getbyte(0) || 0) + 1
359
+ else
360
+ 1
361
+ end
362
+
363
+ Omnizip::Filters::Delta.new(distance).decode(data, 0)
364
+ end
365
+
366
+ # Decode BCJ filter
367
+ #
368
+ # @param data [String] Input data
369
+ # @param architecture [Symbol] Target architecture
370
+ # @param properties [String, nil] Filter properties
371
+ # @return [String] BCJ-transformed data
372
+ def decode_bcj(data, architecture, properties)
373
+ # Get start_offset from properties if present
374
+ # XZ filter properties for BCJ: first 4 bytes are start_offset (big-endian)
375
+ start_offset = 0
376
+ if properties&.bytesize&.>= 4
377
+ start_offset = (properties.getbyte(0) || 0) << 24
378
+ start_offset |= (properties.getbyte(1) || 0) << 16
379
+ start_offset |= (properties.getbyte(2) || 0) << 8
380
+ start_offset |= properties.getbyte(3) || 0
381
+ end
382
+
383
+ # Use the appropriate BCJ filter based on architecture
384
+ case architecture
385
+ when :x86
386
+ Omnizip::Filters::BCJ.new(architecture: :x86).decode(data, start_offset)
387
+ when :powerpc
388
+ Omnizip::Filters::BCJ.new(architecture: :powerpc).decode(data, start_offset)
389
+ when :ia64
390
+ Omnizip::Filters::BCJ.new(architecture: :ia64).decode(data, start_offset)
391
+ when :arm
392
+ Omnizip::Filters::BCJ.new(architecture: :arm).decode(data, start_offset)
393
+ when :armthumb
394
+ Omnizip::Filters::BCJ.new(architecture: :armthumb).decode(data, start_offset)
395
+ when :sparc
396
+ Omnizip::Filters::BCJ.new(architecture: :sparc).decode(data, start_offset)
397
+ when :arm64
398
+ Omnizip::Filters::BCJ.new(architecture: :arm64).decode(data, start_offset)
399
+ else
400
+ raise Omnizip::FormatError,
401
+ "Unsupported BCJ architecture: #{architecture}"
402
+ end
403
+ end
404
+
405
+ # Decode ARM64 BCJ filter
406
+ #
407
+ # XZ Utils pattern (simple/arm64.c):
408
+ # - Converts BL instructions (bits 26-31 == 0x25) with +/-128 MiB range
409
+ # - Converts ADRP instructions (bits 25-29 == 0x10000) with +/-512 MiB range
410
+ # - Uses start_offset for position calculation
411
+ #
412
+ # @param data [String] Input data
413
+ # @param properties [String, nil] Filter properties (first 4 bytes are start_offset)
414
+ # @return [String] ARM64 BCJ-transformed data
415
+ def decode_bcj_arm64(data, properties)
416
+ # Get start_offset from properties if present
417
+ # XZ filter properties for BCJ: first 4 bytes are start_offset (little-endian per XZ spec)
418
+ # Reference: /Users/mulgogi/src/external/xz/src/liblzma/simple/simple_decoder.c:30
419
+ start_offset = 0
420
+ if properties&.bytesize&.>= 4
421
+ # Read as little-endian (LSB first)
422
+ start_offset = properties.getbyte(0) || 0
423
+ start_offset |= (properties.getbyte(1) || 0) << 8
424
+ start_offset |= (properties.getbyte(2) || 0) << 16
425
+ start_offset |= (properties.getbyte(3) || 0) << 24
426
+ end
427
+
428
+ # DEBUG: Show input data
429
+ if ENV["DEBUG_ARM64_BCJ"]
430
+ puts "DEBUG ARM64 BCJ: start_offset=0x#{start_offset.to_s(16).upcase}"
431
+ puts "DEBUG ARM64 BCJ: input (first 32 bytes):"
432
+ puts data[0, 32].unpack1("H*").scan(/../).each_slice(16).map { |row| row.join(" ") }.join("\n")
433
+ end
434
+
435
+ # XZ Utils ARM64 BCJ filter implementation
436
+ result = data.b
437
+ size = data.bytesize & ~3 # Round down to multiple of 4
438
+
439
+ (0...size).step(4) do |i|
440
+ pc = (start_offset + i) & 0xFFFFFFFF
441
+ instr = read_uint32_le(result, i)
442
+
443
+ # Check for BL instruction (bits 26-31 == 0x25)
444
+ if (instr >> 26) == 0x25
445
+ src = instr
446
+ instr = 0x94000000
447
+
448
+ # XZ Utils: pc >>= 2; if (!is_encoder) pc = 0U - pc;
449
+ # Reference: /Users/mulgogi/src/external/xz/src/liblzma/simple/arm64.c:56-60
450
+ pc_div_4 = pc >> 2
451
+ pc_for_calc = (0 - pc_div_4) & 0xFFFFFFFF
452
+
453
+ instr |= (src + pc_for_calc) & 0x03FFFFFF
454
+ write_uint32_le(result, i, instr)
455
+ # Check for ADRP instruction (bits 25-29 == 0x10000)
456
+ elsif (instr & 0x9F000000) == 0x90000000
457
+ # Extract src from ADRP instruction
458
+ src = ((instr >> 29) & 3) | ((instr >> 3) & 0x001FFFFC)
459
+
460
+ # Check if in +/-512 MiB range
461
+ # XZ Utils: if ((src + 0x00020000) & 0x001C0000) continue;
462
+ next if (src + 0x00020000).anybits?(0x001C0000)
463
+
464
+ instr &= 0x9000001F
465
+
466
+ # XZ Utils: pc >>= 12; if (!is_encoder) pc = 0U - pc;
467
+ # Reference: /Users/mulgogi/src/external/xz/src/liblzma/simple/arm64.c:95-96
468
+ pc_div_12 = pc >> 12
469
+ pc_for_calc = (0 - pc_div_12) & 0xFFFFFFFF
470
+
471
+ dest = (src + pc_for_calc) & 0xFFFFFFFF
472
+ instr |= (dest & 3) << 29
473
+ instr |= (dest & 0x0003FFFC) << 3
474
+ instr |= (0 - (dest & 0x00020000)) & 0x00E00000
475
+
476
+ write_uint32_le(result, i, instr)
477
+ end
478
+ end
479
+
480
+ # DEBUG: Show output data
481
+ if ENV["DEBUG_ARM64_BCJ"]
482
+ puts "DEBUG ARM64 BCJ: output (first 32 bytes):"
483
+ puts result[0, 32].unpack1("H*").scan(/../).each_slice(16).map { |row| row.join(" ") }.join("\n")
484
+ end
485
+
486
+ result
487
+ end
488
+
489
+ # Read an unsigned 32-bit little-endian integer from data
490
+ #
491
+ # @param data [String] Binary data
492
+ # @param offset [Integer] Starting position
493
+ # @return [Integer] Unsigned 32-bit integer
494
+ def read_uint32_le(data, offset)
495
+ bytes = data.byteslice(offset, 4).bytes
496
+ bytes[0] |
497
+ (bytes[1] << 8) |
498
+ (bytes[2] << 16) |
499
+ (bytes[3] << 24)
500
+ end
501
+
502
+ # Write an unsigned 32-bit little-endian integer to data
503
+ #
504
+ # @param data [String] Binary data (modified in place)
505
+ # @param offset [Integer] Starting position
506
+ # @param value [Integer] 32-bit integer to write
507
+ # @return [void]
508
+ def write_uint32_le(data, offset, value)
509
+ value &= 0xFFFFFFFF
510
+ data.setbyte(offset, value & 0xFF)
511
+ data.setbyte(offset + 1, (value >> 8) & 0xFF)
512
+ data.setbyte(offset + 2, (value >> 16) & 0xFF)
513
+ data.setbyte(offset + 3, (value >> 24) & 0xFF)
514
+ end
515
+
516
+ # Decode LZMA2 data with byte consumption tracking
517
+ #
518
+ # This method is used when compressed_size is not specified in the block header.
519
+ # It uses a CountingInputStream to track how many bytes the LZMA2 decoder consumes.
520
+ #
521
+ # @param all_remaining [String] All remaining data after block header
522
+ # @param filters [Array<Hash>] Filter definitions
523
+ # @return [Array<String, Integer>] Decompressed data and bytes consumed
524
+ def decode_lzma2_with_consumption_tracking(all_remaining:, filters:)
525
+ # Debug: Show first 30 bytes of input data
526
+ if ENV["DEBUG_LZMA2_INPUT"]
527
+ puts "DEBUG LZMA2 INPUT: first 30 bytes:"
528
+ all_remaining.bytes[0, 30].each_with_index do |byte, i|
529
+ printf " [%2d] 0x%02x (%3d)", i, byte, byte
530
+ puts "" if ((i + 1) % 4).zero?
531
+ end
532
+ puts ""
533
+ end
534
+
535
+ input_buffer = CountingInputStream.new(StringIO.new(all_remaining))
536
+ input_buffer.set_encoding(Encoding::BINARY)
537
+
538
+ # Get dict_size from LZMA2 filter properties
539
+ # IMPORTANT: For multi-filter chains, find the LZMA2 filter (not just filters[0])
540
+ # The filter chain is in encoding order, so we need to find the LZMA2 filter
541
+ lzma2_filter = filters.find { |f| f[:id] == FILTER_LZMA2 }
542
+ if lzma2_filter.nil?
543
+ raise Omnizip::FormatError,
544
+ "Unsupported filter chain: LZMA2 filter not found (not supported)"
545
+ end
546
+
547
+ properties = lzma2_filter[:properties]
548
+ dict_size = if properties&.bytesize&.positive?
549
+ prop = properties.getbyte(0)
550
+ if prop.even?
551
+ 1 << ((prop / 2) + 12)
552
+ else
553
+ 3 * (1 << (((prop - 1) / 2) + 11))
554
+ end
555
+ else
556
+ 8 * 1024 * 1024 # 8MB default
557
+ end
558
+
559
+ # Create LZMA2 decoder with raw_mode for XZ format
560
+ decoder = Omnizip::Implementations::XZUtils::LZMA2::Decoder.new(input_buffer,
561
+ raw_mode: true)
562
+
563
+ # Set dict_size directly since we skipped property byte reading
564
+ decoder.instance_variable_set(:@dict_size, dict_size)
565
+ decoder.instance_variable_set(:@properties, Omnizip::Algorithms::LZMA2::Properties.new(dict_size))
566
+
567
+ # Decode stream
568
+ uncompressed_data = decoder.decode_stream
569
+
570
+ # Save decompressed data for filter chain processing
571
+ @decompressed_data = uncompressed_data
572
+
573
+ # Return both data and bytes consumed
574
+ [uncompressed_data, input_buffer.bytes_read]
575
+ end
576
+
577
+ # Decode LZMA2 data
578
+ #
579
+ # @param compressed_data [String] LZMA2 compressed data
580
+ # @param properties [String, nil] LZMA2 properties byte
581
+ # @return [String] Decompressed data
582
+ def decode_lzma2(compressed_data, properties)
583
+ input_buffer = StringIO.new(compressed_data)
584
+ input_buffer.set_encoding(Encoding::BINARY)
585
+
586
+ # For XZ format, LZMA2 data starts with control bytes, not a property byte
587
+ # The filter properties byte contains the dictionary size encoding
588
+ # We need to extract dict_size from properties if available, otherwise use a default
589
+
590
+ # Parse properties byte to get dict_size
591
+ # Properties byte format: (pb * 5 + lp) * 9 + lc for LZMA1
592
+ # For LZMA2, it encodes dictionary size directly
593
+ # Format: if d < 40: size = 2^((d/2) + 12) for even d, or 3 * 2^((d-1)/2 + 11) for odd d
594
+
595
+ # For now, use a reasonable default since the XZ spec doesn't require
596
+ # the dict_size to be specified in the filter properties for LZMA2
597
+ # The block header filter properties byte (0x08 in our test file) encodes dict_size
598
+ # Using the formula from XZ spec for LZMA2 dict_size encoding:
599
+ # prop 0x08 = 8 means: 2^((8/2) + 12) = 2^16 = 65536 bytes (if even)
600
+ # Wait, let me use the standard formula:
601
+ # If prop is even: dict_size = 2^((prop/2) + 12)
602
+ # If prop is odd: dict_size = 3 * 2^((prop-1)/2 + 11)
603
+ dict_size = if properties&.bytesize&.positive?
604
+ prop = properties.getbyte(0)
605
+ if prop.even?
606
+ 1 << ((prop / 2) + 12)
607
+ else
608
+ 3 * (1 << (((prop - 1) / 2) + 11))
609
+ end
610
+ else
611
+ 8 * 1024 * 1024 # 8MB default
612
+ end
613
+
614
+ # Create LZMA2 decoder with raw_mode for XZ format
615
+ decoder = Omnizip::Implementations::XZUtils::LZMA2::Decoder.new(input_buffer,
616
+ raw_mode: true)
617
+
618
+ # Set dict_size directly since we skipped property byte reading
619
+ decoder.instance_variable_set(:@dict_size, dict_size)
620
+ decoder.instance_variable_set(:@properties, Omnizip::Algorithms::LZMA2::Properties.new(dict_size))
621
+
622
+ # Decode stream
623
+ decoder.decode_stream
624
+ end
625
+
626
+ # Find the end of LZMA2 compressed data by parsing chunks
627
+ #
628
+ # LZMA2 chunk format:
629
+ # - Control byte (1 byte)
630
+ # - 0x00: End of stream marker (STOP)
631
+ # - 0x01-0x02: Uncompressed chunk
632
+ # - Size (2 bytes, big-endian) + 1
633
+ # - Uncompressed data
634
+ # - 0x03-0x7F: Compressed chunk (LZMA)
635
+ # - Properties (1 byte)
636
+ # - Compressed LZMA data
637
+ # - 0x80-0xFF: Compressed chunk (LZMA)
638
+ # - Uncompressed size (2 bytes, big-endian, high 5 bits in control)
639
+ # - Compressed size (2 bytes, big-endian) + 1
640
+ # - Properties (1 byte, if control >= 0xC0)
641
+ # - Compressed LZMA data
642
+ #
643
+ # @param data [String] LZMA2 data to parse
644
+ # @return [Integer] Position where compressed data ends (before check bytes)
645
+ def find_lzma2_compressed_data_end(data)
646
+ pos = 0
647
+
648
+ while pos < data.bytesize
649
+ control = data.getbyte(pos)
650
+ pos += 1
651
+
652
+ case control
653
+ when 0x00
654
+ # End of stream marker - LZMA2 data ends here
655
+ # Return pos (which includes the end marker, as we've already read it)
656
+ return pos
657
+ when 0x01, 0x02
658
+ # Uncompressed chunk
659
+ # Size encoding: 2 bytes (big-endian) + 1
660
+ size_bytes = data.getbyte(pos) || 0
661
+ pos += 1
662
+ size_bytes = (size_bytes << 8) | (data.getbyte(pos) || 0)
663
+ pos += 1
664
+ uncompressed_size = size_bytes + 1
665
+
666
+ # Skip uncompressed data
667
+ pos += uncompressed_size
668
+ when 0x03..0x7F
669
+ # Compressed chunk (LZMA without explicit uncompressed size)
670
+ # Skip properties byte
671
+ pos += 1
672
+
673
+ # For compressed data, we need to find where it ends
674
+ # This is complex because the range decoder consumes variable bytes
675
+ # For now, we'll look ahead for patterns that indicate chunk boundaries
676
+
677
+ # Look for next chunk start (0x00, 0x01, 0x02, or 0x03-0x7F)
678
+ # But we need to be careful not to mistake data for chunk markers
679
+ #
680
+ # Heuristic: scan forward looking for potential chunk starts
681
+ # A valid chunk start would be followed by valid data structure
682
+ found_next_chunk = false
683
+ scan_pos = pos
684
+
685
+ while scan_pos < data.bytesize && !found_next_chunk
686
+ next_byte = data.getbyte(scan_pos)
687
+
688
+ # Check if this could be a chunk start
689
+ case next_byte
690
+ when 0x00
691
+ # End marker - this is the end of the block
692
+ return scan_pos
693
+ when 0x01, 0x02
694
+ # Uncompressed chunk - verify it has valid size byte
695
+ next_next_byte = data.getbyte(scan_pos + 1)
696
+ if next_next_byte
697
+ size_hi = (next_byte >> 5)
698
+ size_lo = next_next_byte
699
+ uncompressed_size = (size_hi << 8) | size_lo
700
+
701
+ # Check if this size makes sense (not too large)
702
+ if uncompressed_size <= 1024 && scan_pos + 1 + uncompressed_size <= data.bytesize
703
+ # Valid uncompressed chunk found
704
+ return scan_pos
705
+ end
706
+ end
707
+ scan_pos += 1
708
+ when 0x03..0x7F
709
+ # Another compressed chunk - verify it has properties byte
710
+ if scan_pos + 1 < data.bytesize
711
+ # Could be valid - assume this is the next chunk
712
+ return scan_pos
713
+ end
714
+
715
+ scan_pos += 1
716
+ else
717
+ scan_pos += 1
718
+ end
719
+ end
720
+
721
+ # If we couldn't find a clear boundary, use current position
722
+ return pos
723
+ when 0x80..0xFF
724
+ # Compressed chunk (LZMA with explicit uncompressed size)
725
+ # Uncompressed size (2 bytes, big-endian)
726
+ pos += 2
727
+
728
+ # Compressed size (2 bytes, big-endian) + 1
729
+ compressed_size_hi = data.getbyte(pos) || 0
730
+ pos += 1
731
+ compressed_size_lo = data.getbyte(pos) || 0
732
+ pos += 1
733
+ compressed_size = (compressed_size_hi << 8) | compressed_size_lo
734
+ compressed_size += 1
735
+
736
+ # Properties byte (if control >= 0xC0)
737
+ pos += 1 if control >= 0xC0
738
+
739
+ # Skip compressed LZMA data
740
+ pos += compressed_size
741
+ else
742
+ # Invalid control byte
743
+ raise Omnizip::FormatError,
744
+ "Invalid LZMA2 control byte: 0x#{control.to_s(16).upcase}"
745
+ end
746
+ end
747
+
748
+ # If we reach here, we've consumed all data
749
+ pos
750
+ end
751
+ end
752
+ end
753
+ end
754
+ end