omnizip 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (511) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.rubocop.yml +32 -0
  4. data/.rubocop_todo.yml +754 -0
  5. data/COPYING +502 -0
  6. data/Gemfile +17 -0
  7. data/LICENSE +12 -0
  8. data/README.adoc +1045 -0
  9. data/Rakefile +12 -0
  10. data/benchmark/README.md +260 -0
  11. data/benchmark/benchmark_suite.rb +125 -0
  12. data/benchmark/compression_bench.rb +181 -0
  13. data/benchmark/filter_bench.rb +180 -0
  14. data/benchmark/models/benchmark_result.rb +59 -0
  15. data/benchmark/models/comparison_result.rb +69 -0
  16. data/benchmark/profile_suite.rb +167 -0
  17. data/benchmark/reporter.rb +150 -0
  18. data/benchmark/run_benchmarks.rb +66 -0
  19. data/benchmark/test_data.rb +137 -0
  20. data/config/formats/rar3_spec.yml +91 -0
  21. data/config/formats/rar5_spec.yml +102 -0
  22. data/docs/.github/workflows/docs.yml +142 -0
  23. data/docs/.gitignore +21 -0
  24. data/docs/.lychee.toml +67 -0
  25. data/docs/Gemfile +13 -0
  26. data/docs/RAR_WRITE_SUPPORT.md +26 -0
  27. data/docs/README.md +101 -0
  28. data/docs/_config.yml +112 -0
  29. data/docs/assets/logo.svg +1 -0
  30. data/docs/assets/omnizip-logo.pdf +1540 -11
  31. data/docs/comparison/feature-matrix.adoc +694 -0
  32. data/docs/comparison/index.adoc +113 -0
  33. data/docs/comparison/vs-7zip.adoc +309 -0
  34. data/docs/comparison/vs-peazip.adoc +77 -0
  35. data/docs/comparison/vs-rubyzip.adoc +342 -0
  36. data/docs/comparison/vs-winrar.adoc +100 -0
  37. data/docs/compatibility.adoc +579 -0
  38. data/docs/concepts/index.adoc +129 -0
  39. data/docs/developer/architecture.adoc +256 -0
  40. data/docs/developer/contributing.adoc +158 -0
  41. data/docs/developer/index.adoc +25 -0
  42. data/docs/developer/testing.adoc +212 -0
  43. data/docs/getting-started/basic-usage.adoc +271 -0
  44. data/docs/getting-started/index.adoc +42 -0
  45. data/docs/getting-started/installation.adoc +138 -0
  46. data/docs/getting-started/quick-start.adoc +185 -0
  47. data/docs/getting-started/your-first-archive.adoc +218 -0
  48. data/docs/guides/advanced-features/encryption.adoc +300 -0
  49. data/docs/guides/advanced-features/index.adoc +49 -0
  50. data/docs/guides/advanced-features/parallel-processing.adoc +246 -0
  51. data/docs/guides/advanced-features/progress-tracking.adoc +320 -0
  52. data/docs/guides/advanced-features/streaming.adoc +212 -0
  53. data/docs/guides/archive-formats/gzip-format.adoc +107 -0
  54. data/docs/guides/archive-formats/index.adoc +130 -0
  55. data/docs/guides/archive-formats/rar-format.adoc +104 -0
  56. data/docs/guides/archive-formats/rar5.adoc +521 -0
  57. data/docs/guides/archive-formats/seven-zip-format.adoc +35 -0
  58. data/docs/guides/archive-formats/tar-format.adoc +106 -0
  59. data/docs/guides/archive-formats/xz-format.adoc +118 -0
  60. data/docs/guides/archive-formats/zip-format.adoc +35 -0
  61. data/docs/guides/compression-algorithms/bzip2.adoc +113 -0
  62. data/docs/guides/compression-algorithms/deflate.adoc +319 -0
  63. data/docs/guides/compression-algorithms/index.adoc +190 -0
  64. data/docs/guides/compression-algorithms/lzma.adoc +398 -0
  65. data/docs/guides/compression-algorithms/lzma2.adoc +327 -0
  66. data/docs/guides/compression-algorithms/ppmd.adoc +316 -0
  67. data/docs/guides/compression-algorithms/zstandard.adoc +361 -0
  68. data/docs/guides/creating-archives.adoc +354 -0
  69. data/docs/guides/extracting-archives.adoc +53 -0
  70. data/docs/guides/format-conversion.adoc +64 -0
  71. data/docs/guides/index.adoc +49 -0
  72. data/docs/guides/migration-rubyzip.adoc +217 -0
  73. data/docs/guides/parity-archives.adoc +605 -0
  74. data/docs/guides/performance-tuning.adoc +88 -0
  75. data/docs/index.adoc +218 -0
  76. data/docs/lychee.toml +67 -0
  77. data/docs/reference/api/overview.adoc +188 -0
  78. data/docs/reference/cli/compress-command.adoc +114 -0
  79. data/docs/reference/cli/overview.adoc +140 -0
  80. data/docs/reference/index.adoc +26 -0
  81. data/docs/resources/faq.adoc +185 -0
  82. data/docs/resources/quick-reference.adoc +222 -0
  83. data/docs/troubleshooting/index.adoc +208 -0
  84. data/examples/api_comparison.rb +205 -0
  85. data/examples/deflate64_example.rb +96 -0
  86. data/examples/par2_demo.rb +121 -0
  87. data/examples/quick_start_native.rb +150 -0
  88. data/examples/quick_start_rubyzip.rb +115 -0
  89. data/examples/rubyzip_compatibility_demo.rb +194 -0
  90. data/exe/omnizip +27 -0
  91. data/lib/omnizip/algorithm.rb +130 -0
  92. data/lib/omnizip/algorithm_registry.rb +86 -0
  93. data/lib/omnizip/algorithms/.keep +0 -0
  94. data/lib/omnizip/algorithms/bzip2/bwt.rb +225 -0
  95. data/lib/omnizip/algorithms/bzip2/decoder.rb +193 -0
  96. data/lib/omnizip/algorithms/bzip2/encoder.rb +237 -0
  97. data/lib/omnizip/algorithms/bzip2/huffman.rb +206 -0
  98. data/lib/omnizip/algorithms/bzip2/mtf.rb +101 -0
  99. data/lib/omnizip/algorithms/bzip2/rle.rb +151 -0
  100. data/lib/omnizip/algorithms/bzip2.rb +130 -0
  101. data/lib/omnizip/algorithms/deflate/constants.rb +28 -0
  102. data/lib/omnizip/algorithms/deflate/decoder.rb +38 -0
  103. data/lib/omnizip/algorithms/deflate/encoder.rb +46 -0
  104. data/lib/omnizip/algorithms/deflate.rb +128 -0
  105. data/lib/omnizip/algorithms/deflate64/constants.rb +45 -0
  106. data/lib/omnizip/algorithms/deflate64/decoder.rb +153 -0
  107. data/lib/omnizip/algorithms/deflate64/encoder.rb +98 -0
  108. data/lib/omnizip/algorithms/deflate64/huffman_coder.rb +354 -0
  109. data/lib/omnizip/algorithms/deflate64/lz77_encoder.rb +142 -0
  110. data/lib/omnizip/algorithms/deflate64.rb +109 -0
  111. data/lib/omnizip/algorithms/lzma/bit_model.rb +120 -0
  112. data/lib/omnizip/algorithms/lzma/constants.rb +112 -0
  113. data/lib/omnizip/algorithms/lzma/decoder.rb +148 -0
  114. data/lib/omnizip/algorithms/lzma/dictionary.rb +69 -0
  115. data/lib/omnizip/algorithms/lzma/distance_coder.rb +415 -0
  116. data/lib/omnizip/algorithms/lzma/encoder.rb +142 -0
  117. data/lib/omnizip/algorithms/lzma/length_coder.rb +260 -0
  118. data/lib/omnizip/algorithms/lzma/literal_decoder.rb +320 -0
  119. data/lib/omnizip/algorithms/lzma/literal_encoder.rb +210 -0
  120. data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +341 -0
  121. data/lib/omnizip/algorithms/lzma/lzma_alone_decoder.rb +192 -0
  122. data/lib/omnizip/algorithms/lzma/lzma_state.rb +128 -0
  123. data/lib/omnizip/algorithms/lzma/match.rb +32 -0
  124. data/lib/omnizip/algorithms/lzma/match_finder.rb +205 -0
  125. data/lib/omnizip/algorithms/lzma/match_finder_config.rb +142 -0
  126. data/lib/omnizip/algorithms/lzma/match_finder_factory.rb +88 -0
  127. data/lib/omnizip/algorithms/lzma/optimal_encoder.rb +130 -0
  128. data/lib/omnizip/algorithms/lzma/probability_models.rb +72 -0
  129. data/lib/omnizip/algorithms/lzma/range_coder.rb +85 -0
  130. data/lib/omnizip/algorithms/lzma/range_decoder.rb +434 -0
  131. data/lib/omnizip/algorithms/lzma/range_encoder.rb +194 -0
  132. data/lib/omnizip/algorithms/lzma/state.rb +127 -0
  133. data/lib/omnizip/algorithms/lzma/xz_buffered_range_encoder.rb +325 -0
  134. data/lib/omnizip/algorithms/lzma/xz_encoder.rb +426 -0
  135. data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +645 -0
  136. data/lib/omnizip/algorithms/lzma/xz_match_finder_adapter.rb +227 -0
  137. data/lib/omnizip/algorithms/lzma/xz_price_calculator.rb +169 -0
  138. data/lib/omnizip/algorithms/lzma/xz_probability_models.rb +261 -0
  139. data/lib/omnizip/algorithms/lzma/xz_range_encoder.rb +223 -0
  140. data/lib/omnizip/algorithms/lzma/xz_range_encoder_exact.rb +331 -0
  141. data/lib/omnizip/algorithms/lzma/xz_state.rb +116 -0
  142. data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +2055 -0
  143. data/lib/omnizip/algorithms/lzma.rb +238 -0
  144. data/lib/omnizip/algorithms/lzma2/chunk_manager.rb +182 -0
  145. data/lib/omnizip/algorithms/lzma2/constants.rb +41 -0
  146. data/lib/omnizip/algorithms/lzma2/encoder.rb +147 -0
  147. data/lib/omnizip/algorithms/lzma2/lzma2_chunk.rb +161 -0
  148. data/lib/omnizip/algorithms/lzma2/properties.rb +179 -0
  149. data/lib/omnizip/algorithms/lzma2/simple_lzma2_encoder.rb +127 -0
  150. data/lib/omnizip/algorithms/lzma2/xz_encoder_adapter.rb +85 -0
  151. data/lib/omnizip/algorithms/lzma2.rb +141 -0
  152. data/lib/omnizip/algorithms/ppmd7/constants.rb +74 -0
  153. data/lib/omnizip/algorithms/ppmd7/context.rb +154 -0
  154. data/lib/omnizip/algorithms/ppmd7/decoder.rb +126 -0
  155. data/lib/omnizip/algorithms/ppmd7/encoder.rb +163 -0
  156. data/lib/omnizip/algorithms/ppmd7/model.rb +248 -0
  157. data/lib/omnizip/algorithms/ppmd7/symbol_state.rb +57 -0
  158. data/lib/omnizip/algorithms/ppmd7.rb +116 -0
  159. data/lib/omnizip/algorithms/ppmd8/constants.rb +61 -0
  160. data/lib/omnizip/algorithms/ppmd8/context.rb +34 -0
  161. data/lib/omnizip/algorithms/ppmd8/decoder.rb +107 -0
  162. data/lib/omnizip/algorithms/ppmd8/encoder.rb +138 -0
  163. data/lib/omnizip/algorithms/ppmd8/model.rb +250 -0
  164. data/lib/omnizip/algorithms/ppmd8/restoration_method.rb +78 -0
  165. data/lib/omnizip/algorithms/ppmd8.rb +82 -0
  166. data/lib/omnizip/algorithms/ppmd_base.rb +138 -0
  167. data/lib/omnizip/algorithms/sevenzip_lzma2.rb +123 -0
  168. data/lib/omnizip/algorithms/xz_lzma2.rb +118 -0
  169. data/lib/omnizip/algorithms/zstandard/constants.rb +25 -0
  170. data/lib/omnizip/algorithms/zstandard/decoder.rb +46 -0
  171. data/lib/omnizip/algorithms/zstandard/encoder.rb +51 -0
  172. data/lib/omnizip/algorithms/zstandard.rb +138 -0
  173. data/lib/omnizip/buffer/memory_archive.rb +251 -0
  174. data/lib/omnizip/buffer/memory_extractor.rb +224 -0
  175. data/lib/omnizip/buffer.rb +176 -0
  176. data/lib/omnizip/checksum_registry.rb +114 -0
  177. data/lib/omnizip/checksums/crc32.rb +100 -0
  178. data/lib/omnizip/checksums/crc64.rb +101 -0
  179. data/lib/omnizip/checksums/crc_base.rb +158 -0
  180. data/lib/omnizip/checksums/verifier.rb +131 -0
  181. data/lib/omnizip/chunked/memory_manager.rb +194 -0
  182. data/lib/omnizip/chunked/reader.rb +78 -0
  183. data/lib/omnizip/chunked/writer.rb +120 -0
  184. data/lib/omnizip/chunked.rb +129 -0
  185. data/lib/omnizip/cli/output_formatter.rb +104 -0
  186. data/lib/omnizip/cli.rb +572 -0
  187. data/lib/omnizip/commands/.keep +0 -0
  188. data/lib/omnizip/commands/archive_create_command.rb +427 -0
  189. data/lib/omnizip/commands/archive_extract_command.rb +272 -0
  190. data/lib/omnizip/commands/archive_list_command.rb +218 -0
  191. data/lib/omnizip/commands/archive_repair_command.rb +131 -0
  192. data/lib/omnizip/commands/archive_verify_command.rb +117 -0
  193. data/lib/omnizip/commands/compress_command.rb +117 -0
  194. data/lib/omnizip/commands/decompress_command.rb +120 -0
  195. data/lib/omnizip/commands/list_command.rb +53 -0
  196. data/lib/omnizip/commands/metadata_command.rb +153 -0
  197. data/lib/omnizip/commands/parity_create_command.rb +122 -0
  198. data/lib/omnizip/commands/parity_repair_command.rb +122 -0
  199. data/lib/omnizip/commands/parity_verify_command.rb +124 -0
  200. data/lib/omnizip/commands/profile_list_command.rb +56 -0
  201. data/lib/omnizip/commands/profile_show_command.rb +44 -0
  202. data/lib/omnizip/convenience.rb +359 -0
  203. data/lib/omnizip/converter/conversion_registry.rb +49 -0
  204. data/lib/omnizip/converter/conversion_strategy.rb +121 -0
  205. data/lib/omnizip/converter/seven_zip_to_zip_strategy.rb +97 -0
  206. data/lib/omnizip/converter/zip_to_seven_zip_strategy.rb +112 -0
  207. data/lib/omnizip/converter.rb +105 -0
  208. data/lib/omnizip/crypto/aes256/cipher.rb +100 -0
  209. data/lib/omnizip/crypto/aes256/constants.rb +28 -0
  210. data/lib/omnizip/crypto/aes256/key_derivation.rb +101 -0
  211. data/lib/omnizip/crypto/aes256.rb +102 -0
  212. data/lib/omnizip/error.rb +106 -0
  213. data/lib/omnizip/eta/exponential_smoothing_estimator.rb +98 -0
  214. data/lib/omnizip/eta/moving_average_estimator.rb +99 -0
  215. data/lib/omnizip/eta/rate_calculator.rb +104 -0
  216. data/lib/omnizip/eta/sample_history.rb +143 -0
  217. data/lib/omnizip/eta/time_estimator.rb +106 -0
  218. data/lib/omnizip/eta.rb +63 -0
  219. data/lib/omnizip/extraction/filter_chain.rb +177 -0
  220. data/lib/omnizip/extraction/glob_pattern.rb +140 -0
  221. data/lib/omnizip/extraction/pattern_matcher.rb +70 -0
  222. data/lib/omnizip/extraction/predicate_pattern.rb +52 -0
  223. data/lib/omnizip/extraction/regex_pattern.rb +50 -0
  224. data/lib/omnizip/extraction/selective_extractor.rb +240 -0
  225. data/lib/omnizip/extraction.rb +111 -0
  226. data/lib/omnizip/file_type/mime_classifier.rb +144 -0
  227. data/lib/omnizip/file_type.rb +113 -0
  228. data/lib/omnizip/filter.rb +139 -0
  229. data/lib/omnizip/filter_pipeline.rb +108 -0
  230. data/lib/omnizip/filter_registry.rb +166 -0
  231. data/lib/omnizip/filters/bcj.rb +279 -0
  232. data/lib/omnizip/filters/bcj2/constants.rb +53 -0
  233. data/lib/omnizip/filters/bcj2/decoder.rb +200 -0
  234. data/lib/omnizip/filters/bcj2/encoder.rb +61 -0
  235. data/lib/omnizip/filters/bcj2/stream_data.rb +93 -0
  236. data/lib/omnizip/filters/bcj2.rb +99 -0
  237. data/lib/omnizip/filters/bcj_arm.rb +176 -0
  238. data/lib/omnizip/filters/bcj_arm64.rb +244 -0
  239. data/lib/omnizip/filters/bcj_ia64.rb +196 -0
  240. data/lib/omnizip/filters/bcj_ppc.rb +190 -0
  241. data/lib/omnizip/filters/bcj_sparc.rb +176 -0
  242. data/lib/omnizip/filters/bcj_x86.rb +193 -0
  243. data/lib/omnizip/filters/delta.rb +196 -0
  244. data/lib/omnizip/filters/filter_base.rb +72 -0
  245. data/lib/omnizip/filters/registry.rb +123 -0
  246. data/lib/omnizip/filters/xz_delta.rb +258 -0
  247. data/lib/omnizip/format_detector.rb +162 -0
  248. data/lib/omnizip/format_registry.rb +59 -0
  249. data/lib/omnizip/formats/.keep +0 -0
  250. data/lib/omnizip/formats/bzip2_file.rb +172 -0
  251. data/lib/omnizip/formats/cpio/constants.rb +55 -0
  252. data/lib/omnizip/formats/cpio/entry.rb +385 -0
  253. data/lib/omnizip/formats/cpio/reader.rb +196 -0
  254. data/lib/omnizip/formats/cpio/writer.rb +234 -0
  255. data/lib/omnizip/formats/cpio.rb +140 -0
  256. data/lib/omnizip/formats/format_spec_loader.rb +230 -0
  257. data/lib/omnizip/formats/gzip.rb +238 -0
  258. data/lib/omnizip/formats/iso/directory_builder.rb +297 -0
  259. data/lib/omnizip/formats/iso/directory_record.rb +152 -0
  260. data/lib/omnizip/formats/iso/joliet.rb +204 -0
  261. data/lib/omnizip/formats/iso/path_table.rb +125 -0
  262. data/lib/omnizip/formats/iso/reader.rb +197 -0
  263. data/lib/omnizip/formats/iso/rock_ridge.rb +349 -0
  264. data/lib/omnizip/formats/iso/volume_builder.rb +320 -0
  265. data/lib/omnizip/formats/iso/volume_descriptor.rb +168 -0
  266. data/lib/omnizip/formats/iso/writer.rb +530 -0
  267. data/lib/omnizip/formats/iso.rb +140 -0
  268. data/lib/omnizip/formats/lzip.rb +175 -0
  269. data/lib/omnizip/formats/lzma_alone.rb +171 -0
  270. data/lib/omnizip/formats/rar/archive_repairer.rb +243 -0
  271. data/lib/omnizip/formats/rar/archive_verifier.rb +195 -0
  272. data/lib/omnizip/formats/rar/block_parser.rb +243 -0
  273. data/lib/omnizip/formats/rar/compression/bit_stream.rb +180 -0
  274. data/lib/omnizip/formats/rar/compression/dispatcher.rb +217 -0
  275. data/lib/omnizip/formats/rar/compression/lz77_huffman/decoder.rb +216 -0
  276. data/lib/omnizip/formats/rar/compression/lz77_huffman/encoder.rb +158 -0
  277. data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_builder.rb +217 -0
  278. data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_coder.rb +189 -0
  279. data/lib/omnizip/formats/rar/compression/lz77_huffman/match_finder.rb +135 -0
  280. data/lib/omnizip/formats/rar/compression/lz77_huffman/sliding_window.rb +165 -0
  281. data/lib/omnizip/formats/rar/compression/ppmd/context.rb +105 -0
  282. data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +219 -0
  283. data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +262 -0
  284. data/lib/omnizip/formats/rar/compression_method_registry.rb +106 -0
  285. data/lib/omnizip/formats/rar/constants.rb +82 -0
  286. data/lib/omnizip/formats/rar/decompressor.rb +238 -0
  287. data/lib/omnizip/formats/rar/external_writer.rb +312 -0
  288. data/lib/omnizip/formats/rar/header.rb +192 -0
  289. data/lib/omnizip/formats/rar/license_validator.rb +109 -0
  290. data/lib/omnizip/formats/rar/models/rar_archive.rb +77 -0
  291. data/lib/omnizip/formats/rar/models/rar_entry.rb +65 -0
  292. data/lib/omnizip/formats/rar/models/rar_volume.rb +56 -0
  293. data/lib/omnizip/formats/rar/parity_handler.rb +292 -0
  294. data/lib/omnizip/formats/rar/rar5/compression/lzma.rb +202 -0
  295. data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +578 -0
  296. data/lib/omnizip/formats/rar/rar5/compression/store.rb +60 -0
  297. data/lib/omnizip/formats/rar/rar5/crc32.rb +39 -0
  298. data/lib/omnizip/formats/rar/rar5/encryption/aes256_cbc.rb +97 -0
  299. data/lib/omnizip/formats/rar/rar5/encryption/encryption_header.rb +114 -0
  300. data/lib/omnizip/formats/rar/rar5/encryption/encryption_manager.rb +166 -0
  301. data/lib/omnizip/formats/rar/rar5/encryption/key_derivation.rb +97 -0
  302. data/lib/omnizip/formats/rar/rar5/header.rb +187 -0
  303. data/lib/omnizip/formats/rar/rar5/models/encryption_options.rb +74 -0
  304. data/lib/omnizip/formats/rar/rar5/models/recovery_options.rb +63 -0
  305. data/lib/omnizip/formats/rar/rar5/models/solid_options.rb +63 -0
  306. data/lib/omnizip/formats/rar/rar5/models/volume_options.rb +74 -0
  307. data/lib/omnizip/formats/rar/rar5/multi_volume/ARCHITECTURE.md +290 -0
  308. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_manager.rb +264 -0
  309. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_splitter.rb +155 -0
  310. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_writer.rb +194 -0
  311. data/lib/omnizip/formats/rar/rar5/solid/solid_encoder.rb +109 -0
  312. data/lib/omnizip/formats/rar/rar5/solid/solid_manager.rb +142 -0
  313. data/lib/omnizip/formats/rar/rar5/solid/solid_stream.rb +121 -0
  314. data/lib/omnizip/formats/rar/rar5/vint.rb +65 -0
  315. data/lib/omnizip/formats/rar/rar5/writer.rb +466 -0
  316. data/lib/omnizip/formats/rar/rar_format_base.rb +241 -0
  317. data/lib/omnizip/formats/rar/reader.rb +366 -0
  318. data/lib/omnizip/formats/rar/recovery_record.rb +245 -0
  319. data/lib/omnizip/formats/rar/volume_manager.rb +168 -0
  320. data/lib/omnizip/formats/rar/writer.rb +431 -0
  321. data/lib/omnizip/formats/rar.rb +205 -0
  322. data/lib/omnizip/formats/rar3/compressor.rb +73 -0
  323. data/lib/omnizip/formats/rar3/decompressor.rb +66 -0
  324. data/lib/omnizip/formats/rar3/reader.rb +386 -0
  325. data/lib/omnizip/formats/rar3/writer.rb +219 -0
  326. data/lib/omnizip/formats/rar5/compressor.rb +73 -0
  327. data/lib/omnizip/formats/rar5/decompressor.rb +66 -0
  328. data/lib/omnizip/formats/rar5/reader.rb +342 -0
  329. data/lib/omnizip/formats/rar5/writer.rb +214 -0
  330. data/lib/omnizip/formats/seven_zip/coder_chain.rb +150 -0
  331. data/lib/omnizip/formats/seven_zip/constants.rb +126 -0
  332. data/lib/omnizip/formats/seven_zip/encoded_header.rb +114 -0
  333. data/lib/omnizip/formats/seven_zip/encrypted_header.rb +142 -0
  334. data/lib/omnizip/formats/seven_zip/file_collector.rb +144 -0
  335. data/lib/omnizip/formats/seven_zip/header.rb +106 -0
  336. data/lib/omnizip/formats/seven_zip/header_encryptor.rb +134 -0
  337. data/lib/omnizip/formats/seven_zip/header_writer.rb +466 -0
  338. data/lib/omnizip/formats/seven_zip/models/coder_info.rb +30 -0
  339. data/lib/omnizip/formats/seven_zip/models/file_entry.rb +58 -0
  340. data/lib/omnizip/formats/seven_zip/models/folder.rb +69 -0
  341. data/lib/omnizip/formats/seven_zip/models/stream_info.rb +42 -0
  342. data/lib/omnizip/formats/seven_zip/parser.rb +660 -0
  343. data/lib/omnizip/formats/seven_zip/reader.rb +458 -0
  344. data/lib/omnizip/formats/seven_zip/split_archive_reader.rb +632 -0
  345. data/lib/omnizip/formats/seven_zip/split_archive_writer.rb +315 -0
  346. data/lib/omnizip/formats/seven_zip/stream_compressor.rb +151 -0
  347. data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +162 -0
  348. data/lib/omnizip/formats/seven_zip/writer.rb +740 -0
  349. data/lib/omnizip/formats/seven_zip.rb +93 -0
  350. data/lib/omnizip/formats/tar/constants.rb +73 -0
  351. data/lib/omnizip/formats/tar/entry.rb +94 -0
  352. data/lib/omnizip/formats/tar/header.rb +168 -0
  353. data/lib/omnizip/formats/tar/reader.rb +121 -0
  354. data/lib/omnizip/formats/tar/writer.rb +216 -0
  355. data/lib/omnizip/formats/tar.rb +84 -0
  356. data/lib/omnizip/formats/xz/reader.rb +116 -0
  357. data/lib/omnizip/formats/xz.rb +237 -0
  358. data/lib/omnizip/formats/xz_impl/block_decoder.rb +754 -0
  359. data/lib/omnizip/formats/xz_impl/block_encoder.rb +306 -0
  360. data/lib/omnizip/formats/xz_impl/block_header.rb +210 -0
  361. data/lib/omnizip/formats/xz_impl/block_header_parser.rb +186 -0
  362. data/lib/omnizip/formats/xz_impl/constants.rb +49 -0
  363. data/lib/omnizip/formats/xz_impl/index_decoder.rb +174 -0
  364. data/lib/omnizip/formats/xz_impl/index_encoder.rb +122 -0
  365. data/lib/omnizip/formats/xz_impl/stream_decoder.rb +468 -0
  366. data/lib/omnizip/formats/xz_impl/stream_encoder.rb +99 -0
  367. data/lib/omnizip/formats/xz_impl/stream_footer.rb +81 -0
  368. data/lib/omnizip/formats/xz_impl/stream_footer_parser.rb +117 -0
  369. data/lib/omnizip/formats/xz_impl/stream_header.rb +55 -0
  370. data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +108 -0
  371. data/lib/omnizip/formats/xz_impl/vli.rb +128 -0
  372. data/lib/omnizip/formats/xz_impl/writer.rb +421 -0
  373. data/lib/omnizip/formats/zip/central_directory_header.rb +195 -0
  374. data/lib/omnizip/formats/zip/constants.rb +69 -0
  375. data/lib/omnizip/formats/zip/end_of_central_directory.rb +133 -0
  376. data/lib/omnizip/formats/zip/local_file_header.rb +138 -0
  377. data/lib/omnizip/formats/zip/reader.rb +250 -0
  378. data/lib/omnizip/formats/zip/unix_extra_field.rb +153 -0
  379. data/lib/omnizip/formats/zip/writer.rb +375 -0
  380. data/lib/omnizip/formats/zip/zip64_end_of_central_directory.rb +104 -0
  381. data/lib/omnizip/formats/zip/zip64_end_of_central_directory_locator.rb +66 -0
  382. data/lib/omnizip/formats/zip/zip64_extra_field.rb +114 -0
  383. data/lib/omnizip/formats/zip.rb +50 -0
  384. data/lib/omnizip/implementations/base/lzma2_decoder_base.rb +75 -0
  385. data/lib/omnizip/implementations/base/lzma2_encoder_base.rb +128 -0
  386. data/lib/omnizip/implementations/base/lzma_decoder_base.rb +83 -0
  387. data/lib/omnizip/implementations/base/lzma_encoder_base.rb +108 -0
  388. data/lib/omnizip/implementations/base/state_machine_base.rb +182 -0
  389. data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +421 -0
  390. data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +465 -0
  391. data/lib/omnizip/implementations/seven_zip/lzma/match_finder.rb +288 -0
  392. data/lib/omnizip/implementations/seven_zip/lzma/range_decoder.rb +200 -0
  393. data/lib/omnizip/implementations/seven_zip/lzma/range_encoder.rb +197 -0
  394. data/lib/omnizip/implementations/seven_zip/lzma/state_machine.rb +141 -0
  395. data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +519 -0
  396. data/lib/omnizip/implementations/xz_utils/lzma2/decoder.rb +723 -0
  397. data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +750 -0
  398. data/lib/omnizip/io/buffered_input.rb +146 -0
  399. data/lib/omnizip/io/buffered_output.rb +105 -0
  400. data/lib/omnizip/io/stream_manager.rb +115 -0
  401. data/lib/omnizip/link_handler/hard_link.rb +79 -0
  402. data/lib/omnizip/link_handler/symbolic_link.rb +74 -0
  403. data/lib/omnizip/link_handler.rb +124 -0
  404. data/lib/omnizip/metadata/archive_metadata.rb +114 -0
  405. data/lib/omnizip/metadata/entry_metadata.rb +146 -0
  406. data/lib/omnizip/metadata/metadata_editor.rb +171 -0
  407. data/lib/omnizip/metadata/metadata_registry.rb +64 -0
  408. data/lib/omnizip/metadata/metadata_validator.rb +99 -0
  409. data/lib/omnizip/metadata.rb +57 -0
  410. data/lib/omnizip/models/.keep +0 -0
  411. data/lib/omnizip/models/algorithm_metadata.rb +73 -0
  412. data/lib/omnizip/models/compression_options.rb +71 -0
  413. data/lib/omnizip/models/conversion_options.rb +87 -0
  414. data/lib/omnizip/models/conversion_result.rb +135 -0
  415. data/lib/omnizip/models/eta_result.rb +46 -0
  416. data/lib/omnizip/models/extraction_rule.rb +115 -0
  417. data/lib/omnizip/models/filter_chain.rb +144 -0
  418. data/lib/omnizip/models/filter_config.rb +183 -0
  419. data/lib/omnizip/models/match_result.rb +124 -0
  420. data/lib/omnizip/models/optimization_suggestion.rb +91 -0
  421. data/lib/omnizip/models/parallel_options.rb +104 -0
  422. data/lib/omnizip/models/performance_result.rb +79 -0
  423. data/lib/omnizip/models/profile_report.rb +82 -0
  424. data/lib/omnizip/models/progress_options.rb +38 -0
  425. data/lib/omnizip/models/split_options.rb +116 -0
  426. data/lib/omnizip/optimization_registry.rb +81 -0
  427. data/lib/omnizip/parallel/job_queue.rb +209 -0
  428. data/lib/omnizip/parallel/job_scheduler.rb +203 -0
  429. data/lib/omnizip/parallel/parallel_compressor.rb +347 -0
  430. data/lib/omnizip/parallel/parallel_extractor.rb +329 -0
  431. data/lib/omnizip/parallel/worker_pool.rb +223 -0
  432. data/lib/omnizip/parallel.rb +149 -0
  433. data/lib/omnizip/parity/chunked_block_processor.rb +196 -0
  434. data/lib/omnizip/parity/galois16.rb +145 -0
  435. data/lib/omnizip/parity/models/creator_packet.rb +73 -0
  436. data/lib/omnizip/parity/models/file_description_packet.rb +133 -0
  437. data/lib/omnizip/parity/models/ifsc_packet.rb +123 -0
  438. data/lib/omnizip/parity/models/main_packet.rb +128 -0
  439. data/lib/omnizip/parity/models/packet.rb +156 -0
  440. data/lib/omnizip/parity/models/packet_registry.rb +109 -0
  441. data/lib/omnizip/parity/models/recovery_slice_packet.rb +78 -0
  442. data/lib/omnizip/parity/par2_creator.rb +531 -0
  443. data/lib/omnizip/parity/par2_repairer.rb +407 -0
  444. data/lib/omnizip/parity/par2_verifier.rb +364 -0
  445. data/lib/omnizip/parity/par2cmdline_algorithm.rb +110 -0
  446. data/lib/omnizip/parity/par2cmdline_coefficients.rb +78 -0
  447. data/lib/omnizip/parity/reed_solomon_decoder.rb +266 -0
  448. data/lib/omnizip/parity/reed_solomon_encoder.rb +111 -0
  449. data/lib/omnizip/parity/reed_solomon_matrix.rb +342 -0
  450. data/lib/omnizip/parity.rb +186 -0
  451. data/lib/omnizip/password/encryption_registry.rb +65 -0
  452. data/lib/omnizip/password/encryption_strategy.rb +96 -0
  453. data/lib/omnizip/password/password_validator.rb +129 -0
  454. data/lib/omnizip/password/winzip_aes_strategy.rb +192 -0
  455. data/lib/omnizip/password/zip_crypto_strategy.rb +141 -0
  456. data/lib/omnizip/password.rb +87 -0
  457. data/lib/omnizip/pipe/stream_compressor.rb +124 -0
  458. data/lib/omnizip/pipe/stream_decompressor.rb +174 -0
  459. data/lib/omnizip/pipe.rb +121 -0
  460. data/lib/omnizip/platform/ntfs_streams.rb +201 -0
  461. data/lib/omnizip/platform.rb +189 -0
  462. data/lib/omnizip/profile/archive_profile.rb +39 -0
  463. data/lib/omnizip/profile/balanced_profile.rb +33 -0
  464. data/lib/omnizip/profile/binary_profile.rb +36 -0
  465. data/lib/omnizip/profile/compression_profile.rb +158 -0
  466. data/lib/omnizip/profile/custom_profile.rb +157 -0
  467. data/lib/omnizip/profile/fast_profile.rb +33 -0
  468. data/lib/omnizip/profile/maximum_profile.rb +33 -0
  469. data/lib/omnizip/profile/profile_detector.rb +110 -0
  470. data/lib/omnizip/profile/profile_registry.rb +161 -0
  471. data/lib/omnizip/profile/text_profile.rb +36 -0
  472. data/lib/omnizip/profile.rb +190 -0
  473. data/lib/omnizip/profiler/memory_profiler.rb +66 -0
  474. data/lib/omnizip/profiler/method_profiler.rb +49 -0
  475. data/lib/omnizip/profiler/report_generator.rb +169 -0
  476. data/lib/omnizip/profiler.rb +204 -0
  477. data/lib/omnizip/progress/callback_reporter.rb +36 -0
  478. data/lib/omnizip/progress/console_reporter.rb +62 -0
  479. data/lib/omnizip/progress/log_reporter.rb +91 -0
  480. data/lib/omnizip/progress/operation_progress.rb +118 -0
  481. data/lib/omnizip/progress/progress_bar.rb +156 -0
  482. data/lib/omnizip/progress/progress_reporter.rb +40 -0
  483. data/lib/omnizip/progress/progress_tracker.rb +190 -0
  484. data/lib/omnizip/progress/silent_reporter.rb +24 -0
  485. data/lib/omnizip/progress.rb +127 -0
  486. data/lib/omnizip/rubyzip_compat.rb +63 -0
  487. data/lib/omnizip/temp/safe_extract.rb +168 -0
  488. data/lib/omnizip/temp/temp_file.rb +124 -0
  489. data/lib/omnizip/temp/temp_file_pool.rb +109 -0
  490. data/lib/omnizip/temp.rb +181 -0
  491. data/lib/omnizip/version.rb +5 -0
  492. data/lib/omnizip/zip/entry.rb +156 -0
  493. data/lib/omnizip/zip/file.rb +485 -0
  494. data/lib/omnizip/zip/input_stream.rb +273 -0
  495. data/lib/omnizip/zip/output_stream.rb +324 -0
  496. data/lib/omnizip.rb +156 -0
  497. data/readme-docs/advanced-features.adoc +515 -0
  498. data/readme-docs/api-usage.adoc +444 -0
  499. data/readme-docs/architecture.adoc +449 -0
  500. data/readme-docs/archive-formats.adoc +479 -0
  501. data/readme-docs/cli-usage.adoc +222 -0
  502. data/readme-docs/compression-algorithms.adoc +442 -0
  503. data/readme-docs/compression-profiles.adoc +247 -0
  504. data/readme-docs/encryption-checksums.adoc +328 -0
  505. data/readme-docs/format-converter.adoc +325 -0
  506. data/readme-docs/installation.adoc +228 -0
  507. data/readme-docs/par2-archives.adoc +608 -0
  508. data/readme-docs/performance-profiler.adoc +389 -0
  509. data/readme-docs/preprocessing-filters.adoc +280 -0
  510. data/xz-file-format-1.2.1.txt +1174 -0
  511. metadata +617 -0
@@ -0,0 +1,632 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "constants"
4
+ require_relative "header"
5
+ require_relative "parser"
6
+ require_relative "models/stream_info"
7
+ require_relative "models/file_entry"
8
+ require_relative "stream_decompressor"
9
+ require_relative "../../models/split_options"
10
+ require "fileutils"
11
+
12
+ module Omnizip
13
+ module Formats
14
+ module SevenZip
15
+ # Split archive reader for .7z format
16
+ # Reads multi-volume archives
17
+ class SplitArchiveReader
18
+ include Constants
19
+
20
+ attr_reader :base_path, :header, :entries, :stream_info, :volumes
21
+
22
+ # Initialize reader with base path
23
+ #
24
+ # @param base_path [String] Path to first volume (e.g., "backup.7z.001")
25
+ def initialize(base_path)
26
+ @base_path = base_path
27
+ @entries = []
28
+ @stream_info = nil
29
+ @volumes = []
30
+ @volume_handles = []
31
+ end
32
+
33
+ # Detect and open all volumes
34
+ #
35
+ # @raise [RuntimeError] if files cannot be opened or parsed
36
+ def open
37
+ detect_volumes
38
+ open_volumes
39
+ parse_archive
40
+ self
41
+ end
42
+
43
+ # Check if archive is split
44
+ #
45
+ # @return [Boolean] true if split across multiple volumes
46
+ def split?
47
+ @volumes.size > 1
48
+ end
49
+
50
+ # Get total number of volumes
51
+ #
52
+ # @return [Integer] Number of volumes
53
+ def total_volumes
54
+ @volumes.size
55
+ end
56
+
57
+ # Get volume size (first volume)
58
+ #
59
+ # @return [Integer] Volume size in bytes
60
+ def volume_size
61
+ return 0 if @volumes.empty?
62
+
63
+ File.size(@volumes.first)
64
+ end
65
+
66
+ # List all files in archive
67
+ #
68
+ # @return [Array<Models::FileEntry>] File entries
69
+ def list_files
70
+ @entries
71
+ end
72
+
73
+ # Extract file to output path
74
+ #
75
+ # @param entry_name [String] File name to extract
76
+ # @param output_path [String] Destination path
77
+ # @raise [RuntimeError] if entry not found or extraction fails
78
+ def extract_entry(entry_name, output_path)
79
+ entry = @entries.find { |e| e.name == entry_name }
80
+ raise "Entry not found: #{entry_name}" unless entry
81
+
82
+ # Create directory if needed
83
+ FileUtils.mkdir_p(File.dirname(output_path))
84
+
85
+ # Extract file
86
+ if entry.directory?
87
+ FileUtils.mkdir_p(output_path)
88
+ elsif entry.has_stream?
89
+ data = extract_entry_data(entry)
90
+ File.binwrite(output_path, data)
91
+
92
+ # Set timestamp if available
93
+ if entry.mtime
94
+ File.utime(entry.atime || entry.mtime || Time.now,
95
+ entry.mtime || Time.now,
96
+ output_path)
97
+ end
98
+ else
99
+ # Empty file
100
+ FileUtils.touch(output_path)
101
+ end
102
+ end
103
+
104
+ # Extract all files to directory
105
+ #
106
+ # @param output_dir [String] Destination directory
107
+ # @raise [RuntimeError] on extraction error
108
+ def extract_all(output_dir)
109
+ FileUtils.mkdir_p(output_dir)
110
+
111
+ @entries.each do |entry|
112
+ output_path = File.join(output_dir, entry.name)
113
+ extract_entry(entry.name, output_path)
114
+ end
115
+ end
116
+
117
+ # Check if archive is valid .7z format
118
+ #
119
+ # @return [Boolean] true if valid
120
+ def valid?
121
+ !@header.nil? && @header.valid?
122
+ end
123
+
124
+ # Close all volume handles
125
+ def close
126
+ @volume_handles.each(&:close)
127
+ @volume_handles.clear
128
+ end
129
+
130
+ private
131
+
132
+ # Detect all volumes in the set
133
+ def detect_volumes
134
+ @volumes = []
135
+
136
+ # Determine naming pattern
137
+ naming_pattern = detect_naming_pattern(@base_path)
138
+
139
+ case naming_pattern
140
+ when :numeric
141
+ detect_numeric_volumes
142
+ when :alpha
143
+ detect_alpha_volumes
144
+ else
145
+ # Single volume
146
+ @volumes = [@base_path]
147
+ end
148
+ end
149
+
150
+ # Detect naming pattern from base path
151
+ #
152
+ # @param path [String] Base path
153
+ # @return [Symbol] :numeric, :alpha, or :single
154
+ def detect_naming_pattern(path)
155
+ if /\.(\d{3})$/.match?(path)
156
+ :numeric
157
+ elsif /\.([a-z]{2,})$/.match?(path)
158
+ :alpha
159
+ else
160
+ :single
161
+ end
162
+ end
163
+
164
+ # Detect volumes with numeric naming (.001, .002, ...)
165
+ def detect_numeric_volumes
166
+ base = @base_path.sub(/\.\d{3}$/, "")
167
+ volume_num = 1
168
+
169
+ loop do
170
+ volume_path = format("%s.%03d", base, volume_num)
171
+ break unless File.exist?(volume_path)
172
+
173
+ @volumes << volume_path
174
+ volume_num += 1
175
+ end
176
+
177
+ raise "No volumes found for #{@base_path}" if @volumes.empty?
178
+ end
179
+
180
+ # Detect volumes with alpha naming (.aa, .ab, ...)
181
+ def detect_alpha_volumes
182
+ base = @base_path.sub(/\.[a-z]{2,}$/, "")
183
+ volume_num = 1
184
+ split_opts = Omnizip::Models::SplitOptions.new
185
+ split_opts.naming_pattern = Omnizip::Models::SplitOptions::NAMING_ALPHA
186
+
187
+ loop do
188
+ volume_path = split_opts.volume_filename(base, volume_num)
189
+ break unless File.exist?(volume_path)
190
+
191
+ @volumes << volume_path
192
+ volume_num += 1
193
+ end
194
+
195
+ raise "No volumes found for #{@base_path}" if @volumes.empty?
196
+ end
197
+
198
+ # Open all volume files
199
+ def open_volumes
200
+ @volume_handles = @volumes.map { |path| File.open(path, "rb") }
201
+ end
202
+
203
+ # Parse .7z archive structure across volumes
204
+ def parse_archive
205
+ # Read and validate start header from first volume
206
+ @header = Header.read(@volume_handles.first)
207
+
208
+ # Read next header metadata
209
+ next_header_data = read_from_volumes(
210
+ @header.start_pos_after_header + @header.next_header_offset,
211
+ @header.next_header_size,
212
+ )
213
+
214
+ # Check if header is encoded (compressed or encrypted)
215
+ # ENCODED_HEADER (0x17) can mean compressed or encrypted
216
+ first_byte = next_header_data.getbyte(0)
217
+ if first_byte == PropertyId::ENCODED_HEADER
218
+ # Note: Split archives typically don't use encryption,
219
+ # but they may use compression. For now, assume compression.
220
+ # If encryption support is needed, add encrypted header detection here.
221
+ next_header_data = decompress_encoded_header(next_header_data)
222
+ end
223
+
224
+ # Parse metadata - if data is incomplete due to missing volumes, handle gracefully
225
+ parser = Parser.new(next_header_data)
226
+ @stream_info, @entries = parse_metadata(parser)
227
+
228
+ # Map entries to their folders/streams
229
+ map_entries_to_streams
230
+ rescue EOFError => e
231
+ # Header data incomplete - likely missing volumes
232
+ # Allow opening but with empty entries (graceful degradation)
233
+ # This is an edge case where volumes were deleted after archive creation
234
+ warn "WARNING: Archive header incomplete - missing volumes detected. Opening in degraded mode."
235
+ warn " Error: #{e.message}"
236
+ @stream_info = Models::StreamInfo.new
237
+ @entries = []
238
+ end
239
+
240
+ # Read data from volumes at global offset
241
+ #
242
+ # @param global_offset [Integer] Offset across all volumes
243
+ # @param size [Integer] Number of bytes to read
244
+ # @return [String] Read data
245
+ def read_from_volumes(global_offset, size)
246
+ data = String.new(encoding: "BINARY")
247
+ remaining = size
248
+ current_offset = global_offset
249
+
250
+ @volume_handles.each_with_index do |handle, i|
251
+ volume_size = File.size(@volumes[i])
252
+ volume_start = i.zero? ? 0 : cumulative_size(i - 1)
253
+ volume_end = volume_start + volume_size
254
+
255
+ next if current_offset >= volume_end
256
+
257
+ # Calculate read position in this volume
258
+ unless current_offset >= volume_start && current_offset < volume_end
259
+ next
260
+ end
261
+
262
+ local_offset = current_offset - volume_start
263
+ available = volume_size - local_offset
264
+ to_read = [available, remaining].min
265
+
266
+ handle.seek(local_offset)
267
+ chunk = handle.read(to_read)
268
+
269
+ if chunk
270
+ actual_read = chunk.bytesize
271
+ data << chunk
272
+ remaining -= actual_read
273
+ current_offset += actual_read
274
+ else
275
+ # No data available - stop reading
276
+ break
277
+ end
278
+
279
+ break if remaining.zero?
280
+ end
281
+
282
+ data
283
+ end
284
+
285
+ # Get cumulative size up to volume index
286
+ #
287
+ # @param index [Integer] Volume index
288
+ # @return [Integer] Cumulative size in bytes
289
+ def cumulative_size(index)
290
+ @volumes[0..index].sum { |path| File.size(path) }
291
+ end
292
+
293
+ # Parse archive metadata
294
+ #
295
+ # @param parser [Parser] Parser instance
296
+ # @return [Array<StreamInfo, Array<Models::FileEntry>>] Parsed data
297
+ def parse_metadata(parser)
298
+ stream_info = Models::StreamInfo.new
299
+ entries = []
300
+
301
+ # Read main header
302
+ type = parser.read_byte
303
+ raise "Expected Header, got 0x#{type.to_s(16)}" unless
304
+ type == PropertyId::HEADER
305
+
306
+ # Parse header sections
307
+ until parser.eof?
308
+ prop_type = parser.read_byte
309
+
310
+ case prop_type
311
+ when PropertyId::MAIN_STREAMS_INFO
312
+ parse_streams_info(parser, stream_info)
313
+ when PropertyId::FILES_INFO
314
+ entries = parser.read_files_info
315
+ when PropertyId::K_END
316
+ break
317
+ else
318
+ # Skip unknown properties
319
+ parser.skip_data if !parser.eof? &&
320
+ parser.peek_byte != PropertyId::K_END
321
+ end
322
+ end
323
+
324
+ parser.read_byte if !parser.eof? &&
325
+ parser.peek_byte == PropertyId::K_END
326
+
327
+ [stream_info, entries]
328
+ end
329
+
330
+ # Map entries to their folders and streams
331
+ def map_entries_to_streams
332
+ return if @stream_info.nil?
333
+
334
+ stream_idx = 0
335
+ @entries.each_with_index do |entry, i|
336
+ next unless entry.has_stream?
337
+
338
+ # Find which folder this stream belongs to
339
+ folder_idx = 0
340
+ accumulated = 0
341
+ @stream_info.num_unpack_streams_in_folders.each_with_index do |num, fi|
342
+ if stream_idx < accumulated + num
343
+ folder_idx = fi
344
+ break
345
+ end
346
+ accumulated += num
347
+ end
348
+
349
+ entry.folder_index = folder_idx
350
+ entry.file_index = i
351
+ entry.size = @stream_info.unpack_sizes[stream_idx] if @stream_info.unpack_sizes[stream_idx]
352
+ stream_idx += 1
353
+ end
354
+ end
355
+
356
+ # Decompress encoded (compressed) header
357
+ #
358
+ # @param encoded_data [String] Encoded header bytes
359
+ # @return [String] Decompressed header data
360
+ def decompress_encoded_header(encoded_data)
361
+ # Skip ENCODED_HEADER marker
362
+ parser = Parser.new(encoded_data[1..])
363
+
364
+ # Parse the streams info for the encoded header
365
+ stream_info = Models::StreamInfo.new
366
+
367
+ # Read streams info - can be either MAIN_STREAMS_INFO or direct stream properties
368
+ type = parser.read_byte
369
+
370
+ if type == PropertyId::MAIN_STREAMS_INFO
371
+ parse_streams_info(parser, stream_info)
372
+ elsif type == PropertyId::PACK_INFO
373
+ # Direct PackInfo without MAIN_STREAMS_INFO wrapper
374
+ parser.read_pack_info(stream_info)
375
+
376
+ # Read UNPACK_INFO
377
+ type = parser.read_byte
378
+ if type == PropertyId::UNPACK_INFO
379
+ parser.read_unpack_info(stream_info)
380
+ end
381
+ else
382
+ raise "Unexpected property in encoded header: 0x#{type.to_s(16)}"
383
+ end
384
+
385
+ # Decompress the header using the stream info
386
+ pack_pos = @header.start_pos_after_header + stream_info.pack_pos
387
+ folder = stream_info.folders[0]
388
+ pack_size = stream_info.pack_sizes[0]
389
+ unpack_size = folder.uncompressed_size
390
+
391
+ # Create multi-volume IO wrapper for decompression
392
+ io_wrapper = MultiVolumeIO.new(@volume_handles, @volumes)
393
+ io_wrapper.seek(pack_pos)
394
+
395
+ decompressor = StreamDecompressor.new(io_wrapper, folder, pack_pos,
396
+ pack_size, @header)
397
+ decompressor.decompress(unpack_size)
398
+ end
399
+
400
+ # Parse streams info section
401
+ #
402
+ # @param parser [Parser] Parser instance
403
+ # @param stream_info [Models::StreamInfo] Stream info to populate
404
+ def parse_streams_info(parser, stream_info)
405
+ until parser.eof?
406
+ prop_type = parser.read_byte
407
+
408
+ case prop_type
409
+ when PropertyId::PACK_INFO
410
+ parser.read_pack_info(stream_info)
411
+ when PropertyId::UNPACK_INFO
412
+ parser.read_unpack_info(stream_info)
413
+ when PropertyId::SUBSTREAMS_INFO
414
+ parser.read_substreams_info(stream_info)
415
+ when PropertyId::K_END
416
+ break
417
+ else
418
+ # Unknown property within streams_info - skip it
419
+ parser.skip_data if !parser.eof? && parser.peek_byte != PropertyId::K_END
420
+ end
421
+ end
422
+
423
+ # Consume final K_END for MAIN_STREAMS_INFO section if present
424
+ parser.read_byte if !parser.eof? && parser.peek_byte == PropertyId::K_END
425
+ end
426
+
427
+ # Extract entry data from volumes
428
+ #
429
+ # @param entry [Models::FileEntry] Entry to extract
430
+ # @return [String] Extracted data
431
+ def extract_entry_data(entry)
432
+ return "" unless entry.has_stream?
433
+ return "" unless @stream_info
434
+
435
+ folder = @stream_info.folders[entry.folder_index]
436
+ return "" unless folder
437
+
438
+ # Calculate pack position in the combined stream
439
+ # Start from where pack data begins (after header and any offset)
440
+ pack_pos = @header.start_pos_after_header + @stream_info.pack_pos
441
+
442
+ # Advance pack_pos by summing pack_sizes from previous folders
443
+ # and find pack_idx for this folder
444
+ pack_idx = 0
445
+ entry.folder_index.times do |i|
446
+ num_streams = @stream_info.folders[i].pack_stream_indices.size
447
+ num_streams.times do |j|
448
+ pack_pos += @stream_info.pack_sizes[pack_idx + j] || 0
449
+ end
450
+ pack_idx += num_streams
451
+ end
452
+
453
+ # Get pack size for this folder - use first pack stream only
454
+ pack_size = @stream_info.pack_sizes[pack_idx] || 0
455
+
456
+ # Create multi-volume IO wrapper
457
+ io_wrapper = MultiVolumeIO.new(@volume_handles, @volumes)
458
+
459
+ # For solid archives, multiple files share one compressed stream
460
+ # We need to decompress the entire folder and extract the correct portion
461
+ num_files_in_folder = @stream_info.num_unpack_streams_in_folders[entry.folder_index] || 1
462
+
463
+ if num_files_in_folder > 1
464
+ # Solid archive: decompress entire folder and extract this file's portion
465
+ # Calculate total size by summing all stream sizes for this folder
466
+ total_unpack_size = 0
467
+ stream_idx = 0
468
+ @stream_info.num_unpack_streams_in_folders.each_with_index do |num, fi|
469
+ if fi == entry.folder_index
470
+ # This is our folder - sum its stream sizes
471
+ num.times do
472
+ total_unpack_size += @stream_info.unpack_sizes[stream_idx] || 0
473
+ stream_idx += 1
474
+ end
475
+ break
476
+ else
477
+ stream_idx += num
478
+ end
479
+ end
480
+
481
+ decompressor = StreamDecompressor.new(io_wrapper, folder, pack_pos,
482
+ pack_size, @header)
483
+ full_data = decompressor.decompress(total_unpack_size)
484
+
485
+ # Find offset of this file within the uncompressed stream
486
+ file_offset = 0
487
+ @entries.each do |e|
488
+ break if e.file_index == entry.file_index
489
+
490
+ file_offset += e.size if e.has_stream? && e.folder_index == entry.folder_index
491
+ end
492
+
493
+ # Extract this file's data
494
+ data = full_data[file_offset, entry.size]
495
+
496
+ # Verify CRC if available
497
+ if entry.crc
498
+ crc = Omnizip::Checksums::Crc32.new
499
+ crc.update(data)
500
+ unless crc.value == entry.crc
501
+ raise "CRC mismatch for #{entry.name}: expected 0x#{entry.crc.to_s(16)}, got 0x#{crc.value.to_s(16)}"
502
+ end
503
+ end
504
+
505
+ data
506
+ else
507
+ # Non-solid: each file has its own compressed stream
508
+ decompressor = StreamDecompressor.new(io_wrapper, folder, pack_pos,
509
+ pack_size, @header)
510
+ expected_crc = entry.crc
511
+ decompressor.decompress_and_verify(entry.size, expected_crc)
512
+ end
513
+ rescue StandardError => e
514
+ warn "Extraction failed for #{entry.name}: #{e.message}"
515
+ raise
516
+ end
517
+
518
+ # Multi-volume IO wrapper
519
+ # Provides unified IO interface across multiple volumes
520
+ class MultiVolumeIO
521
+ def initialize(handles, paths)
522
+ @handles = handles
523
+ @paths = paths
524
+ @position = 0
525
+ @combined_data = nil
526
+ end
527
+
528
+ # Seek to position across volumes
529
+ #
530
+ # @param pos [Integer] Position to seek to
531
+ # @param whence [Integer] Seek mode
532
+ def seek(pos, whence = ::IO::SEEK_SET)
533
+ case whence
534
+ when ::IO::SEEK_SET
535
+ @position = pos
536
+ when ::IO::SEEK_CUR
537
+ @position += pos
538
+ when ::IO::SEEK_END
539
+ @position = total_size + pos
540
+ end
541
+ end
542
+
543
+ # Read from current position
544
+ #
545
+ # @param size [Integer] Number of bytes to read
546
+ # @return [String, nil] Read data, or nil if at EOF
547
+ def read(size)
548
+ # Lazy-load combined data on first read
549
+ if @combined_data.nil?
550
+ load_combined_data
551
+ end
552
+
553
+ # Return nil if at EOF (matches IO behavior)
554
+ return nil if @position >= @combined_data.bytesize
555
+
556
+ available = @combined_data.bytesize - @position
557
+ to_read = [size, available].min
558
+
559
+ data = @combined_data[@position, to_read]
560
+ @position += to_read
561
+
562
+ data
563
+ end
564
+
565
+ # Get current position
566
+ #
567
+ # @return [Integer] Current position
568
+ def pos
569
+ @position
570
+ end
571
+
572
+ # Get total size across all volumes
573
+ #
574
+ # @return [Integer] Total size
575
+ def total_size
576
+ @paths.sum { |path| File.size(path) }
577
+ end
578
+
579
+ private
580
+
581
+ # Load all volume data into memory
582
+ def load_combined_data
583
+ @combined_data = String.new(encoding: "BINARY")
584
+
585
+ # Safety limit: prevent loading more than 10GB or 1000 volumes
586
+ max_volumes = 1000
587
+ max_size = 10 * 1024 * 1024 * 1024 # 10GB
588
+
589
+ if ENV["LZMA2_DEBUG"]
590
+ warn "DEBUG: MultiVolumeIO.load_combined_data - starting, volumes=#{@handles.size}"
591
+ end
592
+
593
+ @handles.each_with_index do |handle, idx|
594
+ # Safety check: prevent infinite loop on corrupted archives
595
+ if idx >= max_volumes
596
+ warn "WARNING: Reached maximum volume limit (#{max_volumes}). Archive may be corrupted."
597
+ break
598
+ end
599
+
600
+ if @combined_data.bytesize >= max_size
601
+ warn "WARNING: Reached maximum data size limit (#{max_size} bytes). Archive may be corrupted."
602
+ break
603
+ end
604
+
605
+ handle.rewind
606
+ chunk = handle.read
607
+
608
+ if ENV["LZMA2_DEBUG"]
609
+ chunk_size = chunk&.bytesize || 0
610
+ warn "DEBUG: MultiVolumeIO - loaded volume #{idx}: #{chunk_size} bytes, total=#{@combined_data.bytesize + chunk_size}"
611
+ end
612
+
613
+ @combined_data << chunk if chunk
614
+ end
615
+
616
+ if ENV["LZMA2_DEBUG"]
617
+ warn "DEBUG: MultiVolumeIO.load_combined_data - complete, total_size=#{@combined_data.bytesize}"
618
+ end
619
+ end
620
+
621
+ # Get cumulative size up to volume index
622
+ #
623
+ # @param index [Integer] Volume index
624
+ # @return [Integer] Cumulative size
625
+ def cumulative_size(index)
626
+ @paths[0..index].sum { |path| File.size(path) }
627
+ end
628
+ end
629
+ end
630
+ end
631
+ end
632
+ end