omnizip 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (511) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.rubocop.yml +32 -0
  4. data/.rubocop_todo.yml +754 -0
  5. data/COPYING +502 -0
  6. data/Gemfile +17 -0
  7. data/LICENSE +12 -0
  8. data/README.adoc +1045 -0
  9. data/Rakefile +12 -0
  10. data/benchmark/README.md +260 -0
  11. data/benchmark/benchmark_suite.rb +125 -0
  12. data/benchmark/compression_bench.rb +181 -0
  13. data/benchmark/filter_bench.rb +180 -0
  14. data/benchmark/models/benchmark_result.rb +59 -0
  15. data/benchmark/models/comparison_result.rb +69 -0
  16. data/benchmark/profile_suite.rb +167 -0
  17. data/benchmark/reporter.rb +150 -0
  18. data/benchmark/run_benchmarks.rb +66 -0
  19. data/benchmark/test_data.rb +137 -0
  20. data/config/formats/rar3_spec.yml +91 -0
  21. data/config/formats/rar5_spec.yml +102 -0
  22. data/docs/.github/workflows/docs.yml +142 -0
  23. data/docs/.gitignore +21 -0
  24. data/docs/.lychee.toml +67 -0
  25. data/docs/Gemfile +13 -0
  26. data/docs/RAR_WRITE_SUPPORT.md +26 -0
  27. data/docs/README.md +101 -0
  28. data/docs/_config.yml +112 -0
  29. data/docs/assets/logo.svg +1 -0
  30. data/docs/assets/omnizip-logo.pdf +1540 -11
  31. data/docs/comparison/feature-matrix.adoc +694 -0
  32. data/docs/comparison/index.adoc +113 -0
  33. data/docs/comparison/vs-7zip.adoc +309 -0
  34. data/docs/comparison/vs-peazip.adoc +77 -0
  35. data/docs/comparison/vs-rubyzip.adoc +342 -0
  36. data/docs/comparison/vs-winrar.adoc +100 -0
  37. data/docs/compatibility.adoc +579 -0
  38. data/docs/concepts/index.adoc +129 -0
  39. data/docs/developer/architecture.adoc +256 -0
  40. data/docs/developer/contributing.adoc +158 -0
  41. data/docs/developer/index.adoc +25 -0
  42. data/docs/developer/testing.adoc +212 -0
  43. data/docs/getting-started/basic-usage.adoc +271 -0
  44. data/docs/getting-started/index.adoc +42 -0
  45. data/docs/getting-started/installation.adoc +138 -0
  46. data/docs/getting-started/quick-start.adoc +185 -0
  47. data/docs/getting-started/your-first-archive.adoc +218 -0
  48. data/docs/guides/advanced-features/encryption.adoc +300 -0
  49. data/docs/guides/advanced-features/index.adoc +49 -0
  50. data/docs/guides/advanced-features/parallel-processing.adoc +246 -0
  51. data/docs/guides/advanced-features/progress-tracking.adoc +320 -0
  52. data/docs/guides/advanced-features/streaming.adoc +212 -0
  53. data/docs/guides/archive-formats/gzip-format.adoc +107 -0
  54. data/docs/guides/archive-formats/index.adoc +130 -0
  55. data/docs/guides/archive-formats/rar-format.adoc +104 -0
  56. data/docs/guides/archive-formats/rar5.adoc +521 -0
  57. data/docs/guides/archive-formats/seven-zip-format.adoc +35 -0
  58. data/docs/guides/archive-formats/tar-format.adoc +106 -0
  59. data/docs/guides/archive-formats/xz-format.adoc +118 -0
  60. data/docs/guides/archive-formats/zip-format.adoc +35 -0
  61. data/docs/guides/compression-algorithms/bzip2.adoc +113 -0
  62. data/docs/guides/compression-algorithms/deflate.adoc +319 -0
  63. data/docs/guides/compression-algorithms/index.adoc +190 -0
  64. data/docs/guides/compression-algorithms/lzma.adoc +398 -0
  65. data/docs/guides/compression-algorithms/lzma2.adoc +327 -0
  66. data/docs/guides/compression-algorithms/ppmd.adoc +316 -0
  67. data/docs/guides/compression-algorithms/zstandard.adoc +361 -0
  68. data/docs/guides/creating-archives.adoc +354 -0
  69. data/docs/guides/extracting-archives.adoc +53 -0
  70. data/docs/guides/format-conversion.adoc +64 -0
  71. data/docs/guides/index.adoc +49 -0
  72. data/docs/guides/migration-rubyzip.adoc +217 -0
  73. data/docs/guides/parity-archives.adoc +605 -0
  74. data/docs/guides/performance-tuning.adoc +88 -0
  75. data/docs/index.adoc +218 -0
  76. data/docs/lychee.toml +67 -0
  77. data/docs/reference/api/overview.adoc +188 -0
  78. data/docs/reference/cli/compress-command.adoc +114 -0
  79. data/docs/reference/cli/overview.adoc +140 -0
  80. data/docs/reference/index.adoc +26 -0
  81. data/docs/resources/faq.adoc +185 -0
  82. data/docs/resources/quick-reference.adoc +222 -0
  83. data/docs/troubleshooting/index.adoc +208 -0
  84. data/examples/api_comparison.rb +205 -0
  85. data/examples/deflate64_example.rb +96 -0
  86. data/examples/par2_demo.rb +121 -0
  87. data/examples/quick_start_native.rb +150 -0
  88. data/examples/quick_start_rubyzip.rb +115 -0
  89. data/examples/rubyzip_compatibility_demo.rb +194 -0
  90. data/exe/omnizip +27 -0
  91. data/lib/omnizip/algorithm.rb +130 -0
  92. data/lib/omnizip/algorithm_registry.rb +86 -0
  93. data/lib/omnizip/algorithms/.keep +0 -0
  94. data/lib/omnizip/algorithms/bzip2/bwt.rb +225 -0
  95. data/lib/omnizip/algorithms/bzip2/decoder.rb +193 -0
  96. data/lib/omnizip/algorithms/bzip2/encoder.rb +237 -0
  97. data/lib/omnizip/algorithms/bzip2/huffman.rb +206 -0
  98. data/lib/omnizip/algorithms/bzip2/mtf.rb +101 -0
  99. data/lib/omnizip/algorithms/bzip2/rle.rb +151 -0
  100. data/lib/omnizip/algorithms/bzip2.rb +130 -0
  101. data/lib/omnizip/algorithms/deflate/constants.rb +28 -0
  102. data/lib/omnizip/algorithms/deflate/decoder.rb +38 -0
  103. data/lib/omnizip/algorithms/deflate/encoder.rb +46 -0
  104. data/lib/omnizip/algorithms/deflate.rb +128 -0
  105. data/lib/omnizip/algorithms/deflate64/constants.rb +45 -0
  106. data/lib/omnizip/algorithms/deflate64/decoder.rb +153 -0
  107. data/lib/omnizip/algorithms/deflate64/encoder.rb +98 -0
  108. data/lib/omnizip/algorithms/deflate64/huffman_coder.rb +354 -0
  109. data/lib/omnizip/algorithms/deflate64/lz77_encoder.rb +142 -0
  110. data/lib/omnizip/algorithms/deflate64.rb +109 -0
  111. data/lib/omnizip/algorithms/lzma/bit_model.rb +120 -0
  112. data/lib/omnizip/algorithms/lzma/constants.rb +112 -0
  113. data/lib/omnizip/algorithms/lzma/decoder.rb +148 -0
  114. data/lib/omnizip/algorithms/lzma/dictionary.rb +69 -0
  115. data/lib/omnizip/algorithms/lzma/distance_coder.rb +415 -0
  116. data/lib/omnizip/algorithms/lzma/encoder.rb +142 -0
  117. data/lib/omnizip/algorithms/lzma/length_coder.rb +260 -0
  118. data/lib/omnizip/algorithms/lzma/literal_decoder.rb +320 -0
  119. data/lib/omnizip/algorithms/lzma/literal_encoder.rb +210 -0
  120. data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +341 -0
  121. data/lib/omnizip/algorithms/lzma/lzma_alone_decoder.rb +192 -0
  122. data/lib/omnizip/algorithms/lzma/lzma_state.rb +128 -0
  123. data/lib/omnizip/algorithms/lzma/match.rb +32 -0
  124. data/lib/omnizip/algorithms/lzma/match_finder.rb +205 -0
  125. data/lib/omnizip/algorithms/lzma/match_finder_config.rb +142 -0
  126. data/lib/omnizip/algorithms/lzma/match_finder_factory.rb +88 -0
  127. data/lib/omnizip/algorithms/lzma/optimal_encoder.rb +130 -0
  128. data/lib/omnizip/algorithms/lzma/probability_models.rb +72 -0
  129. data/lib/omnizip/algorithms/lzma/range_coder.rb +85 -0
  130. data/lib/omnizip/algorithms/lzma/range_decoder.rb +434 -0
  131. data/lib/omnizip/algorithms/lzma/range_encoder.rb +194 -0
  132. data/lib/omnizip/algorithms/lzma/state.rb +127 -0
  133. data/lib/omnizip/algorithms/lzma/xz_buffered_range_encoder.rb +325 -0
  134. data/lib/omnizip/algorithms/lzma/xz_encoder.rb +426 -0
  135. data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +645 -0
  136. data/lib/omnizip/algorithms/lzma/xz_match_finder_adapter.rb +227 -0
  137. data/lib/omnizip/algorithms/lzma/xz_price_calculator.rb +169 -0
  138. data/lib/omnizip/algorithms/lzma/xz_probability_models.rb +261 -0
  139. data/lib/omnizip/algorithms/lzma/xz_range_encoder.rb +223 -0
  140. data/lib/omnizip/algorithms/lzma/xz_range_encoder_exact.rb +331 -0
  141. data/lib/omnizip/algorithms/lzma/xz_state.rb +116 -0
  142. data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +2055 -0
  143. data/lib/omnizip/algorithms/lzma.rb +238 -0
  144. data/lib/omnizip/algorithms/lzma2/chunk_manager.rb +182 -0
  145. data/lib/omnizip/algorithms/lzma2/constants.rb +41 -0
  146. data/lib/omnizip/algorithms/lzma2/encoder.rb +147 -0
  147. data/lib/omnizip/algorithms/lzma2/lzma2_chunk.rb +161 -0
  148. data/lib/omnizip/algorithms/lzma2/properties.rb +179 -0
  149. data/lib/omnizip/algorithms/lzma2/simple_lzma2_encoder.rb +127 -0
  150. data/lib/omnizip/algorithms/lzma2/xz_encoder_adapter.rb +85 -0
  151. data/lib/omnizip/algorithms/lzma2.rb +141 -0
  152. data/lib/omnizip/algorithms/ppmd7/constants.rb +74 -0
  153. data/lib/omnizip/algorithms/ppmd7/context.rb +154 -0
  154. data/lib/omnizip/algorithms/ppmd7/decoder.rb +126 -0
  155. data/lib/omnizip/algorithms/ppmd7/encoder.rb +163 -0
  156. data/lib/omnizip/algorithms/ppmd7/model.rb +248 -0
  157. data/lib/omnizip/algorithms/ppmd7/symbol_state.rb +57 -0
  158. data/lib/omnizip/algorithms/ppmd7.rb +116 -0
  159. data/lib/omnizip/algorithms/ppmd8/constants.rb +61 -0
  160. data/lib/omnizip/algorithms/ppmd8/context.rb +34 -0
  161. data/lib/omnizip/algorithms/ppmd8/decoder.rb +107 -0
  162. data/lib/omnizip/algorithms/ppmd8/encoder.rb +138 -0
  163. data/lib/omnizip/algorithms/ppmd8/model.rb +250 -0
  164. data/lib/omnizip/algorithms/ppmd8/restoration_method.rb +78 -0
  165. data/lib/omnizip/algorithms/ppmd8.rb +82 -0
  166. data/lib/omnizip/algorithms/ppmd_base.rb +138 -0
  167. data/lib/omnizip/algorithms/sevenzip_lzma2.rb +123 -0
  168. data/lib/omnizip/algorithms/xz_lzma2.rb +118 -0
  169. data/lib/omnizip/algorithms/zstandard/constants.rb +25 -0
  170. data/lib/omnizip/algorithms/zstandard/decoder.rb +46 -0
  171. data/lib/omnizip/algorithms/zstandard/encoder.rb +51 -0
  172. data/lib/omnizip/algorithms/zstandard.rb +138 -0
  173. data/lib/omnizip/buffer/memory_archive.rb +251 -0
  174. data/lib/omnizip/buffer/memory_extractor.rb +224 -0
  175. data/lib/omnizip/buffer.rb +176 -0
  176. data/lib/omnizip/checksum_registry.rb +114 -0
  177. data/lib/omnizip/checksums/crc32.rb +100 -0
  178. data/lib/omnizip/checksums/crc64.rb +101 -0
  179. data/lib/omnizip/checksums/crc_base.rb +158 -0
  180. data/lib/omnizip/checksums/verifier.rb +131 -0
  181. data/lib/omnizip/chunked/memory_manager.rb +194 -0
  182. data/lib/omnizip/chunked/reader.rb +78 -0
  183. data/lib/omnizip/chunked/writer.rb +120 -0
  184. data/lib/omnizip/chunked.rb +129 -0
  185. data/lib/omnizip/cli/output_formatter.rb +104 -0
  186. data/lib/omnizip/cli.rb +572 -0
  187. data/lib/omnizip/commands/.keep +0 -0
  188. data/lib/omnizip/commands/archive_create_command.rb +427 -0
  189. data/lib/omnizip/commands/archive_extract_command.rb +272 -0
  190. data/lib/omnizip/commands/archive_list_command.rb +218 -0
  191. data/lib/omnizip/commands/archive_repair_command.rb +131 -0
  192. data/lib/omnizip/commands/archive_verify_command.rb +117 -0
  193. data/lib/omnizip/commands/compress_command.rb +117 -0
  194. data/lib/omnizip/commands/decompress_command.rb +120 -0
  195. data/lib/omnizip/commands/list_command.rb +53 -0
  196. data/lib/omnizip/commands/metadata_command.rb +153 -0
  197. data/lib/omnizip/commands/parity_create_command.rb +122 -0
  198. data/lib/omnizip/commands/parity_repair_command.rb +122 -0
  199. data/lib/omnizip/commands/parity_verify_command.rb +124 -0
  200. data/lib/omnizip/commands/profile_list_command.rb +56 -0
  201. data/lib/omnizip/commands/profile_show_command.rb +44 -0
  202. data/lib/omnizip/convenience.rb +359 -0
  203. data/lib/omnizip/converter/conversion_registry.rb +49 -0
  204. data/lib/omnizip/converter/conversion_strategy.rb +121 -0
  205. data/lib/omnizip/converter/seven_zip_to_zip_strategy.rb +97 -0
  206. data/lib/omnizip/converter/zip_to_seven_zip_strategy.rb +112 -0
  207. data/lib/omnizip/converter.rb +105 -0
  208. data/lib/omnizip/crypto/aes256/cipher.rb +100 -0
  209. data/lib/omnizip/crypto/aes256/constants.rb +28 -0
  210. data/lib/omnizip/crypto/aes256/key_derivation.rb +101 -0
  211. data/lib/omnizip/crypto/aes256.rb +102 -0
  212. data/lib/omnizip/error.rb +106 -0
  213. data/lib/omnizip/eta/exponential_smoothing_estimator.rb +98 -0
  214. data/lib/omnizip/eta/moving_average_estimator.rb +99 -0
  215. data/lib/omnizip/eta/rate_calculator.rb +104 -0
  216. data/lib/omnizip/eta/sample_history.rb +143 -0
  217. data/lib/omnizip/eta/time_estimator.rb +106 -0
  218. data/lib/omnizip/eta.rb +63 -0
  219. data/lib/omnizip/extraction/filter_chain.rb +177 -0
  220. data/lib/omnizip/extraction/glob_pattern.rb +140 -0
  221. data/lib/omnizip/extraction/pattern_matcher.rb +70 -0
  222. data/lib/omnizip/extraction/predicate_pattern.rb +52 -0
  223. data/lib/omnizip/extraction/regex_pattern.rb +50 -0
  224. data/lib/omnizip/extraction/selective_extractor.rb +240 -0
  225. data/lib/omnizip/extraction.rb +111 -0
  226. data/lib/omnizip/file_type/mime_classifier.rb +144 -0
  227. data/lib/omnizip/file_type.rb +113 -0
  228. data/lib/omnizip/filter.rb +139 -0
  229. data/lib/omnizip/filter_pipeline.rb +108 -0
  230. data/lib/omnizip/filter_registry.rb +166 -0
  231. data/lib/omnizip/filters/bcj.rb +279 -0
  232. data/lib/omnizip/filters/bcj2/constants.rb +53 -0
  233. data/lib/omnizip/filters/bcj2/decoder.rb +200 -0
  234. data/lib/omnizip/filters/bcj2/encoder.rb +61 -0
  235. data/lib/omnizip/filters/bcj2/stream_data.rb +93 -0
  236. data/lib/omnizip/filters/bcj2.rb +99 -0
  237. data/lib/omnizip/filters/bcj_arm.rb +176 -0
  238. data/lib/omnizip/filters/bcj_arm64.rb +244 -0
  239. data/lib/omnizip/filters/bcj_ia64.rb +196 -0
  240. data/lib/omnizip/filters/bcj_ppc.rb +190 -0
  241. data/lib/omnizip/filters/bcj_sparc.rb +176 -0
  242. data/lib/omnizip/filters/bcj_x86.rb +193 -0
  243. data/lib/omnizip/filters/delta.rb +196 -0
  244. data/lib/omnizip/filters/filter_base.rb +72 -0
  245. data/lib/omnizip/filters/registry.rb +123 -0
  246. data/lib/omnizip/filters/xz_delta.rb +258 -0
  247. data/lib/omnizip/format_detector.rb +162 -0
  248. data/lib/omnizip/format_registry.rb +59 -0
  249. data/lib/omnizip/formats/.keep +0 -0
  250. data/lib/omnizip/formats/bzip2_file.rb +172 -0
  251. data/lib/omnizip/formats/cpio/constants.rb +55 -0
  252. data/lib/omnizip/formats/cpio/entry.rb +385 -0
  253. data/lib/omnizip/formats/cpio/reader.rb +196 -0
  254. data/lib/omnizip/formats/cpio/writer.rb +234 -0
  255. data/lib/omnizip/formats/cpio.rb +140 -0
  256. data/lib/omnizip/formats/format_spec_loader.rb +230 -0
  257. data/lib/omnizip/formats/gzip.rb +238 -0
  258. data/lib/omnizip/formats/iso/directory_builder.rb +297 -0
  259. data/lib/omnizip/formats/iso/directory_record.rb +152 -0
  260. data/lib/omnizip/formats/iso/joliet.rb +204 -0
  261. data/lib/omnizip/formats/iso/path_table.rb +125 -0
  262. data/lib/omnizip/formats/iso/reader.rb +197 -0
  263. data/lib/omnizip/formats/iso/rock_ridge.rb +349 -0
  264. data/lib/omnizip/formats/iso/volume_builder.rb +320 -0
  265. data/lib/omnizip/formats/iso/volume_descriptor.rb +168 -0
  266. data/lib/omnizip/formats/iso/writer.rb +530 -0
  267. data/lib/omnizip/formats/iso.rb +140 -0
  268. data/lib/omnizip/formats/lzip.rb +175 -0
  269. data/lib/omnizip/formats/lzma_alone.rb +171 -0
  270. data/lib/omnizip/formats/rar/archive_repairer.rb +243 -0
  271. data/lib/omnizip/formats/rar/archive_verifier.rb +195 -0
  272. data/lib/omnizip/formats/rar/block_parser.rb +243 -0
  273. data/lib/omnizip/formats/rar/compression/bit_stream.rb +180 -0
  274. data/lib/omnizip/formats/rar/compression/dispatcher.rb +217 -0
  275. data/lib/omnizip/formats/rar/compression/lz77_huffman/decoder.rb +216 -0
  276. data/lib/omnizip/formats/rar/compression/lz77_huffman/encoder.rb +158 -0
  277. data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_builder.rb +217 -0
  278. data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_coder.rb +189 -0
  279. data/lib/omnizip/formats/rar/compression/lz77_huffman/match_finder.rb +135 -0
  280. data/lib/omnizip/formats/rar/compression/lz77_huffman/sliding_window.rb +165 -0
  281. data/lib/omnizip/formats/rar/compression/ppmd/context.rb +105 -0
  282. data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +219 -0
  283. data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +262 -0
  284. data/lib/omnizip/formats/rar/compression_method_registry.rb +106 -0
  285. data/lib/omnizip/formats/rar/constants.rb +82 -0
  286. data/lib/omnizip/formats/rar/decompressor.rb +238 -0
  287. data/lib/omnizip/formats/rar/external_writer.rb +312 -0
  288. data/lib/omnizip/formats/rar/header.rb +192 -0
  289. data/lib/omnizip/formats/rar/license_validator.rb +109 -0
  290. data/lib/omnizip/formats/rar/models/rar_archive.rb +77 -0
  291. data/lib/omnizip/formats/rar/models/rar_entry.rb +65 -0
  292. data/lib/omnizip/formats/rar/models/rar_volume.rb +56 -0
  293. data/lib/omnizip/formats/rar/parity_handler.rb +292 -0
  294. data/lib/omnizip/formats/rar/rar5/compression/lzma.rb +202 -0
  295. data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +578 -0
  296. data/lib/omnizip/formats/rar/rar5/compression/store.rb +60 -0
  297. data/lib/omnizip/formats/rar/rar5/crc32.rb +39 -0
  298. data/lib/omnizip/formats/rar/rar5/encryption/aes256_cbc.rb +97 -0
  299. data/lib/omnizip/formats/rar/rar5/encryption/encryption_header.rb +114 -0
  300. data/lib/omnizip/formats/rar/rar5/encryption/encryption_manager.rb +166 -0
  301. data/lib/omnizip/formats/rar/rar5/encryption/key_derivation.rb +97 -0
  302. data/lib/omnizip/formats/rar/rar5/header.rb +187 -0
  303. data/lib/omnizip/formats/rar/rar5/models/encryption_options.rb +74 -0
  304. data/lib/omnizip/formats/rar/rar5/models/recovery_options.rb +63 -0
  305. data/lib/omnizip/formats/rar/rar5/models/solid_options.rb +63 -0
  306. data/lib/omnizip/formats/rar/rar5/models/volume_options.rb +74 -0
  307. data/lib/omnizip/formats/rar/rar5/multi_volume/ARCHITECTURE.md +290 -0
  308. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_manager.rb +264 -0
  309. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_splitter.rb +155 -0
  310. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_writer.rb +194 -0
  311. data/lib/omnizip/formats/rar/rar5/solid/solid_encoder.rb +109 -0
  312. data/lib/omnizip/formats/rar/rar5/solid/solid_manager.rb +142 -0
  313. data/lib/omnizip/formats/rar/rar5/solid/solid_stream.rb +121 -0
  314. data/lib/omnizip/formats/rar/rar5/vint.rb +65 -0
  315. data/lib/omnizip/formats/rar/rar5/writer.rb +466 -0
  316. data/lib/omnizip/formats/rar/rar_format_base.rb +241 -0
  317. data/lib/omnizip/formats/rar/reader.rb +366 -0
  318. data/lib/omnizip/formats/rar/recovery_record.rb +245 -0
  319. data/lib/omnizip/formats/rar/volume_manager.rb +168 -0
  320. data/lib/omnizip/formats/rar/writer.rb +431 -0
  321. data/lib/omnizip/formats/rar.rb +205 -0
  322. data/lib/omnizip/formats/rar3/compressor.rb +73 -0
  323. data/lib/omnizip/formats/rar3/decompressor.rb +66 -0
  324. data/lib/omnizip/formats/rar3/reader.rb +386 -0
  325. data/lib/omnizip/formats/rar3/writer.rb +219 -0
  326. data/lib/omnizip/formats/rar5/compressor.rb +73 -0
  327. data/lib/omnizip/formats/rar5/decompressor.rb +66 -0
  328. data/lib/omnizip/formats/rar5/reader.rb +342 -0
  329. data/lib/omnizip/formats/rar5/writer.rb +214 -0
  330. data/lib/omnizip/formats/seven_zip/coder_chain.rb +150 -0
  331. data/lib/omnizip/formats/seven_zip/constants.rb +126 -0
  332. data/lib/omnizip/formats/seven_zip/encoded_header.rb +114 -0
  333. data/lib/omnizip/formats/seven_zip/encrypted_header.rb +142 -0
  334. data/lib/omnizip/formats/seven_zip/file_collector.rb +144 -0
  335. data/lib/omnizip/formats/seven_zip/header.rb +106 -0
  336. data/lib/omnizip/formats/seven_zip/header_encryptor.rb +134 -0
  337. data/lib/omnizip/formats/seven_zip/header_writer.rb +466 -0
  338. data/lib/omnizip/formats/seven_zip/models/coder_info.rb +30 -0
  339. data/lib/omnizip/formats/seven_zip/models/file_entry.rb +58 -0
  340. data/lib/omnizip/formats/seven_zip/models/folder.rb +69 -0
  341. data/lib/omnizip/formats/seven_zip/models/stream_info.rb +42 -0
  342. data/lib/omnizip/formats/seven_zip/parser.rb +660 -0
  343. data/lib/omnizip/formats/seven_zip/reader.rb +458 -0
  344. data/lib/omnizip/formats/seven_zip/split_archive_reader.rb +632 -0
  345. data/lib/omnizip/formats/seven_zip/split_archive_writer.rb +315 -0
  346. data/lib/omnizip/formats/seven_zip/stream_compressor.rb +151 -0
  347. data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +162 -0
  348. data/lib/omnizip/formats/seven_zip/writer.rb +740 -0
  349. data/lib/omnizip/formats/seven_zip.rb +93 -0
  350. data/lib/omnizip/formats/tar/constants.rb +73 -0
  351. data/lib/omnizip/formats/tar/entry.rb +94 -0
  352. data/lib/omnizip/formats/tar/header.rb +168 -0
  353. data/lib/omnizip/formats/tar/reader.rb +121 -0
  354. data/lib/omnizip/formats/tar/writer.rb +216 -0
  355. data/lib/omnizip/formats/tar.rb +84 -0
  356. data/lib/omnizip/formats/xz/reader.rb +116 -0
  357. data/lib/omnizip/formats/xz.rb +237 -0
  358. data/lib/omnizip/formats/xz_impl/block_decoder.rb +754 -0
  359. data/lib/omnizip/formats/xz_impl/block_encoder.rb +306 -0
  360. data/lib/omnizip/formats/xz_impl/block_header.rb +210 -0
  361. data/lib/omnizip/formats/xz_impl/block_header_parser.rb +186 -0
  362. data/lib/omnizip/formats/xz_impl/constants.rb +49 -0
  363. data/lib/omnizip/formats/xz_impl/index_decoder.rb +174 -0
  364. data/lib/omnizip/formats/xz_impl/index_encoder.rb +122 -0
  365. data/lib/omnizip/formats/xz_impl/stream_decoder.rb +468 -0
  366. data/lib/omnizip/formats/xz_impl/stream_encoder.rb +99 -0
  367. data/lib/omnizip/formats/xz_impl/stream_footer.rb +81 -0
  368. data/lib/omnizip/formats/xz_impl/stream_footer_parser.rb +117 -0
  369. data/lib/omnizip/formats/xz_impl/stream_header.rb +55 -0
  370. data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +108 -0
  371. data/lib/omnizip/formats/xz_impl/vli.rb +128 -0
  372. data/lib/omnizip/formats/xz_impl/writer.rb +421 -0
  373. data/lib/omnizip/formats/zip/central_directory_header.rb +195 -0
  374. data/lib/omnizip/formats/zip/constants.rb +69 -0
  375. data/lib/omnizip/formats/zip/end_of_central_directory.rb +133 -0
  376. data/lib/omnizip/formats/zip/local_file_header.rb +138 -0
  377. data/lib/omnizip/formats/zip/reader.rb +250 -0
  378. data/lib/omnizip/formats/zip/unix_extra_field.rb +153 -0
  379. data/lib/omnizip/formats/zip/writer.rb +375 -0
  380. data/lib/omnizip/formats/zip/zip64_end_of_central_directory.rb +104 -0
  381. data/lib/omnizip/formats/zip/zip64_end_of_central_directory_locator.rb +66 -0
  382. data/lib/omnizip/formats/zip/zip64_extra_field.rb +114 -0
  383. data/lib/omnizip/formats/zip.rb +50 -0
  384. data/lib/omnizip/implementations/base/lzma2_decoder_base.rb +75 -0
  385. data/lib/omnizip/implementations/base/lzma2_encoder_base.rb +128 -0
  386. data/lib/omnizip/implementations/base/lzma_decoder_base.rb +83 -0
  387. data/lib/omnizip/implementations/base/lzma_encoder_base.rb +108 -0
  388. data/lib/omnizip/implementations/base/state_machine_base.rb +182 -0
  389. data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +421 -0
  390. data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +465 -0
  391. data/lib/omnizip/implementations/seven_zip/lzma/match_finder.rb +288 -0
  392. data/lib/omnizip/implementations/seven_zip/lzma/range_decoder.rb +200 -0
  393. data/lib/omnizip/implementations/seven_zip/lzma/range_encoder.rb +197 -0
  394. data/lib/omnizip/implementations/seven_zip/lzma/state_machine.rb +141 -0
  395. data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +519 -0
  396. data/lib/omnizip/implementations/xz_utils/lzma2/decoder.rb +723 -0
  397. data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +750 -0
  398. data/lib/omnizip/io/buffered_input.rb +146 -0
  399. data/lib/omnizip/io/buffered_output.rb +105 -0
  400. data/lib/omnizip/io/stream_manager.rb +115 -0
  401. data/lib/omnizip/link_handler/hard_link.rb +79 -0
  402. data/lib/omnizip/link_handler/symbolic_link.rb +74 -0
  403. data/lib/omnizip/link_handler.rb +124 -0
  404. data/lib/omnizip/metadata/archive_metadata.rb +114 -0
  405. data/lib/omnizip/metadata/entry_metadata.rb +146 -0
  406. data/lib/omnizip/metadata/metadata_editor.rb +171 -0
  407. data/lib/omnizip/metadata/metadata_registry.rb +64 -0
  408. data/lib/omnizip/metadata/metadata_validator.rb +99 -0
  409. data/lib/omnizip/metadata.rb +57 -0
  410. data/lib/omnizip/models/.keep +0 -0
  411. data/lib/omnizip/models/algorithm_metadata.rb +73 -0
  412. data/lib/omnizip/models/compression_options.rb +71 -0
  413. data/lib/omnizip/models/conversion_options.rb +87 -0
  414. data/lib/omnizip/models/conversion_result.rb +135 -0
  415. data/lib/omnizip/models/eta_result.rb +46 -0
  416. data/lib/omnizip/models/extraction_rule.rb +115 -0
  417. data/lib/omnizip/models/filter_chain.rb +144 -0
  418. data/lib/omnizip/models/filter_config.rb +183 -0
  419. data/lib/omnizip/models/match_result.rb +124 -0
  420. data/lib/omnizip/models/optimization_suggestion.rb +91 -0
  421. data/lib/omnizip/models/parallel_options.rb +104 -0
  422. data/lib/omnizip/models/performance_result.rb +79 -0
  423. data/lib/omnizip/models/profile_report.rb +82 -0
  424. data/lib/omnizip/models/progress_options.rb +38 -0
  425. data/lib/omnizip/models/split_options.rb +116 -0
  426. data/lib/omnizip/optimization_registry.rb +81 -0
  427. data/lib/omnizip/parallel/job_queue.rb +209 -0
  428. data/lib/omnizip/parallel/job_scheduler.rb +203 -0
  429. data/lib/omnizip/parallel/parallel_compressor.rb +347 -0
  430. data/lib/omnizip/parallel/parallel_extractor.rb +329 -0
  431. data/lib/omnizip/parallel/worker_pool.rb +223 -0
  432. data/lib/omnizip/parallel.rb +149 -0
  433. data/lib/omnizip/parity/chunked_block_processor.rb +196 -0
  434. data/lib/omnizip/parity/galois16.rb +145 -0
  435. data/lib/omnizip/parity/models/creator_packet.rb +73 -0
  436. data/lib/omnizip/parity/models/file_description_packet.rb +133 -0
  437. data/lib/omnizip/parity/models/ifsc_packet.rb +123 -0
  438. data/lib/omnizip/parity/models/main_packet.rb +128 -0
  439. data/lib/omnizip/parity/models/packet.rb +156 -0
  440. data/lib/omnizip/parity/models/packet_registry.rb +109 -0
  441. data/lib/omnizip/parity/models/recovery_slice_packet.rb +78 -0
  442. data/lib/omnizip/parity/par2_creator.rb +531 -0
  443. data/lib/omnizip/parity/par2_repairer.rb +407 -0
  444. data/lib/omnizip/parity/par2_verifier.rb +364 -0
  445. data/lib/omnizip/parity/par2cmdline_algorithm.rb +110 -0
  446. data/lib/omnizip/parity/par2cmdline_coefficients.rb +78 -0
  447. data/lib/omnizip/parity/reed_solomon_decoder.rb +266 -0
  448. data/lib/omnizip/parity/reed_solomon_encoder.rb +111 -0
  449. data/lib/omnizip/parity/reed_solomon_matrix.rb +342 -0
  450. data/lib/omnizip/parity.rb +186 -0
  451. data/lib/omnizip/password/encryption_registry.rb +65 -0
  452. data/lib/omnizip/password/encryption_strategy.rb +96 -0
  453. data/lib/omnizip/password/password_validator.rb +129 -0
  454. data/lib/omnizip/password/winzip_aes_strategy.rb +192 -0
  455. data/lib/omnizip/password/zip_crypto_strategy.rb +141 -0
  456. data/lib/omnizip/password.rb +87 -0
  457. data/lib/omnizip/pipe/stream_compressor.rb +124 -0
  458. data/lib/omnizip/pipe/stream_decompressor.rb +174 -0
  459. data/lib/omnizip/pipe.rb +121 -0
  460. data/lib/omnizip/platform/ntfs_streams.rb +201 -0
  461. data/lib/omnizip/platform.rb +189 -0
  462. data/lib/omnizip/profile/archive_profile.rb +39 -0
  463. data/lib/omnizip/profile/balanced_profile.rb +33 -0
  464. data/lib/omnizip/profile/binary_profile.rb +36 -0
  465. data/lib/omnizip/profile/compression_profile.rb +158 -0
  466. data/lib/omnizip/profile/custom_profile.rb +157 -0
  467. data/lib/omnizip/profile/fast_profile.rb +33 -0
  468. data/lib/omnizip/profile/maximum_profile.rb +33 -0
  469. data/lib/omnizip/profile/profile_detector.rb +110 -0
  470. data/lib/omnizip/profile/profile_registry.rb +161 -0
  471. data/lib/omnizip/profile/text_profile.rb +36 -0
  472. data/lib/omnizip/profile.rb +190 -0
  473. data/lib/omnizip/profiler/memory_profiler.rb +66 -0
  474. data/lib/omnizip/profiler/method_profiler.rb +49 -0
  475. data/lib/omnizip/profiler/report_generator.rb +169 -0
  476. data/lib/omnizip/profiler.rb +204 -0
  477. data/lib/omnizip/progress/callback_reporter.rb +36 -0
  478. data/lib/omnizip/progress/console_reporter.rb +62 -0
  479. data/lib/omnizip/progress/log_reporter.rb +91 -0
  480. data/lib/omnizip/progress/operation_progress.rb +118 -0
  481. data/lib/omnizip/progress/progress_bar.rb +156 -0
  482. data/lib/omnizip/progress/progress_reporter.rb +40 -0
  483. data/lib/omnizip/progress/progress_tracker.rb +190 -0
  484. data/lib/omnizip/progress/silent_reporter.rb +24 -0
  485. data/lib/omnizip/progress.rb +127 -0
  486. data/lib/omnizip/rubyzip_compat.rb +63 -0
  487. data/lib/omnizip/temp/safe_extract.rb +168 -0
  488. data/lib/omnizip/temp/temp_file.rb +124 -0
  489. data/lib/omnizip/temp/temp_file_pool.rb +109 -0
  490. data/lib/omnizip/temp.rb +181 -0
  491. data/lib/omnizip/version.rb +5 -0
  492. data/lib/omnizip/zip/entry.rb +156 -0
  493. data/lib/omnizip/zip/file.rb +485 -0
  494. data/lib/omnizip/zip/input_stream.rb +273 -0
  495. data/lib/omnizip/zip/output_stream.rb +324 -0
  496. data/lib/omnizip.rb +156 -0
  497. data/readme-docs/advanced-features.adoc +515 -0
  498. data/readme-docs/api-usage.adoc +444 -0
  499. data/readme-docs/architecture.adoc +449 -0
  500. data/readme-docs/archive-formats.adoc +479 -0
  501. data/readme-docs/cli-usage.adoc +222 -0
  502. data/readme-docs/compression-algorithms.adoc +442 -0
  503. data/readme-docs/compression-profiles.adoc +247 -0
  504. data/readme-docs/encryption-checksums.adoc +328 -0
  505. data/readme-docs/format-converter.adoc +325 -0
  506. data/readme-docs/installation.adoc +228 -0
  507. data/readme-docs/par2-archives.adoc +608 -0
  508. data/readme-docs/performance-profiler.adoc +389 -0
  509. data/readme-docs/preprocessing-filters.adoc +280 -0
  510. data/xz-file-format-1.2.1.txt +1174 -0
  511. metadata +617 -0
@@ -0,0 +1,660 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "constants"
4
+ require_relative "models/folder"
5
+ require_relative "models/coder_info"
6
+ require_relative "models/stream_info"
7
+ require_relative "models/file_entry"
8
+
9
+ module Omnizip
10
+ module Formats
11
+ module SevenZip
12
+ # Binary data parser for .7z format
13
+ # Implements variable-length encoding and bit vector handling
14
+ class Parser
15
+ include Constants
16
+
17
+ attr_reader :data, :position
18
+
19
+ # Initialize parser with binary data
20
+ #
21
+ # @param data [String] Binary data to parse
22
+ def initialize(data)
23
+ @data = data.b
24
+ @position = 0
25
+ end
26
+
27
+ # Read a single byte
28
+ #
29
+ # @return [Integer] Byte value (0-255)
30
+ # @raise [EOFError] if no more data available
31
+ def read_byte
32
+ raise EOFError, "End of data" if @position >= @data.bytesize
33
+
34
+ byte = @data.getbyte(@position)
35
+ @position += 1
36
+ byte
37
+ end
38
+
39
+ # Read variable-length integer (7-Zip VLI format)
40
+ #
41
+ # 7-Zip VLI encoding uses the first byte's high bits to determine
42
+ # the number of additional bytes:
43
+ # 0xxxxxxx : value = xxxxxxx (0-127)
44
+ # 10xxxxxx BYTE y[1] : value = (xxxxxx << 8) + y
45
+ # 110xxxxx BYTE y[2] : value = (xxxxx << 16) + y
46
+ # 1110xxxx BYTE y[3] : value = (xxxx << 24) + y
47
+ # ...up to 8 bytes total
48
+ #
49
+ # @return [Integer] Decoded number
50
+ def read_number
51
+ first_byte = read_byte
52
+
53
+ # Single byte encoding (0-127)
54
+ return first_byte if first_byte.nobits?(0x80)
55
+
56
+ # Determine number of additional bytes from high bits
57
+ mask = 0x80
58
+ extra_bytes = 0
59
+
60
+ while first_byte.anybits?(mask)
61
+ extra_bytes += 1
62
+ mask >>= 1
63
+ end
64
+
65
+ # Calculate value: data bits from first byte + additional bytes
66
+ # The data bits start after the leading 1s and 0
67
+ data_bits = first_byte & (mask - 1)
68
+ value = data_bits
69
+
70
+ extra_bytes.times do
71
+ value = (value << 8) | read_byte
72
+ end
73
+
74
+ value
75
+ end
76
+
77
+ # Read 32-bit variable-length number
78
+ #
79
+ # @return [Integer] Number value (max 32-bit)
80
+ # @raise [RuntimeError] if value exceeds 32-bit range
81
+ def read_number32
82
+ first_byte = peek_byte
83
+ if first_byte.nobits?(0x80)
84
+ @position += 1
85
+ return first_byte
86
+ end
87
+
88
+ value = read_number
89
+ raise "Unsupported 32-bit value" if value >= 0x80000000
90
+
91
+ value
92
+ end
93
+
94
+ # Read property ID
95
+ #
96
+ # @return [Integer] Property ID
97
+ def read_id
98
+ read_number
99
+ end
100
+
101
+ # Read fixed-size unsigned 32-bit integer (little-endian)
102
+ #
103
+ # @return [Integer] 32-bit value
104
+ def read_uint32
105
+ raise EOFError if @position + 4 > @data.bytesize
106
+
107
+ value = @data[@position, 4].unpack1("V")
108
+ @position += 4
109
+ value
110
+ end
111
+
112
+ # Read fixed-size unsigned 64-bit integer (little-endian)
113
+ #
114
+ # @return [Integer] 64-bit value
115
+ def read_uint64
116
+ raise EOFError if @position + 8 > @data.bytesize
117
+
118
+ value = @data[@position, 8].unpack1("Q<")
119
+ @position += 8
120
+ value
121
+ end
122
+
123
+ # Read bit vector
124
+ # Format: 1 byte flag, then either all 1s or bit array
125
+ #
126
+ # @param num_items [Integer] Number of items in bit vector
127
+ # @return [Array<Boolean>] Bit vector
128
+ def read_bit_vector(num_items)
129
+ all_defined = read_byte
130
+ num_bytes = (num_items + 7) / 8
131
+
132
+ if all_defined.zero?
133
+ # Explicit bit vector
134
+ raise EOFError if @position + num_bytes > @data.bytesize
135
+
136
+ bits_data = @data[@position, num_bytes]
137
+ @position += num_bytes
138
+ decode_bit_vector(bits_data, num_items)
139
+ else
140
+ # All bits are set (return 1 for each item, not true)
141
+ Array.new(num_items, 1)
142
+ end
143
+ end
144
+
145
+ # Read raw bytes
146
+ #
147
+ # @param count [Integer] Number of bytes to read
148
+ # @return [String] Binary string
149
+ def read_bytes(count)
150
+ raise EOFError if @position + count > @data.bytesize
151
+
152
+ bytes = @data[@position, count]
153
+ @position += count
154
+ bytes
155
+ end
156
+
157
+ # Skip bytes
158
+ #
159
+ # @param count [Integer] Number of bytes to skip
160
+ def skip(count)
161
+ @position += count
162
+ end
163
+
164
+ # Peek at next byte without advancing position
165
+ #
166
+ # @return [Integer] Next byte value
167
+ def peek_byte
168
+ raise EOFError if @position >= @data.bytesize
169
+
170
+ @data.getbyte(@position)
171
+ end
172
+
173
+ # Check if at end of data
174
+ #
175
+ # @return [Boolean] true if no more data
176
+ def eof?
177
+ @position >= @data.bytesize
178
+ end
179
+
180
+ # Get remaining byte count
181
+ #
182
+ # @return [Integer] Bytes remaining
183
+ def remaining
184
+ @data.bytesize - @position
185
+ end
186
+
187
+ # Read pack info section
188
+ # Contains information about packed streams
189
+ #
190
+ # @param stream_info [StreamInfo] Stream info to populate
191
+ def read_pack_info(stream_info)
192
+ # Read pack position
193
+ stream_info.pack_pos = read_number
194
+
195
+ # Read number of pack streams
196
+ num_pack_streams = read_number
197
+
198
+ # Read properties in any order (7z format allows variable ordering)
199
+ sizes_read = false
200
+
201
+ until eof? || peek_byte == PropertyId::K_END
202
+ prop_type = peek_byte
203
+
204
+ case prop_type
205
+ when PropertyId::SIZE
206
+ read_byte
207
+ num_pack_streams.times do
208
+ stream_info.pack_sizes << read_number
209
+ end
210
+ sizes_read = true
211
+ when PropertyId::CRC
212
+ read_byte
213
+ defined_vec = read_bit_vector(num_pack_streams)
214
+ num_pack_streams.times do |i|
215
+ stream_info.pack_crcs << (defined_vec[i] ? read_uint32 : nil)
216
+ end
217
+ when PropertyId::K_END
218
+ break
219
+ else
220
+ # Unknown property - skip it
221
+ read_byte
222
+ skip_data unless eof?
223
+ end
224
+ end
225
+
226
+ # Verify required SIZE property was present
227
+ raise "Missing SIZE property in pack info" unless sizes_read
228
+
229
+ # Optional K_END (for backward compatibility)
230
+ read_byte if !eof? && peek_byte == PropertyId::K_END
231
+ end
232
+
233
+ # Read folders section
234
+ # Contains compression method and coder information
235
+ #
236
+ # @param stream_info [StreamInfo] Stream info to populate
237
+ def read_folders(stream_info)
238
+ num_folders = read_number
239
+
240
+ # Read External flag (similar to names)
241
+ external = read_byte
242
+
243
+ # Only read folders if they're stored inline (external == 0)
244
+ if external.zero?
245
+ # Read each folder
246
+ num_folders.times do
247
+ folder = Models::Folder.new
248
+ read_folder(folder)
249
+ stream_info.folders << folder
250
+ end
251
+ else
252
+ # External folders not supported
253
+ raise "External folders not supported"
254
+ end
255
+ end
256
+
257
+ # Read single folder definition
258
+ #
259
+ # @param folder [Models::Folder] Folder to populate
260
+ def read_folder(folder)
261
+ num_coders = read_number
262
+ raise "Too many coders" if num_coders > Constants::MAX_NUM_CODERS
263
+
264
+ num_in_streams = 0
265
+ num_out_streams = 0
266
+
267
+ # Read coders
268
+ num_coders.times do
269
+ coder = Models::CoderInfo.new
270
+ read_coder(coder)
271
+ folder.coders << coder
272
+ num_in_streams += coder.num_in_streams
273
+ num_out_streams += coder.num_out_streams
274
+ end
275
+
276
+ # Read bind pairs
277
+ num_bind_pairs = num_out_streams - 1
278
+ num_bind_pairs.times do
279
+ in_index = read_number
280
+ out_index = read_number
281
+ folder.bind_pairs << [in_index, out_index]
282
+ end
283
+
284
+ # Read pack stream indices
285
+ num_pack_streams = num_in_streams - num_bind_pairs
286
+ if num_pack_streams == 1
287
+ # Single pack stream - find unused input
288
+ (0...num_in_streams).each do |i|
289
+ used = folder.bind_pairs.any? { |pair| pair[0] == i }
290
+ unless used
291
+ folder.pack_stream_indices << i
292
+ break
293
+ end
294
+ end
295
+ else
296
+ # Multiple pack streams - read indices
297
+ num_pack_streams.times do
298
+ folder.pack_stream_indices << read_number
299
+ end
300
+ end
301
+ end
302
+
303
+ # Read coder definition
304
+ #
305
+ # @param coder [Models::CoderInfo] Coder to populate
306
+ def read_coder(coder)
307
+ main_byte = read_byte
308
+
309
+ # Extract coder flags
310
+ id_size = main_byte & 0x0F
311
+ has_attributes = main_byte.anybits?(0x20)
312
+ complex_streams = main_byte.anybits?(0x10)
313
+
314
+ # Read method ID
315
+ method_id = 0
316
+ id_size.times do
317
+ method_id = (method_id << 8) | read_byte
318
+ end
319
+ coder.method_id = method_id
320
+
321
+ # Read stream counts if complex
322
+ if complex_streams
323
+ coder.num_in_streams = read_number
324
+ coder.num_out_streams = read_number
325
+ else
326
+ coder.num_in_streams = 1
327
+ coder.num_out_streams = 1
328
+ end
329
+
330
+ # Read properties if present
331
+ return unless has_attributes
332
+
333
+ props_size = read_number
334
+ coder.properties = read_bytes(props_size)
335
+ end
336
+
337
+ # Read unpack info section
338
+ # Contains information about unpacked streams
339
+ #
340
+ # @param stream_info [StreamInfo] Stream info to populate
341
+ def read_unpack_info(stream_info)
342
+ # Read properties in any order (7z format allows variable ordering)
343
+ folders_read = false
344
+ unpack_sizes_read = false
345
+
346
+ until eof? || peek_byte == PropertyId::K_END
347
+ prop_type = read_byte
348
+
349
+ case prop_type
350
+ when PropertyId::FOLDER
351
+ read_folders(stream_info)
352
+ folders_read = true
353
+ when PropertyId::CODERS_UNPACK_SIZE
354
+ # Read unpack sizes for each folder based on numOutStreams
355
+ stream_info.folders.each do |folder|
356
+ # For folders with 0 coders (Copy method), treat as having 1 output stream
357
+ num_out_streams = folder.num_out_streams
358
+ num_out_streams = 1 if num_out_streams.zero?
359
+
360
+ num_out_streams.times do
361
+ folder.unpack_sizes << read_number
362
+ end
363
+ end
364
+ unpack_sizes_read = true
365
+ when PropertyId::CRC
366
+ # Optional: read CRCs
367
+ defined_vec = read_bit_vector(stream_info.num_folders)
368
+ stream_info.num_folders.times do |i|
369
+ stream_info.folders[i].unpack_crc = read_uint32 if defined_vec[i]
370
+ end
371
+ when PropertyId::K_END
372
+ break
373
+ else
374
+ # Skip unknown properties
375
+ skip_data unless eof?
376
+ end
377
+ end
378
+
379
+ # Verify required properties were present
380
+ raise "Missing FOLDER property in unpack info" unless folders_read
381
+ raise "Missing CODERS_UNPACK_SIZE property in unpack info" unless unpack_sizes_read
382
+
383
+ # Optional K_END (for backward compatibility)
384
+ read_byte if !eof? && peek_byte == PropertyId::K_END
385
+ end
386
+
387
+ # Read substreams info section
388
+ # Maps files to compressed streams
389
+ #
390
+ # @param stream_info [StreamInfo] Stream info to populate
391
+ def read_substreams_info(stream_info)
392
+ # Read number of unpack streams per folder
393
+ if !eof? && peek_byte == PropertyId::NUM_UNPACK_STREAM
394
+ read_byte
395
+ stream_info.folders.each do
396
+ stream_info.num_unpack_streams_in_folders << read_number
397
+ end
398
+ else
399
+ # Default: one stream per folder
400
+ stream_info.folders.size.times do
401
+ stream_info.num_unpack_streams_in_folders << 1
402
+ end
403
+ end
404
+
405
+ # Read unpack sizes
406
+ if !eof? && peek_byte == PropertyId::SIZE
407
+ read_byte
408
+ stream_info.folders.each_with_index do |folder, i|
409
+ num_streams = stream_info.num_unpack_streams_in_folders[i]
410
+ start_idx = stream_info.unpack_sizes.size
411
+
412
+ if num_streams > 1
413
+ (num_streams - 1).times do
414
+ size = read_number
415
+ stream_info.unpack_sizes << size
416
+ end
417
+ end
418
+ # Last stream size = folder's final output size - sum of sizes we just read for this folder
419
+ folder_total_size = folder.uncompressed_size
420
+ sum = stream_info.unpack_sizes[start_idx..]&.sum || 0
421
+ last_size = folder_total_size - sum
422
+ stream_info.unpack_sizes << last_size
423
+ end
424
+ else
425
+ # No SIZE property - use folder unpack sizes directly
426
+ # This happens when each folder has exactly one stream
427
+ stream_info.folders.each_with_index do |folder, i|
428
+ num_streams = stream_info.num_unpack_streams_in_folders[i]
429
+ if num_streams == 1
430
+ # Single stream - use folder's total unpack size
431
+ folder_size = folder.unpack_sizes.empty? ? 0 : folder.unpack_sizes.sum
432
+ stream_info.unpack_sizes << folder_size
433
+ end
434
+ end
435
+ end
436
+
437
+ # Read digests (CRCs)
438
+ num_digests = stream_info.num_unpack_streams_in_folders.sum
439
+ if !(eof? || peek_byte == PropertyId::K_END) && (peek_byte == PropertyId::CRC)
440
+ read_byte
441
+ defined_vec = read_bit_vector(num_digests)
442
+ num_digests.times do |i|
443
+ stream_info.digests << (defined_vec[i] ? read_uint32 : nil)
444
+ end
445
+ end
446
+
447
+ # Optional K_END (for backward compatibility)
448
+ read_byte if !eof? && peek_byte == PropertyId::K_END
449
+ end
450
+
451
+ # Read files info section
452
+ # Contains file metadata (names, timestamps, attributes)
453
+ #
454
+ # @return [Array<Models::FileEntry>] Array of file entries
455
+ def read_files_info
456
+ num_files = read_number
457
+ entries = Array.new(num_files) { Models::FileEntry.new }
458
+
459
+ # Read file properties
460
+ until eof? || peek_byte == PropertyId::K_END
461
+ prop_type = read_byte
462
+
463
+ case prop_type
464
+ when PropertyId::NAME
465
+ read_names(entries)
466
+ when PropertyId::EMPTY_STREAM
467
+ read_empty_stream(entries)
468
+ when PropertyId::EMPTY_FILE
469
+ read_empty_file(entries)
470
+ when PropertyId::ANTI
471
+ read_anti(entries)
472
+ when PropertyId::CTIME
473
+ read_timestamps(entries, :ctime)
474
+ when PropertyId::ATIME
475
+ read_timestamps(entries, :atime)
476
+ when PropertyId::MTIME
477
+ read_timestamps(entries, :mtime)
478
+ when PropertyId::WIN_ATTRIB
479
+ read_attributes(entries)
480
+ when PropertyId::DUMMY
481
+ skip_data
482
+ else
483
+ skip_data
484
+ end
485
+ end
486
+
487
+ read_byte if !eof? && peek_byte == PropertyId::K_END
488
+
489
+ entries
490
+ end
491
+
492
+ # Read file names
493
+ #
494
+ # @param entries [Array<Models::FileEntry>] File entries
495
+ def read_names(entries)
496
+ # Size of all names in bytes
497
+ size = read_number
498
+ start_pos = @position
499
+ external = read_byte
500
+
501
+ if external.zero?
502
+ # Names stored inline as UTF-16LE null-terminated strings
503
+ entries.each do |entry|
504
+ name_bytes = +""
505
+ loop do
506
+ ch1 = read_byte
507
+ ch2 = read_byte
508
+ break if ch1.zero? && ch2.zero?
509
+
510
+ name_bytes << [ch1, ch2].pack("CC")
511
+ end
512
+ entry.name = name_bytes.encode("UTF-8", "UTF-16LE")
513
+ end
514
+ end
515
+
516
+ # Ensure we consumed expected bytes
517
+ consumed = @position - start_pos
518
+ skip(size - consumed) if consumed < size
519
+ end
520
+
521
+ # Read empty stream flags
522
+ #
523
+ # @param entries [Array<Models::FileEntry>] File entries
524
+ def read_empty_stream(entries)
525
+ skip_size
526
+ empty_stream = read_bit_vector(entries.size)
527
+ entries.each_with_index do |entry, i|
528
+ # Bit vector: 0 = has stream (file), 1 = empty (directory)
529
+ # Convert to boolean: has_stream = (bit == 0)
530
+ entry.has_stream = (empty_stream[i].zero?)
531
+ entry.is_dir = (empty_stream[i] == 1)
532
+ end
533
+ end
534
+
535
+ # Read empty file flags
536
+ #
537
+ # @param entries [Array<Models::FileEntry>] File entries
538
+ def read_empty_file(entries)
539
+ skip_size
540
+ empty_files = entries.reject(&:has_stream)
541
+ empty_bits = read_bit_vector(empty_files.size)
542
+ empty_files.each_with_index do |entry, i|
543
+ entry.is_empty = !empty_bits[i]
544
+ end
545
+ end
546
+
547
+ # Read anti flags
548
+ #
549
+ # @param entries [Array<Models::FileEntry>] File entries
550
+ def read_anti(entries)
551
+ skip_size
552
+ anti_files = entries.select { |e| !e.has_stream && !e.is_empty }
553
+ anti_bits = read_bit_vector(anti_files.size)
554
+ anti_files.each_with_index do |entry, i|
555
+ entry.is_anti = anti_bits[i]
556
+ end
557
+ end
558
+
559
+ # Read timestamps
560
+ #
561
+ # @param entries [Array<Models::FileEntry>] File entries
562
+ # @param attr [Symbol] Attribute name (:mtime, :atime, :ctime)
563
+ def read_timestamps(entries, attr)
564
+ skip_size
565
+ defined_bits = read_bit_vector(entries.size)
566
+ external = read_byte
567
+
568
+ return unless external.zero?
569
+
570
+ entries.each_with_index do |entry, i|
571
+ next unless defined_bits[i]
572
+
573
+ time_val = read_uint64
574
+ # Convert Windows FILETIME to Ruby Time
575
+ # (100-nanosecond intervals since 1601-01-01)
576
+ entry.send(:"#{attr}=", windows_time_to_unix(time_val))
577
+ end
578
+ end
579
+
580
+ # Read file attributes
581
+ #
582
+ # @param entries [Array<Models::FileEntry>] File entries
583
+ def read_attributes(entries)
584
+ skip_size
585
+ defined_bits = read_bit_vector(entries.size)
586
+ external = read_byte
587
+
588
+ return unless external.zero?
589
+
590
+ entries.each_with_index do |entry, i|
591
+ entry.attributes = read_uint32 if defined_bits[i]
592
+ end
593
+ end
594
+
595
+ # Expect specific property ID
596
+ #
597
+ # @param expected [Integer] Expected property ID
598
+ # @raise [RuntimeError] if property doesn't match
599
+ def expect_property(expected)
600
+ actual = read_byte
601
+ return if actual == expected
602
+
603
+ raise "Expected property 0x#{expected.to_s(16)}, " \
604
+ "got 0x#{actual.to_s(16)}"
605
+ end
606
+
607
+ # Skip size field
608
+ def skip_size
609
+ read_number
610
+ end
611
+
612
+ # Skip property data
613
+ def skip_data
614
+ size = read_number
615
+ skip(size)
616
+ end
617
+
618
+ # Convert Windows FILETIME to Unix timestamp
619
+ #
620
+ # @param windows_time [Integer] Windows FILETIME
621
+ # @return [Time] Ruby Time object
622
+ def windows_time_to_unix(windows_time)
623
+ # Windows FILETIME epoch: 1601-01-01
624
+ # Unix epoch: 1970-01-01
625
+ # Difference: 11644473600 seconds
626
+ unix_time = (windows_time / 10_000_000) - 11_644_473_600
627
+ Time.at(unix_time)
628
+ rescue StandardError
629
+ nil
630
+ end
631
+
632
+ private
633
+
634
+ # Decode bit vector from packed bytes
635
+ #
636
+ # @param bits_data [String] Packed bit data
637
+ # @param num_items [Integer] Number of items
638
+ # @return [Array<Boolean>] Decoded bits
639
+ def decode_bit_vector(bits_data, num_items)
640
+ result = []
641
+ byte_idx = 0
642
+ bit_idx = 7
643
+
644
+ num_items.times do
645
+ byte = bits_data.getbyte(byte_idx)
646
+ result << ((byte >> bit_idx) & 1)
647
+
648
+ bit_idx -= 1
649
+ if bit_idx.negative?
650
+ bit_idx = 7
651
+ byte_idx += 1
652
+ end
653
+ end
654
+
655
+ result
656
+ end
657
+ end
658
+ end
659
+ end
660
+ end