omnizip 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (511) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.rubocop.yml +32 -0
  4. data/.rubocop_todo.yml +754 -0
  5. data/COPYING +502 -0
  6. data/Gemfile +17 -0
  7. data/LICENSE +12 -0
  8. data/README.adoc +1045 -0
  9. data/Rakefile +12 -0
  10. data/benchmark/README.md +260 -0
  11. data/benchmark/benchmark_suite.rb +125 -0
  12. data/benchmark/compression_bench.rb +181 -0
  13. data/benchmark/filter_bench.rb +180 -0
  14. data/benchmark/models/benchmark_result.rb +59 -0
  15. data/benchmark/models/comparison_result.rb +69 -0
  16. data/benchmark/profile_suite.rb +167 -0
  17. data/benchmark/reporter.rb +150 -0
  18. data/benchmark/run_benchmarks.rb +66 -0
  19. data/benchmark/test_data.rb +137 -0
  20. data/config/formats/rar3_spec.yml +91 -0
  21. data/config/formats/rar5_spec.yml +102 -0
  22. data/docs/.github/workflows/docs.yml +142 -0
  23. data/docs/.gitignore +21 -0
  24. data/docs/.lychee.toml +67 -0
  25. data/docs/Gemfile +13 -0
  26. data/docs/RAR_WRITE_SUPPORT.md +26 -0
  27. data/docs/README.md +101 -0
  28. data/docs/_config.yml +112 -0
  29. data/docs/assets/logo.svg +1 -0
  30. data/docs/assets/omnizip-logo.pdf +1540 -11
  31. data/docs/comparison/feature-matrix.adoc +694 -0
  32. data/docs/comparison/index.adoc +113 -0
  33. data/docs/comparison/vs-7zip.adoc +309 -0
  34. data/docs/comparison/vs-peazip.adoc +77 -0
  35. data/docs/comparison/vs-rubyzip.adoc +342 -0
  36. data/docs/comparison/vs-winrar.adoc +100 -0
  37. data/docs/compatibility.adoc +579 -0
  38. data/docs/concepts/index.adoc +129 -0
  39. data/docs/developer/architecture.adoc +256 -0
  40. data/docs/developer/contributing.adoc +158 -0
  41. data/docs/developer/index.adoc +25 -0
  42. data/docs/developer/testing.adoc +212 -0
  43. data/docs/getting-started/basic-usage.adoc +271 -0
  44. data/docs/getting-started/index.adoc +42 -0
  45. data/docs/getting-started/installation.adoc +138 -0
  46. data/docs/getting-started/quick-start.adoc +185 -0
  47. data/docs/getting-started/your-first-archive.adoc +218 -0
  48. data/docs/guides/advanced-features/encryption.adoc +300 -0
  49. data/docs/guides/advanced-features/index.adoc +49 -0
  50. data/docs/guides/advanced-features/parallel-processing.adoc +246 -0
  51. data/docs/guides/advanced-features/progress-tracking.adoc +320 -0
  52. data/docs/guides/advanced-features/streaming.adoc +212 -0
  53. data/docs/guides/archive-formats/gzip-format.adoc +107 -0
  54. data/docs/guides/archive-formats/index.adoc +130 -0
  55. data/docs/guides/archive-formats/rar-format.adoc +104 -0
  56. data/docs/guides/archive-formats/rar5.adoc +521 -0
  57. data/docs/guides/archive-formats/seven-zip-format.adoc +35 -0
  58. data/docs/guides/archive-formats/tar-format.adoc +106 -0
  59. data/docs/guides/archive-formats/xz-format.adoc +118 -0
  60. data/docs/guides/archive-formats/zip-format.adoc +35 -0
  61. data/docs/guides/compression-algorithms/bzip2.adoc +113 -0
  62. data/docs/guides/compression-algorithms/deflate.adoc +319 -0
  63. data/docs/guides/compression-algorithms/index.adoc +190 -0
  64. data/docs/guides/compression-algorithms/lzma.adoc +398 -0
  65. data/docs/guides/compression-algorithms/lzma2.adoc +327 -0
  66. data/docs/guides/compression-algorithms/ppmd.adoc +316 -0
  67. data/docs/guides/compression-algorithms/zstandard.adoc +361 -0
  68. data/docs/guides/creating-archives.adoc +354 -0
  69. data/docs/guides/extracting-archives.adoc +53 -0
  70. data/docs/guides/format-conversion.adoc +64 -0
  71. data/docs/guides/index.adoc +49 -0
  72. data/docs/guides/migration-rubyzip.adoc +217 -0
  73. data/docs/guides/parity-archives.adoc +605 -0
  74. data/docs/guides/performance-tuning.adoc +88 -0
  75. data/docs/index.adoc +218 -0
  76. data/docs/lychee.toml +67 -0
  77. data/docs/reference/api/overview.adoc +188 -0
  78. data/docs/reference/cli/compress-command.adoc +114 -0
  79. data/docs/reference/cli/overview.adoc +140 -0
  80. data/docs/reference/index.adoc +26 -0
  81. data/docs/resources/faq.adoc +185 -0
  82. data/docs/resources/quick-reference.adoc +222 -0
  83. data/docs/troubleshooting/index.adoc +208 -0
  84. data/examples/api_comparison.rb +205 -0
  85. data/examples/deflate64_example.rb +96 -0
  86. data/examples/par2_demo.rb +121 -0
  87. data/examples/quick_start_native.rb +150 -0
  88. data/examples/quick_start_rubyzip.rb +115 -0
  89. data/examples/rubyzip_compatibility_demo.rb +194 -0
  90. data/exe/omnizip +27 -0
  91. data/lib/omnizip/algorithm.rb +130 -0
  92. data/lib/omnizip/algorithm_registry.rb +86 -0
  93. data/lib/omnizip/algorithms/.keep +0 -0
  94. data/lib/omnizip/algorithms/bzip2/bwt.rb +225 -0
  95. data/lib/omnizip/algorithms/bzip2/decoder.rb +193 -0
  96. data/lib/omnizip/algorithms/bzip2/encoder.rb +237 -0
  97. data/lib/omnizip/algorithms/bzip2/huffman.rb +206 -0
  98. data/lib/omnizip/algorithms/bzip2/mtf.rb +101 -0
  99. data/lib/omnizip/algorithms/bzip2/rle.rb +151 -0
  100. data/lib/omnizip/algorithms/bzip2.rb +130 -0
  101. data/lib/omnizip/algorithms/deflate/constants.rb +28 -0
  102. data/lib/omnizip/algorithms/deflate/decoder.rb +38 -0
  103. data/lib/omnizip/algorithms/deflate/encoder.rb +46 -0
  104. data/lib/omnizip/algorithms/deflate.rb +128 -0
  105. data/lib/omnizip/algorithms/deflate64/constants.rb +45 -0
  106. data/lib/omnizip/algorithms/deflate64/decoder.rb +153 -0
  107. data/lib/omnizip/algorithms/deflate64/encoder.rb +98 -0
  108. data/lib/omnizip/algorithms/deflate64/huffman_coder.rb +354 -0
  109. data/lib/omnizip/algorithms/deflate64/lz77_encoder.rb +142 -0
  110. data/lib/omnizip/algorithms/deflate64.rb +109 -0
  111. data/lib/omnizip/algorithms/lzma/bit_model.rb +120 -0
  112. data/lib/omnizip/algorithms/lzma/constants.rb +112 -0
  113. data/lib/omnizip/algorithms/lzma/decoder.rb +148 -0
  114. data/lib/omnizip/algorithms/lzma/dictionary.rb +69 -0
  115. data/lib/omnizip/algorithms/lzma/distance_coder.rb +415 -0
  116. data/lib/omnizip/algorithms/lzma/encoder.rb +142 -0
  117. data/lib/omnizip/algorithms/lzma/length_coder.rb +260 -0
  118. data/lib/omnizip/algorithms/lzma/literal_decoder.rb +320 -0
  119. data/lib/omnizip/algorithms/lzma/literal_encoder.rb +210 -0
  120. data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +341 -0
  121. data/lib/omnizip/algorithms/lzma/lzma_alone_decoder.rb +192 -0
  122. data/lib/omnizip/algorithms/lzma/lzma_state.rb +128 -0
  123. data/lib/omnizip/algorithms/lzma/match.rb +32 -0
  124. data/lib/omnizip/algorithms/lzma/match_finder.rb +205 -0
  125. data/lib/omnizip/algorithms/lzma/match_finder_config.rb +142 -0
  126. data/lib/omnizip/algorithms/lzma/match_finder_factory.rb +88 -0
  127. data/lib/omnizip/algorithms/lzma/optimal_encoder.rb +130 -0
  128. data/lib/omnizip/algorithms/lzma/probability_models.rb +72 -0
  129. data/lib/omnizip/algorithms/lzma/range_coder.rb +85 -0
  130. data/lib/omnizip/algorithms/lzma/range_decoder.rb +434 -0
  131. data/lib/omnizip/algorithms/lzma/range_encoder.rb +194 -0
  132. data/lib/omnizip/algorithms/lzma/state.rb +127 -0
  133. data/lib/omnizip/algorithms/lzma/xz_buffered_range_encoder.rb +325 -0
  134. data/lib/omnizip/algorithms/lzma/xz_encoder.rb +426 -0
  135. data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +645 -0
  136. data/lib/omnizip/algorithms/lzma/xz_match_finder_adapter.rb +227 -0
  137. data/lib/omnizip/algorithms/lzma/xz_price_calculator.rb +169 -0
  138. data/lib/omnizip/algorithms/lzma/xz_probability_models.rb +261 -0
  139. data/lib/omnizip/algorithms/lzma/xz_range_encoder.rb +223 -0
  140. data/lib/omnizip/algorithms/lzma/xz_range_encoder_exact.rb +331 -0
  141. data/lib/omnizip/algorithms/lzma/xz_state.rb +116 -0
  142. data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +2055 -0
  143. data/lib/omnizip/algorithms/lzma.rb +238 -0
  144. data/lib/omnizip/algorithms/lzma2/chunk_manager.rb +182 -0
  145. data/lib/omnizip/algorithms/lzma2/constants.rb +41 -0
  146. data/lib/omnizip/algorithms/lzma2/encoder.rb +147 -0
  147. data/lib/omnizip/algorithms/lzma2/lzma2_chunk.rb +161 -0
  148. data/lib/omnizip/algorithms/lzma2/properties.rb +179 -0
  149. data/lib/omnizip/algorithms/lzma2/simple_lzma2_encoder.rb +127 -0
  150. data/lib/omnizip/algorithms/lzma2/xz_encoder_adapter.rb +85 -0
  151. data/lib/omnizip/algorithms/lzma2.rb +141 -0
  152. data/lib/omnizip/algorithms/ppmd7/constants.rb +74 -0
  153. data/lib/omnizip/algorithms/ppmd7/context.rb +154 -0
  154. data/lib/omnizip/algorithms/ppmd7/decoder.rb +126 -0
  155. data/lib/omnizip/algorithms/ppmd7/encoder.rb +163 -0
  156. data/lib/omnizip/algorithms/ppmd7/model.rb +248 -0
  157. data/lib/omnizip/algorithms/ppmd7/symbol_state.rb +57 -0
  158. data/lib/omnizip/algorithms/ppmd7.rb +116 -0
  159. data/lib/omnizip/algorithms/ppmd8/constants.rb +61 -0
  160. data/lib/omnizip/algorithms/ppmd8/context.rb +34 -0
  161. data/lib/omnizip/algorithms/ppmd8/decoder.rb +107 -0
  162. data/lib/omnizip/algorithms/ppmd8/encoder.rb +138 -0
  163. data/lib/omnizip/algorithms/ppmd8/model.rb +250 -0
  164. data/lib/omnizip/algorithms/ppmd8/restoration_method.rb +78 -0
  165. data/lib/omnizip/algorithms/ppmd8.rb +82 -0
  166. data/lib/omnizip/algorithms/ppmd_base.rb +138 -0
  167. data/lib/omnizip/algorithms/sevenzip_lzma2.rb +123 -0
  168. data/lib/omnizip/algorithms/xz_lzma2.rb +118 -0
  169. data/lib/omnizip/algorithms/zstandard/constants.rb +25 -0
  170. data/lib/omnizip/algorithms/zstandard/decoder.rb +46 -0
  171. data/lib/omnizip/algorithms/zstandard/encoder.rb +51 -0
  172. data/lib/omnizip/algorithms/zstandard.rb +138 -0
  173. data/lib/omnizip/buffer/memory_archive.rb +251 -0
  174. data/lib/omnizip/buffer/memory_extractor.rb +224 -0
  175. data/lib/omnizip/buffer.rb +176 -0
  176. data/lib/omnizip/checksum_registry.rb +114 -0
  177. data/lib/omnizip/checksums/crc32.rb +100 -0
  178. data/lib/omnizip/checksums/crc64.rb +101 -0
  179. data/lib/omnizip/checksums/crc_base.rb +158 -0
  180. data/lib/omnizip/checksums/verifier.rb +131 -0
  181. data/lib/omnizip/chunked/memory_manager.rb +194 -0
  182. data/lib/omnizip/chunked/reader.rb +78 -0
  183. data/lib/omnizip/chunked/writer.rb +120 -0
  184. data/lib/omnizip/chunked.rb +129 -0
  185. data/lib/omnizip/cli/output_formatter.rb +104 -0
  186. data/lib/omnizip/cli.rb +572 -0
  187. data/lib/omnizip/commands/.keep +0 -0
  188. data/lib/omnizip/commands/archive_create_command.rb +427 -0
  189. data/lib/omnizip/commands/archive_extract_command.rb +272 -0
  190. data/lib/omnizip/commands/archive_list_command.rb +218 -0
  191. data/lib/omnizip/commands/archive_repair_command.rb +131 -0
  192. data/lib/omnizip/commands/archive_verify_command.rb +117 -0
  193. data/lib/omnizip/commands/compress_command.rb +117 -0
  194. data/lib/omnizip/commands/decompress_command.rb +120 -0
  195. data/lib/omnizip/commands/list_command.rb +53 -0
  196. data/lib/omnizip/commands/metadata_command.rb +153 -0
  197. data/lib/omnizip/commands/parity_create_command.rb +122 -0
  198. data/lib/omnizip/commands/parity_repair_command.rb +122 -0
  199. data/lib/omnizip/commands/parity_verify_command.rb +124 -0
  200. data/lib/omnizip/commands/profile_list_command.rb +56 -0
  201. data/lib/omnizip/commands/profile_show_command.rb +44 -0
  202. data/lib/omnizip/convenience.rb +359 -0
  203. data/lib/omnizip/converter/conversion_registry.rb +49 -0
  204. data/lib/omnizip/converter/conversion_strategy.rb +121 -0
  205. data/lib/omnizip/converter/seven_zip_to_zip_strategy.rb +97 -0
  206. data/lib/omnizip/converter/zip_to_seven_zip_strategy.rb +112 -0
  207. data/lib/omnizip/converter.rb +105 -0
  208. data/lib/omnizip/crypto/aes256/cipher.rb +100 -0
  209. data/lib/omnizip/crypto/aes256/constants.rb +28 -0
  210. data/lib/omnizip/crypto/aes256/key_derivation.rb +101 -0
  211. data/lib/omnizip/crypto/aes256.rb +102 -0
  212. data/lib/omnizip/error.rb +106 -0
  213. data/lib/omnizip/eta/exponential_smoothing_estimator.rb +98 -0
  214. data/lib/omnizip/eta/moving_average_estimator.rb +99 -0
  215. data/lib/omnizip/eta/rate_calculator.rb +104 -0
  216. data/lib/omnizip/eta/sample_history.rb +143 -0
  217. data/lib/omnizip/eta/time_estimator.rb +106 -0
  218. data/lib/omnizip/eta.rb +63 -0
  219. data/lib/omnizip/extraction/filter_chain.rb +177 -0
  220. data/lib/omnizip/extraction/glob_pattern.rb +140 -0
  221. data/lib/omnizip/extraction/pattern_matcher.rb +70 -0
  222. data/lib/omnizip/extraction/predicate_pattern.rb +52 -0
  223. data/lib/omnizip/extraction/regex_pattern.rb +50 -0
  224. data/lib/omnizip/extraction/selective_extractor.rb +240 -0
  225. data/lib/omnizip/extraction.rb +111 -0
  226. data/lib/omnizip/file_type/mime_classifier.rb +144 -0
  227. data/lib/omnizip/file_type.rb +113 -0
  228. data/lib/omnizip/filter.rb +139 -0
  229. data/lib/omnizip/filter_pipeline.rb +108 -0
  230. data/lib/omnizip/filter_registry.rb +166 -0
  231. data/lib/omnizip/filters/bcj.rb +279 -0
  232. data/lib/omnizip/filters/bcj2/constants.rb +53 -0
  233. data/lib/omnizip/filters/bcj2/decoder.rb +200 -0
  234. data/lib/omnizip/filters/bcj2/encoder.rb +61 -0
  235. data/lib/omnizip/filters/bcj2/stream_data.rb +93 -0
  236. data/lib/omnizip/filters/bcj2.rb +99 -0
  237. data/lib/omnizip/filters/bcj_arm.rb +176 -0
  238. data/lib/omnizip/filters/bcj_arm64.rb +244 -0
  239. data/lib/omnizip/filters/bcj_ia64.rb +196 -0
  240. data/lib/omnizip/filters/bcj_ppc.rb +190 -0
  241. data/lib/omnizip/filters/bcj_sparc.rb +176 -0
  242. data/lib/omnizip/filters/bcj_x86.rb +193 -0
  243. data/lib/omnizip/filters/delta.rb +196 -0
  244. data/lib/omnizip/filters/filter_base.rb +72 -0
  245. data/lib/omnizip/filters/registry.rb +123 -0
  246. data/lib/omnizip/filters/xz_delta.rb +258 -0
  247. data/lib/omnizip/format_detector.rb +162 -0
  248. data/lib/omnizip/format_registry.rb +59 -0
  249. data/lib/omnizip/formats/.keep +0 -0
  250. data/lib/omnizip/formats/bzip2_file.rb +172 -0
  251. data/lib/omnizip/formats/cpio/constants.rb +55 -0
  252. data/lib/omnizip/formats/cpio/entry.rb +385 -0
  253. data/lib/omnizip/formats/cpio/reader.rb +196 -0
  254. data/lib/omnizip/formats/cpio/writer.rb +234 -0
  255. data/lib/omnizip/formats/cpio.rb +140 -0
  256. data/lib/omnizip/formats/format_spec_loader.rb +230 -0
  257. data/lib/omnizip/formats/gzip.rb +238 -0
  258. data/lib/omnizip/formats/iso/directory_builder.rb +297 -0
  259. data/lib/omnizip/formats/iso/directory_record.rb +152 -0
  260. data/lib/omnizip/formats/iso/joliet.rb +204 -0
  261. data/lib/omnizip/formats/iso/path_table.rb +125 -0
  262. data/lib/omnizip/formats/iso/reader.rb +197 -0
  263. data/lib/omnizip/formats/iso/rock_ridge.rb +349 -0
  264. data/lib/omnizip/formats/iso/volume_builder.rb +320 -0
  265. data/lib/omnizip/formats/iso/volume_descriptor.rb +168 -0
  266. data/lib/omnizip/formats/iso/writer.rb +530 -0
  267. data/lib/omnizip/formats/iso.rb +140 -0
  268. data/lib/omnizip/formats/lzip.rb +175 -0
  269. data/lib/omnizip/formats/lzma_alone.rb +171 -0
  270. data/lib/omnizip/formats/rar/archive_repairer.rb +243 -0
  271. data/lib/omnizip/formats/rar/archive_verifier.rb +195 -0
  272. data/lib/omnizip/formats/rar/block_parser.rb +243 -0
  273. data/lib/omnizip/formats/rar/compression/bit_stream.rb +180 -0
  274. data/lib/omnizip/formats/rar/compression/dispatcher.rb +217 -0
  275. data/lib/omnizip/formats/rar/compression/lz77_huffman/decoder.rb +216 -0
  276. data/lib/omnizip/formats/rar/compression/lz77_huffman/encoder.rb +158 -0
  277. data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_builder.rb +217 -0
  278. data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_coder.rb +189 -0
  279. data/lib/omnizip/formats/rar/compression/lz77_huffman/match_finder.rb +135 -0
  280. data/lib/omnizip/formats/rar/compression/lz77_huffman/sliding_window.rb +165 -0
  281. data/lib/omnizip/formats/rar/compression/ppmd/context.rb +105 -0
  282. data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +219 -0
  283. data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +262 -0
  284. data/lib/omnizip/formats/rar/compression_method_registry.rb +106 -0
  285. data/lib/omnizip/formats/rar/constants.rb +82 -0
  286. data/lib/omnizip/formats/rar/decompressor.rb +238 -0
  287. data/lib/omnizip/formats/rar/external_writer.rb +312 -0
  288. data/lib/omnizip/formats/rar/header.rb +192 -0
  289. data/lib/omnizip/formats/rar/license_validator.rb +109 -0
  290. data/lib/omnizip/formats/rar/models/rar_archive.rb +77 -0
  291. data/lib/omnizip/formats/rar/models/rar_entry.rb +65 -0
  292. data/lib/omnizip/formats/rar/models/rar_volume.rb +56 -0
  293. data/lib/omnizip/formats/rar/parity_handler.rb +292 -0
  294. data/lib/omnizip/formats/rar/rar5/compression/lzma.rb +202 -0
  295. data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +578 -0
  296. data/lib/omnizip/formats/rar/rar5/compression/store.rb +60 -0
  297. data/lib/omnizip/formats/rar/rar5/crc32.rb +39 -0
  298. data/lib/omnizip/formats/rar/rar5/encryption/aes256_cbc.rb +97 -0
  299. data/lib/omnizip/formats/rar/rar5/encryption/encryption_header.rb +114 -0
  300. data/lib/omnizip/formats/rar/rar5/encryption/encryption_manager.rb +166 -0
  301. data/lib/omnizip/formats/rar/rar5/encryption/key_derivation.rb +97 -0
  302. data/lib/omnizip/formats/rar/rar5/header.rb +187 -0
  303. data/lib/omnizip/formats/rar/rar5/models/encryption_options.rb +74 -0
  304. data/lib/omnizip/formats/rar/rar5/models/recovery_options.rb +63 -0
  305. data/lib/omnizip/formats/rar/rar5/models/solid_options.rb +63 -0
  306. data/lib/omnizip/formats/rar/rar5/models/volume_options.rb +74 -0
  307. data/lib/omnizip/formats/rar/rar5/multi_volume/ARCHITECTURE.md +290 -0
  308. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_manager.rb +264 -0
  309. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_splitter.rb +155 -0
  310. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_writer.rb +194 -0
  311. data/lib/omnizip/formats/rar/rar5/solid/solid_encoder.rb +109 -0
  312. data/lib/omnizip/formats/rar/rar5/solid/solid_manager.rb +142 -0
  313. data/lib/omnizip/formats/rar/rar5/solid/solid_stream.rb +121 -0
  314. data/lib/omnizip/formats/rar/rar5/vint.rb +65 -0
  315. data/lib/omnizip/formats/rar/rar5/writer.rb +466 -0
  316. data/lib/omnizip/formats/rar/rar_format_base.rb +241 -0
  317. data/lib/omnizip/formats/rar/reader.rb +366 -0
  318. data/lib/omnizip/formats/rar/recovery_record.rb +245 -0
  319. data/lib/omnizip/formats/rar/volume_manager.rb +168 -0
  320. data/lib/omnizip/formats/rar/writer.rb +431 -0
  321. data/lib/omnizip/formats/rar.rb +205 -0
  322. data/lib/omnizip/formats/rar3/compressor.rb +73 -0
  323. data/lib/omnizip/formats/rar3/decompressor.rb +66 -0
  324. data/lib/omnizip/formats/rar3/reader.rb +386 -0
  325. data/lib/omnizip/formats/rar3/writer.rb +219 -0
  326. data/lib/omnizip/formats/rar5/compressor.rb +73 -0
  327. data/lib/omnizip/formats/rar5/decompressor.rb +66 -0
  328. data/lib/omnizip/formats/rar5/reader.rb +342 -0
  329. data/lib/omnizip/formats/rar5/writer.rb +214 -0
  330. data/lib/omnizip/formats/seven_zip/coder_chain.rb +150 -0
  331. data/lib/omnizip/formats/seven_zip/constants.rb +126 -0
  332. data/lib/omnizip/formats/seven_zip/encoded_header.rb +114 -0
  333. data/lib/omnizip/formats/seven_zip/encrypted_header.rb +142 -0
  334. data/lib/omnizip/formats/seven_zip/file_collector.rb +144 -0
  335. data/lib/omnizip/formats/seven_zip/header.rb +106 -0
  336. data/lib/omnizip/formats/seven_zip/header_encryptor.rb +134 -0
  337. data/lib/omnizip/formats/seven_zip/header_writer.rb +466 -0
  338. data/lib/omnizip/formats/seven_zip/models/coder_info.rb +30 -0
  339. data/lib/omnizip/formats/seven_zip/models/file_entry.rb +58 -0
  340. data/lib/omnizip/formats/seven_zip/models/folder.rb +69 -0
  341. data/lib/omnizip/formats/seven_zip/models/stream_info.rb +42 -0
  342. data/lib/omnizip/formats/seven_zip/parser.rb +660 -0
  343. data/lib/omnizip/formats/seven_zip/reader.rb +458 -0
  344. data/lib/omnizip/formats/seven_zip/split_archive_reader.rb +632 -0
  345. data/lib/omnizip/formats/seven_zip/split_archive_writer.rb +315 -0
  346. data/lib/omnizip/formats/seven_zip/stream_compressor.rb +151 -0
  347. data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +162 -0
  348. data/lib/omnizip/formats/seven_zip/writer.rb +740 -0
  349. data/lib/omnizip/formats/seven_zip.rb +93 -0
  350. data/lib/omnizip/formats/tar/constants.rb +73 -0
  351. data/lib/omnizip/formats/tar/entry.rb +94 -0
  352. data/lib/omnizip/formats/tar/header.rb +168 -0
  353. data/lib/omnizip/formats/tar/reader.rb +121 -0
  354. data/lib/omnizip/formats/tar/writer.rb +216 -0
  355. data/lib/omnizip/formats/tar.rb +84 -0
  356. data/lib/omnizip/formats/xz/reader.rb +116 -0
  357. data/lib/omnizip/formats/xz.rb +237 -0
  358. data/lib/omnizip/formats/xz_impl/block_decoder.rb +754 -0
  359. data/lib/omnizip/formats/xz_impl/block_encoder.rb +306 -0
  360. data/lib/omnizip/formats/xz_impl/block_header.rb +210 -0
  361. data/lib/omnizip/formats/xz_impl/block_header_parser.rb +186 -0
  362. data/lib/omnizip/formats/xz_impl/constants.rb +49 -0
  363. data/lib/omnizip/formats/xz_impl/index_decoder.rb +174 -0
  364. data/lib/omnizip/formats/xz_impl/index_encoder.rb +122 -0
  365. data/lib/omnizip/formats/xz_impl/stream_decoder.rb +468 -0
  366. data/lib/omnizip/formats/xz_impl/stream_encoder.rb +99 -0
  367. data/lib/omnizip/formats/xz_impl/stream_footer.rb +81 -0
  368. data/lib/omnizip/formats/xz_impl/stream_footer_parser.rb +117 -0
  369. data/lib/omnizip/formats/xz_impl/stream_header.rb +55 -0
  370. data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +108 -0
  371. data/lib/omnizip/formats/xz_impl/vli.rb +128 -0
  372. data/lib/omnizip/formats/xz_impl/writer.rb +421 -0
  373. data/lib/omnizip/formats/zip/central_directory_header.rb +195 -0
  374. data/lib/omnizip/formats/zip/constants.rb +69 -0
  375. data/lib/omnizip/formats/zip/end_of_central_directory.rb +133 -0
  376. data/lib/omnizip/formats/zip/local_file_header.rb +138 -0
  377. data/lib/omnizip/formats/zip/reader.rb +250 -0
  378. data/lib/omnizip/formats/zip/unix_extra_field.rb +153 -0
  379. data/lib/omnizip/formats/zip/writer.rb +375 -0
  380. data/lib/omnizip/formats/zip/zip64_end_of_central_directory.rb +104 -0
  381. data/lib/omnizip/formats/zip/zip64_end_of_central_directory_locator.rb +66 -0
  382. data/lib/omnizip/formats/zip/zip64_extra_field.rb +114 -0
  383. data/lib/omnizip/formats/zip.rb +50 -0
  384. data/lib/omnizip/implementations/base/lzma2_decoder_base.rb +75 -0
  385. data/lib/omnizip/implementations/base/lzma2_encoder_base.rb +128 -0
  386. data/lib/omnizip/implementations/base/lzma_decoder_base.rb +83 -0
  387. data/lib/omnizip/implementations/base/lzma_encoder_base.rb +108 -0
  388. data/lib/omnizip/implementations/base/state_machine_base.rb +182 -0
  389. data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +421 -0
  390. data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +465 -0
  391. data/lib/omnizip/implementations/seven_zip/lzma/match_finder.rb +288 -0
  392. data/lib/omnizip/implementations/seven_zip/lzma/range_decoder.rb +200 -0
  393. data/lib/omnizip/implementations/seven_zip/lzma/range_encoder.rb +197 -0
  394. data/lib/omnizip/implementations/seven_zip/lzma/state_machine.rb +141 -0
  395. data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +519 -0
  396. data/lib/omnizip/implementations/xz_utils/lzma2/decoder.rb +723 -0
  397. data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +750 -0
  398. data/lib/omnizip/io/buffered_input.rb +146 -0
  399. data/lib/omnizip/io/buffered_output.rb +105 -0
  400. data/lib/omnizip/io/stream_manager.rb +115 -0
  401. data/lib/omnizip/link_handler/hard_link.rb +79 -0
  402. data/lib/omnizip/link_handler/symbolic_link.rb +74 -0
  403. data/lib/omnizip/link_handler.rb +124 -0
  404. data/lib/omnizip/metadata/archive_metadata.rb +114 -0
  405. data/lib/omnizip/metadata/entry_metadata.rb +146 -0
  406. data/lib/omnizip/metadata/metadata_editor.rb +171 -0
  407. data/lib/omnizip/metadata/metadata_registry.rb +64 -0
  408. data/lib/omnizip/metadata/metadata_validator.rb +99 -0
  409. data/lib/omnizip/metadata.rb +57 -0
  410. data/lib/omnizip/models/.keep +0 -0
  411. data/lib/omnizip/models/algorithm_metadata.rb +73 -0
  412. data/lib/omnizip/models/compression_options.rb +71 -0
  413. data/lib/omnizip/models/conversion_options.rb +87 -0
  414. data/lib/omnizip/models/conversion_result.rb +135 -0
  415. data/lib/omnizip/models/eta_result.rb +46 -0
  416. data/lib/omnizip/models/extraction_rule.rb +115 -0
  417. data/lib/omnizip/models/filter_chain.rb +144 -0
  418. data/lib/omnizip/models/filter_config.rb +183 -0
  419. data/lib/omnizip/models/match_result.rb +124 -0
  420. data/lib/omnizip/models/optimization_suggestion.rb +91 -0
  421. data/lib/omnizip/models/parallel_options.rb +104 -0
  422. data/lib/omnizip/models/performance_result.rb +79 -0
  423. data/lib/omnizip/models/profile_report.rb +82 -0
  424. data/lib/omnizip/models/progress_options.rb +38 -0
  425. data/lib/omnizip/models/split_options.rb +116 -0
  426. data/lib/omnizip/optimization_registry.rb +81 -0
  427. data/lib/omnizip/parallel/job_queue.rb +209 -0
  428. data/lib/omnizip/parallel/job_scheduler.rb +203 -0
  429. data/lib/omnizip/parallel/parallel_compressor.rb +347 -0
  430. data/lib/omnizip/parallel/parallel_extractor.rb +329 -0
  431. data/lib/omnizip/parallel/worker_pool.rb +223 -0
  432. data/lib/omnizip/parallel.rb +149 -0
  433. data/lib/omnizip/parity/chunked_block_processor.rb +196 -0
  434. data/lib/omnizip/parity/galois16.rb +145 -0
  435. data/lib/omnizip/parity/models/creator_packet.rb +73 -0
  436. data/lib/omnizip/parity/models/file_description_packet.rb +133 -0
  437. data/lib/omnizip/parity/models/ifsc_packet.rb +123 -0
  438. data/lib/omnizip/parity/models/main_packet.rb +128 -0
  439. data/lib/omnizip/parity/models/packet.rb +156 -0
  440. data/lib/omnizip/parity/models/packet_registry.rb +109 -0
  441. data/lib/omnizip/parity/models/recovery_slice_packet.rb +78 -0
  442. data/lib/omnizip/parity/par2_creator.rb +531 -0
  443. data/lib/omnizip/parity/par2_repairer.rb +407 -0
  444. data/lib/omnizip/parity/par2_verifier.rb +364 -0
  445. data/lib/omnizip/parity/par2cmdline_algorithm.rb +110 -0
  446. data/lib/omnizip/parity/par2cmdline_coefficients.rb +78 -0
  447. data/lib/omnizip/parity/reed_solomon_decoder.rb +266 -0
  448. data/lib/omnizip/parity/reed_solomon_encoder.rb +111 -0
  449. data/lib/omnizip/parity/reed_solomon_matrix.rb +342 -0
  450. data/lib/omnizip/parity.rb +186 -0
  451. data/lib/omnizip/password/encryption_registry.rb +65 -0
  452. data/lib/omnizip/password/encryption_strategy.rb +96 -0
  453. data/lib/omnizip/password/password_validator.rb +129 -0
  454. data/lib/omnizip/password/winzip_aes_strategy.rb +192 -0
  455. data/lib/omnizip/password/zip_crypto_strategy.rb +141 -0
  456. data/lib/omnizip/password.rb +87 -0
  457. data/lib/omnizip/pipe/stream_compressor.rb +124 -0
  458. data/lib/omnizip/pipe/stream_decompressor.rb +174 -0
  459. data/lib/omnizip/pipe.rb +121 -0
  460. data/lib/omnizip/platform/ntfs_streams.rb +201 -0
  461. data/lib/omnizip/platform.rb +189 -0
  462. data/lib/omnizip/profile/archive_profile.rb +39 -0
  463. data/lib/omnizip/profile/balanced_profile.rb +33 -0
  464. data/lib/omnizip/profile/binary_profile.rb +36 -0
  465. data/lib/omnizip/profile/compression_profile.rb +158 -0
  466. data/lib/omnizip/profile/custom_profile.rb +157 -0
  467. data/lib/omnizip/profile/fast_profile.rb +33 -0
  468. data/lib/omnizip/profile/maximum_profile.rb +33 -0
  469. data/lib/omnizip/profile/profile_detector.rb +110 -0
  470. data/lib/omnizip/profile/profile_registry.rb +161 -0
  471. data/lib/omnizip/profile/text_profile.rb +36 -0
  472. data/lib/omnizip/profile.rb +190 -0
  473. data/lib/omnizip/profiler/memory_profiler.rb +66 -0
  474. data/lib/omnizip/profiler/method_profiler.rb +49 -0
  475. data/lib/omnizip/profiler/report_generator.rb +169 -0
  476. data/lib/omnizip/profiler.rb +204 -0
  477. data/lib/omnizip/progress/callback_reporter.rb +36 -0
  478. data/lib/omnizip/progress/console_reporter.rb +62 -0
  479. data/lib/omnizip/progress/log_reporter.rb +91 -0
  480. data/lib/omnizip/progress/operation_progress.rb +118 -0
  481. data/lib/omnizip/progress/progress_bar.rb +156 -0
  482. data/lib/omnizip/progress/progress_reporter.rb +40 -0
  483. data/lib/omnizip/progress/progress_tracker.rb +190 -0
  484. data/lib/omnizip/progress/silent_reporter.rb +24 -0
  485. data/lib/omnizip/progress.rb +127 -0
  486. data/lib/omnizip/rubyzip_compat.rb +63 -0
  487. data/lib/omnizip/temp/safe_extract.rb +168 -0
  488. data/lib/omnizip/temp/temp_file.rb +124 -0
  489. data/lib/omnizip/temp/temp_file_pool.rb +109 -0
  490. data/lib/omnizip/temp.rb +181 -0
  491. data/lib/omnizip/version.rb +5 -0
  492. data/lib/omnizip/zip/entry.rb +156 -0
  493. data/lib/omnizip/zip/file.rb +485 -0
  494. data/lib/omnizip/zip/input_stream.rb +273 -0
  495. data/lib/omnizip/zip/output_stream.rb +324 -0
  496. data/lib/omnizip.rb +156 -0
  497. data/readme-docs/advanced-features.adoc +515 -0
  498. data/readme-docs/api-usage.adoc +444 -0
  499. data/readme-docs/architecture.adoc +449 -0
  500. data/readme-docs/archive-formats.adoc +479 -0
  501. data/readme-docs/cli-usage.adoc +222 -0
  502. data/readme-docs/compression-algorithms.adoc +442 -0
  503. data/readme-docs/compression-profiles.adoc +247 -0
  504. data/readme-docs/encryption-checksums.adoc +328 -0
  505. data/readme-docs/format-converter.adoc +325 -0
  506. data/readme-docs/installation.adoc +228 -0
  507. data/readme-docs/par2-archives.adoc +608 -0
  508. data/readme-docs/performance-profiler.adoc +389 -0
  509. data/readme-docs/preprocessing-filters.adoc +280 -0
  510. data/xz-file-format-1.2.1.txt +1174 -0
  511. metadata +617 -0
@@ -0,0 +1,347 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "fractor"
4
+ require "fileutils"
5
+
6
+ module Omnizip
7
+ module Parallel
8
+ # Parallel compression coordinator using Fractor
9
+ #
10
+ # Manages parallel compression of files in a directory.
11
+ # Distributes compression work across multiple workers and
12
+ # writes results to archive in a thread-safe manner.
13
+ #
14
+ # @example Compress directory in parallel
15
+ # compressor = Omnizip::Parallel::ParallelCompressor.new(threads: 4)
16
+ # compressor.compress('files/', 'backup.zip')
17
+ #
18
+ # @example With options
19
+ # options = Omnizip::Models::ParallelOptions.new
20
+ # options.threads = 8
21
+ # compressor = Omnizip::Parallel::ParallelCompressor.new(options)
22
+ # compressor.compress('files/', 'backup.zip', compression: :lzma2)
23
+ class ParallelCompressor
24
+ # Fractor Work class for compression jobs
25
+ class CompressionWork < Fractor::Work
26
+ def initialize(file_path:, archive_path:, compression: :deflate,
27
+ level: 6)
28
+ super({
29
+ file_path: file_path,
30
+ archive_path: archive_path,
31
+ compression: compression,
32
+ level: level,
33
+ })
34
+ end
35
+
36
+ def file_path
37
+ input[:file_path]
38
+ end
39
+
40
+ def archive_path
41
+ input[:archive_path]
42
+ end
43
+
44
+ def compression
45
+ input[:compression]
46
+ end
47
+
48
+ def level
49
+ input[:level]
50
+ end
51
+ end
52
+
53
+ # Fractor Worker class for compression
54
+ class CompressionWorker < Fractor::Worker
55
+ def process(work)
56
+ file_path = work.file_path
57
+ archive_path = work.archive_path
58
+ compression = work.compression
59
+ level = work.level
60
+
61
+ # Read file data
62
+ data = ::File.binread(file_path)
63
+ stat = ::File.stat(file_path)
64
+
65
+ # Compress the data
66
+ compressed_data = compress_data(data, compression, level)
67
+
68
+ # Calculate CRC32
69
+ crc32 = Omnizip::Checksums::Crc32.new.tap do |c|
70
+ c.update(data)
71
+ end.finalize
72
+
73
+ # Return result
74
+ Fractor::WorkResult.new(
75
+ result: {
76
+ archive_path: archive_path,
77
+ file_path: file_path,
78
+ compressed_data: compressed_data,
79
+ uncompressed_size: data.bytesize,
80
+ compressed_size: compressed_data.bytesize,
81
+ crc32: crc32,
82
+ stat: stat,
83
+ compression: compression,
84
+ },
85
+ work: work,
86
+ )
87
+ rescue StandardError => e
88
+ Fractor::WorkResult.new(
89
+ error: e,
90
+ work: work,
91
+ )
92
+ end
93
+
94
+ private
95
+
96
+ def compress_data(data, method, level)
97
+ case method
98
+ when :store
99
+ data
100
+ when :deflate
101
+ require "zlib"
102
+ Zlib::Deflate.new(level, -Zlib::MAX_WBITS).deflate(data, Zlib::FINISH)
103
+ when :bzip2
104
+ Omnizip::AlgorithmRegistry.get(:bzip2).compress(data, level: level)
105
+ when :lzma
106
+ Omnizip::AlgorithmRegistry.get(:lzma).compress(data, level: level)
107
+ when :lzma2
108
+ Omnizip::AlgorithmRegistry.get(:lzma2).compress(data, level: level)
109
+ when :zstandard
110
+ Omnizip::AlgorithmRegistry.get(:zstandard).compress(data,
111
+ level: level)
112
+ else
113
+ raise Omnizip::UnsupportedFormatError,
114
+ "Unsupported compression: #{method}"
115
+ end
116
+ end
117
+ end
118
+
119
+ # @return [Omnizip::Models::ParallelOptions] parallel options
120
+ attr_reader :options
121
+
122
+ # @return [Hash] compression statistics
123
+ attr_reader :stats
124
+
125
+ # Initialize parallel compressor
126
+ #
127
+ # @param options [Omnizip::Models::ParallelOptions, Hash] parallel options
128
+ # @param threads [Integer] number of threads (overrides options)
129
+ def initialize(options = nil, threads: nil)
130
+ @options = case options
131
+ when Omnizip::Models::ParallelOptions
132
+ options.dup
133
+ when Hash
134
+ Omnizip::Models::ParallelOptions.new.tap do |opts|
135
+ options.each do |k, v|
136
+ opts.send(:"#{k}=", v) if opts.respond_to?(:"#{k}=")
137
+ end
138
+ end
139
+ else
140
+ Omnizip::Models::ParallelOptions.new
141
+ end
142
+
143
+ @options.threads = threads if threads
144
+ @options.validate!
145
+
146
+ @stats = {
147
+ files_processed: 0,
148
+ bytes_processed: 0,
149
+ bytes_compressed: 0,
150
+ start_time: nil,
151
+ end_time: nil,
152
+ }
153
+ end
154
+
155
+ # Compress directory to archive in parallel
156
+ #
157
+ # @param dir [String] directory path
158
+ # @param output [String] output archive path
159
+ # @param options [Hash] compression options
160
+ # @option options [Symbol] :compression compression method
161
+ # @option options [Integer] :level compression level
162
+ # @option options [Boolean] :recursive include subdirectories
163
+ # @option options [Proc] :progress progress callback
164
+ # @return [String] path to created archive
165
+ def compress(dir, output, **options)
166
+ unless ::File.exist?(dir)
167
+ raise Errno::ENOENT,
168
+ "Directory not found: #{dir}"
169
+ end
170
+ unless ::File.directory?(dir)
171
+ raise ArgumentError,
172
+ "Not a directory: #{dir}"
173
+ end
174
+
175
+ compression = options[:compression] || :deflate
176
+ level = options[:level] || 6
177
+ recursive = options.fetch(:recursive, true)
178
+ options[:progress]
179
+
180
+ @stats[:start_time] = Time.now
181
+
182
+ # Scan directory for files
183
+ files = scan_directory(dir, recursive: recursive)
184
+
185
+ # Create job queue
186
+ job_queue = JobQueue.new(max_size: @options.queue_size)
187
+
188
+ # Schedule jobs
189
+ JobScheduler.new(strategy: @options.strategy)
190
+ files.each do |file_path|
191
+ archive_path = file_path.sub("#{dir}/", "")
192
+ file_size = ::File.size(file_path)
193
+
194
+ job_queue.push_with_size(
195
+ file: file_path,
196
+ size: file_size,
197
+ data: {
198
+ archive_path: archive_path,
199
+ compression: compression,
200
+ level: level,
201
+ },
202
+ )
203
+ end
204
+
205
+ # Create work items from jobs
206
+ work_items = []
207
+ until job_queue.empty?
208
+ job = job_queue.pop(timeout: 0.1)
209
+ break unless job
210
+
211
+ work_items << CompressionWork.new(
212
+ file_path: job.file,
213
+ archive_path: job.data[:archive_path],
214
+ compression: job.data[:compression],
215
+ level: job.data[:level],
216
+ )
217
+ end
218
+
219
+ # Create worker pool
220
+ pool = WorkerPool.new(
221
+ worker_class: CompressionWorker,
222
+ num_workers: @options.threads,
223
+ continuous: false,
224
+ )
225
+
226
+ pool.start
227
+ pool.submit_batch(work_items)
228
+ pool.run
229
+
230
+ # Collect results
231
+ results = pool.successful_results
232
+ errors = pool.failed_results
233
+
234
+ # Handle errors
235
+ unless errors.empty?
236
+ error_msgs = errors.map do |e|
237
+ "#{e.work&.file_path}: #{e.error}"
238
+ end.join("\n")
239
+ raise Omnizip::CompressionError, "Compression errors:\n#{error_msgs}"
240
+ end
241
+
242
+ # Write archive sequentially (thread-safe)
243
+ write_archive(output, results, compression: compression)
244
+
245
+ pool.shutdown
246
+
247
+ @stats[:end_time] = Time.now
248
+ @stats[:files_processed] = results.size
249
+
250
+ output
251
+ end
252
+
253
+ # Get compression statistics
254
+ #
255
+ # @return [Hash] statistics
256
+ def statistics
257
+ duration = if @stats[:start_time] && @stats[:end_time]
258
+ @stats[:end_time] - @stats[:start_time]
259
+ else
260
+ 0
261
+ end
262
+
263
+ @stats.merge(
264
+ duration: duration,
265
+ compression_ratio: calculate_compression_ratio,
266
+ throughput_mbps: calculate_throughput(duration),
267
+ )
268
+ end
269
+
270
+ private
271
+
272
+ # Scan directory for files
273
+ #
274
+ # @param dir [String] directory path
275
+ # @param recursive [Boolean] scan recursively
276
+ # @return [Array<String>] file paths
277
+ def scan_directory(dir, recursive: true)
278
+ files = []
279
+
280
+ if recursive
281
+ Dir.glob(::File.join(dir, "**", "*")).each do |path|
282
+ files << path if ::File.file?(path)
283
+ end
284
+ else
285
+ Dir.glob(::File.join(dir, "*")).each do |path|
286
+ files << path if ::File.file?(path)
287
+ end
288
+ end
289
+
290
+ files.sort
291
+ end
292
+
293
+ # Write archive from compressed results
294
+ #
295
+ # @param output [String] output path
296
+ # @param results [Array] compression results
297
+ # @param compression [Symbol] compression method
298
+ def write_archive(output, results, compression:)
299
+ writer = Omnizip::Formats::Zip::Writer.new(output)
300
+
301
+ results.each do |work_result|
302
+ result = work_result.result
303
+ next unless result
304
+
305
+ # Add compressed entry to writer
306
+ entry = writer.send(:create_entry,
307
+ filename: result[:archive_path],
308
+ uncompressed_data: "",
309
+ stat: result[:stat])
310
+
311
+ # Override with pre-compressed data
312
+ entry[:compressed_size] = result[:compressed_size]
313
+ entry[:uncompressed_size] = result[:uncompressed_size]
314
+ entry[:crc32] = result[:crc32]
315
+ entry[:compressed_data] = result[:compressed_data]
316
+
317
+ writer.instance_variable_get(:@entries) << entry
318
+
319
+ @stats[:bytes_processed] += result[:uncompressed_size]
320
+ @stats[:bytes_compressed] += result[:compressed_size]
321
+ end
322
+
323
+ # Write with pre-compressed data
324
+ writer.send(:write_with_precompressed_data, compression)
325
+ end
326
+
327
+ # Calculate compression ratio
328
+ #
329
+ # @return [Float] compression ratio percentage
330
+ def calculate_compression_ratio
331
+ return 0.0 if @stats[:bytes_processed].zero?
332
+
333
+ (1.0 - (@stats[:bytes_compressed].to_f / @stats[:bytes_processed])) * 100.0
334
+ end
335
+
336
+ # Calculate throughput in MB/s
337
+ #
338
+ # @param duration [Float] duration in seconds
339
+ # @return [Float] throughput in MB/s
340
+ def calculate_throughput(duration)
341
+ return 0.0 if duration.zero?
342
+
343
+ (@stats[:bytes_processed].to_f / (1024 * 1024)) / duration
344
+ end
345
+ end
346
+ end
347
+ end
@@ -0,0 +1,329 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "fractor"
4
+ require "fileutils"
5
+
6
+ module Omnizip
7
+ module Parallel
8
+ # Parallel extraction coordinator using Fractor
9
+ #
10
+ # Manages parallel extraction of files from an archive.
11
+ # Distributes extraction work across multiple workers and
12
+ # writes files to disk in a thread-safe manner.
13
+ #
14
+ # @example Extract archive in parallel
15
+ # extractor = Omnizip::Parallel::ParallelExtractor.new(threads: 4)
16
+ # extractor.extract('backup.zip', 'output/')
17
+ #
18
+ # @example With options
19
+ # options = Omnizip::Models::ParallelOptions.new
20
+ # options.threads = 8
21
+ # extractor = Omnizip::Parallel::ParallelExtractor.new(options)
22
+ # extractor.extract('backup.zip', 'output/')
23
+ class ParallelExtractor
24
+ # Fractor Work class for extraction jobs
25
+ class ExtractionWork < Fractor::Work
26
+ def initialize(entry:, archive_path:, dest_dir:)
27
+ super({
28
+ entry: entry,
29
+ archive_path: archive_path,
30
+ dest_dir: dest_dir,
31
+ })
32
+ end
33
+
34
+ def entry
35
+ input[:entry]
36
+ end
37
+
38
+ def archive_path
39
+ input[:archive_path]
40
+ end
41
+
42
+ def dest_dir
43
+ input[:dest_dir]
44
+ end
45
+ end
46
+
47
+ # Fractor Worker class for extraction
48
+ class ExtractionWorker < Fractor::Worker
49
+ def process(work)
50
+ entry = work.entry
51
+ archive_path = work.archive_path
52
+ dest_dir = work.dest_dir
53
+
54
+ # Read and decompress entry data
55
+ data = read_entry_data(archive_path, entry)
56
+
57
+ # Determine destination path
58
+ dest_path = ::File.join(dest_dir, entry.name)
59
+
60
+ # Return result
61
+ Fractor::WorkResult.new(
62
+ result: {
63
+ entry_name: entry.name,
64
+ dest_path: dest_path,
65
+ data: data,
66
+ directory: entry.directory?,
67
+ unix_perms: entry.respond_to?(:unix_perms) ? entry.unix_perms : 0,
68
+ },
69
+ work: work,
70
+ )
71
+ rescue StandardError => e
72
+ Fractor::WorkResult.new(
73
+ error: e,
74
+ work: work,
75
+ )
76
+ end
77
+
78
+ private
79
+
80
+ def read_entry_data(archive_path, entry)
81
+ return "" if entry.directory?
82
+
83
+ # Open archive and extract entry
84
+ reader = Omnizip::Formats::Zip::Reader.new(archive_path)
85
+ reader.read
86
+
87
+ ::File.open(archive_path, "rb") do |io|
88
+ # Find the entry in reader
89
+ reader_entry = reader.entries.find { |e| e.filename == entry.name }
90
+ raise "Entry not found in archive: #{entry.name}" unless reader_entry
91
+
92
+ # Seek to entry data
93
+ io.seek(reader_entry.local_header_offset, ::IO::SEEK_SET)
94
+
95
+ # Read and parse local file header
96
+ fixed_header = io.read(30)
97
+ return "" unless fixed_header && fixed_header.size == 30
98
+
99
+ _signature, _version, _flags, _method, _time, _date, _crc32,
100
+ _comp_size, _uncomp_size, filename_length, extra_length = fixed_header.unpack("VvvvvvVVVvv")
101
+
102
+ # Skip filename and extra field
103
+ io.read(filename_length + extra_length)
104
+
105
+ # Read compressed data
106
+ compressed_data = io.read(reader_entry.compressed_size)
107
+ return "" unless compressed_data
108
+
109
+ # Decompress
110
+ reader.send(:decompress_data,
111
+ compressed_data,
112
+ reader_entry.compression_method,
113
+ reader_entry.uncompressed_size)
114
+ end
115
+ end
116
+ end
117
+
118
+ # @return [Omnizip::Models::ParallelOptions] parallel options
119
+ attr_reader :options
120
+
121
+ # @return [Hash] extraction statistics
122
+ attr_reader :stats
123
+
124
+ # Initialize parallel extractor
125
+ #
126
+ # @param options [Omnizip::Models::ParallelOptions, Hash] parallel options
127
+ # @param threads [Integer] number of threads (overrides options)
128
+ def initialize(options = nil, threads: nil)
129
+ @options = case options
130
+ when Omnizip::Models::ParallelOptions
131
+ options.dup
132
+ when Hash
133
+ Omnizip::Models::ParallelOptions.new.tap do |opts|
134
+ options.each do |k, v|
135
+ opts.send(:"#{k}=", v) if opts.respond_to?(:"#{k}=")
136
+ end
137
+ end
138
+ else
139
+ Omnizip::Models::ParallelOptions.new
140
+ end
141
+
142
+ @options.threads = threads if threads
143
+ @options.validate!
144
+
145
+ @stats = {
146
+ files_extracted: 0,
147
+ bytes_extracted: 0,
148
+ start_time: nil,
149
+ end_time: nil,
150
+ }
151
+
152
+ @write_mutex = Mutex.new
153
+ end
154
+
155
+ # Extract archive to directory in parallel
156
+ #
157
+ # @param archive [String] archive path
158
+ # @param dest [String] destination directory
159
+ # @param options [Hash] extraction options
160
+ # @option options [Boolean] :overwrite overwrite existing files
161
+ # @option options [Proc] :progress progress callback
162
+ # @return [Array<String>] extracted file paths
163
+ def extract(archive, dest, **options)
164
+ unless ::File.exist?(archive)
165
+ raise Errno::ENOENT,
166
+ "Archive not found: #{archive}"
167
+ end
168
+
169
+ overwrite = options.fetch(:overwrite, false)
170
+ options[:progress]
171
+
172
+ @stats[:start_time] = Time.now
173
+
174
+ # Read archive to get entries
175
+ entries = read_archive_entries(archive)
176
+
177
+ # Create destination directory
178
+ FileUtils.mkdir_p(dest)
179
+
180
+ # Create job queue
181
+ job_queue = JobQueue.new(max_size: @options.queue_size)
182
+
183
+ # Schedule jobs
184
+ entries.each do |entry|
185
+ file_size = entry.respond_to?(:size) ? entry.size : 0
186
+
187
+ job_queue.push_with_size(
188
+ file: entry.name,
189
+ size: file_size,
190
+ data: { entry: entry },
191
+ )
192
+ end
193
+
194
+ # Create work items from jobs
195
+ work_items = []
196
+ until job_queue.empty?
197
+ job = job_queue.pop(timeout: 0.1)
198
+ break unless job
199
+
200
+ work_items << ExtractionWork.new(
201
+ entry: job.data[:entry],
202
+ archive_path: archive,
203
+ dest_dir: dest,
204
+ )
205
+ end
206
+
207
+ # Create worker pool
208
+ pool = WorkerPool.new(
209
+ worker_class: ExtractionWorker,
210
+ num_workers: @options.threads,
211
+ continuous: false,
212
+ )
213
+
214
+ pool.start
215
+ pool.submit_batch(work_items)
216
+ pool.run
217
+
218
+ # Collect results
219
+ results = pool.successful_results
220
+ errors = pool.failed_results
221
+
222
+ # Handle errors
223
+ unless errors.empty?
224
+ error_msgs = errors.map do |e|
225
+ "#{e.work&.entry&.name}: #{e.error}"
226
+ end.join("\n")
227
+ raise Omnizip::ExtractionError, "Extraction errors:\n#{error_msgs}"
228
+ end
229
+
230
+ # Write files to disk (thread-safe)
231
+ extracted_paths = write_extracted_files(results, overwrite: overwrite)
232
+
233
+ pool.shutdown
234
+
235
+ @stats[:end_time] = Time.now
236
+ @stats[:files_extracted] = results.size
237
+
238
+ extracted_paths
239
+ end
240
+
241
+ # Get extraction statistics
242
+ #
243
+ # @return [Hash] statistics
244
+ def statistics
245
+ duration = if @stats[:start_time] && @stats[:end_time]
246
+ @stats[:end_time] - @stats[:start_time]
247
+ else
248
+ 0
249
+ end
250
+
251
+ @stats.merge(
252
+ duration: duration,
253
+ throughput_mbps: calculate_throughput(duration),
254
+ )
255
+ end
256
+
257
+ private
258
+
259
+ # Read archive entries
260
+ #
261
+ # @param archive_path [String] archive path
262
+ # @return [Array<Entry>] array of entries
263
+ def read_archive_entries(archive_path)
264
+ entries = []
265
+
266
+ Omnizip::Zip::File.open(archive_path) do |zip|
267
+ zip.each do |entry|
268
+ entries << entry
269
+ end
270
+ end
271
+
272
+ entries
273
+ end
274
+
275
+ # Write extracted files to disk
276
+ #
277
+ # @param results [Array] extraction results
278
+ # @param overwrite [Boolean] overwrite existing files
279
+ # @return [Array<String>] extracted file paths
280
+ def write_extracted_files(results, overwrite: false)
281
+ extracted_paths = []
282
+
283
+ results.each do |work_result|
284
+ result = work_result.result
285
+ next unless result
286
+
287
+ dest_path = result[:dest_path]
288
+
289
+ # Thread-safe file writing
290
+ @write_mutex.synchronize do
291
+ # Check if file exists
292
+ if ::File.exist?(dest_path) && !overwrite
293
+ raise "File exists: #{dest_path}"
294
+ end
295
+
296
+ # Write file or create directory
297
+ if result[:directory]
298
+ FileUtils.mkdir_p(dest_path)
299
+ else
300
+ FileUtils.mkdir_p(::File.dirname(dest_path))
301
+ ::File.binwrite(dest_path, result[:data])
302
+
303
+ # Set permissions if Unix
304
+ if result[:unix_perms].positive?
305
+ ::File.chmod(result[:unix_perms] & 0o777, dest_path)
306
+ end
307
+
308
+ @stats[:bytes_extracted] += result[:data].bytesize
309
+ end
310
+
311
+ extracted_paths << dest_path
312
+ end
313
+ end
314
+
315
+ extracted_paths
316
+ end
317
+
318
+ # Calculate throughput in MB/s
319
+ #
320
+ # @param duration [Float] duration in seconds
321
+ # @return [Float] throughput in MB/s
322
+ def calculate_throughput(duration)
323
+ return 0.0 if duration.zero?
324
+
325
+ (@stats[:bytes_extracted].to_f / (1024 * 1024)) / duration
326
+ end
327
+ end
328
+ end
329
+ end