omnizip 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (511) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.rubocop.yml +32 -0
  4. data/.rubocop_todo.yml +754 -0
  5. data/COPYING +502 -0
  6. data/Gemfile +17 -0
  7. data/LICENSE +12 -0
  8. data/README.adoc +1045 -0
  9. data/Rakefile +12 -0
  10. data/benchmark/README.md +260 -0
  11. data/benchmark/benchmark_suite.rb +125 -0
  12. data/benchmark/compression_bench.rb +181 -0
  13. data/benchmark/filter_bench.rb +180 -0
  14. data/benchmark/models/benchmark_result.rb +59 -0
  15. data/benchmark/models/comparison_result.rb +69 -0
  16. data/benchmark/profile_suite.rb +167 -0
  17. data/benchmark/reporter.rb +150 -0
  18. data/benchmark/run_benchmarks.rb +66 -0
  19. data/benchmark/test_data.rb +137 -0
  20. data/config/formats/rar3_spec.yml +91 -0
  21. data/config/formats/rar5_spec.yml +102 -0
  22. data/docs/.github/workflows/docs.yml +142 -0
  23. data/docs/.gitignore +21 -0
  24. data/docs/.lychee.toml +67 -0
  25. data/docs/Gemfile +13 -0
  26. data/docs/RAR_WRITE_SUPPORT.md +26 -0
  27. data/docs/README.md +101 -0
  28. data/docs/_config.yml +112 -0
  29. data/docs/assets/logo.svg +1 -0
  30. data/docs/assets/omnizip-logo.pdf +1540 -11
  31. data/docs/comparison/feature-matrix.adoc +694 -0
  32. data/docs/comparison/index.adoc +113 -0
  33. data/docs/comparison/vs-7zip.adoc +309 -0
  34. data/docs/comparison/vs-peazip.adoc +77 -0
  35. data/docs/comparison/vs-rubyzip.adoc +342 -0
  36. data/docs/comparison/vs-winrar.adoc +100 -0
  37. data/docs/compatibility.adoc +579 -0
  38. data/docs/concepts/index.adoc +129 -0
  39. data/docs/developer/architecture.adoc +256 -0
  40. data/docs/developer/contributing.adoc +158 -0
  41. data/docs/developer/index.adoc +25 -0
  42. data/docs/developer/testing.adoc +212 -0
  43. data/docs/getting-started/basic-usage.adoc +271 -0
  44. data/docs/getting-started/index.adoc +42 -0
  45. data/docs/getting-started/installation.adoc +138 -0
  46. data/docs/getting-started/quick-start.adoc +185 -0
  47. data/docs/getting-started/your-first-archive.adoc +218 -0
  48. data/docs/guides/advanced-features/encryption.adoc +300 -0
  49. data/docs/guides/advanced-features/index.adoc +49 -0
  50. data/docs/guides/advanced-features/parallel-processing.adoc +246 -0
  51. data/docs/guides/advanced-features/progress-tracking.adoc +320 -0
  52. data/docs/guides/advanced-features/streaming.adoc +212 -0
  53. data/docs/guides/archive-formats/gzip-format.adoc +107 -0
  54. data/docs/guides/archive-formats/index.adoc +130 -0
  55. data/docs/guides/archive-formats/rar-format.adoc +104 -0
  56. data/docs/guides/archive-formats/rar5.adoc +521 -0
  57. data/docs/guides/archive-formats/seven-zip-format.adoc +35 -0
  58. data/docs/guides/archive-formats/tar-format.adoc +106 -0
  59. data/docs/guides/archive-formats/xz-format.adoc +118 -0
  60. data/docs/guides/archive-formats/zip-format.adoc +35 -0
  61. data/docs/guides/compression-algorithms/bzip2.adoc +113 -0
  62. data/docs/guides/compression-algorithms/deflate.adoc +319 -0
  63. data/docs/guides/compression-algorithms/index.adoc +190 -0
  64. data/docs/guides/compression-algorithms/lzma.adoc +398 -0
  65. data/docs/guides/compression-algorithms/lzma2.adoc +327 -0
  66. data/docs/guides/compression-algorithms/ppmd.adoc +316 -0
  67. data/docs/guides/compression-algorithms/zstandard.adoc +361 -0
  68. data/docs/guides/creating-archives.adoc +354 -0
  69. data/docs/guides/extracting-archives.adoc +53 -0
  70. data/docs/guides/format-conversion.adoc +64 -0
  71. data/docs/guides/index.adoc +49 -0
  72. data/docs/guides/migration-rubyzip.adoc +217 -0
  73. data/docs/guides/parity-archives.adoc +605 -0
  74. data/docs/guides/performance-tuning.adoc +88 -0
  75. data/docs/index.adoc +218 -0
  76. data/docs/lychee.toml +67 -0
  77. data/docs/reference/api/overview.adoc +188 -0
  78. data/docs/reference/cli/compress-command.adoc +114 -0
  79. data/docs/reference/cli/overview.adoc +140 -0
  80. data/docs/reference/index.adoc +26 -0
  81. data/docs/resources/faq.adoc +185 -0
  82. data/docs/resources/quick-reference.adoc +222 -0
  83. data/docs/troubleshooting/index.adoc +208 -0
  84. data/examples/api_comparison.rb +205 -0
  85. data/examples/deflate64_example.rb +96 -0
  86. data/examples/par2_demo.rb +121 -0
  87. data/examples/quick_start_native.rb +150 -0
  88. data/examples/quick_start_rubyzip.rb +115 -0
  89. data/examples/rubyzip_compatibility_demo.rb +194 -0
  90. data/exe/omnizip +27 -0
  91. data/lib/omnizip/algorithm.rb +130 -0
  92. data/lib/omnizip/algorithm_registry.rb +86 -0
  93. data/lib/omnizip/algorithms/.keep +0 -0
  94. data/lib/omnizip/algorithms/bzip2/bwt.rb +225 -0
  95. data/lib/omnizip/algorithms/bzip2/decoder.rb +193 -0
  96. data/lib/omnizip/algorithms/bzip2/encoder.rb +237 -0
  97. data/lib/omnizip/algorithms/bzip2/huffman.rb +206 -0
  98. data/lib/omnizip/algorithms/bzip2/mtf.rb +101 -0
  99. data/lib/omnizip/algorithms/bzip2/rle.rb +151 -0
  100. data/lib/omnizip/algorithms/bzip2.rb +130 -0
  101. data/lib/omnizip/algorithms/deflate/constants.rb +28 -0
  102. data/lib/omnizip/algorithms/deflate/decoder.rb +38 -0
  103. data/lib/omnizip/algorithms/deflate/encoder.rb +46 -0
  104. data/lib/omnizip/algorithms/deflate.rb +128 -0
  105. data/lib/omnizip/algorithms/deflate64/constants.rb +45 -0
  106. data/lib/omnizip/algorithms/deflate64/decoder.rb +153 -0
  107. data/lib/omnizip/algorithms/deflate64/encoder.rb +98 -0
  108. data/lib/omnizip/algorithms/deflate64/huffman_coder.rb +354 -0
  109. data/lib/omnizip/algorithms/deflate64/lz77_encoder.rb +142 -0
  110. data/lib/omnizip/algorithms/deflate64.rb +109 -0
  111. data/lib/omnizip/algorithms/lzma/bit_model.rb +120 -0
  112. data/lib/omnizip/algorithms/lzma/constants.rb +112 -0
  113. data/lib/omnizip/algorithms/lzma/decoder.rb +148 -0
  114. data/lib/omnizip/algorithms/lzma/dictionary.rb +69 -0
  115. data/lib/omnizip/algorithms/lzma/distance_coder.rb +415 -0
  116. data/lib/omnizip/algorithms/lzma/encoder.rb +142 -0
  117. data/lib/omnizip/algorithms/lzma/length_coder.rb +260 -0
  118. data/lib/omnizip/algorithms/lzma/literal_decoder.rb +320 -0
  119. data/lib/omnizip/algorithms/lzma/literal_encoder.rb +210 -0
  120. data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +341 -0
  121. data/lib/omnizip/algorithms/lzma/lzma_alone_decoder.rb +192 -0
  122. data/lib/omnizip/algorithms/lzma/lzma_state.rb +128 -0
  123. data/lib/omnizip/algorithms/lzma/match.rb +32 -0
  124. data/lib/omnizip/algorithms/lzma/match_finder.rb +205 -0
  125. data/lib/omnizip/algorithms/lzma/match_finder_config.rb +142 -0
  126. data/lib/omnizip/algorithms/lzma/match_finder_factory.rb +88 -0
  127. data/lib/omnizip/algorithms/lzma/optimal_encoder.rb +130 -0
  128. data/lib/omnizip/algorithms/lzma/probability_models.rb +72 -0
  129. data/lib/omnizip/algorithms/lzma/range_coder.rb +85 -0
  130. data/lib/omnizip/algorithms/lzma/range_decoder.rb +434 -0
  131. data/lib/omnizip/algorithms/lzma/range_encoder.rb +194 -0
  132. data/lib/omnizip/algorithms/lzma/state.rb +127 -0
  133. data/lib/omnizip/algorithms/lzma/xz_buffered_range_encoder.rb +325 -0
  134. data/lib/omnizip/algorithms/lzma/xz_encoder.rb +426 -0
  135. data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +645 -0
  136. data/lib/omnizip/algorithms/lzma/xz_match_finder_adapter.rb +227 -0
  137. data/lib/omnizip/algorithms/lzma/xz_price_calculator.rb +169 -0
  138. data/lib/omnizip/algorithms/lzma/xz_probability_models.rb +261 -0
  139. data/lib/omnizip/algorithms/lzma/xz_range_encoder.rb +223 -0
  140. data/lib/omnizip/algorithms/lzma/xz_range_encoder_exact.rb +331 -0
  141. data/lib/omnizip/algorithms/lzma/xz_state.rb +116 -0
  142. data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +2055 -0
  143. data/lib/omnizip/algorithms/lzma.rb +238 -0
  144. data/lib/omnizip/algorithms/lzma2/chunk_manager.rb +182 -0
  145. data/lib/omnizip/algorithms/lzma2/constants.rb +41 -0
  146. data/lib/omnizip/algorithms/lzma2/encoder.rb +147 -0
  147. data/lib/omnizip/algorithms/lzma2/lzma2_chunk.rb +161 -0
  148. data/lib/omnizip/algorithms/lzma2/properties.rb +179 -0
  149. data/lib/omnizip/algorithms/lzma2/simple_lzma2_encoder.rb +127 -0
  150. data/lib/omnizip/algorithms/lzma2/xz_encoder_adapter.rb +85 -0
  151. data/lib/omnizip/algorithms/lzma2.rb +141 -0
  152. data/lib/omnizip/algorithms/ppmd7/constants.rb +74 -0
  153. data/lib/omnizip/algorithms/ppmd7/context.rb +154 -0
  154. data/lib/omnizip/algorithms/ppmd7/decoder.rb +126 -0
  155. data/lib/omnizip/algorithms/ppmd7/encoder.rb +163 -0
  156. data/lib/omnizip/algorithms/ppmd7/model.rb +248 -0
  157. data/lib/omnizip/algorithms/ppmd7/symbol_state.rb +57 -0
  158. data/lib/omnizip/algorithms/ppmd7.rb +116 -0
  159. data/lib/omnizip/algorithms/ppmd8/constants.rb +61 -0
  160. data/lib/omnizip/algorithms/ppmd8/context.rb +34 -0
  161. data/lib/omnizip/algorithms/ppmd8/decoder.rb +107 -0
  162. data/lib/omnizip/algorithms/ppmd8/encoder.rb +138 -0
  163. data/lib/omnizip/algorithms/ppmd8/model.rb +250 -0
  164. data/lib/omnizip/algorithms/ppmd8/restoration_method.rb +78 -0
  165. data/lib/omnizip/algorithms/ppmd8.rb +82 -0
  166. data/lib/omnizip/algorithms/ppmd_base.rb +138 -0
  167. data/lib/omnizip/algorithms/sevenzip_lzma2.rb +123 -0
  168. data/lib/omnizip/algorithms/xz_lzma2.rb +118 -0
  169. data/lib/omnizip/algorithms/zstandard/constants.rb +25 -0
  170. data/lib/omnizip/algorithms/zstandard/decoder.rb +46 -0
  171. data/lib/omnizip/algorithms/zstandard/encoder.rb +51 -0
  172. data/lib/omnizip/algorithms/zstandard.rb +138 -0
  173. data/lib/omnizip/buffer/memory_archive.rb +251 -0
  174. data/lib/omnizip/buffer/memory_extractor.rb +224 -0
  175. data/lib/omnizip/buffer.rb +176 -0
  176. data/lib/omnizip/checksum_registry.rb +114 -0
  177. data/lib/omnizip/checksums/crc32.rb +100 -0
  178. data/lib/omnizip/checksums/crc64.rb +101 -0
  179. data/lib/omnizip/checksums/crc_base.rb +158 -0
  180. data/lib/omnizip/checksums/verifier.rb +131 -0
  181. data/lib/omnizip/chunked/memory_manager.rb +194 -0
  182. data/lib/omnizip/chunked/reader.rb +78 -0
  183. data/lib/omnizip/chunked/writer.rb +120 -0
  184. data/lib/omnizip/chunked.rb +129 -0
  185. data/lib/omnizip/cli/output_formatter.rb +104 -0
  186. data/lib/omnizip/cli.rb +572 -0
  187. data/lib/omnizip/commands/.keep +0 -0
  188. data/lib/omnizip/commands/archive_create_command.rb +427 -0
  189. data/lib/omnizip/commands/archive_extract_command.rb +272 -0
  190. data/lib/omnizip/commands/archive_list_command.rb +218 -0
  191. data/lib/omnizip/commands/archive_repair_command.rb +131 -0
  192. data/lib/omnizip/commands/archive_verify_command.rb +117 -0
  193. data/lib/omnizip/commands/compress_command.rb +117 -0
  194. data/lib/omnizip/commands/decompress_command.rb +120 -0
  195. data/lib/omnizip/commands/list_command.rb +53 -0
  196. data/lib/omnizip/commands/metadata_command.rb +153 -0
  197. data/lib/omnizip/commands/parity_create_command.rb +122 -0
  198. data/lib/omnizip/commands/parity_repair_command.rb +122 -0
  199. data/lib/omnizip/commands/parity_verify_command.rb +124 -0
  200. data/lib/omnizip/commands/profile_list_command.rb +56 -0
  201. data/lib/omnizip/commands/profile_show_command.rb +44 -0
  202. data/lib/omnizip/convenience.rb +359 -0
  203. data/lib/omnizip/converter/conversion_registry.rb +49 -0
  204. data/lib/omnizip/converter/conversion_strategy.rb +121 -0
  205. data/lib/omnizip/converter/seven_zip_to_zip_strategy.rb +97 -0
  206. data/lib/omnizip/converter/zip_to_seven_zip_strategy.rb +112 -0
  207. data/lib/omnizip/converter.rb +105 -0
  208. data/lib/omnizip/crypto/aes256/cipher.rb +100 -0
  209. data/lib/omnizip/crypto/aes256/constants.rb +28 -0
  210. data/lib/omnizip/crypto/aes256/key_derivation.rb +101 -0
  211. data/lib/omnizip/crypto/aes256.rb +102 -0
  212. data/lib/omnizip/error.rb +106 -0
  213. data/lib/omnizip/eta/exponential_smoothing_estimator.rb +98 -0
  214. data/lib/omnizip/eta/moving_average_estimator.rb +99 -0
  215. data/lib/omnizip/eta/rate_calculator.rb +104 -0
  216. data/lib/omnizip/eta/sample_history.rb +143 -0
  217. data/lib/omnizip/eta/time_estimator.rb +106 -0
  218. data/lib/omnizip/eta.rb +63 -0
  219. data/lib/omnizip/extraction/filter_chain.rb +177 -0
  220. data/lib/omnizip/extraction/glob_pattern.rb +140 -0
  221. data/lib/omnizip/extraction/pattern_matcher.rb +70 -0
  222. data/lib/omnizip/extraction/predicate_pattern.rb +52 -0
  223. data/lib/omnizip/extraction/regex_pattern.rb +50 -0
  224. data/lib/omnizip/extraction/selective_extractor.rb +240 -0
  225. data/lib/omnizip/extraction.rb +111 -0
  226. data/lib/omnizip/file_type/mime_classifier.rb +144 -0
  227. data/lib/omnizip/file_type.rb +113 -0
  228. data/lib/omnizip/filter.rb +139 -0
  229. data/lib/omnizip/filter_pipeline.rb +108 -0
  230. data/lib/omnizip/filter_registry.rb +166 -0
  231. data/lib/omnizip/filters/bcj.rb +279 -0
  232. data/lib/omnizip/filters/bcj2/constants.rb +53 -0
  233. data/lib/omnizip/filters/bcj2/decoder.rb +200 -0
  234. data/lib/omnizip/filters/bcj2/encoder.rb +61 -0
  235. data/lib/omnizip/filters/bcj2/stream_data.rb +93 -0
  236. data/lib/omnizip/filters/bcj2.rb +99 -0
  237. data/lib/omnizip/filters/bcj_arm.rb +176 -0
  238. data/lib/omnizip/filters/bcj_arm64.rb +244 -0
  239. data/lib/omnizip/filters/bcj_ia64.rb +196 -0
  240. data/lib/omnizip/filters/bcj_ppc.rb +190 -0
  241. data/lib/omnizip/filters/bcj_sparc.rb +176 -0
  242. data/lib/omnizip/filters/bcj_x86.rb +193 -0
  243. data/lib/omnizip/filters/delta.rb +196 -0
  244. data/lib/omnizip/filters/filter_base.rb +72 -0
  245. data/lib/omnizip/filters/registry.rb +123 -0
  246. data/lib/omnizip/filters/xz_delta.rb +258 -0
  247. data/lib/omnizip/format_detector.rb +162 -0
  248. data/lib/omnizip/format_registry.rb +59 -0
  249. data/lib/omnizip/formats/.keep +0 -0
  250. data/lib/omnizip/formats/bzip2_file.rb +172 -0
  251. data/lib/omnizip/formats/cpio/constants.rb +55 -0
  252. data/lib/omnizip/formats/cpio/entry.rb +385 -0
  253. data/lib/omnizip/formats/cpio/reader.rb +196 -0
  254. data/lib/omnizip/formats/cpio/writer.rb +234 -0
  255. data/lib/omnizip/formats/cpio.rb +140 -0
  256. data/lib/omnizip/formats/format_spec_loader.rb +230 -0
  257. data/lib/omnizip/formats/gzip.rb +238 -0
  258. data/lib/omnizip/formats/iso/directory_builder.rb +297 -0
  259. data/lib/omnizip/formats/iso/directory_record.rb +152 -0
  260. data/lib/omnizip/formats/iso/joliet.rb +204 -0
  261. data/lib/omnizip/formats/iso/path_table.rb +125 -0
  262. data/lib/omnizip/formats/iso/reader.rb +197 -0
  263. data/lib/omnizip/formats/iso/rock_ridge.rb +349 -0
  264. data/lib/omnizip/formats/iso/volume_builder.rb +320 -0
  265. data/lib/omnizip/formats/iso/volume_descriptor.rb +168 -0
  266. data/lib/omnizip/formats/iso/writer.rb +530 -0
  267. data/lib/omnizip/formats/iso.rb +140 -0
  268. data/lib/omnizip/formats/lzip.rb +175 -0
  269. data/lib/omnizip/formats/lzma_alone.rb +171 -0
  270. data/lib/omnizip/formats/rar/archive_repairer.rb +243 -0
  271. data/lib/omnizip/formats/rar/archive_verifier.rb +195 -0
  272. data/lib/omnizip/formats/rar/block_parser.rb +243 -0
  273. data/lib/omnizip/formats/rar/compression/bit_stream.rb +180 -0
  274. data/lib/omnizip/formats/rar/compression/dispatcher.rb +217 -0
  275. data/lib/omnizip/formats/rar/compression/lz77_huffman/decoder.rb +216 -0
  276. data/lib/omnizip/formats/rar/compression/lz77_huffman/encoder.rb +158 -0
  277. data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_builder.rb +217 -0
  278. data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_coder.rb +189 -0
  279. data/lib/omnizip/formats/rar/compression/lz77_huffman/match_finder.rb +135 -0
  280. data/lib/omnizip/formats/rar/compression/lz77_huffman/sliding_window.rb +165 -0
  281. data/lib/omnizip/formats/rar/compression/ppmd/context.rb +105 -0
  282. data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +219 -0
  283. data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +262 -0
  284. data/lib/omnizip/formats/rar/compression_method_registry.rb +106 -0
  285. data/lib/omnizip/formats/rar/constants.rb +82 -0
  286. data/lib/omnizip/formats/rar/decompressor.rb +238 -0
  287. data/lib/omnizip/formats/rar/external_writer.rb +312 -0
  288. data/lib/omnizip/formats/rar/header.rb +192 -0
  289. data/lib/omnizip/formats/rar/license_validator.rb +109 -0
  290. data/lib/omnizip/formats/rar/models/rar_archive.rb +77 -0
  291. data/lib/omnizip/formats/rar/models/rar_entry.rb +65 -0
  292. data/lib/omnizip/formats/rar/models/rar_volume.rb +56 -0
  293. data/lib/omnizip/formats/rar/parity_handler.rb +292 -0
  294. data/lib/omnizip/formats/rar/rar5/compression/lzma.rb +202 -0
  295. data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +578 -0
  296. data/lib/omnizip/formats/rar/rar5/compression/store.rb +60 -0
  297. data/lib/omnizip/formats/rar/rar5/crc32.rb +39 -0
  298. data/lib/omnizip/formats/rar/rar5/encryption/aes256_cbc.rb +97 -0
  299. data/lib/omnizip/formats/rar/rar5/encryption/encryption_header.rb +114 -0
  300. data/lib/omnizip/formats/rar/rar5/encryption/encryption_manager.rb +166 -0
  301. data/lib/omnizip/formats/rar/rar5/encryption/key_derivation.rb +97 -0
  302. data/lib/omnizip/formats/rar/rar5/header.rb +187 -0
  303. data/lib/omnizip/formats/rar/rar5/models/encryption_options.rb +74 -0
  304. data/lib/omnizip/formats/rar/rar5/models/recovery_options.rb +63 -0
  305. data/lib/omnizip/formats/rar/rar5/models/solid_options.rb +63 -0
  306. data/lib/omnizip/formats/rar/rar5/models/volume_options.rb +74 -0
  307. data/lib/omnizip/formats/rar/rar5/multi_volume/ARCHITECTURE.md +290 -0
  308. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_manager.rb +264 -0
  309. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_splitter.rb +155 -0
  310. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_writer.rb +194 -0
  311. data/lib/omnizip/formats/rar/rar5/solid/solid_encoder.rb +109 -0
  312. data/lib/omnizip/formats/rar/rar5/solid/solid_manager.rb +142 -0
  313. data/lib/omnizip/formats/rar/rar5/solid/solid_stream.rb +121 -0
  314. data/lib/omnizip/formats/rar/rar5/vint.rb +65 -0
  315. data/lib/omnizip/formats/rar/rar5/writer.rb +466 -0
  316. data/lib/omnizip/formats/rar/rar_format_base.rb +241 -0
  317. data/lib/omnizip/formats/rar/reader.rb +366 -0
  318. data/lib/omnizip/formats/rar/recovery_record.rb +245 -0
  319. data/lib/omnizip/formats/rar/volume_manager.rb +168 -0
  320. data/lib/omnizip/formats/rar/writer.rb +431 -0
  321. data/lib/omnizip/formats/rar.rb +205 -0
  322. data/lib/omnizip/formats/rar3/compressor.rb +73 -0
  323. data/lib/omnizip/formats/rar3/decompressor.rb +66 -0
  324. data/lib/omnizip/formats/rar3/reader.rb +386 -0
  325. data/lib/omnizip/formats/rar3/writer.rb +219 -0
  326. data/lib/omnizip/formats/rar5/compressor.rb +73 -0
  327. data/lib/omnizip/formats/rar5/decompressor.rb +66 -0
  328. data/lib/omnizip/formats/rar5/reader.rb +342 -0
  329. data/lib/omnizip/formats/rar5/writer.rb +214 -0
  330. data/lib/omnizip/formats/seven_zip/coder_chain.rb +150 -0
  331. data/lib/omnizip/formats/seven_zip/constants.rb +126 -0
  332. data/lib/omnizip/formats/seven_zip/encoded_header.rb +114 -0
  333. data/lib/omnizip/formats/seven_zip/encrypted_header.rb +142 -0
  334. data/lib/omnizip/formats/seven_zip/file_collector.rb +144 -0
  335. data/lib/omnizip/formats/seven_zip/header.rb +106 -0
  336. data/lib/omnizip/formats/seven_zip/header_encryptor.rb +134 -0
  337. data/lib/omnizip/formats/seven_zip/header_writer.rb +466 -0
  338. data/lib/omnizip/formats/seven_zip/models/coder_info.rb +30 -0
  339. data/lib/omnizip/formats/seven_zip/models/file_entry.rb +58 -0
  340. data/lib/omnizip/formats/seven_zip/models/folder.rb +69 -0
  341. data/lib/omnizip/formats/seven_zip/models/stream_info.rb +42 -0
  342. data/lib/omnizip/formats/seven_zip/parser.rb +660 -0
  343. data/lib/omnizip/formats/seven_zip/reader.rb +458 -0
  344. data/lib/omnizip/formats/seven_zip/split_archive_reader.rb +632 -0
  345. data/lib/omnizip/formats/seven_zip/split_archive_writer.rb +315 -0
  346. data/lib/omnizip/formats/seven_zip/stream_compressor.rb +151 -0
  347. data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +162 -0
  348. data/lib/omnizip/formats/seven_zip/writer.rb +740 -0
  349. data/lib/omnizip/formats/seven_zip.rb +93 -0
  350. data/lib/omnizip/formats/tar/constants.rb +73 -0
  351. data/lib/omnizip/formats/tar/entry.rb +94 -0
  352. data/lib/omnizip/formats/tar/header.rb +168 -0
  353. data/lib/omnizip/formats/tar/reader.rb +121 -0
  354. data/lib/omnizip/formats/tar/writer.rb +216 -0
  355. data/lib/omnizip/formats/tar.rb +84 -0
  356. data/lib/omnizip/formats/xz/reader.rb +116 -0
  357. data/lib/omnizip/formats/xz.rb +237 -0
  358. data/lib/omnizip/formats/xz_impl/block_decoder.rb +754 -0
  359. data/lib/omnizip/formats/xz_impl/block_encoder.rb +306 -0
  360. data/lib/omnizip/formats/xz_impl/block_header.rb +210 -0
  361. data/lib/omnizip/formats/xz_impl/block_header_parser.rb +186 -0
  362. data/lib/omnizip/formats/xz_impl/constants.rb +49 -0
  363. data/lib/omnizip/formats/xz_impl/index_decoder.rb +174 -0
  364. data/lib/omnizip/formats/xz_impl/index_encoder.rb +122 -0
  365. data/lib/omnizip/formats/xz_impl/stream_decoder.rb +468 -0
  366. data/lib/omnizip/formats/xz_impl/stream_encoder.rb +99 -0
  367. data/lib/omnizip/formats/xz_impl/stream_footer.rb +81 -0
  368. data/lib/omnizip/formats/xz_impl/stream_footer_parser.rb +117 -0
  369. data/lib/omnizip/formats/xz_impl/stream_header.rb +55 -0
  370. data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +108 -0
  371. data/lib/omnizip/formats/xz_impl/vli.rb +128 -0
  372. data/lib/omnizip/formats/xz_impl/writer.rb +421 -0
  373. data/lib/omnizip/formats/zip/central_directory_header.rb +195 -0
  374. data/lib/omnizip/formats/zip/constants.rb +69 -0
  375. data/lib/omnizip/formats/zip/end_of_central_directory.rb +133 -0
  376. data/lib/omnizip/formats/zip/local_file_header.rb +138 -0
  377. data/lib/omnizip/formats/zip/reader.rb +250 -0
  378. data/lib/omnizip/formats/zip/unix_extra_field.rb +153 -0
  379. data/lib/omnizip/formats/zip/writer.rb +375 -0
  380. data/lib/omnizip/formats/zip/zip64_end_of_central_directory.rb +104 -0
  381. data/lib/omnizip/formats/zip/zip64_end_of_central_directory_locator.rb +66 -0
  382. data/lib/omnizip/formats/zip/zip64_extra_field.rb +114 -0
  383. data/lib/omnizip/formats/zip.rb +50 -0
  384. data/lib/omnizip/implementations/base/lzma2_decoder_base.rb +75 -0
  385. data/lib/omnizip/implementations/base/lzma2_encoder_base.rb +128 -0
  386. data/lib/omnizip/implementations/base/lzma_decoder_base.rb +83 -0
  387. data/lib/omnizip/implementations/base/lzma_encoder_base.rb +108 -0
  388. data/lib/omnizip/implementations/base/state_machine_base.rb +182 -0
  389. data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +421 -0
  390. data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +465 -0
  391. data/lib/omnizip/implementations/seven_zip/lzma/match_finder.rb +288 -0
  392. data/lib/omnizip/implementations/seven_zip/lzma/range_decoder.rb +200 -0
  393. data/lib/omnizip/implementations/seven_zip/lzma/range_encoder.rb +197 -0
  394. data/lib/omnizip/implementations/seven_zip/lzma/state_machine.rb +141 -0
  395. data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +519 -0
  396. data/lib/omnizip/implementations/xz_utils/lzma2/decoder.rb +723 -0
  397. data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +750 -0
  398. data/lib/omnizip/io/buffered_input.rb +146 -0
  399. data/lib/omnizip/io/buffered_output.rb +105 -0
  400. data/lib/omnizip/io/stream_manager.rb +115 -0
  401. data/lib/omnizip/link_handler/hard_link.rb +79 -0
  402. data/lib/omnizip/link_handler/symbolic_link.rb +74 -0
  403. data/lib/omnizip/link_handler.rb +124 -0
  404. data/lib/omnizip/metadata/archive_metadata.rb +114 -0
  405. data/lib/omnizip/metadata/entry_metadata.rb +146 -0
  406. data/lib/omnizip/metadata/metadata_editor.rb +171 -0
  407. data/lib/omnizip/metadata/metadata_registry.rb +64 -0
  408. data/lib/omnizip/metadata/metadata_validator.rb +99 -0
  409. data/lib/omnizip/metadata.rb +57 -0
  410. data/lib/omnizip/models/.keep +0 -0
  411. data/lib/omnizip/models/algorithm_metadata.rb +73 -0
  412. data/lib/omnizip/models/compression_options.rb +71 -0
  413. data/lib/omnizip/models/conversion_options.rb +87 -0
  414. data/lib/omnizip/models/conversion_result.rb +135 -0
  415. data/lib/omnizip/models/eta_result.rb +46 -0
  416. data/lib/omnizip/models/extraction_rule.rb +115 -0
  417. data/lib/omnizip/models/filter_chain.rb +144 -0
  418. data/lib/omnizip/models/filter_config.rb +183 -0
  419. data/lib/omnizip/models/match_result.rb +124 -0
  420. data/lib/omnizip/models/optimization_suggestion.rb +91 -0
  421. data/lib/omnizip/models/parallel_options.rb +104 -0
  422. data/lib/omnizip/models/performance_result.rb +79 -0
  423. data/lib/omnizip/models/profile_report.rb +82 -0
  424. data/lib/omnizip/models/progress_options.rb +38 -0
  425. data/lib/omnizip/models/split_options.rb +116 -0
  426. data/lib/omnizip/optimization_registry.rb +81 -0
  427. data/lib/omnizip/parallel/job_queue.rb +209 -0
  428. data/lib/omnizip/parallel/job_scheduler.rb +203 -0
  429. data/lib/omnizip/parallel/parallel_compressor.rb +347 -0
  430. data/lib/omnizip/parallel/parallel_extractor.rb +329 -0
  431. data/lib/omnizip/parallel/worker_pool.rb +223 -0
  432. data/lib/omnizip/parallel.rb +149 -0
  433. data/lib/omnizip/parity/chunked_block_processor.rb +196 -0
  434. data/lib/omnizip/parity/galois16.rb +145 -0
  435. data/lib/omnizip/parity/models/creator_packet.rb +73 -0
  436. data/lib/omnizip/parity/models/file_description_packet.rb +133 -0
  437. data/lib/omnizip/parity/models/ifsc_packet.rb +123 -0
  438. data/lib/omnizip/parity/models/main_packet.rb +128 -0
  439. data/lib/omnizip/parity/models/packet.rb +156 -0
  440. data/lib/omnizip/parity/models/packet_registry.rb +109 -0
  441. data/lib/omnizip/parity/models/recovery_slice_packet.rb +78 -0
  442. data/lib/omnizip/parity/par2_creator.rb +531 -0
  443. data/lib/omnizip/parity/par2_repairer.rb +407 -0
  444. data/lib/omnizip/parity/par2_verifier.rb +364 -0
  445. data/lib/omnizip/parity/par2cmdline_algorithm.rb +110 -0
  446. data/lib/omnizip/parity/par2cmdline_coefficients.rb +78 -0
  447. data/lib/omnizip/parity/reed_solomon_decoder.rb +266 -0
  448. data/lib/omnizip/parity/reed_solomon_encoder.rb +111 -0
  449. data/lib/omnizip/parity/reed_solomon_matrix.rb +342 -0
  450. data/lib/omnizip/parity.rb +186 -0
  451. data/lib/omnizip/password/encryption_registry.rb +65 -0
  452. data/lib/omnizip/password/encryption_strategy.rb +96 -0
  453. data/lib/omnizip/password/password_validator.rb +129 -0
  454. data/lib/omnizip/password/winzip_aes_strategy.rb +192 -0
  455. data/lib/omnizip/password/zip_crypto_strategy.rb +141 -0
  456. data/lib/omnizip/password.rb +87 -0
  457. data/lib/omnizip/pipe/stream_compressor.rb +124 -0
  458. data/lib/omnizip/pipe/stream_decompressor.rb +174 -0
  459. data/lib/omnizip/pipe.rb +121 -0
  460. data/lib/omnizip/platform/ntfs_streams.rb +201 -0
  461. data/lib/omnizip/platform.rb +189 -0
  462. data/lib/omnizip/profile/archive_profile.rb +39 -0
  463. data/lib/omnizip/profile/balanced_profile.rb +33 -0
  464. data/lib/omnizip/profile/binary_profile.rb +36 -0
  465. data/lib/omnizip/profile/compression_profile.rb +158 -0
  466. data/lib/omnizip/profile/custom_profile.rb +157 -0
  467. data/lib/omnizip/profile/fast_profile.rb +33 -0
  468. data/lib/omnizip/profile/maximum_profile.rb +33 -0
  469. data/lib/omnizip/profile/profile_detector.rb +110 -0
  470. data/lib/omnizip/profile/profile_registry.rb +161 -0
  471. data/lib/omnizip/profile/text_profile.rb +36 -0
  472. data/lib/omnizip/profile.rb +190 -0
  473. data/lib/omnizip/profiler/memory_profiler.rb +66 -0
  474. data/lib/omnizip/profiler/method_profiler.rb +49 -0
  475. data/lib/omnizip/profiler/report_generator.rb +169 -0
  476. data/lib/omnizip/profiler.rb +204 -0
  477. data/lib/omnizip/progress/callback_reporter.rb +36 -0
  478. data/lib/omnizip/progress/console_reporter.rb +62 -0
  479. data/lib/omnizip/progress/log_reporter.rb +91 -0
  480. data/lib/omnizip/progress/operation_progress.rb +118 -0
  481. data/lib/omnizip/progress/progress_bar.rb +156 -0
  482. data/lib/omnizip/progress/progress_reporter.rb +40 -0
  483. data/lib/omnizip/progress/progress_tracker.rb +190 -0
  484. data/lib/omnizip/progress/silent_reporter.rb +24 -0
  485. data/lib/omnizip/progress.rb +127 -0
  486. data/lib/omnizip/rubyzip_compat.rb +63 -0
  487. data/lib/omnizip/temp/safe_extract.rb +168 -0
  488. data/lib/omnizip/temp/temp_file.rb +124 -0
  489. data/lib/omnizip/temp/temp_file_pool.rb +109 -0
  490. data/lib/omnizip/temp.rb +181 -0
  491. data/lib/omnizip/version.rb +5 -0
  492. data/lib/omnizip/zip/entry.rb +156 -0
  493. data/lib/omnizip/zip/file.rb +485 -0
  494. data/lib/omnizip/zip/input_stream.rb +273 -0
  495. data/lib/omnizip/zip/output_stream.rb +324 -0
  496. data/lib/omnizip.rb +156 -0
  497. data/readme-docs/advanced-features.adoc +515 -0
  498. data/readme-docs/api-usage.adoc +444 -0
  499. data/readme-docs/architecture.adoc +449 -0
  500. data/readme-docs/archive-formats.adoc +479 -0
  501. data/readme-docs/cli-usage.adoc +222 -0
  502. data/readme-docs/compression-algorithms.adoc +442 -0
  503. data/readme-docs/compression-profiles.adoc +247 -0
  504. data/readme-docs/encryption-checksums.adoc +328 -0
  505. data/readme-docs/format-converter.adoc +325 -0
  506. data/readme-docs/installation.adoc +228 -0
  507. data/readme-docs/par2-archives.adoc +608 -0
  508. data/readme-docs/performance-profiler.adoc +389 -0
  509. data/readme-docs/preprocessing-filters.adoc +280 -0
  510. data/xz-file-format-1.2.1.txt +1174 -0
  511. metadata +617 -0
@@ -0,0 +1,223 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "fractor"
4
+
5
+ module Omnizip
6
+ module Parallel
7
+ # Worker pool wrapper for Fractor-based parallel processing
8
+ #
9
+ # Manages a pool of Fractor workers for parallel compression/extraction.
10
+ # Handles job distribution, result collection, and graceful shutdown.
11
+ #
12
+ # @example Create and use worker pool
13
+ # pool = Omnizip::Parallel::WorkerPool.new(
14
+ # worker_class: CompressionWorker,
15
+ # num_workers: 4
16
+ # )
17
+ # pool.start
18
+ # pool.submit(work_item)
19
+ # results = pool.results
20
+ # pool.shutdown
21
+ class WorkerPool
22
+ # @return [Fractor::Supervisor] underlying Fractor supervisor
23
+ attr_reader :supervisor
24
+
25
+ # @return [Array] collected results
26
+ attr_reader :results
27
+
28
+ # @return [Array] collected errors
29
+ attr_reader :errors
30
+
31
+ # @return [Boolean] whether pool is running
32
+ attr_reader :running
33
+
34
+ # Initialize worker pool
35
+ #
36
+ # @param worker_class [Class] Fractor::Worker subclass
37
+ # @param num_workers [Integer] number of worker threads
38
+ # @param continuous [Boolean] continuous mode for long-running tasks
39
+ def initialize(worker_class:, num_workers: nil, continuous: false)
40
+ @worker_class = worker_class
41
+ @num_workers = num_workers || detect_cpu_count
42
+ @continuous = continuous
43
+ @supervisor = nil
44
+ @results = []
45
+ @errors = []
46
+ @running = false
47
+ @result_mutex = Mutex.new
48
+ @work_queue = nil
49
+ end
50
+
51
+ # Start the worker pool
52
+ #
53
+ # @return [void]
54
+ def start
55
+ return if @running
56
+
57
+ # Create Fractor supervisor with worker pool configuration
58
+ @supervisor = Fractor::Supervisor.new(
59
+ worker_pools: [
60
+ {
61
+ worker_class: @worker_class,
62
+ num_workers: @num_workers,
63
+ },
64
+ ],
65
+ continuous_mode: @continuous,
66
+ )
67
+
68
+ # For continuous mode, set up work queue
69
+ if @continuous
70
+ @work_queue = Fractor::WorkQueue.new
71
+ @work_queue.register_with_supervisor(@supervisor)
72
+ end
73
+
74
+ @running = true
75
+
76
+ # Start supervisor in background thread for continuous mode
77
+ if @continuous
78
+ @supervisor_thread = Thread.new do
79
+ @supervisor.run
80
+ rescue StandardError => e
81
+ @result_mutex.synchronize do
82
+ @errors << { error: e, message: "Supervisor error: #{e.message}" }
83
+ end
84
+ end
85
+ else
86
+ # For batch mode, don't start yet - wait for work items
87
+ @supervisor.start_workers
88
+ end
89
+ end
90
+
91
+ # Submit work item to the pool
92
+ #
93
+ # @param work [Fractor::Work] work item to process
94
+ # @return [void]
95
+ def submit(work)
96
+ raise "Worker pool not started" unless @running
97
+
98
+ if @continuous
99
+ # In continuous mode, add to work queue
100
+ @work_queue << work
101
+ else
102
+ # In batch mode, add to supervisor
103
+ @supervisor.add_work_item(work)
104
+ end
105
+ end
106
+
107
+ # Submit multiple work items
108
+ #
109
+ # @param works [Array<Fractor::Work>] array of work items
110
+ # @return [void]
111
+ def submit_batch(works)
112
+ works.each { |work| submit(work) }
113
+ end
114
+
115
+ # Run the pool in batch mode and wait for completion
116
+ #
117
+ # @return [void]
118
+ def run
119
+ raise "Can only run in batch mode" if @continuous
120
+ raise "Worker pool not started" unless @running
121
+
122
+ @supervisor.run
123
+
124
+ # Collect results
125
+ collect_results
126
+ end
127
+
128
+ # Shutdown the worker pool
129
+ #
130
+ # @param timeout [Numeric] timeout in seconds
131
+ # @return [void]
132
+ def shutdown(timeout: 30)
133
+ return unless @running
134
+
135
+ if @continuous
136
+ @supervisor.stop
137
+ @supervisor_thread&.join(timeout)
138
+ end
139
+
140
+ # Collect final results
141
+ collect_results
142
+
143
+ @running = false
144
+ end
145
+
146
+ # Get successful results
147
+ #
148
+ # @return [Array] array of successful results
149
+ def successful_results
150
+ @result_mutex.synchronize { @results.dup }
151
+ end
152
+
153
+ # Get failed results
154
+ #
155
+ # @return [Array] array of errors
156
+ def failed_results
157
+ @result_mutex.synchronize { @errors.dup }
158
+ end
159
+
160
+ # Get pool statistics
161
+ #
162
+ # @return [Hash] statistics hash
163
+ def stats
164
+ return {} unless @supervisor
165
+
166
+ {
167
+ workers: @num_workers,
168
+ running: @running,
169
+ continuous: @continuous,
170
+ results: @results.size,
171
+ errors: @errors.size,
172
+ total_processed: @results.size + @errors.size,
173
+ }
174
+ end
175
+
176
+ # Check if pool has completed all work
177
+ #
178
+ # @return [Boolean] true if complete
179
+ def complete?
180
+ return false if @continuous
181
+ return false unless @supervisor
182
+
183
+ # In batch mode, check if all work is processed
184
+ result_aggregator = @supervisor.results
185
+ result_aggregator && !@supervisor.work_queue.empty?
186
+ end
187
+
188
+ private
189
+
190
+ # Collect results from supervisor
191
+ #
192
+ # @return [void]
193
+ def collect_results
194
+ return unless @supervisor
195
+
196
+ result_aggregator = @supervisor.results
197
+ return unless result_aggregator
198
+
199
+ @result_mutex.synchronize do
200
+ # Collect successful results
201
+ result_aggregator.results.each do |work_result|
202
+ @results << work_result
203
+ end
204
+
205
+ # Collect errors
206
+ result_aggregator.errors.each do |error_result|
207
+ @errors << error_result
208
+ end
209
+ end
210
+ end
211
+
212
+ # Detect number of available CPU cores
213
+ #
214
+ # @return [Integer] number of CPUs
215
+ def detect_cpu_count
216
+ require "etc"
217
+ Etc.nprocessors
218
+ rescue StandardError
219
+ 4 # fallback
220
+ end
221
+ end
222
+ end
223
+ end
@@ -0,0 +1,149 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "parallel/job_queue"
4
+ require_relative "parallel/job_scheduler"
5
+ require_relative "parallel/worker_pool"
6
+ require_relative "parallel/parallel_compressor"
7
+ require_relative "parallel/parallel_extractor"
8
+
9
+ module Omnizip
10
+ # Parallel processing module for multi-threaded compression/extraction
11
+ #
12
+ # Leverages Fractor for parallel processing to utilize multi-core CPUs.
13
+ # Provides high-level APIs for parallel compression and extraction operations.
14
+ #
15
+ # @example Auto-detect CPU count and compress in parallel
16
+ # Omnizip::Parallel.compress_directory('files/', 'backup.zip')
17
+ #
18
+ # @example Custom thread count
19
+ # Omnizip::Parallel.compress_directory('files/', 'backup.zip', threads: 8)
20
+ #
21
+ # @example Parallel extraction
22
+ # Omnizip::Parallel.extract_archive('large.zip', 'output/', threads: 4)
23
+ #
24
+ # @example Configure globally
25
+ # Omnizip::Parallel.configure do |config|
26
+ # config.default_threads = 8
27
+ # config.queue_size = 100
28
+ # config.load_balancing = :dynamic
29
+ # end
30
+ module Parallel
31
+ class << self
32
+ # Global configuration
33
+ attr_accessor :config
34
+
35
+ # Configure parallel processing globally
36
+ #
37
+ # @yield [config] Configuration block
38
+ # @yieldparam config [Omnizip::Models::ParallelOptions] configuration object
39
+ # @return [void]
40
+ #
41
+ # @example
42
+ # Omnizip::Parallel.configure do |config|
43
+ # config.threads = 8
44
+ # config.queue_size = 100
45
+ # config.strategy = :dynamic
46
+ # end
47
+ def configure
48
+ @config ||= Omnizip::Models::ParallelOptions.new
49
+ yield @config if block_given?
50
+ @config.validate!
51
+ @config
52
+ end
53
+
54
+ # Compress directory in parallel
55
+ #
56
+ # @param dir [String] directory path
57
+ # @param output [String] output archive path
58
+ # @param options [Hash] compression options
59
+ # @option options [Integer] :threads number of threads
60
+ # @option options [Symbol] :compression compression method
61
+ # @option options [Integer] :level compression level
62
+ # @option options [Boolean] :recursive include subdirectories
63
+ # @option options [Proc] :progress progress callback
64
+ # @return [String] path to created archive
65
+ #
66
+ # @example
67
+ # Omnizip::Parallel.compress_directory('files/', 'backup.zip')
68
+ # Omnizip::Parallel.compress_directory('files/', 'backup.zip', threads: 8)
69
+ def compress_directory(dir, output, **options)
70
+ threads = options.delete(:threads) || @config&.threads
71
+
72
+ compressor = ParallelCompressor.new(@config, threads: threads)
73
+ result = compressor.compress(dir, output, **options)
74
+
75
+ # Print stats if verbose
76
+ if options[:verbose] || @config&.verbose
77
+ stats = compressor.statistics
78
+ print_stats("Compression", stats)
79
+ end
80
+
81
+ result
82
+ end
83
+
84
+ # Extract archive in parallel
85
+ #
86
+ # @param archive [String] archive path
87
+ # @param dest [String] destination directory
88
+ # @param options [Hash] extraction options
89
+ # @option options [Integer] :threads number of threads
90
+ # @option options [Boolean] :overwrite overwrite existing files
91
+ # @option options [Proc] :progress progress callback
92
+ # @return [Array<String>] extracted file paths
93
+ #
94
+ # @example
95
+ # Omnizip::Parallel.extract_archive('large.zip', 'output/')
96
+ # Omnizip::Parallel.extract_archive('large.zip', 'output/', threads: 4)
97
+ def extract_archive(archive, dest, **options)
98
+ threads = options.delete(:threads) || @config&.threads
99
+
100
+ extractor = ParallelExtractor.new(@config, threads: threads)
101
+ result = extractor.extract(archive, dest, **options)
102
+
103
+ # Print stats if verbose
104
+ if options[:verbose] || @config&.verbose
105
+ stats = extractor.statistics
106
+ print_stats("Extraction", stats)
107
+ end
108
+
109
+ result
110
+ end
111
+
112
+ # Get default configuration
113
+ #
114
+ # @return [Omnizip::Models::ParallelOptions] configuration
115
+ def default_config
116
+ @default_config ||= Omnizip::Models::ParallelOptions.new
117
+ end
118
+
119
+ # Reset configuration to defaults
120
+ #
121
+ # @return [void]
122
+ def reset_config
123
+ @config = Omnizip::Models::ParallelOptions.new
124
+ end
125
+
126
+ private
127
+
128
+ # Print statistics
129
+ #
130
+ # @param operation [String] operation name
131
+ # @param stats [Hash] statistics hash
132
+ def print_stats(operation, stats)
133
+ puts "\n=== #{operation} Statistics ==="
134
+ puts "Files processed: #{stats[:files_processed] || stats[:files_extracted] || 0}"
135
+ puts "Duration: #{'%.2f' % stats[:duration]}s"
136
+ puts "Throughput: #{'%.2f' % stats[:throughput_mbps]} MB/s"
137
+
138
+ if stats[:compression_ratio]
139
+ puts "Compression ratio: #{'%.2f' % stats[:compression_ratio]}%"
140
+ end
141
+
142
+ puts "================================\n"
143
+ end
144
+ end
145
+
146
+ # Initialize default configuration
147
+ configure {}
148
+ end
149
+ end
@@ -0,0 +1,196 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "reed_solomon_matrix"
4
+
5
+ module Omnizip
6
+ module Parity
7
+ # Chunked block processor for incremental Reed-Solomon recovery
8
+ #
9
+ # Implements correct RS decoding: x = A^-1 * b
10
+ # where b[i] = recovery[i] - sum(present[k] * base[present[k]]^exponent[i])
11
+ #
12
+ # Processes large blocks incrementally in memory-efficient chunks.
13
+ class ChunkedBlockProcessor
14
+ # Default chunk size (1MB)
15
+ DEFAULT_CHUNK_SIZE = 1024 * 1024
16
+
17
+ # @return [ReedSolomonMatrix] RS matrix with precomputed coefficients
18
+ attr_reader :matrix
19
+
20
+ # @return [Hash<Integer, String>] Present input blocks (index => data)
21
+ attr_reader :present_blocks
22
+
23
+ # @return [Hash<Integer, String>] Recovery blocks (exponent => data)
24
+ attr_reader :recovery_blocks
25
+
26
+ # @return [Array<Integer>] Missing block indices to recover
27
+ attr_reader :missing_indices
28
+
29
+ # @return [Integer] Block size in bytes
30
+ attr_reader :block_size
31
+
32
+ # @return [Integer] Chunk size for processing
33
+ attr_reader :chunk_size
34
+
35
+ # Initialize processor
36
+ #
37
+ # @param matrix [ReedSolomonMatrix] Precomputed RS matrix (A^-1)
38
+ # @param present_blocks [Hash<Integer, String>] Present data blocks
39
+ # @param recovery_blocks [Hash<Integer, String>] Recovery blocks (by exponent)
40
+ # @param missing_indices [Array<Integer>] Indices to recover
41
+ # @param block_size [Integer] Block size in bytes
42
+ # @param chunk_size [Integer] Chunk size for processing
43
+ def initialize(matrix, present_blocks, recovery_blocks, missing_indices,
44
+ block_size, chunk_size: DEFAULT_CHUNK_SIZE)
45
+ @matrix = matrix
46
+ @present_blocks = present_blocks
47
+ @recovery_blocks = recovery_blocks
48
+ @missing_indices = missing_indices.sort
49
+ @block_size = block_size
50
+ # Ensure chunk_size is even (we process 16-bit words)
51
+ requested_chunk = [chunk_size, block_size].min
52
+ @chunk_size = requested_chunk - (requested_chunk % 2)
53
+ end
54
+
55
+ # Process all blocks incrementally
56
+ #
57
+ # Implements: x = A^-1 * b
58
+ # where b = recovery - present_contributions
59
+ #
60
+ # @return [Hash<Integer, String>] Recovered blocks
61
+ def process_all
62
+ # Initialize output blocks (all zeros)
63
+ recovered = {}
64
+ missing_indices.each do |idx|
65
+ recovered[idx] = "\x00".b * block_size
66
+ end
67
+
68
+ # Process block chunk by chunk
69
+ block_offset = 0
70
+ while block_offset < block_size
71
+ current_chunk_size = [chunk_size, block_size - block_offset].min
72
+ process_chunk_at_offset(recovered, block_offset, current_chunk_size)
73
+ block_offset += current_chunk_size
74
+ end
75
+
76
+ recovered
77
+ end
78
+
79
+ private
80
+
81
+ # Process one chunk at given offset
82
+ #
83
+ # For each chunk, we:
84
+ # 1. Compute b_vector chunks from recovery blocks
85
+ # 2. Subtract present block contributions to get final b
86
+ # 3. Apply A^-1 matrix to b to get recovered chunks
87
+ #
88
+ # @param recovered [Hash<Integer, String>] Recovered blocks being built
89
+ # @param offset [Integer] Current offset within blocks
90
+ # @param length [Integer] Chunk length
91
+ def process_chunk_at_offset(recovered, offset, length)
92
+ # Step 1: Initialize b vector from recovery blocks
93
+ # b_vector[i] starts as recovery[i] (for each recovery exponent used)
94
+ b_vector = compute_b_vector_chunks(offset, length)
95
+
96
+ # Step 2: Apply A^-1 to b vector to get recovered chunks
97
+ # For each missing block j: recovered[j] = sum(A^-1[j,i] * b[i])
98
+ apply_inverse_matrix(recovered, b_vector, offset, length)
99
+ end
100
+
101
+ # Compute b vector chunks
102
+ #
103
+ # b[i] = recovery[i] - sum(present[k] * base[present[k]]^exponent[i])
104
+ #
105
+ # @param offset [Integer] Offset within blocks
106
+ # @param length [Integer] Chunk length
107
+ # @return [Array<String>] B vector chunks (one per recovery block used)
108
+ def compute_b_vector_chunks(offset, length)
109
+ b_chunks = []
110
+
111
+ # For each recovery block being used (must match matrix computation!)
112
+ matrix.used_recovery_exponents.each_with_index do |exponent, _exp_idx|
113
+ # Start with recovery block chunk
114
+ recovery_data = recovery_blocks[exponent]
115
+ b_chunk = recovery_data[offset, length].dup
116
+
117
+ # Subtract contributions from present blocks
118
+ present_blocks.each do |present_idx, present_data|
119
+ coefficient = matrix.present_contribution_coefficient(present_idx,
120
+ exponent)
121
+ next if coefficient.zero?
122
+
123
+ present_chunk = present_data[offset, length]
124
+
125
+ # Subtract: b -= present * coefficient
126
+ # In GF(2^16), subtraction is XOR, so: b ^= present * coefficient
127
+ subtract_contribution(b_chunk, present_chunk, coefficient, length)
128
+ end
129
+
130
+ b_chunks << b_chunk
131
+ end
132
+
133
+ b_chunks
134
+ end
135
+
136
+ # Subtract present block contribution from b chunk
137
+ #
138
+ # b_chunk ^= present_chunk * coefficient (GF subtraction is XOR)
139
+ #
140
+ # @param b_chunk [String] B vector chunk (modified in place)
141
+ # @param present_chunk [String] Present block chunk
142
+ # @param coefficient [Integer] Galois field coefficient
143
+ # @param length [Integer] Chunk length
144
+ def subtract_contribution(b_chunk, present_chunk, coefficient, length)
145
+ num_words = length / 2
146
+
147
+ num_words.times do |i|
148
+ word_offset = i * 2
149
+
150
+ # Read present word
151
+ present_word = present_chunk.getbyte(word_offset) |
152
+ (present_chunk.getbyte(word_offset + 1) << 8)
153
+
154
+ # Read current b word
155
+ b_word = b_chunk.getbyte(word_offset) |
156
+ (b_chunk.getbyte(word_offset + 1) << 8)
157
+
158
+ # Compute: b ^= present * coefficient
159
+ contribution = Galois16.multiply(present_word, coefficient)
160
+ result = Galois16.add(b_word, contribution) # add is XOR in GF(2^16)
161
+
162
+ # Write back
163
+ b_chunk.setbyte(word_offset, result & 0xFF)
164
+ b_chunk.setbyte(word_offset + 1, (result >> 8) & 0xFF)
165
+ end
166
+ end
167
+
168
+ # Apply inverse matrix to b vector
169
+ #
170
+ # For each missing block j:
171
+ # recovered[j] += sum_i(A^-1[j,i] * b[i])
172
+ #
173
+ # @param recovered [Hash<Integer, String>] Recovered blocks being built
174
+ # @param b_vector [Array<String>] B vector chunks
175
+ # @param offset [Integer] Offset within blocks
176
+ # @param length [Integer] Chunk length
177
+ def apply_inverse_matrix(recovered, b_vector, offset, length)
178
+ # For each output (missing block)
179
+ missing_indices.each_with_index do |missing_idx, output_idx|
180
+ output_block = recovered[missing_idx]
181
+
182
+ # For each b vector element (recovery block)
183
+ b_vector.each_with_index do |b_chunk, recovery_idx|
184
+ # Get coefficient from A^-1
185
+ coefficient = matrix.coefficient(output_idx, recovery_idx)
186
+ next if coefficient.zero?
187
+
188
+ # CRITICAL: Accumulate chunk at the correct offset
189
+ matrix.process_chunk(coefficient, b_chunk, output_block, length,
190
+ output_offset: offset)
191
+ end
192
+ end
193
+ end
194
+ end
195
+ end
196
+ end
@@ -0,0 +1,145 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Omnizip
4
+ module Parity
5
+ # Pure implementation of Galois Field GF(2^16) arithmetic
6
+ # Uses generator polynomial 0x1100B (69643) as per PAR2 specification
7
+ #
8
+ # This is pure algorithm code with no I/O dependencies.
9
+ # All operations are exact (no floating point) and work in GF(2^16).
10
+ class Galois16
11
+ BITS = 16
12
+ GENERATOR = 0x1100B # 69643 in decimal
13
+ FIELD_SIZE = 1 << BITS # 65536
14
+ LIMIT = FIELD_SIZE - 1 # 65535
15
+
16
+ # Lookup tables for efficient operations
17
+ @log_table = nil
18
+ @antilog_table = nil
19
+
20
+ class << self
21
+ # Build log and antilog tables
22
+ # This is called once when the class is loaded
23
+ def build_tables
24
+ return if @log_table && @antilog_table
25
+
26
+ @log_table = Array.new(FIELD_SIZE)
27
+ @antilog_table = Array.new(FIELD_SIZE)
28
+
29
+ b = 1
30
+ LIMIT.times do |l|
31
+ @log_table[b] = l
32
+ @antilog_table[l] = b
33
+
34
+ b <<= 1
35
+ b ^= GENERATOR if b.anybits?(FIELD_SIZE)
36
+ end
37
+
38
+ # Special cases for zero
39
+ @log_table[0] = LIMIT
40
+ @antilog_table[LIMIT] = 0
41
+ end
42
+
43
+ # Get log value (discrete logarithm)
44
+ def log(value)
45
+ @log_table[value & 0xFFFF]
46
+ end
47
+
48
+ # Get antilog value (inverse of log)
49
+ def antilog(value)
50
+ @antilog_table[value % FIELD_SIZE]
51
+ end
52
+
53
+ # Addition in GF(2^16) - same as subtraction (XOR)
54
+ def add(a, b)
55
+ (a ^ b) & 0xFFFF
56
+ end
57
+
58
+ # Subtraction in GF(2^16) - same as addition (XOR)
59
+ alias subtract add
60
+
61
+ # Multiplication in GF(2^16)
62
+ def multiply(a, b)
63
+ a &= 0xFFFF
64
+ b &= 0xFFFF
65
+
66
+ return 0 if a.zero? || b.zero?
67
+
68
+ sum = @log_table[a] + @log_table[b]
69
+ sum -= LIMIT if sum >= LIMIT
70
+ @antilog_table[sum]
71
+ end
72
+
73
+ # Division in GF(2^16)
74
+ def divide(a, b)
75
+ a &= 0xFFFF
76
+ b &= 0xFFFF
77
+
78
+ return 0 if a.zero?
79
+ raise ArgumentError, "Division by zero in GF(2^16)" if b.zero?
80
+
81
+ diff = @log_table[a] - @log_table[b]
82
+ diff += LIMIT if diff.negative?
83
+ @antilog_table[diff]
84
+ end
85
+
86
+ # Power in GF(2^16): compute a^n
87
+ def power(a, n)
88
+ a &= 0xFFFF
89
+
90
+ return 1 if n.zero?
91
+ return 0 if a.zero?
92
+
93
+ product = @log_table[a] * n
94
+
95
+ # Reduce modulo LIMIT using the identity:
96
+ # product mod LIMIT = (product >> BITS) + (product & LIMIT)
97
+ product = (product >> BITS) + (product & LIMIT)
98
+ product -= LIMIT if product >= LIMIT
99
+
100
+ @antilog_table[product]
101
+ end
102
+
103
+ # Compute GCD using Euclidean algorithm
104
+ def gcd(a, b)
105
+ while a != 0 && b != 0
106
+ if a > b
107
+ a %= b
108
+ else
109
+ b %= a
110
+ end
111
+ end
112
+ a + b
113
+ end
114
+
115
+ # Select base values for Reed-Solomon matrix
116
+ # Par2cmdline uses sequential logbases: base[i] = antilog[i]
117
+ # base[0] = antilog[0] = 1, base[1] = antilog[1] = 2, etc.
118
+ def select_bases(count)
119
+ raise ArgumentError, "Too many bases requested" if count >= LIMIT
120
+
121
+ bases = []
122
+
123
+ count.times do |i|
124
+ # Par2cmdline uses logbase = i (NOT i+1!)
125
+ # This gives: base[0]=1, base[1]=2, base[2]=4, etc.
126
+ logbase = i
127
+
128
+ if logbase >= LIMIT
129
+ raise ArgumentError,
130
+ "Too many input blocks for Reed Solomon matrix"
131
+ end
132
+
133
+ # Convert log to actual value
134
+ bases << @antilog_table[logbase]
135
+ end
136
+
137
+ bases
138
+ end
139
+ end
140
+
141
+ # Build tables when class is loaded
142
+ build_tables
143
+ end
144
+ end
145
+ end