omnizip 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (511) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.rubocop.yml +32 -0
  4. data/.rubocop_todo.yml +754 -0
  5. data/COPYING +502 -0
  6. data/Gemfile +17 -0
  7. data/LICENSE +12 -0
  8. data/README.adoc +1045 -0
  9. data/Rakefile +12 -0
  10. data/benchmark/README.md +260 -0
  11. data/benchmark/benchmark_suite.rb +125 -0
  12. data/benchmark/compression_bench.rb +181 -0
  13. data/benchmark/filter_bench.rb +180 -0
  14. data/benchmark/models/benchmark_result.rb +59 -0
  15. data/benchmark/models/comparison_result.rb +69 -0
  16. data/benchmark/profile_suite.rb +167 -0
  17. data/benchmark/reporter.rb +150 -0
  18. data/benchmark/run_benchmarks.rb +66 -0
  19. data/benchmark/test_data.rb +137 -0
  20. data/config/formats/rar3_spec.yml +91 -0
  21. data/config/formats/rar5_spec.yml +102 -0
  22. data/docs/.github/workflows/docs.yml +142 -0
  23. data/docs/.gitignore +21 -0
  24. data/docs/.lychee.toml +67 -0
  25. data/docs/Gemfile +13 -0
  26. data/docs/RAR_WRITE_SUPPORT.md +26 -0
  27. data/docs/README.md +101 -0
  28. data/docs/_config.yml +112 -0
  29. data/docs/assets/logo.svg +1 -0
  30. data/docs/assets/omnizip-logo.pdf +1540 -11
  31. data/docs/comparison/feature-matrix.adoc +694 -0
  32. data/docs/comparison/index.adoc +113 -0
  33. data/docs/comparison/vs-7zip.adoc +309 -0
  34. data/docs/comparison/vs-peazip.adoc +77 -0
  35. data/docs/comparison/vs-rubyzip.adoc +342 -0
  36. data/docs/comparison/vs-winrar.adoc +100 -0
  37. data/docs/compatibility.adoc +579 -0
  38. data/docs/concepts/index.adoc +129 -0
  39. data/docs/developer/architecture.adoc +256 -0
  40. data/docs/developer/contributing.adoc +158 -0
  41. data/docs/developer/index.adoc +25 -0
  42. data/docs/developer/testing.adoc +212 -0
  43. data/docs/getting-started/basic-usage.adoc +271 -0
  44. data/docs/getting-started/index.adoc +42 -0
  45. data/docs/getting-started/installation.adoc +138 -0
  46. data/docs/getting-started/quick-start.adoc +185 -0
  47. data/docs/getting-started/your-first-archive.adoc +218 -0
  48. data/docs/guides/advanced-features/encryption.adoc +300 -0
  49. data/docs/guides/advanced-features/index.adoc +49 -0
  50. data/docs/guides/advanced-features/parallel-processing.adoc +246 -0
  51. data/docs/guides/advanced-features/progress-tracking.adoc +320 -0
  52. data/docs/guides/advanced-features/streaming.adoc +212 -0
  53. data/docs/guides/archive-formats/gzip-format.adoc +107 -0
  54. data/docs/guides/archive-formats/index.adoc +130 -0
  55. data/docs/guides/archive-formats/rar-format.adoc +104 -0
  56. data/docs/guides/archive-formats/rar5.adoc +521 -0
  57. data/docs/guides/archive-formats/seven-zip-format.adoc +35 -0
  58. data/docs/guides/archive-formats/tar-format.adoc +106 -0
  59. data/docs/guides/archive-formats/xz-format.adoc +118 -0
  60. data/docs/guides/archive-formats/zip-format.adoc +35 -0
  61. data/docs/guides/compression-algorithms/bzip2.adoc +113 -0
  62. data/docs/guides/compression-algorithms/deflate.adoc +319 -0
  63. data/docs/guides/compression-algorithms/index.adoc +190 -0
  64. data/docs/guides/compression-algorithms/lzma.adoc +398 -0
  65. data/docs/guides/compression-algorithms/lzma2.adoc +327 -0
  66. data/docs/guides/compression-algorithms/ppmd.adoc +316 -0
  67. data/docs/guides/compression-algorithms/zstandard.adoc +361 -0
  68. data/docs/guides/creating-archives.adoc +354 -0
  69. data/docs/guides/extracting-archives.adoc +53 -0
  70. data/docs/guides/format-conversion.adoc +64 -0
  71. data/docs/guides/index.adoc +49 -0
  72. data/docs/guides/migration-rubyzip.adoc +217 -0
  73. data/docs/guides/parity-archives.adoc +605 -0
  74. data/docs/guides/performance-tuning.adoc +88 -0
  75. data/docs/index.adoc +218 -0
  76. data/docs/lychee.toml +67 -0
  77. data/docs/reference/api/overview.adoc +188 -0
  78. data/docs/reference/cli/compress-command.adoc +114 -0
  79. data/docs/reference/cli/overview.adoc +140 -0
  80. data/docs/reference/index.adoc +26 -0
  81. data/docs/resources/faq.adoc +185 -0
  82. data/docs/resources/quick-reference.adoc +222 -0
  83. data/docs/troubleshooting/index.adoc +208 -0
  84. data/examples/api_comparison.rb +205 -0
  85. data/examples/deflate64_example.rb +96 -0
  86. data/examples/par2_demo.rb +121 -0
  87. data/examples/quick_start_native.rb +150 -0
  88. data/examples/quick_start_rubyzip.rb +115 -0
  89. data/examples/rubyzip_compatibility_demo.rb +194 -0
  90. data/exe/omnizip +27 -0
  91. data/lib/omnizip/algorithm.rb +130 -0
  92. data/lib/omnizip/algorithm_registry.rb +86 -0
  93. data/lib/omnizip/algorithms/.keep +0 -0
  94. data/lib/omnizip/algorithms/bzip2/bwt.rb +225 -0
  95. data/lib/omnizip/algorithms/bzip2/decoder.rb +193 -0
  96. data/lib/omnizip/algorithms/bzip2/encoder.rb +237 -0
  97. data/lib/omnizip/algorithms/bzip2/huffman.rb +206 -0
  98. data/lib/omnizip/algorithms/bzip2/mtf.rb +101 -0
  99. data/lib/omnizip/algorithms/bzip2/rle.rb +151 -0
  100. data/lib/omnizip/algorithms/bzip2.rb +130 -0
  101. data/lib/omnizip/algorithms/deflate/constants.rb +28 -0
  102. data/lib/omnizip/algorithms/deflate/decoder.rb +38 -0
  103. data/lib/omnizip/algorithms/deflate/encoder.rb +46 -0
  104. data/lib/omnizip/algorithms/deflate.rb +128 -0
  105. data/lib/omnizip/algorithms/deflate64/constants.rb +45 -0
  106. data/lib/omnizip/algorithms/deflate64/decoder.rb +153 -0
  107. data/lib/omnizip/algorithms/deflate64/encoder.rb +98 -0
  108. data/lib/omnizip/algorithms/deflate64/huffman_coder.rb +354 -0
  109. data/lib/omnizip/algorithms/deflate64/lz77_encoder.rb +142 -0
  110. data/lib/omnizip/algorithms/deflate64.rb +109 -0
  111. data/lib/omnizip/algorithms/lzma/bit_model.rb +120 -0
  112. data/lib/omnizip/algorithms/lzma/constants.rb +112 -0
  113. data/lib/omnizip/algorithms/lzma/decoder.rb +148 -0
  114. data/lib/omnizip/algorithms/lzma/dictionary.rb +69 -0
  115. data/lib/omnizip/algorithms/lzma/distance_coder.rb +415 -0
  116. data/lib/omnizip/algorithms/lzma/encoder.rb +142 -0
  117. data/lib/omnizip/algorithms/lzma/length_coder.rb +260 -0
  118. data/lib/omnizip/algorithms/lzma/literal_decoder.rb +320 -0
  119. data/lib/omnizip/algorithms/lzma/literal_encoder.rb +210 -0
  120. data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +341 -0
  121. data/lib/omnizip/algorithms/lzma/lzma_alone_decoder.rb +192 -0
  122. data/lib/omnizip/algorithms/lzma/lzma_state.rb +128 -0
  123. data/lib/omnizip/algorithms/lzma/match.rb +32 -0
  124. data/lib/omnizip/algorithms/lzma/match_finder.rb +205 -0
  125. data/lib/omnizip/algorithms/lzma/match_finder_config.rb +142 -0
  126. data/lib/omnizip/algorithms/lzma/match_finder_factory.rb +88 -0
  127. data/lib/omnizip/algorithms/lzma/optimal_encoder.rb +130 -0
  128. data/lib/omnizip/algorithms/lzma/probability_models.rb +72 -0
  129. data/lib/omnizip/algorithms/lzma/range_coder.rb +85 -0
  130. data/lib/omnizip/algorithms/lzma/range_decoder.rb +434 -0
  131. data/lib/omnizip/algorithms/lzma/range_encoder.rb +194 -0
  132. data/lib/omnizip/algorithms/lzma/state.rb +127 -0
  133. data/lib/omnizip/algorithms/lzma/xz_buffered_range_encoder.rb +325 -0
  134. data/lib/omnizip/algorithms/lzma/xz_encoder.rb +426 -0
  135. data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +645 -0
  136. data/lib/omnizip/algorithms/lzma/xz_match_finder_adapter.rb +227 -0
  137. data/lib/omnizip/algorithms/lzma/xz_price_calculator.rb +169 -0
  138. data/lib/omnizip/algorithms/lzma/xz_probability_models.rb +261 -0
  139. data/lib/omnizip/algorithms/lzma/xz_range_encoder.rb +223 -0
  140. data/lib/omnizip/algorithms/lzma/xz_range_encoder_exact.rb +331 -0
  141. data/lib/omnizip/algorithms/lzma/xz_state.rb +116 -0
  142. data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +2055 -0
  143. data/lib/omnizip/algorithms/lzma.rb +238 -0
  144. data/lib/omnizip/algorithms/lzma2/chunk_manager.rb +182 -0
  145. data/lib/omnizip/algorithms/lzma2/constants.rb +41 -0
  146. data/lib/omnizip/algorithms/lzma2/encoder.rb +147 -0
  147. data/lib/omnizip/algorithms/lzma2/lzma2_chunk.rb +161 -0
  148. data/lib/omnizip/algorithms/lzma2/properties.rb +179 -0
  149. data/lib/omnizip/algorithms/lzma2/simple_lzma2_encoder.rb +127 -0
  150. data/lib/omnizip/algorithms/lzma2/xz_encoder_adapter.rb +85 -0
  151. data/lib/omnizip/algorithms/lzma2.rb +141 -0
  152. data/lib/omnizip/algorithms/ppmd7/constants.rb +74 -0
  153. data/lib/omnizip/algorithms/ppmd7/context.rb +154 -0
  154. data/lib/omnizip/algorithms/ppmd7/decoder.rb +126 -0
  155. data/lib/omnizip/algorithms/ppmd7/encoder.rb +163 -0
  156. data/lib/omnizip/algorithms/ppmd7/model.rb +248 -0
  157. data/lib/omnizip/algorithms/ppmd7/symbol_state.rb +57 -0
  158. data/lib/omnizip/algorithms/ppmd7.rb +116 -0
  159. data/lib/omnizip/algorithms/ppmd8/constants.rb +61 -0
  160. data/lib/omnizip/algorithms/ppmd8/context.rb +34 -0
  161. data/lib/omnizip/algorithms/ppmd8/decoder.rb +107 -0
  162. data/lib/omnizip/algorithms/ppmd8/encoder.rb +138 -0
  163. data/lib/omnizip/algorithms/ppmd8/model.rb +250 -0
  164. data/lib/omnizip/algorithms/ppmd8/restoration_method.rb +78 -0
  165. data/lib/omnizip/algorithms/ppmd8.rb +82 -0
  166. data/lib/omnizip/algorithms/ppmd_base.rb +138 -0
  167. data/lib/omnizip/algorithms/sevenzip_lzma2.rb +123 -0
  168. data/lib/omnizip/algorithms/xz_lzma2.rb +118 -0
  169. data/lib/omnizip/algorithms/zstandard/constants.rb +25 -0
  170. data/lib/omnizip/algorithms/zstandard/decoder.rb +46 -0
  171. data/lib/omnizip/algorithms/zstandard/encoder.rb +51 -0
  172. data/lib/omnizip/algorithms/zstandard.rb +138 -0
  173. data/lib/omnizip/buffer/memory_archive.rb +251 -0
  174. data/lib/omnizip/buffer/memory_extractor.rb +224 -0
  175. data/lib/omnizip/buffer.rb +176 -0
  176. data/lib/omnizip/checksum_registry.rb +114 -0
  177. data/lib/omnizip/checksums/crc32.rb +100 -0
  178. data/lib/omnizip/checksums/crc64.rb +101 -0
  179. data/lib/omnizip/checksums/crc_base.rb +158 -0
  180. data/lib/omnizip/checksums/verifier.rb +131 -0
  181. data/lib/omnizip/chunked/memory_manager.rb +194 -0
  182. data/lib/omnizip/chunked/reader.rb +78 -0
  183. data/lib/omnizip/chunked/writer.rb +120 -0
  184. data/lib/omnizip/chunked.rb +129 -0
  185. data/lib/omnizip/cli/output_formatter.rb +104 -0
  186. data/lib/omnizip/cli.rb +572 -0
  187. data/lib/omnizip/commands/.keep +0 -0
  188. data/lib/omnizip/commands/archive_create_command.rb +427 -0
  189. data/lib/omnizip/commands/archive_extract_command.rb +272 -0
  190. data/lib/omnizip/commands/archive_list_command.rb +218 -0
  191. data/lib/omnizip/commands/archive_repair_command.rb +131 -0
  192. data/lib/omnizip/commands/archive_verify_command.rb +117 -0
  193. data/lib/omnizip/commands/compress_command.rb +117 -0
  194. data/lib/omnizip/commands/decompress_command.rb +120 -0
  195. data/lib/omnizip/commands/list_command.rb +53 -0
  196. data/lib/omnizip/commands/metadata_command.rb +153 -0
  197. data/lib/omnizip/commands/parity_create_command.rb +122 -0
  198. data/lib/omnizip/commands/parity_repair_command.rb +122 -0
  199. data/lib/omnizip/commands/parity_verify_command.rb +124 -0
  200. data/lib/omnizip/commands/profile_list_command.rb +56 -0
  201. data/lib/omnizip/commands/profile_show_command.rb +44 -0
  202. data/lib/omnizip/convenience.rb +359 -0
  203. data/lib/omnizip/converter/conversion_registry.rb +49 -0
  204. data/lib/omnizip/converter/conversion_strategy.rb +121 -0
  205. data/lib/omnizip/converter/seven_zip_to_zip_strategy.rb +97 -0
  206. data/lib/omnizip/converter/zip_to_seven_zip_strategy.rb +112 -0
  207. data/lib/omnizip/converter.rb +105 -0
  208. data/lib/omnizip/crypto/aes256/cipher.rb +100 -0
  209. data/lib/omnizip/crypto/aes256/constants.rb +28 -0
  210. data/lib/omnizip/crypto/aes256/key_derivation.rb +101 -0
  211. data/lib/omnizip/crypto/aes256.rb +102 -0
  212. data/lib/omnizip/error.rb +106 -0
  213. data/lib/omnizip/eta/exponential_smoothing_estimator.rb +98 -0
  214. data/lib/omnizip/eta/moving_average_estimator.rb +99 -0
  215. data/lib/omnizip/eta/rate_calculator.rb +104 -0
  216. data/lib/omnizip/eta/sample_history.rb +143 -0
  217. data/lib/omnizip/eta/time_estimator.rb +106 -0
  218. data/lib/omnizip/eta.rb +63 -0
  219. data/lib/omnizip/extraction/filter_chain.rb +177 -0
  220. data/lib/omnizip/extraction/glob_pattern.rb +140 -0
  221. data/lib/omnizip/extraction/pattern_matcher.rb +70 -0
  222. data/lib/omnizip/extraction/predicate_pattern.rb +52 -0
  223. data/lib/omnizip/extraction/regex_pattern.rb +50 -0
  224. data/lib/omnizip/extraction/selective_extractor.rb +240 -0
  225. data/lib/omnizip/extraction.rb +111 -0
  226. data/lib/omnizip/file_type/mime_classifier.rb +144 -0
  227. data/lib/omnizip/file_type.rb +113 -0
  228. data/lib/omnizip/filter.rb +139 -0
  229. data/lib/omnizip/filter_pipeline.rb +108 -0
  230. data/lib/omnizip/filter_registry.rb +166 -0
  231. data/lib/omnizip/filters/bcj.rb +279 -0
  232. data/lib/omnizip/filters/bcj2/constants.rb +53 -0
  233. data/lib/omnizip/filters/bcj2/decoder.rb +200 -0
  234. data/lib/omnizip/filters/bcj2/encoder.rb +61 -0
  235. data/lib/omnizip/filters/bcj2/stream_data.rb +93 -0
  236. data/lib/omnizip/filters/bcj2.rb +99 -0
  237. data/lib/omnizip/filters/bcj_arm.rb +176 -0
  238. data/lib/omnizip/filters/bcj_arm64.rb +244 -0
  239. data/lib/omnizip/filters/bcj_ia64.rb +196 -0
  240. data/lib/omnizip/filters/bcj_ppc.rb +190 -0
  241. data/lib/omnizip/filters/bcj_sparc.rb +176 -0
  242. data/lib/omnizip/filters/bcj_x86.rb +193 -0
  243. data/lib/omnizip/filters/delta.rb +196 -0
  244. data/lib/omnizip/filters/filter_base.rb +72 -0
  245. data/lib/omnizip/filters/registry.rb +123 -0
  246. data/lib/omnizip/filters/xz_delta.rb +258 -0
  247. data/lib/omnizip/format_detector.rb +162 -0
  248. data/lib/omnizip/format_registry.rb +59 -0
  249. data/lib/omnizip/formats/.keep +0 -0
  250. data/lib/omnizip/formats/bzip2_file.rb +172 -0
  251. data/lib/omnizip/formats/cpio/constants.rb +55 -0
  252. data/lib/omnizip/formats/cpio/entry.rb +385 -0
  253. data/lib/omnizip/formats/cpio/reader.rb +196 -0
  254. data/lib/omnizip/formats/cpio/writer.rb +234 -0
  255. data/lib/omnizip/formats/cpio.rb +140 -0
  256. data/lib/omnizip/formats/format_spec_loader.rb +230 -0
  257. data/lib/omnizip/formats/gzip.rb +238 -0
  258. data/lib/omnizip/formats/iso/directory_builder.rb +297 -0
  259. data/lib/omnizip/formats/iso/directory_record.rb +152 -0
  260. data/lib/omnizip/formats/iso/joliet.rb +204 -0
  261. data/lib/omnizip/formats/iso/path_table.rb +125 -0
  262. data/lib/omnizip/formats/iso/reader.rb +197 -0
  263. data/lib/omnizip/formats/iso/rock_ridge.rb +349 -0
  264. data/lib/omnizip/formats/iso/volume_builder.rb +320 -0
  265. data/lib/omnizip/formats/iso/volume_descriptor.rb +168 -0
  266. data/lib/omnizip/formats/iso/writer.rb +530 -0
  267. data/lib/omnizip/formats/iso.rb +140 -0
  268. data/lib/omnizip/formats/lzip.rb +175 -0
  269. data/lib/omnizip/formats/lzma_alone.rb +171 -0
  270. data/lib/omnizip/formats/rar/archive_repairer.rb +243 -0
  271. data/lib/omnizip/formats/rar/archive_verifier.rb +195 -0
  272. data/lib/omnizip/formats/rar/block_parser.rb +243 -0
  273. data/lib/omnizip/formats/rar/compression/bit_stream.rb +180 -0
  274. data/lib/omnizip/formats/rar/compression/dispatcher.rb +217 -0
  275. data/lib/omnizip/formats/rar/compression/lz77_huffman/decoder.rb +216 -0
  276. data/lib/omnizip/formats/rar/compression/lz77_huffman/encoder.rb +158 -0
  277. data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_builder.rb +217 -0
  278. data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_coder.rb +189 -0
  279. data/lib/omnizip/formats/rar/compression/lz77_huffman/match_finder.rb +135 -0
  280. data/lib/omnizip/formats/rar/compression/lz77_huffman/sliding_window.rb +165 -0
  281. data/lib/omnizip/formats/rar/compression/ppmd/context.rb +105 -0
  282. data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +219 -0
  283. data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +262 -0
  284. data/lib/omnizip/formats/rar/compression_method_registry.rb +106 -0
  285. data/lib/omnizip/formats/rar/constants.rb +82 -0
  286. data/lib/omnizip/formats/rar/decompressor.rb +238 -0
  287. data/lib/omnizip/formats/rar/external_writer.rb +312 -0
  288. data/lib/omnizip/formats/rar/header.rb +192 -0
  289. data/lib/omnizip/formats/rar/license_validator.rb +109 -0
  290. data/lib/omnizip/formats/rar/models/rar_archive.rb +77 -0
  291. data/lib/omnizip/formats/rar/models/rar_entry.rb +65 -0
  292. data/lib/omnizip/formats/rar/models/rar_volume.rb +56 -0
  293. data/lib/omnizip/formats/rar/parity_handler.rb +292 -0
  294. data/lib/omnizip/formats/rar/rar5/compression/lzma.rb +202 -0
  295. data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +578 -0
  296. data/lib/omnizip/formats/rar/rar5/compression/store.rb +60 -0
  297. data/lib/omnizip/formats/rar/rar5/crc32.rb +39 -0
  298. data/lib/omnizip/formats/rar/rar5/encryption/aes256_cbc.rb +97 -0
  299. data/lib/omnizip/formats/rar/rar5/encryption/encryption_header.rb +114 -0
  300. data/lib/omnizip/formats/rar/rar5/encryption/encryption_manager.rb +166 -0
  301. data/lib/omnizip/formats/rar/rar5/encryption/key_derivation.rb +97 -0
  302. data/lib/omnizip/formats/rar/rar5/header.rb +187 -0
  303. data/lib/omnizip/formats/rar/rar5/models/encryption_options.rb +74 -0
  304. data/lib/omnizip/formats/rar/rar5/models/recovery_options.rb +63 -0
  305. data/lib/omnizip/formats/rar/rar5/models/solid_options.rb +63 -0
  306. data/lib/omnizip/formats/rar/rar5/models/volume_options.rb +74 -0
  307. data/lib/omnizip/formats/rar/rar5/multi_volume/ARCHITECTURE.md +290 -0
  308. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_manager.rb +264 -0
  309. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_splitter.rb +155 -0
  310. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_writer.rb +194 -0
  311. data/lib/omnizip/formats/rar/rar5/solid/solid_encoder.rb +109 -0
  312. data/lib/omnizip/formats/rar/rar5/solid/solid_manager.rb +142 -0
  313. data/lib/omnizip/formats/rar/rar5/solid/solid_stream.rb +121 -0
  314. data/lib/omnizip/formats/rar/rar5/vint.rb +65 -0
  315. data/lib/omnizip/formats/rar/rar5/writer.rb +466 -0
  316. data/lib/omnizip/formats/rar/rar_format_base.rb +241 -0
  317. data/lib/omnizip/formats/rar/reader.rb +366 -0
  318. data/lib/omnizip/formats/rar/recovery_record.rb +245 -0
  319. data/lib/omnizip/formats/rar/volume_manager.rb +168 -0
  320. data/lib/omnizip/formats/rar/writer.rb +431 -0
  321. data/lib/omnizip/formats/rar.rb +205 -0
  322. data/lib/omnizip/formats/rar3/compressor.rb +73 -0
  323. data/lib/omnizip/formats/rar3/decompressor.rb +66 -0
  324. data/lib/omnizip/formats/rar3/reader.rb +386 -0
  325. data/lib/omnizip/formats/rar3/writer.rb +219 -0
  326. data/lib/omnizip/formats/rar5/compressor.rb +73 -0
  327. data/lib/omnizip/formats/rar5/decompressor.rb +66 -0
  328. data/lib/omnizip/formats/rar5/reader.rb +342 -0
  329. data/lib/omnizip/formats/rar5/writer.rb +214 -0
  330. data/lib/omnizip/formats/seven_zip/coder_chain.rb +150 -0
  331. data/lib/omnizip/formats/seven_zip/constants.rb +126 -0
  332. data/lib/omnizip/formats/seven_zip/encoded_header.rb +114 -0
  333. data/lib/omnizip/formats/seven_zip/encrypted_header.rb +142 -0
  334. data/lib/omnizip/formats/seven_zip/file_collector.rb +144 -0
  335. data/lib/omnizip/formats/seven_zip/header.rb +106 -0
  336. data/lib/omnizip/formats/seven_zip/header_encryptor.rb +134 -0
  337. data/lib/omnizip/formats/seven_zip/header_writer.rb +466 -0
  338. data/lib/omnizip/formats/seven_zip/models/coder_info.rb +30 -0
  339. data/lib/omnizip/formats/seven_zip/models/file_entry.rb +58 -0
  340. data/lib/omnizip/formats/seven_zip/models/folder.rb +69 -0
  341. data/lib/omnizip/formats/seven_zip/models/stream_info.rb +42 -0
  342. data/lib/omnizip/formats/seven_zip/parser.rb +660 -0
  343. data/lib/omnizip/formats/seven_zip/reader.rb +458 -0
  344. data/lib/omnizip/formats/seven_zip/split_archive_reader.rb +632 -0
  345. data/lib/omnizip/formats/seven_zip/split_archive_writer.rb +315 -0
  346. data/lib/omnizip/formats/seven_zip/stream_compressor.rb +151 -0
  347. data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +162 -0
  348. data/lib/omnizip/formats/seven_zip/writer.rb +740 -0
  349. data/lib/omnizip/formats/seven_zip.rb +93 -0
  350. data/lib/omnizip/formats/tar/constants.rb +73 -0
  351. data/lib/omnizip/formats/tar/entry.rb +94 -0
  352. data/lib/omnizip/formats/tar/header.rb +168 -0
  353. data/lib/omnizip/formats/tar/reader.rb +121 -0
  354. data/lib/omnizip/formats/tar/writer.rb +216 -0
  355. data/lib/omnizip/formats/tar.rb +84 -0
  356. data/lib/omnizip/formats/xz/reader.rb +116 -0
  357. data/lib/omnizip/formats/xz.rb +237 -0
  358. data/lib/omnizip/formats/xz_impl/block_decoder.rb +754 -0
  359. data/lib/omnizip/formats/xz_impl/block_encoder.rb +306 -0
  360. data/lib/omnizip/formats/xz_impl/block_header.rb +210 -0
  361. data/lib/omnizip/formats/xz_impl/block_header_parser.rb +186 -0
  362. data/lib/omnizip/formats/xz_impl/constants.rb +49 -0
  363. data/lib/omnizip/formats/xz_impl/index_decoder.rb +174 -0
  364. data/lib/omnizip/formats/xz_impl/index_encoder.rb +122 -0
  365. data/lib/omnizip/formats/xz_impl/stream_decoder.rb +468 -0
  366. data/lib/omnizip/formats/xz_impl/stream_encoder.rb +99 -0
  367. data/lib/omnizip/formats/xz_impl/stream_footer.rb +81 -0
  368. data/lib/omnizip/formats/xz_impl/stream_footer_parser.rb +117 -0
  369. data/lib/omnizip/formats/xz_impl/stream_header.rb +55 -0
  370. data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +108 -0
  371. data/lib/omnizip/formats/xz_impl/vli.rb +128 -0
  372. data/lib/omnizip/formats/xz_impl/writer.rb +421 -0
  373. data/lib/omnizip/formats/zip/central_directory_header.rb +195 -0
  374. data/lib/omnizip/formats/zip/constants.rb +69 -0
  375. data/lib/omnizip/formats/zip/end_of_central_directory.rb +133 -0
  376. data/lib/omnizip/formats/zip/local_file_header.rb +138 -0
  377. data/lib/omnizip/formats/zip/reader.rb +250 -0
  378. data/lib/omnizip/formats/zip/unix_extra_field.rb +153 -0
  379. data/lib/omnizip/formats/zip/writer.rb +375 -0
  380. data/lib/omnizip/formats/zip/zip64_end_of_central_directory.rb +104 -0
  381. data/lib/omnizip/formats/zip/zip64_end_of_central_directory_locator.rb +66 -0
  382. data/lib/omnizip/formats/zip/zip64_extra_field.rb +114 -0
  383. data/lib/omnizip/formats/zip.rb +50 -0
  384. data/lib/omnizip/implementations/base/lzma2_decoder_base.rb +75 -0
  385. data/lib/omnizip/implementations/base/lzma2_encoder_base.rb +128 -0
  386. data/lib/omnizip/implementations/base/lzma_decoder_base.rb +83 -0
  387. data/lib/omnizip/implementations/base/lzma_encoder_base.rb +108 -0
  388. data/lib/omnizip/implementations/base/state_machine_base.rb +182 -0
  389. data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +421 -0
  390. data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +465 -0
  391. data/lib/omnizip/implementations/seven_zip/lzma/match_finder.rb +288 -0
  392. data/lib/omnizip/implementations/seven_zip/lzma/range_decoder.rb +200 -0
  393. data/lib/omnizip/implementations/seven_zip/lzma/range_encoder.rb +197 -0
  394. data/lib/omnizip/implementations/seven_zip/lzma/state_machine.rb +141 -0
  395. data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +519 -0
  396. data/lib/omnizip/implementations/xz_utils/lzma2/decoder.rb +723 -0
  397. data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +750 -0
  398. data/lib/omnizip/io/buffered_input.rb +146 -0
  399. data/lib/omnizip/io/buffered_output.rb +105 -0
  400. data/lib/omnizip/io/stream_manager.rb +115 -0
  401. data/lib/omnizip/link_handler/hard_link.rb +79 -0
  402. data/lib/omnizip/link_handler/symbolic_link.rb +74 -0
  403. data/lib/omnizip/link_handler.rb +124 -0
  404. data/lib/omnizip/metadata/archive_metadata.rb +114 -0
  405. data/lib/omnizip/metadata/entry_metadata.rb +146 -0
  406. data/lib/omnizip/metadata/metadata_editor.rb +171 -0
  407. data/lib/omnizip/metadata/metadata_registry.rb +64 -0
  408. data/lib/omnizip/metadata/metadata_validator.rb +99 -0
  409. data/lib/omnizip/metadata.rb +57 -0
  410. data/lib/omnizip/models/.keep +0 -0
  411. data/lib/omnizip/models/algorithm_metadata.rb +73 -0
  412. data/lib/omnizip/models/compression_options.rb +71 -0
  413. data/lib/omnizip/models/conversion_options.rb +87 -0
  414. data/lib/omnizip/models/conversion_result.rb +135 -0
  415. data/lib/omnizip/models/eta_result.rb +46 -0
  416. data/lib/omnizip/models/extraction_rule.rb +115 -0
  417. data/lib/omnizip/models/filter_chain.rb +144 -0
  418. data/lib/omnizip/models/filter_config.rb +183 -0
  419. data/lib/omnizip/models/match_result.rb +124 -0
  420. data/lib/omnizip/models/optimization_suggestion.rb +91 -0
  421. data/lib/omnizip/models/parallel_options.rb +104 -0
  422. data/lib/omnizip/models/performance_result.rb +79 -0
  423. data/lib/omnizip/models/profile_report.rb +82 -0
  424. data/lib/omnizip/models/progress_options.rb +38 -0
  425. data/lib/omnizip/models/split_options.rb +116 -0
  426. data/lib/omnizip/optimization_registry.rb +81 -0
  427. data/lib/omnizip/parallel/job_queue.rb +209 -0
  428. data/lib/omnizip/parallel/job_scheduler.rb +203 -0
  429. data/lib/omnizip/parallel/parallel_compressor.rb +347 -0
  430. data/lib/omnizip/parallel/parallel_extractor.rb +329 -0
  431. data/lib/omnizip/parallel/worker_pool.rb +223 -0
  432. data/lib/omnizip/parallel.rb +149 -0
  433. data/lib/omnizip/parity/chunked_block_processor.rb +196 -0
  434. data/lib/omnizip/parity/galois16.rb +145 -0
  435. data/lib/omnizip/parity/models/creator_packet.rb +73 -0
  436. data/lib/omnizip/parity/models/file_description_packet.rb +133 -0
  437. data/lib/omnizip/parity/models/ifsc_packet.rb +123 -0
  438. data/lib/omnizip/parity/models/main_packet.rb +128 -0
  439. data/lib/omnizip/parity/models/packet.rb +156 -0
  440. data/lib/omnizip/parity/models/packet_registry.rb +109 -0
  441. data/lib/omnizip/parity/models/recovery_slice_packet.rb +78 -0
  442. data/lib/omnizip/parity/par2_creator.rb +531 -0
  443. data/lib/omnizip/parity/par2_repairer.rb +407 -0
  444. data/lib/omnizip/parity/par2_verifier.rb +364 -0
  445. data/lib/omnizip/parity/par2cmdline_algorithm.rb +110 -0
  446. data/lib/omnizip/parity/par2cmdline_coefficients.rb +78 -0
  447. data/lib/omnizip/parity/reed_solomon_decoder.rb +266 -0
  448. data/lib/omnizip/parity/reed_solomon_encoder.rb +111 -0
  449. data/lib/omnizip/parity/reed_solomon_matrix.rb +342 -0
  450. data/lib/omnizip/parity.rb +186 -0
  451. data/lib/omnizip/password/encryption_registry.rb +65 -0
  452. data/lib/omnizip/password/encryption_strategy.rb +96 -0
  453. data/lib/omnizip/password/password_validator.rb +129 -0
  454. data/lib/omnizip/password/winzip_aes_strategy.rb +192 -0
  455. data/lib/omnizip/password/zip_crypto_strategy.rb +141 -0
  456. data/lib/omnizip/password.rb +87 -0
  457. data/lib/omnizip/pipe/stream_compressor.rb +124 -0
  458. data/lib/omnizip/pipe/stream_decompressor.rb +174 -0
  459. data/lib/omnizip/pipe.rb +121 -0
  460. data/lib/omnizip/platform/ntfs_streams.rb +201 -0
  461. data/lib/omnizip/platform.rb +189 -0
  462. data/lib/omnizip/profile/archive_profile.rb +39 -0
  463. data/lib/omnizip/profile/balanced_profile.rb +33 -0
  464. data/lib/omnizip/profile/binary_profile.rb +36 -0
  465. data/lib/omnizip/profile/compression_profile.rb +158 -0
  466. data/lib/omnizip/profile/custom_profile.rb +157 -0
  467. data/lib/omnizip/profile/fast_profile.rb +33 -0
  468. data/lib/omnizip/profile/maximum_profile.rb +33 -0
  469. data/lib/omnizip/profile/profile_detector.rb +110 -0
  470. data/lib/omnizip/profile/profile_registry.rb +161 -0
  471. data/lib/omnizip/profile/text_profile.rb +36 -0
  472. data/lib/omnizip/profile.rb +190 -0
  473. data/lib/omnizip/profiler/memory_profiler.rb +66 -0
  474. data/lib/omnizip/profiler/method_profiler.rb +49 -0
  475. data/lib/omnizip/profiler/report_generator.rb +169 -0
  476. data/lib/omnizip/profiler.rb +204 -0
  477. data/lib/omnizip/progress/callback_reporter.rb +36 -0
  478. data/lib/omnizip/progress/console_reporter.rb +62 -0
  479. data/lib/omnizip/progress/log_reporter.rb +91 -0
  480. data/lib/omnizip/progress/operation_progress.rb +118 -0
  481. data/lib/omnizip/progress/progress_bar.rb +156 -0
  482. data/lib/omnizip/progress/progress_reporter.rb +40 -0
  483. data/lib/omnizip/progress/progress_tracker.rb +190 -0
  484. data/lib/omnizip/progress/silent_reporter.rb +24 -0
  485. data/lib/omnizip/progress.rb +127 -0
  486. data/lib/omnizip/rubyzip_compat.rb +63 -0
  487. data/lib/omnizip/temp/safe_extract.rb +168 -0
  488. data/lib/omnizip/temp/temp_file.rb +124 -0
  489. data/lib/omnizip/temp/temp_file_pool.rb +109 -0
  490. data/lib/omnizip/temp.rb +181 -0
  491. data/lib/omnizip/version.rb +5 -0
  492. data/lib/omnizip/zip/entry.rb +156 -0
  493. data/lib/omnizip/zip/file.rb +485 -0
  494. data/lib/omnizip/zip/input_stream.rb +273 -0
  495. data/lib/omnizip/zip/output_stream.rb +324 -0
  496. data/lib/omnizip.rb +156 -0
  497. data/readme-docs/advanced-features.adoc +515 -0
  498. data/readme-docs/api-usage.adoc +444 -0
  499. data/readme-docs/architecture.adoc +449 -0
  500. data/readme-docs/archive-formats.adoc +479 -0
  501. data/readme-docs/cli-usage.adoc +222 -0
  502. data/readme-docs/compression-algorithms.adoc +442 -0
  503. data/readme-docs/compression-profiles.adoc +247 -0
  504. data/readme-docs/encryption-checksums.adoc +328 -0
  505. data/readme-docs/format-converter.adoc +325 -0
  506. data/readme-docs/installation.adoc +228 -0
  507. data/readme-docs/par2-archives.adoc +608 -0
  508. data/readme-docs/performance-profiler.adoc +389 -0
  509. data/readme-docs/preprocessing-filters.adoc +280 -0
  510. data/xz-file-format-1.2.1.txt +1174 -0
  511. metadata +617 -0
@@ -0,0 +1,116 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Omnizip
4
+ module Models
5
+ # Configuration for split archive (multi-volume) creation
6
+ # Defines how archives should be split into volumes
7
+ class SplitOptions
8
+ attr_accessor :volume_size, :naming_pattern, :span_strategy
9
+
10
+ # Naming pattern types
11
+ NAMING_NUMERIC = :numeric # .001, .002, .003
12
+ NAMING_ALPHA = :alpha # .aa, .ab, .ac
13
+
14
+ # Span strategies
15
+ STRATEGY_FIRST_FIT = :first_fit # Fill volumes sequentially
16
+ STRATEGY_BALANCED = :balanced # Balance files across volumes
17
+
18
+ # Default volume size (100 MB)
19
+ DEFAULT_VOLUME_SIZE = 100 * 1024 * 1024
20
+
21
+ # Initialize with default options
22
+ def initialize
23
+ @volume_size = DEFAULT_VOLUME_SIZE
24
+ @naming_pattern = NAMING_NUMERIC
25
+ @span_strategy = STRATEGY_FIRST_FIT
26
+ end
27
+
28
+ # Parse volume size from string (e.g., "100M", "4.7G")
29
+ #
30
+ # @param size_str [String] Size string with unit
31
+ # @return [Integer] Size in bytes
32
+ def self.parse_volume_size(size_str)
33
+ return size_str if size_str.is_a?(Integer)
34
+
35
+ size_str = size_str.to_s.strip.upcase
36
+ multiplier = case size_str
37
+ when /(\d+(?:\.\d+)?)\s*K(?:B)?$/
38
+ 1024
39
+ when /(\d+(?:\.\d+)?)\s*M(?:B)?$/
40
+ 1024 * 1024
41
+ when /(\d+(?:\.\d+)?)\s*G(?:B)?$/
42
+ 1024 * 1024 * 1024
43
+ when /(\d+(?:\.\d+)?)\s*T(?:B)?$/
44
+ 1024 * 1024 * 1024 * 1024
45
+ else
46
+ return size_str.to_i
47
+ end
48
+
49
+ (Regexp.last_match(1).to_f * multiplier).to_i
50
+ end
51
+
52
+ # Generate volume filename
53
+ #
54
+ # @param base_path [String] Base archive path (e.g., "backup.7z.001")
55
+ # @param volume_number [Integer] Volume number (1-based)
56
+ # @return [String] Volume filename
57
+ def volume_filename(base_path, volume_number)
58
+ # Extract base and extension
59
+ base = base_path.sub(/\.\d{3}$/, "")
60
+ base = base.sub(/\.[a-z]{2,}$/, "") if @naming_pattern == NAMING_ALPHA
61
+
62
+ case @naming_pattern
63
+ when NAMING_NUMERIC
64
+ format("%s.%03d", base, volume_number)
65
+ when NAMING_ALPHA
66
+ format("%s.%s", base, alpha_suffix(volume_number))
67
+ else
68
+ format("%s.%03d", base, volume_number)
69
+ end
70
+ end
71
+
72
+ # Validate options
73
+ #
74
+ # @raise [ArgumentError] if options are invalid
75
+ def validate!
76
+ raise ArgumentError, "volume_size must be positive" unless
77
+ @volume_size.positive?
78
+
79
+ valid_patterns = [NAMING_NUMERIC, NAMING_ALPHA]
80
+ unless valid_patterns.include?(@naming_pattern)
81
+ raise ArgumentError,
82
+ "naming_pattern must be one of #{valid_patterns.inspect}"
83
+ end
84
+
85
+ valid_strategies = [STRATEGY_FIRST_FIT, STRATEGY_BALANCED]
86
+ unless valid_strategies.include?(@span_strategy)
87
+ raise ArgumentError,
88
+ "span_strategy must be one of #{valid_strategies.inspect}"
89
+ end
90
+
91
+ true
92
+ end
93
+
94
+ private
95
+
96
+ # Generate alpha suffix for volume number
97
+ #
98
+ # @param volume_number [Integer] Volume number (1-based)
99
+ # @return [String] Alpha suffix (aa, ab, ..., az, ba, ..., zz, aaa, ...)
100
+ def alpha_suffix(volume_number)
101
+ # Convert 1 -> aa, 2 -> ab, ..., 26 -> az, 27 -> ba, etc.
102
+ num = volume_number - 1 # Convert to 0-based
103
+
104
+ # For two-character format (minimum):
105
+ # Second character cycles through a-z (rightmost, least significant)
106
+ second = ("a".ord + (num % 26)).chr
107
+
108
+ # First character represents which group of 26 we're in
109
+ first_index = num / 26
110
+ first = ("a".ord + first_index).chr
111
+
112
+ first + second
113
+ end
114
+ end
115
+ end
116
+ end
@@ -0,0 +1,81 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Omnizip
4
+ # Registry for performance optimization strategies using the Registry pattern
5
+ class OptimizationRegistry
6
+ class << self
7
+ # Register an optimization strategy
8
+ def register(name, strategy_class)
9
+ strategies[name] = strategy_class
10
+ end
11
+
12
+ # Get an optimization strategy by name
13
+ def get(name)
14
+ strategies[name] || raise(
15
+ Omnizip::OptimizationNotFound,
16
+ "Optimization strategy not found: #{name}",
17
+ )
18
+ end
19
+
20
+ # Check if an optimization strategy is registered
21
+ def registered?(name)
22
+ strategies.key?(name)
23
+ end
24
+
25
+ # List all registered optimization strategies
26
+ def all
27
+ strategies.keys
28
+ end
29
+
30
+ # Get all optimization strategies as a hash
31
+ def strategies
32
+ @strategies ||= {}
33
+ end
34
+
35
+ # Clear all registered strategies (useful for testing)
36
+ def clear!
37
+ @strategies = {}
38
+ end
39
+
40
+ # Apply an optimization strategy to a target
41
+ def apply(name, target, **options)
42
+ strategy_class = get(name)
43
+ strategy = strategy_class.new(**options)
44
+ strategy.optimize(target)
45
+ end
46
+
47
+ # Get optimization metadata
48
+ def metadata(name)
49
+ strategy_class = get(name)
50
+ return {} unless strategy_class.respond_to?(:metadata)
51
+
52
+ strategy_class.metadata
53
+ end
54
+ end
55
+
56
+ # Base class for optimization strategies
57
+ class Strategy
58
+ attr_reader :options
59
+
60
+ def initialize(**options)
61
+ @options = options
62
+ end
63
+
64
+ # Override in subclasses to implement optimization logic
65
+ def optimize(target)
66
+ raise NotImplementedError,
67
+ "#{self.class} must implement #optimize"
68
+ end
69
+
70
+ # Override in subclasses to provide strategy metadata
71
+ def self.metadata
72
+ {
73
+ name: name,
74
+ description: "No description provided",
75
+ category: :general,
76
+ impact: :unknown,
77
+ }
78
+ end
79
+ end
80
+ end
81
+ end
@@ -0,0 +1,209 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Omnizip
4
+ module Parallel
5
+ # Thread-safe job queue for parallel compression/extraction
6
+ #
7
+ # Manages a queue of compression or extraction jobs with priority support.
8
+ # Jobs are ordered by priority (large files first for better load balancing).
9
+ #
10
+ # @example Create and use job queue
11
+ # queue = Omnizip::Parallel::JobQueue.new(max_size: 100)
12
+ # queue.push(file: 'large.dat', size: 1_000_000, priority: :high)
13
+ # job = queue.pop
14
+ #
15
+ # @example Size-based priority
16
+ # queue.push_with_size(file: 'file.txt', size: 1024)
17
+ class JobQueue
18
+ # Job structure for queue items
19
+ Job = Struct.new(:file, :data, :size, :priority, :metadata,
20
+ keyword_init: true) do
21
+ def <=>(other)
22
+ # Higher priority first, then larger files first
23
+ priority_order = { high: 0, normal: 1, low: 2 }
24
+ priority_cmp = (priority_order[priority] || 1) <=> (priority_order[other.priority] || 1)
25
+ return priority_cmp unless priority_cmp.zero?
26
+
27
+ # If same priority, larger files first
28
+ -(size <=> other.size)
29
+ end
30
+ end
31
+
32
+ # @return [Integer] maximum queue size
33
+ attr_reader :max_size
34
+
35
+ # @return [Integer] current queue size
36
+ attr_reader :size
37
+
38
+ # Initialize job queue
39
+ #
40
+ # @param max_size [Integer] maximum number of jobs in queue
41
+ def initialize(max_size: 1000)
42
+ @max_size = max_size
43
+ @queue = []
44
+ @mutex = Mutex.new
45
+ @cond = ConditionVariable.new
46
+ @closed = false
47
+ @size = 0
48
+ end
49
+
50
+ # Push a job onto the queue
51
+ #
52
+ # @param file [String] file path
53
+ # @param data [Object] job data
54
+ # @param size [Integer] file size in bytes
55
+ # @param priority [Symbol] job priority (:high, :normal, :low)
56
+ # @param metadata [Hash] additional metadata
57
+ # @raise [ClosedQueueError] if queue is closed
58
+ # @return [Job] the created job
59
+ def push(file:, data: nil, size: 0, priority: :normal, metadata: {})
60
+ @mutex.synchronize do
61
+ raise ClosedQueueError, "Queue is closed" if @closed
62
+
63
+ # Wait if queue is full
64
+ @cond.wait(@mutex) while @size >= @max_size && !@closed
65
+
66
+ raise ClosedQueueError, "Queue is closed" if @closed
67
+
68
+ job = Job.new(
69
+ file: file,
70
+ data: data,
71
+ size: size,
72
+ priority: priority,
73
+ metadata: metadata,
74
+ )
75
+
76
+ @queue << job
77
+ @size += 1
78
+
79
+ # Keep queue sorted by priority
80
+ @queue.sort!
81
+
82
+ @cond.signal
83
+ job
84
+ end
85
+ end
86
+
87
+ # Push a job with automatic priority based on file size
88
+ #
89
+ # @param file [String] file path
90
+ # @param size [Integer] file size in bytes
91
+ # @param data [Object] job data
92
+ # @param metadata [Hash] additional metadata
93
+ # @return [Job] the created job
94
+ def push_with_size(file:, size:, data: nil, metadata: {})
95
+ # Determine priority based on size
96
+ # Large files (>10MB) get high priority for better load balancing
97
+ priority = if size > 10 * 1024 * 1024
98
+ :high
99
+ elsif size > 1024 * 1024
100
+ :normal
101
+ else
102
+ :low
103
+ end
104
+
105
+ push(file: file, data: data, size: size, priority: priority,
106
+ metadata: metadata)
107
+ end
108
+
109
+ # Pop a job from the queue
110
+ #
111
+ # @param timeout [Numeric, nil] timeout in seconds, nil for no timeout
112
+ # @return [Job, nil] job or nil if timeout or closed
113
+ def pop(timeout: nil)
114
+ @mutex.synchronize do
115
+ if timeout
116
+ deadline = Time.now + timeout
117
+ while @queue.empty? && !@closed
118
+ remaining = deadline - Time.now
119
+ return nil if remaining <= 0
120
+
121
+ @cond.wait(@mutex, remaining)
122
+ end
123
+ else
124
+ @cond.wait(@mutex) while @queue.empty? && !@closed
125
+ end
126
+
127
+ return nil if @queue.empty?
128
+
129
+ job = @queue.shift
130
+ @size -= 1
131
+ @cond.signal # Signal waiting pushers
132
+ job
133
+ end
134
+ end
135
+
136
+ # Pop multiple jobs in batch
137
+ #
138
+ # @param count [Integer] maximum number of jobs to pop
139
+ # @param timeout [Numeric, nil] timeout in seconds
140
+ # @return [Array<Job>] array of jobs (may be empty)
141
+ def pop_batch(count, timeout: nil)
142
+ jobs = []
143
+ count.times do
144
+ job = pop(timeout: timeout)
145
+ break unless job
146
+
147
+ jobs << job
148
+ end
149
+ jobs
150
+ end
151
+
152
+ # Check if queue is empty
153
+ #
154
+ # @return [Boolean] true if empty
155
+ def empty?
156
+ @mutex.synchronize { @queue.empty? }
157
+ end
158
+
159
+ # Check if queue is closed
160
+ #
161
+ # @return [Boolean] true if closed
162
+ def closed?
163
+ @mutex.synchronize { @closed }
164
+ end
165
+
166
+ # Close the queue
167
+ #
168
+ # No more jobs can be pushed after closing.
169
+ # Pending pops will return nil.
170
+ def close
171
+ @mutex.synchronize do
172
+ @closed = true
173
+ @cond.broadcast # Wake up all waiting threads
174
+ end
175
+ end
176
+
177
+ # Clear all jobs from queue
178
+ #
179
+ # @return [Integer] number of jobs cleared
180
+ def clear
181
+ @mutex.synchronize do
182
+ count = @queue.size
183
+ @queue.clear
184
+ @size = 0
185
+ @cond.broadcast
186
+ count
187
+ end
188
+ end
189
+
190
+ # Get queue statistics
191
+ #
192
+ # @return [Hash] statistics hash
193
+ def stats
194
+ @mutex.synchronize do
195
+ {
196
+ size: @size,
197
+ max_size: @max_size,
198
+ closed: @closed,
199
+ utilization: @max_size.zero? ? 0.0 : @size.to_f / @max_size,
200
+ priority_counts: @queue.group_by(&:priority).transform_values(&:count),
201
+ }
202
+ end
203
+ end
204
+ end
205
+
206
+ # Exception raised when trying to push to a closed queue
207
+ class ClosedQueueError < StandardError; end
208
+ end
209
+ end
@@ -0,0 +1,203 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Omnizip
4
+ module Parallel
5
+ # Job scheduler for load balancing and work distribution
6
+ #
7
+ # Manages job assignment to workers using different strategies:
8
+ # - Dynamic: Workers pull jobs as they become available (default)
9
+ # - Static: Pre-assign equal chunks to each worker
10
+ #
11
+ # @example Create scheduler with dynamic strategy
12
+ # scheduler = Omnizip::Parallel::JobScheduler.new(strategy: :dynamic)
13
+ # scheduler.schedule_jobs(jobs, worker_count: 4)
14
+ #
15
+ # @example Create scheduler with static strategy
16
+ # scheduler = Omnizip::Parallel::JobScheduler.new(strategy: :static)
17
+ # assignments = scheduler.schedule_jobs(jobs, worker_count: 4)
18
+ class JobScheduler
19
+ # @return [Symbol] scheduling strategy
20
+ attr_reader :strategy
21
+
22
+ # Initialize job scheduler
23
+ #
24
+ # @param strategy [Symbol] :dynamic or :static
25
+ def initialize(strategy: :dynamic)
26
+ @strategy = strategy
27
+ validate_strategy!
28
+ end
29
+
30
+ # Schedule jobs for workers
31
+ #
32
+ # @param jobs [Array] array of jobs to schedule
33
+ # @param worker_count [Integer] number of workers
34
+ # @return [Hash, Array] assignments (strategy-dependent)
35
+ def schedule_jobs(jobs, worker_count:)
36
+ case @strategy
37
+ when :dynamic
38
+ schedule_dynamic(jobs, worker_count)
39
+ when :static
40
+ schedule_static(jobs, worker_count)
41
+ end
42
+ end
43
+
44
+ # Estimate completion time based on job sizes and worker count
45
+ #
46
+ # @param jobs [Array] array of jobs with :size attribute
47
+ # @param worker_count [Integer] number of workers
48
+ # @param bytes_per_second [Float] processing rate
49
+ # @return [Float] estimated seconds to completion
50
+ def estimate_completion_time(jobs, worker_count:,
51
+ bytes_per_second: 10_000_000)
52
+ total_bytes = jobs.sum { |job| job.respond_to?(:size) ? job.size : 0 }
53
+ return 0.0 if total_bytes.zero? || worker_count.zero?
54
+
55
+ # Simple estimate: total bytes / (workers * rate)
56
+ total_bytes.to_f / (worker_count * bytes_per_second)
57
+ end
58
+
59
+ # Calculate load balance quality metric
60
+ #
61
+ # @param assignments [Hash] worker_id => [jobs] mapping
62
+ # @return [Float] balance score (0.0 = perfect, 1.0 = worst)
63
+ def calculate_load_balance(assignments)
64
+ return 0.0 if assignments.empty?
65
+
66
+ # Calculate total size per worker
67
+ worker_sizes = assignments.transform_values do |jobs|
68
+ jobs.sum { |job| job.respond_to?(:size) ? job.size : 1 }
69
+ end
70
+
71
+ sizes = worker_sizes.values
72
+ return 0.0 if sizes.empty? || sizes.max.zero?
73
+
74
+ # Balance = (max - min) / max
75
+ (sizes.max - sizes.min).to_f / sizes.max
76
+ end
77
+
78
+ private
79
+
80
+ # Validate scheduling strategy
81
+ #
82
+ # @raise [ArgumentError] if strategy is invalid
83
+ def validate_strategy!
84
+ valid_strategies = %i[dynamic static]
85
+ return if valid_strategies.include?(@strategy)
86
+
87
+ raise ArgumentError,
88
+ "Invalid strategy: #{@strategy}. Must be one of: #{valid_strategies.join(', ')}"
89
+ end
90
+
91
+ # Dynamic scheduling: jobs pulled from queue as workers become available
92
+ #
93
+ # @param jobs [Array] jobs to schedule
94
+ # @param worker_count [Integer] number of workers
95
+ # @return [Hash] scheduling metadata
96
+ def schedule_dynamic(jobs, worker_count)
97
+ # In dynamic mode, we don't pre-assign jobs
98
+ # Workers pull from shared queue as they complete work
99
+ # Return metadata about the scheduling
100
+ {
101
+ strategy: :dynamic,
102
+ total_jobs: jobs.size,
103
+ worker_count: worker_count,
104
+ estimated_jobs_per_worker: (jobs.size.to_f / worker_count).ceil,
105
+ queue: jobs, # Jobs will be consumed from this queue
106
+ }
107
+ end
108
+
109
+ # Static scheduling: pre-assign jobs to workers in balanced chunks
110
+ #
111
+ # @param jobs [Array] jobs to schedule
112
+ # @param worker_count [Integer] number of workers
113
+ # @return [Hash] worker_id => [jobs] mapping
114
+ def schedule_static(jobs, worker_count)
115
+ return {} if jobs.empty? || worker_count.zero?
116
+
117
+ # Sort jobs by size (largest first) for better balance
118
+ sorted_jobs = jobs.sort_by do |job|
119
+ -(job.respond_to?(:size) ? job.size : 0)
120
+ end
121
+
122
+ # Initialize worker assignments
123
+ assignments = (0...worker_count).to_h { |i| [i, []] }
124
+ worker_loads = Array.new(worker_count, 0)
125
+
126
+ # Assign each job to worker with smallest current load
127
+ sorted_jobs.each do |job|
128
+ job_size = job.respond_to?(:size) ? job.size : 1
129
+
130
+ # Find worker with minimum load
131
+ min_worker = worker_loads.each_with_index.min_by { |load, _| load }[1]
132
+
133
+ # Assign job to this worker
134
+ assignments[min_worker] << job
135
+ worker_loads[min_worker] += job_size
136
+ end
137
+
138
+ # Add metadata
139
+ assignments[:metadata] = {
140
+ strategy: :static,
141
+ total_jobs: jobs.size,
142
+ worker_count: worker_count,
143
+ balance_score: calculate_load_balance(assignments.except(:metadata)),
144
+ worker_loads: worker_loads,
145
+ }
146
+
147
+ assignments
148
+ end
149
+
150
+ # Round-robin assignment (alternative simple strategy)
151
+ #
152
+ # @param jobs [Array] jobs to schedule
153
+ # @param worker_count [Integer] number of workers
154
+ # @return [Hash] worker_id => [jobs] mapping
155
+ def schedule_round_robin(jobs, worker_count)
156
+ return {} if jobs.empty? || worker_count.zero?
157
+
158
+ assignments = (0...worker_count).to_h { |i| [i, []] }
159
+
160
+ jobs.each_with_index do |job, index|
161
+ worker_id = index % worker_count
162
+ assignments[worker_id] << job
163
+ end
164
+
165
+ assignments
166
+ end
167
+
168
+ # Size-aware assignment with bin packing
169
+ #
170
+ # @param jobs [Array] jobs to schedule
171
+ # @param worker_count [Integer] number of workers
172
+ # @return [Hash] worker_id => [jobs] mapping
173
+ def schedule_bin_packing(jobs, worker_count)
174
+ return {} if jobs.empty? || worker_count.zero?
175
+
176
+ # Sort jobs by size (largest first)
177
+ sorted_jobs = jobs.sort_by do |job|
178
+ -(job.respond_to?(:size) ? job.size : 0)
179
+ end
180
+
181
+ # First-fit decreasing bin packing
182
+ bins = Array.new(worker_count) { { jobs: [], total_size: 0 } }
183
+
184
+ sorted_jobs.each do |job|
185
+ job_size = job.respond_to?(:size) ? job.size : 1
186
+
187
+ # Find bin with minimum total size
188
+ min_bin = bins.min_by { |bin| bin[:total_size] }
189
+ min_bin[:jobs] << job
190
+ min_bin[:total_size] += job_size
191
+ end
192
+
193
+ # Convert to standard format
194
+ assignments = {}
195
+ bins.each_with_index do |bin, index|
196
+ assignments[index] = bin[:jobs]
197
+ end
198
+
199
+ assignments
200
+ end
201
+ end
202
+ end
203
+ end