omnizip 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/.rubocop.yml +32 -0
- data/.rubocop_todo.yml +754 -0
- data/COPYING +502 -0
- data/Gemfile +17 -0
- data/LICENSE +12 -0
- data/README.adoc +1045 -0
- data/Rakefile +12 -0
- data/benchmark/README.md +260 -0
- data/benchmark/benchmark_suite.rb +125 -0
- data/benchmark/compression_bench.rb +181 -0
- data/benchmark/filter_bench.rb +180 -0
- data/benchmark/models/benchmark_result.rb +59 -0
- data/benchmark/models/comparison_result.rb +69 -0
- data/benchmark/profile_suite.rb +167 -0
- data/benchmark/reporter.rb +150 -0
- data/benchmark/run_benchmarks.rb +66 -0
- data/benchmark/test_data.rb +137 -0
- data/config/formats/rar3_spec.yml +91 -0
- data/config/formats/rar5_spec.yml +102 -0
- data/docs/.github/workflows/docs.yml +142 -0
- data/docs/.gitignore +21 -0
- data/docs/.lychee.toml +67 -0
- data/docs/Gemfile +13 -0
- data/docs/RAR_WRITE_SUPPORT.md +26 -0
- data/docs/README.md +101 -0
- data/docs/_config.yml +112 -0
- data/docs/assets/logo.svg +1 -0
- data/docs/assets/omnizip-logo.pdf +1540 -11
- data/docs/comparison/feature-matrix.adoc +694 -0
- data/docs/comparison/index.adoc +113 -0
- data/docs/comparison/vs-7zip.adoc +309 -0
- data/docs/comparison/vs-peazip.adoc +77 -0
- data/docs/comparison/vs-rubyzip.adoc +342 -0
- data/docs/comparison/vs-winrar.adoc +100 -0
- data/docs/compatibility.adoc +579 -0
- data/docs/concepts/index.adoc +129 -0
- data/docs/developer/architecture.adoc +256 -0
- data/docs/developer/contributing.adoc +158 -0
- data/docs/developer/index.adoc +25 -0
- data/docs/developer/testing.adoc +212 -0
- data/docs/getting-started/basic-usage.adoc +271 -0
- data/docs/getting-started/index.adoc +42 -0
- data/docs/getting-started/installation.adoc +138 -0
- data/docs/getting-started/quick-start.adoc +185 -0
- data/docs/getting-started/your-first-archive.adoc +218 -0
- data/docs/guides/advanced-features/encryption.adoc +300 -0
- data/docs/guides/advanced-features/index.adoc +49 -0
- data/docs/guides/advanced-features/parallel-processing.adoc +246 -0
- data/docs/guides/advanced-features/progress-tracking.adoc +320 -0
- data/docs/guides/advanced-features/streaming.adoc +212 -0
- data/docs/guides/archive-formats/gzip-format.adoc +107 -0
- data/docs/guides/archive-formats/index.adoc +130 -0
- data/docs/guides/archive-formats/rar-format.adoc +104 -0
- data/docs/guides/archive-formats/rar5.adoc +521 -0
- data/docs/guides/archive-formats/seven-zip-format.adoc +35 -0
- data/docs/guides/archive-formats/tar-format.adoc +106 -0
- data/docs/guides/archive-formats/xz-format.adoc +118 -0
- data/docs/guides/archive-formats/zip-format.adoc +35 -0
- data/docs/guides/compression-algorithms/bzip2.adoc +113 -0
- data/docs/guides/compression-algorithms/deflate.adoc +319 -0
- data/docs/guides/compression-algorithms/index.adoc +190 -0
- data/docs/guides/compression-algorithms/lzma.adoc +398 -0
- data/docs/guides/compression-algorithms/lzma2.adoc +327 -0
- data/docs/guides/compression-algorithms/ppmd.adoc +316 -0
- data/docs/guides/compression-algorithms/zstandard.adoc +361 -0
- data/docs/guides/creating-archives.adoc +354 -0
- data/docs/guides/extracting-archives.adoc +53 -0
- data/docs/guides/format-conversion.adoc +64 -0
- data/docs/guides/index.adoc +49 -0
- data/docs/guides/migration-rubyzip.adoc +217 -0
- data/docs/guides/parity-archives.adoc +605 -0
- data/docs/guides/performance-tuning.adoc +88 -0
- data/docs/index.adoc +218 -0
- data/docs/lychee.toml +67 -0
- data/docs/reference/api/overview.adoc +188 -0
- data/docs/reference/cli/compress-command.adoc +114 -0
- data/docs/reference/cli/overview.adoc +140 -0
- data/docs/reference/index.adoc +26 -0
- data/docs/resources/faq.adoc +185 -0
- data/docs/resources/quick-reference.adoc +222 -0
- data/docs/troubleshooting/index.adoc +208 -0
- data/examples/api_comparison.rb +205 -0
- data/examples/deflate64_example.rb +96 -0
- data/examples/par2_demo.rb +121 -0
- data/examples/quick_start_native.rb +150 -0
- data/examples/quick_start_rubyzip.rb +115 -0
- data/examples/rubyzip_compatibility_demo.rb +194 -0
- data/exe/omnizip +27 -0
- data/lib/omnizip/algorithm.rb +130 -0
- data/lib/omnizip/algorithm_registry.rb +86 -0
- data/lib/omnizip/algorithms/.keep +0 -0
- data/lib/omnizip/algorithms/bzip2/bwt.rb +225 -0
- data/lib/omnizip/algorithms/bzip2/decoder.rb +193 -0
- data/lib/omnizip/algorithms/bzip2/encoder.rb +237 -0
- data/lib/omnizip/algorithms/bzip2/huffman.rb +206 -0
- data/lib/omnizip/algorithms/bzip2/mtf.rb +101 -0
- data/lib/omnizip/algorithms/bzip2/rle.rb +151 -0
- data/lib/omnizip/algorithms/bzip2.rb +130 -0
- data/lib/omnizip/algorithms/deflate/constants.rb +28 -0
- data/lib/omnizip/algorithms/deflate/decoder.rb +38 -0
- data/lib/omnizip/algorithms/deflate/encoder.rb +46 -0
- data/lib/omnizip/algorithms/deflate.rb +128 -0
- data/lib/omnizip/algorithms/deflate64/constants.rb +45 -0
- data/lib/omnizip/algorithms/deflate64/decoder.rb +153 -0
- data/lib/omnizip/algorithms/deflate64/encoder.rb +98 -0
- data/lib/omnizip/algorithms/deflate64/huffman_coder.rb +354 -0
- data/lib/omnizip/algorithms/deflate64/lz77_encoder.rb +142 -0
- data/lib/omnizip/algorithms/deflate64.rb +109 -0
- data/lib/omnizip/algorithms/lzma/bit_model.rb +120 -0
- data/lib/omnizip/algorithms/lzma/constants.rb +112 -0
- data/lib/omnizip/algorithms/lzma/decoder.rb +148 -0
- data/lib/omnizip/algorithms/lzma/dictionary.rb +69 -0
- data/lib/omnizip/algorithms/lzma/distance_coder.rb +415 -0
- data/lib/omnizip/algorithms/lzma/encoder.rb +142 -0
- data/lib/omnizip/algorithms/lzma/length_coder.rb +260 -0
- data/lib/omnizip/algorithms/lzma/literal_decoder.rb +320 -0
- data/lib/omnizip/algorithms/lzma/literal_encoder.rb +210 -0
- data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +341 -0
- data/lib/omnizip/algorithms/lzma/lzma_alone_decoder.rb +192 -0
- data/lib/omnizip/algorithms/lzma/lzma_state.rb +128 -0
- data/lib/omnizip/algorithms/lzma/match.rb +32 -0
- data/lib/omnizip/algorithms/lzma/match_finder.rb +205 -0
- data/lib/omnizip/algorithms/lzma/match_finder_config.rb +142 -0
- data/lib/omnizip/algorithms/lzma/match_finder_factory.rb +88 -0
- data/lib/omnizip/algorithms/lzma/optimal_encoder.rb +130 -0
- data/lib/omnizip/algorithms/lzma/probability_models.rb +72 -0
- data/lib/omnizip/algorithms/lzma/range_coder.rb +85 -0
- data/lib/omnizip/algorithms/lzma/range_decoder.rb +434 -0
- data/lib/omnizip/algorithms/lzma/range_encoder.rb +194 -0
- data/lib/omnizip/algorithms/lzma/state.rb +127 -0
- data/lib/omnizip/algorithms/lzma/xz_buffered_range_encoder.rb +325 -0
- data/lib/omnizip/algorithms/lzma/xz_encoder.rb +426 -0
- data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +645 -0
- data/lib/omnizip/algorithms/lzma/xz_match_finder_adapter.rb +227 -0
- data/lib/omnizip/algorithms/lzma/xz_price_calculator.rb +169 -0
- data/lib/omnizip/algorithms/lzma/xz_probability_models.rb +261 -0
- data/lib/omnizip/algorithms/lzma/xz_range_encoder.rb +223 -0
- data/lib/omnizip/algorithms/lzma/xz_range_encoder_exact.rb +331 -0
- data/lib/omnizip/algorithms/lzma/xz_state.rb +116 -0
- data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +2055 -0
- data/lib/omnizip/algorithms/lzma.rb +238 -0
- data/lib/omnizip/algorithms/lzma2/chunk_manager.rb +182 -0
- data/lib/omnizip/algorithms/lzma2/constants.rb +41 -0
- data/lib/omnizip/algorithms/lzma2/encoder.rb +147 -0
- data/lib/omnizip/algorithms/lzma2/lzma2_chunk.rb +161 -0
- data/lib/omnizip/algorithms/lzma2/properties.rb +179 -0
- data/lib/omnizip/algorithms/lzma2/simple_lzma2_encoder.rb +127 -0
- data/lib/omnizip/algorithms/lzma2/xz_encoder_adapter.rb +85 -0
- data/lib/omnizip/algorithms/lzma2.rb +141 -0
- data/lib/omnizip/algorithms/ppmd7/constants.rb +74 -0
- data/lib/omnizip/algorithms/ppmd7/context.rb +154 -0
- data/lib/omnizip/algorithms/ppmd7/decoder.rb +126 -0
- data/lib/omnizip/algorithms/ppmd7/encoder.rb +163 -0
- data/lib/omnizip/algorithms/ppmd7/model.rb +248 -0
- data/lib/omnizip/algorithms/ppmd7/symbol_state.rb +57 -0
- data/lib/omnizip/algorithms/ppmd7.rb +116 -0
- data/lib/omnizip/algorithms/ppmd8/constants.rb +61 -0
- data/lib/omnizip/algorithms/ppmd8/context.rb +34 -0
- data/lib/omnizip/algorithms/ppmd8/decoder.rb +107 -0
- data/lib/omnizip/algorithms/ppmd8/encoder.rb +138 -0
- data/lib/omnizip/algorithms/ppmd8/model.rb +250 -0
- data/lib/omnizip/algorithms/ppmd8/restoration_method.rb +78 -0
- data/lib/omnizip/algorithms/ppmd8.rb +82 -0
- data/lib/omnizip/algorithms/ppmd_base.rb +138 -0
- data/lib/omnizip/algorithms/sevenzip_lzma2.rb +123 -0
- data/lib/omnizip/algorithms/xz_lzma2.rb +118 -0
- data/lib/omnizip/algorithms/zstandard/constants.rb +25 -0
- data/lib/omnizip/algorithms/zstandard/decoder.rb +46 -0
- data/lib/omnizip/algorithms/zstandard/encoder.rb +51 -0
- data/lib/omnizip/algorithms/zstandard.rb +138 -0
- data/lib/omnizip/buffer/memory_archive.rb +251 -0
- data/lib/omnizip/buffer/memory_extractor.rb +224 -0
- data/lib/omnizip/buffer.rb +176 -0
- data/lib/omnizip/checksum_registry.rb +114 -0
- data/lib/omnizip/checksums/crc32.rb +100 -0
- data/lib/omnizip/checksums/crc64.rb +101 -0
- data/lib/omnizip/checksums/crc_base.rb +158 -0
- data/lib/omnizip/checksums/verifier.rb +131 -0
- data/lib/omnizip/chunked/memory_manager.rb +194 -0
- data/lib/omnizip/chunked/reader.rb +78 -0
- data/lib/omnizip/chunked/writer.rb +120 -0
- data/lib/omnizip/chunked.rb +129 -0
- data/lib/omnizip/cli/output_formatter.rb +104 -0
- data/lib/omnizip/cli.rb +572 -0
- data/lib/omnizip/commands/.keep +0 -0
- data/lib/omnizip/commands/archive_create_command.rb +427 -0
- data/lib/omnizip/commands/archive_extract_command.rb +272 -0
- data/lib/omnizip/commands/archive_list_command.rb +218 -0
- data/lib/omnizip/commands/archive_repair_command.rb +131 -0
- data/lib/omnizip/commands/archive_verify_command.rb +117 -0
- data/lib/omnizip/commands/compress_command.rb +117 -0
- data/lib/omnizip/commands/decompress_command.rb +120 -0
- data/lib/omnizip/commands/list_command.rb +53 -0
- data/lib/omnizip/commands/metadata_command.rb +153 -0
- data/lib/omnizip/commands/parity_create_command.rb +122 -0
- data/lib/omnizip/commands/parity_repair_command.rb +122 -0
- data/lib/omnizip/commands/parity_verify_command.rb +124 -0
- data/lib/omnizip/commands/profile_list_command.rb +56 -0
- data/lib/omnizip/commands/profile_show_command.rb +44 -0
- data/lib/omnizip/convenience.rb +359 -0
- data/lib/omnizip/converter/conversion_registry.rb +49 -0
- data/lib/omnizip/converter/conversion_strategy.rb +121 -0
- data/lib/omnizip/converter/seven_zip_to_zip_strategy.rb +97 -0
- data/lib/omnizip/converter/zip_to_seven_zip_strategy.rb +112 -0
- data/lib/omnizip/converter.rb +105 -0
- data/lib/omnizip/crypto/aes256/cipher.rb +100 -0
- data/lib/omnizip/crypto/aes256/constants.rb +28 -0
- data/lib/omnizip/crypto/aes256/key_derivation.rb +101 -0
- data/lib/omnizip/crypto/aes256.rb +102 -0
- data/lib/omnizip/error.rb +106 -0
- data/lib/omnizip/eta/exponential_smoothing_estimator.rb +98 -0
- data/lib/omnizip/eta/moving_average_estimator.rb +99 -0
- data/lib/omnizip/eta/rate_calculator.rb +104 -0
- data/lib/omnizip/eta/sample_history.rb +143 -0
- data/lib/omnizip/eta/time_estimator.rb +106 -0
- data/lib/omnizip/eta.rb +63 -0
- data/lib/omnizip/extraction/filter_chain.rb +177 -0
- data/lib/omnizip/extraction/glob_pattern.rb +140 -0
- data/lib/omnizip/extraction/pattern_matcher.rb +70 -0
- data/lib/omnizip/extraction/predicate_pattern.rb +52 -0
- data/lib/omnizip/extraction/regex_pattern.rb +50 -0
- data/lib/omnizip/extraction/selective_extractor.rb +240 -0
- data/lib/omnizip/extraction.rb +111 -0
- data/lib/omnizip/file_type/mime_classifier.rb +144 -0
- data/lib/omnizip/file_type.rb +113 -0
- data/lib/omnizip/filter.rb +139 -0
- data/lib/omnizip/filter_pipeline.rb +108 -0
- data/lib/omnizip/filter_registry.rb +166 -0
- data/lib/omnizip/filters/bcj.rb +279 -0
- data/lib/omnizip/filters/bcj2/constants.rb +53 -0
- data/lib/omnizip/filters/bcj2/decoder.rb +200 -0
- data/lib/omnizip/filters/bcj2/encoder.rb +61 -0
- data/lib/omnizip/filters/bcj2/stream_data.rb +93 -0
- data/lib/omnizip/filters/bcj2.rb +99 -0
- data/lib/omnizip/filters/bcj_arm.rb +176 -0
- data/lib/omnizip/filters/bcj_arm64.rb +244 -0
- data/lib/omnizip/filters/bcj_ia64.rb +196 -0
- data/lib/omnizip/filters/bcj_ppc.rb +190 -0
- data/lib/omnizip/filters/bcj_sparc.rb +176 -0
- data/lib/omnizip/filters/bcj_x86.rb +193 -0
- data/lib/omnizip/filters/delta.rb +196 -0
- data/lib/omnizip/filters/filter_base.rb +72 -0
- data/lib/omnizip/filters/registry.rb +123 -0
- data/lib/omnizip/filters/xz_delta.rb +258 -0
- data/lib/omnizip/format_detector.rb +162 -0
- data/lib/omnizip/format_registry.rb +59 -0
- data/lib/omnizip/formats/.keep +0 -0
- data/lib/omnizip/formats/bzip2_file.rb +172 -0
- data/lib/omnizip/formats/cpio/constants.rb +55 -0
- data/lib/omnizip/formats/cpio/entry.rb +385 -0
- data/lib/omnizip/formats/cpio/reader.rb +196 -0
- data/lib/omnizip/formats/cpio/writer.rb +234 -0
- data/lib/omnizip/formats/cpio.rb +140 -0
- data/lib/omnizip/formats/format_spec_loader.rb +230 -0
- data/lib/omnizip/formats/gzip.rb +238 -0
- data/lib/omnizip/formats/iso/directory_builder.rb +297 -0
- data/lib/omnizip/formats/iso/directory_record.rb +152 -0
- data/lib/omnizip/formats/iso/joliet.rb +204 -0
- data/lib/omnizip/formats/iso/path_table.rb +125 -0
- data/lib/omnizip/formats/iso/reader.rb +197 -0
- data/lib/omnizip/formats/iso/rock_ridge.rb +349 -0
- data/lib/omnizip/formats/iso/volume_builder.rb +320 -0
- data/lib/omnizip/formats/iso/volume_descriptor.rb +168 -0
- data/lib/omnizip/formats/iso/writer.rb +530 -0
- data/lib/omnizip/formats/iso.rb +140 -0
- data/lib/omnizip/formats/lzip.rb +175 -0
- data/lib/omnizip/formats/lzma_alone.rb +171 -0
- data/lib/omnizip/formats/rar/archive_repairer.rb +243 -0
- data/lib/omnizip/formats/rar/archive_verifier.rb +195 -0
- data/lib/omnizip/formats/rar/block_parser.rb +243 -0
- data/lib/omnizip/formats/rar/compression/bit_stream.rb +180 -0
- data/lib/omnizip/formats/rar/compression/dispatcher.rb +217 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/decoder.rb +216 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/encoder.rb +158 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_builder.rb +217 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_coder.rb +189 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/match_finder.rb +135 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/sliding_window.rb +165 -0
- data/lib/omnizip/formats/rar/compression/ppmd/context.rb +105 -0
- data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +219 -0
- data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +262 -0
- data/lib/omnizip/formats/rar/compression_method_registry.rb +106 -0
- data/lib/omnizip/formats/rar/constants.rb +82 -0
- data/lib/omnizip/formats/rar/decompressor.rb +238 -0
- data/lib/omnizip/formats/rar/external_writer.rb +312 -0
- data/lib/omnizip/formats/rar/header.rb +192 -0
- data/lib/omnizip/formats/rar/license_validator.rb +109 -0
- data/lib/omnizip/formats/rar/models/rar_archive.rb +77 -0
- data/lib/omnizip/formats/rar/models/rar_entry.rb +65 -0
- data/lib/omnizip/formats/rar/models/rar_volume.rb +56 -0
- data/lib/omnizip/formats/rar/parity_handler.rb +292 -0
- data/lib/omnizip/formats/rar/rar5/compression/lzma.rb +202 -0
- data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +578 -0
- data/lib/omnizip/formats/rar/rar5/compression/store.rb +60 -0
- data/lib/omnizip/formats/rar/rar5/crc32.rb +39 -0
- data/lib/omnizip/formats/rar/rar5/encryption/aes256_cbc.rb +97 -0
- data/lib/omnizip/formats/rar/rar5/encryption/encryption_header.rb +114 -0
- data/lib/omnizip/formats/rar/rar5/encryption/encryption_manager.rb +166 -0
- data/lib/omnizip/formats/rar/rar5/encryption/key_derivation.rb +97 -0
- data/lib/omnizip/formats/rar/rar5/header.rb +187 -0
- data/lib/omnizip/formats/rar/rar5/models/encryption_options.rb +74 -0
- data/lib/omnizip/formats/rar/rar5/models/recovery_options.rb +63 -0
- data/lib/omnizip/formats/rar/rar5/models/solid_options.rb +63 -0
- data/lib/omnizip/formats/rar/rar5/models/volume_options.rb +74 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/ARCHITECTURE.md +290 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_manager.rb +264 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_splitter.rb +155 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_writer.rb +194 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_encoder.rb +109 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_manager.rb +142 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_stream.rb +121 -0
- data/lib/omnizip/formats/rar/rar5/vint.rb +65 -0
- data/lib/omnizip/formats/rar/rar5/writer.rb +466 -0
- data/lib/omnizip/formats/rar/rar_format_base.rb +241 -0
- data/lib/omnizip/formats/rar/reader.rb +366 -0
- data/lib/omnizip/formats/rar/recovery_record.rb +245 -0
- data/lib/omnizip/formats/rar/volume_manager.rb +168 -0
- data/lib/omnizip/formats/rar/writer.rb +431 -0
- data/lib/omnizip/formats/rar.rb +205 -0
- data/lib/omnizip/formats/rar3/compressor.rb +73 -0
- data/lib/omnizip/formats/rar3/decompressor.rb +66 -0
- data/lib/omnizip/formats/rar3/reader.rb +386 -0
- data/lib/omnizip/formats/rar3/writer.rb +219 -0
- data/lib/omnizip/formats/rar5/compressor.rb +73 -0
- data/lib/omnizip/formats/rar5/decompressor.rb +66 -0
- data/lib/omnizip/formats/rar5/reader.rb +342 -0
- data/lib/omnizip/formats/rar5/writer.rb +214 -0
- data/lib/omnizip/formats/seven_zip/coder_chain.rb +150 -0
- data/lib/omnizip/formats/seven_zip/constants.rb +126 -0
- data/lib/omnizip/formats/seven_zip/encoded_header.rb +114 -0
- data/lib/omnizip/formats/seven_zip/encrypted_header.rb +142 -0
- data/lib/omnizip/formats/seven_zip/file_collector.rb +144 -0
- data/lib/omnizip/formats/seven_zip/header.rb +106 -0
- data/lib/omnizip/formats/seven_zip/header_encryptor.rb +134 -0
- data/lib/omnizip/formats/seven_zip/header_writer.rb +466 -0
- data/lib/omnizip/formats/seven_zip/models/coder_info.rb +30 -0
- data/lib/omnizip/formats/seven_zip/models/file_entry.rb +58 -0
- data/lib/omnizip/formats/seven_zip/models/folder.rb +69 -0
- data/lib/omnizip/formats/seven_zip/models/stream_info.rb +42 -0
- data/lib/omnizip/formats/seven_zip/parser.rb +660 -0
- data/lib/omnizip/formats/seven_zip/reader.rb +458 -0
- data/lib/omnizip/formats/seven_zip/split_archive_reader.rb +632 -0
- data/lib/omnizip/formats/seven_zip/split_archive_writer.rb +315 -0
- data/lib/omnizip/formats/seven_zip/stream_compressor.rb +151 -0
- data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +162 -0
- data/lib/omnizip/formats/seven_zip/writer.rb +740 -0
- data/lib/omnizip/formats/seven_zip.rb +93 -0
- data/lib/omnizip/formats/tar/constants.rb +73 -0
- data/lib/omnizip/formats/tar/entry.rb +94 -0
- data/lib/omnizip/formats/tar/header.rb +168 -0
- data/lib/omnizip/formats/tar/reader.rb +121 -0
- data/lib/omnizip/formats/tar/writer.rb +216 -0
- data/lib/omnizip/formats/tar.rb +84 -0
- data/lib/omnizip/formats/xz/reader.rb +116 -0
- data/lib/omnizip/formats/xz.rb +237 -0
- data/lib/omnizip/formats/xz_impl/block_decoder.rb +754 -0
- data/lib/omnizip/formats/xz_impl/block_encoder.rb +306 -0
- data/lib/omnizip/formats/xz_impl/block_header.rb +210 -0
- data/lib/omnizip/formats/xz_impl/block_header_parser.rb +186 -0
- data/lib/omnizip/formats/xz_impl/constants.rb +49 -0
- data/lib/omnizip/formats/xz_impl/index_decoder.rb +174 -0
- data/lib/omnizip/formats/xz_impl/index_encoder.rb +122 -0
- data/lib/omnizip/formats/xz_impl/stream_decoder.rb +468 -0
- data/lib/omnizip/formats/xz_impl/stream_encoder.rb +99 -0
- data/lib/omnizip/formats/xz_impl/stream_footer.rb +81 -0
- data/lib/omnizip/formats/xz_impl/stream_footer_parser.rb +117 -0
- data/lib/omnizip/formats/xz_impl/stream_header.rb +55 -0
- data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +108 -0
- data/lib/omnizip/formats/xz_impl/vli.rb +128 -0
- data/lib/omnizip/formats/xz_impl/writer.rb +421 -0
- data/lib/omnizip/formats/zip/central_directory_header.rb +195 -0
- data/lib/omnizip/formats/zip/constants.rb +69 -0
- data/lib/omnizip/formats/zip/end_of_central_directory.rb +133 -0
- data/lib/omnizip/formats/zip/local_file_header.rb +138 -0
- data/lib/omnizip/formats/zip/reader.rb +250 -0
- data/lib/omnizip/formats/zip/unix_extra_field.rb +153 -0
- data/lib/omnizip/formats/zip/writer.rb +375 -0
- data/lib/omnizip/formats/zip/zip64_end_of_central_directory.rb +104 -0
- data/lib/omnizip/formats/zip/zip64_end_of_central_directory_locator.rb +66 -0
- data/lib/omnizip/formats/zip/zip64_extra_field.rb +114 -0
- data/lib/omnizip/formats/zip.rb +50 -0
- data/lib/omnizip/implementations/base/lzma2_decoder_base.rb +75 -0
- data/lib/omnizip/implementations/base/lzma2_encoder_base.rb +128 -0
- data/lib/omnizip/implementations/base/lzma_decoder_base.rb +83 -0
- data/lib/omnizip/implementations/base/lzma_encoder_base.rb +108 -0
- data/lib/omnizip/implementations/base/state_machine_base.rb +182 -0
- data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +421 -0
- data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +465 -0
- data/lib/omnizip/implementations/seven_zip/lzma/match_finder.rb +288 -0
- data/lib/omnizip/implementations/seven_zip/lzma/range_decoder.rb +200 -0
- data/lib/omnizip/implementations/seven_zip/lzma/range_encoder.rb +197 -0
- data/lib/omnizip/implementations/seven_zip/lzma/state_machine.rb +141 -0
- data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +519 -0
- data/lib/omnizip/implementations/xz_utils/lzma2/decoder.rb +723 -0
- data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +750 -0
- data/lib/omnizip/io/buffered_input.rb +146 -0
- data/lib/omnizip/io/buffered_output.rb +105 -0
- data/lib/omnizip/io/stream_manager.rb +115 -0
- data/lib/omnizip/link_handler/hard_link.rb +79 -0
- data/lib/omnizip/link_handler/symbolic_link.rb +74 -0
- data/lib/omnizip/link_handler.rb +124 -0
- data/lib/omnizip/metadata/archive_metadata.rb +114 -0
- data/lib/omnizip/metadata/entry_metadata.rb +146 -0
- data/lib/omnizip/metadata/metadata_editor.rb +171 -0
- data/lib/omnizip/metadata/metadata_registry.rb +64 -0
- data/lib/omnizip/metadata/metadata_validator.rb +99 -0
- data/lib/omnizip/metadata.rb +57 -0
- data/lib/omnizip/models/.keep +0 -0
- data/lib/omnizip/models/algorithm_metadata.rb +73 -0
- data/lib/omnizip/models/compression_options.rb +71 -0
- data/lib/omnizip/models/conversion_options.rb +87 -0
- data/lib/omnizip/models/conversion_result.rb +135 -0
- data/lib/omnizip/models/eta_result.rb +46 -0
- data/lib/omnizip/models/extraction_rule.rb +115 -0
- data/lib/omnizip/models/filter_chain.rb +144 -0
- data/lib/omnizip/models/filter_config.rb +183 -0
- data/lib/omnizip/models/match_result.rb +124 -0
- data/lib/omnizip/models/optimization_suggestion.rb +91 -0
- data/lib/omnizip/models/parallel_options.rb +104 -0
- data/lib/omnizip/models/performance_result.rb +79 -0
- data/lib/omnizip/models/profile_report.rb +82 -0
- data/lib/omnizip/models/progress_options.rb +38 -0
- data/lib/omnizip/models/split_options.rb +116 -0
- data/lib/omnizip/optimization_registry.rb +81 -0
- data/lib/omnizip/parallel/job_queue.rb +209 -0
- data/lib/omnizip/parallel/job_scheduler.rb +203 -0
- data/lib/omnizip/parallel/parallel_compressor.rb +347 -0
- data/lib/omnizip/parallel/parallel_extractor.rb +329 -0
- data/lib/omnizip/parallel/worker_pool.rb +223 -0
- data/lib/omnizip/parallel.rb +149 -0
- data/lib/omnizip/parity/chunked_block_processor.rb +196 -0
- data/lib/omnizip/parity/galois16.rb +145 -0
- data/lib/omnizip/parity/models/creator_packet.rb +73 -0
- data/lib/omnizip/parity/models/file_description_packet.rb +133 -0
- data/lib/omnizip/parity/models/ifsc_packet.rb +123 -0
- data/lib/omnizip/parity/models/main_packet.rb +128 -0
- data/lib/omnizip/parity/models/packet.rb +156 -0
- data/lib/omnizip/parity/models/packet_registry.rb +109 -0
- data/lib/omnizip/parity/models/recovery_slice_packet.rb +78 -0
- data/lib/omnizip/parity/par2_creator.rb +531 -0
- data/lib/omnizip/parity/par2_repairer.rb +407 -0
- data/lib/omnizip/parity/par2_verifier.rb +364 -0
- data/lib/omnizip/parity/par2cmdline_algorithm.rb +110 -0
- data/lib/omnizip/parity/par2cmdline_coefficients.rb +78 -0
- data/lib/omnizip/parity/reed_solomon_decoder.rb +266 -0
- data/lib/omnizip/parity/reed_solomon_encoder.rb +111 -0
- data/lib/omnizip/parity/reed_solomon_matrix.rb +342 -0
- data/lib/omnizip/parity.rb +186 -0
- data/lib/omnizip/password/encryption_registry.rb +65 -0
- data/lib/omnizip/password/encryption_strategy.rb +96 -0
- data/lib/omnizip/password/password_validator.rb +129 -0
- data/lib/omnizip/password/winzip_aes_strategy.rb +192 -0
- data/lib/omnizip/password/zip_crypto_strategy.rb +141 -0
- data/lib/omnizip/password.rb +87 -0
- data/lib/omnizip/pipe/stream_compressor.rb +124 -0
- data/lib/omnizip/pipe/stream_decompressor.rb +174 -0
- data/lib/omnizip/pipe.rb +121 -0
- data/lib/omnizip/platform/ntfs_streams.rb +201 -0
- data/lib/omnizip/platform.rb +189 -0
- data/lib/omnizip/profile/archive_profile.rb +39 -0
- data/lib/omnizip/profile/balanced_profile.rb +33 -0
- data/lib/omnizip/profile/binary_profile.rb +36 -0
- data/lib/omnizip/profile/compression_profile.rb +158 -0
- data/lib/omnizip/profile/custom_profile.rb +157 -0
- data/lib/omnizip/profile/fast_profile.rb +33 -0
- data/lib/omnizip/profile/maximum_profile.rb +33 -0
- data/lib/omnizip/profile/profile_detector.rb +110 -0
- data/lib/omnizip/profile/profile_registry.rb +161 -0
- data/lib/omnizip/profile/text_profile.rb +36 -0
- data/lib/omnizip/profile.rb +190 -0
- data/lib/omnizip/profiler/memory_profiler.rb +66 -0
- data/lib/omnizip/profiler/method_profiler.rb +49 -0
- data/lib/omnizip/profiler/report_generator.rb +169 -0
- data/lib/omnizip/profiler.rb +204 -0
- data/lib/omnizip/progress/callback_reporter.rb +36 -0
- data/lib/omnizip/progress/console_reporter.rb +62 -0
- data/lib/omnizip/progress/log_reporter.rb +91 -0
- data/lib/omnizip/progress/operation_progress.rb +118 -0
- data/lib/omnizip/progress/progress_bar.rb +156 -0
- data/lib/omnizip/progress/progress_reporter.rb +40 -0
- data/lib/omnizip/progress/progress_tracker.rb +190 -0
- data/lib/omnizip/progress/silent_reporter.rb +24 -0
- data/lib/omnizip/progress.rb +127 -0
- data/lib/omnizip/rubyzip_compat.rb +63 -0
- data/lib/omnizip/temp/safe_extract.rb +168 -0
- data/lib/omnizip/temp/temp_file.rb +124 -0
- data/lib/omnizip/temp/temp_file_pool.rb +109 -0
- data/lib/omnizip/temp.rb +181 -0
- data/lib/omnizip/version.rb +5 -0
- data/lib/omnizip/zip/entry.rb +156 -0
- data/lib/omnizip/zip/file.rb +485 -0
- data/lib/omnizip/zip/input_stream.rb +273 -0
- data/lib/omnizip/zip/output_stream.rb +324 -0
- data/lib/omnizip.rb +156 -0
- data/readme-docs/advanced-features.adoc +515 -0
- data/readme-docs/api-usage.adoc +444 -0
- data/readme-docs/architecture.adoc +449 -0
- data/readme-docs/archive-formats.adoc +479 -0
- data/readme-docs/cli-usage.adoc +222 -0
- data/readme-docs/compression-algorithms.adoc +442 -0
- data/readme-docs/compression-profiles.adoc +247 -0
- data/readme-docs/encryption-checksums.adoc +328 -0
- data/readme-docs/format-converter.adoc +325 -0
- data/readme-docs/installation.adoc +228 -0
- data/readme-docs/par2-archives.adoc +608 -0
- data/readme-docs/performance-profiler.adoc +389 -0
- data/readme-docs/preprocessing-filters.adoc +280 -0
- data/xz-file-format-1.2.1.txt +1174 -0
- metadata +617 -0
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Omnizip
|
|
4
|
+
module Models
|
|
5
|
+
# Configuration for split archive (multi-volume) creation
|
|
6
|
+
# Defines how archives should be split into volumes
|
|
7
|
+
class SplitOptions
|
|
8
|
+
attr_accessor :volume_size, :naming_pattern, :span_strategy
|
|
9
|
+
|
|
10
|
+
# Naming pattern types
|
|
11
|
+
NAMING_NUMERIC = :numeric # .001, .002, .003
|
|
12
|
+
NAMING_ALPHA = :alpha # .aa, .ab, .ac
|
|
13
|
+
|
|
14
|
+
# Span strategies
|
|
15
|
+
STRATEGY_FIRST_FIT = :first_fit # Fill volumes sequentially
|
|
16
|
+
STRATEGY_BALANCED = :balanced # Balance files across volumes
|
|
17
|
+
|
|
18
|
+
# Default volume size (100 MB)
|
|
19
|
+
DEFAULT_VOLUME_SIZE = 100 * 1024 * 1024
|
|
20
|
+
|
|
21
|
+
# Initialize with default options
|
|
22
|
+
def initialize
|
|
23
|
+
@volume_size = DEFAULT_VOLUME_SIZE
|
|
24
|
+
@naming_pattern = NAMING_NUMERIC
|
|
25
|
+
@span_strategy = STRATEGY_FIRST_FIT
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# Parse volume size from string (e.g., "100M", "4.7G")
|
|
29
|
+
#
|
|
30
|
+
# @param size_str [String] Size string with unit
|
|
31
|
+
# @return [Integer] Size in bytes
|
|
32
|
+
def self.parse_volume_size(size_str)
|
|
33
|
+
return size_str if size_str.is_a?(Integer)
|
|
34
|
+
|
|
35
|
+
size_str = size_str.to_s.strip.upcase
|
|
36
|
+
multiplier = case size_str
|
|
37
|
+
when /(\d+(?:\.\d+)?)\s*K(?:B)?$/
|
|
38
|
+
1024
|
|
39
|
+
when /(\d+(?:\.\d+)?)\s*M(?:B)?$/
|
|
40
|
+
1024 * 1024
|
|
41
|
+
when /(\d+(?:\.\d+)?)\s*G(?:B)?$/
|
|
42
|
+
1024 * 1024 * 1024
|
|
43
|
+
when /(\d+(?:\.\d+)?)\s*T(?:B)?$/
|
|
44
|
+
1024 * 1024 * 1024 * 1024
|
|
45
|
+
else
|
|
46
|
+
return size_str.to_i
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
(Regexp.last_match(1).to_f * multiplier).to_i
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# Generate volume filename
|
|
53
|
+
#
|
|
54
|
+
# @param base_path [String] Base archive path (e.g., "backup.7z.001")
|
|
55
|
+
# @param volume_number [Integer] Volume number (1-based)
|
|
56
|
+
# @return [String] Volume filename
|
|
57
|
+
def volume_filename(base_path, volume_number)
|
|
58
|
+
# Extract base and extension
|
|
59
|
+
base = base_path.sub(/\.\d{3}$/, "")
|
|
60
|
+
base = base.sub(/\.[a-z]{2,}$/, "") if @naming_pattern == NAMING_ALPHA
|
|
61
|
+
|
|
62
|
+
case @naming_pattern
|
|
63
|
+
when NAMING_NUMERIC
|
|
64
|
+
format("%s.%03d", base, volume_number)
|
|
65
|
+
when NAMING_ALPHA
|
|
66
|
+
format("%s.%s", base, alpha_suffix(volume_number))
|
|
67
|
+
else
|
|
68
|
+
format("%s.%03d", base, volume_number)
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# Validate options
|
|
73
|
+
#
|
|
74
|
+
# @raise [ArgumentError] if options are invalid
|
|
75
|
+
def validate!
|
|
76
|
+
raise ArgumentError, "volume_size must be positive" unless
|
|
77
|
+
@volume_size.positive?
|
|
78
|
+
|
|
79
|
+
valid_patterns = [NAMING_NUMERIC, NAMING_ALPHA]
|
|
80
|
+
unless valid_patterns.include?(@naming_pattern)
|
|
81
|
+
raise ArgumentError,
|
|
82
|
+
"naming_pattern must be one of #{valid_patterns.inspect}"
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
valid_strategies = [STRATEGY_FIRST_FIT, STRATEGY_BALANCED]
|
|
86
|
+
unless valid_strategies.include?(@span_strategy)
|
|
87
|
+
raise ArgumentError,
|
|
88
|
+
"span_strategy must be one of #{valid_strategies.inspect}"
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
true
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
private
|
|
95
|
+
|
|
96
|
+
# Generate alpha suffix for volume number
|
|
97
|
+
#
|
|
98
|
+
# @param volume_number [Integer] Volume number (1-based)
|
|
99
|
+
# @return [String] Alpha suffix (aa, ab, ..., az, ba, ..., zz, aaa, ...)
|
|
100
|
+
def alpha_suffix(volume_number)
|
|
101
|
+
# Convert 1 -> aa, 2 -> ab, ..., 26 -> az, 27 -> ba, etc.
|
|
102
|
+
num = volume_number - 1 # Convert to 0-based
|
|
103
|
+
|
|
104
|
+
# For two-character format (minimum):
|
|
105
|
+
# Second character cycles through a-z (rightmost, least significant)
|
|
106
|
+
second = ("a".ord + (num % 26)).chr
|
|
107
|
+
|
|
108
|
+
# First character represents which group of 26 we're in
|
|
109
|
+
first_index = num / 26
|
|
110
|
+
first = ("a".ord + first_index).chr
|
|
111
|
+
|
|
112
|
+
first + second
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
end
|
|
116
|
+
end
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Omnizip
|
|
4
|
+
# Registry for performance optimization strategies using the Registry pattern
|
|
5
|
+
class OptimizationRegistry
|
|
6
|
+
class << self
|
|
7
|
+
# Register an optimization strategy
|
|
8
|
+
def register(name, strategy_class)
|
|
9
|
+
strategies[name] = strategy_class
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
# Get an optimization strategy by name
|
|
13
|
+
def get(name)
|
|
14
|
+
strategies[name] || raise(
|
|
15
|
+
Omnizip::OptimizationNotFound,
|
|
16
|
+
"Optimization strategy not found: #{name}",
|
|
17
|
+
)
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
# Check if an optimization strategy is registered
|
|
21
|
+
def registered?(name)
|
|
22
|
+
strategies.key?(name)
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# List all registered optimization strategies
|
|
26
|
+
def all
|
|
27
|
+
strategies.keys
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# Get all optimization strategies as a hash
|
|
31
|
+
def strategies
|
|
32
|
+
@strategies ||= {}
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# Clear all registered strategies (useful for testing)
|
|
36
|
+
def clear!
|
|
37
|
+
@strategies = {}
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Apply an optimization strategy to a target
|
|
41
|
+
def apply(name, target, **options)
|
|
42
|
+
strategy_class = get(name)
|
|
43
|
+
strategy = strategy_class.new(**options)
|
|
44
|
+
strategy.optimize(target)
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# Get optimization metadata
|
|
48
|
+
def metadata(name)
|
|
49
|
+
strategy_class = get(name)
|
|
50
|
+
return {} unless strategy_class.respond_to?(:metadata)
|
|
51
|
+
|
|
52
|
+
strategy_class.metadata
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Base class for optimization strategies
|
|
57
|
+
class Strategy
|
|
58
|
+
attr_reader :options
|
|
59
|
+
|
|
60
|
+
def initialize(**options)
|
|
61
|
+
@options = options
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# Override in subclasses to implement optimization logic
|
|
65
|
+
def optimize(target)
|
|
66
|
+
raise NotImplementedError,
|
|
67
|
+
"#{self.class} must implement #optimize"
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# Override in subclasses to provide strategy metadata
|
|
71
|
+
def self.metadata
|
|
72
|
+
{
|
|
73
|
+
name: name,
|
|
74
|
+
description: "No description provided",
|
|
75
|
+
category: :general,
|
|
76
|
+
impact: :unknown,
|
|
77
|
+
}
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
end
|
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Omnizip
|
|
4
|
+
module Parallel
|
|
5
|
+
# Thread-safe job queue for parallel compression/extraction
|
|
6
|
+
#
|
|
7
|
+
# Manages a queue of compression or extraction jobs with priority support.
|
|
8
|
+
# Jobs are ordered by priority (large files first for better load balancing).
|
|
9
|
+
#
|
|
10
|
+
# @example Create and use job queue
|
|
11
|
+
# queue = Omnizip::Parallel::JobQueue.new(max_size: 100)
|
|
12
|
+
# queue.push(file: 'large.dat', size: 1_000_000, priority: :high)
|
|
13
|
+
# job = queue.pop
|
|
14
|
+
#
|
|
15
|
+
# @example Size-based priority
|
|
16
|
+
# queue.push_with_size(file: 'file.txt', size: 1024)
|
|
17
|
+
class JobQueue
|
|
18
|
+
# Job structure for queue items
|
|
19
|
+
Job = Struct.new(:file, :data, :size, :priority, :metadata,
|
|
20
|
+
keyword_init: true) do
|
|
21
|
+
def <=>(other)
|
|
22
|
+
# Higher priority first, then larger files first
|
|
23
|
+
priority_order = { high: 0, normal: 1, low: 2 }
|
|
24
|
+
priority_cmp = (priority_order[priority] || 1) <=> (priority_order[other.priority] || 1)
|
|
25
|
+
return priority_cmp unless priority_cmp.zero?
|
|
26
|
+
|
|
27
|
+
# If same priority, larger files first
|
|
28
|
+
-(size <=> other.size)
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# @return [Integer] maximum queue size
|
|
33
|
+
attr_reader :max_size
|
|
34
|
+
|
|
35
|
+
# @return [Integer] current queue size
|
|
36
|
+
attr_reader :size
|
|
37
|
+
|
|
38
|
+
# Initialize job queue
|
|
39
|
+
#
|
|
40
|
+
# @param max_size [Integer] maximum number of jobs in queue
|
|
41
|
+
def initialize(max_size: 1000)
|
|
42
|
+
@max_size = max_size
|
|
43
|
+
@queue = []
|
|
44
|
+
@mutex = Mutex.new
|
|
45
|
+
@cond = ConditionVariable.new
|
|
46
|
+
@closed = false
|
|
47
|
+
@size = 0
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Push a job onto the queue
|
|
51
|
+
#
|
|
52
|
+
# @param file [String] file path
|
|
53
|
+
# @param data [Object] job data
|
|
54
|
+
# @param size [Integer] file size in bytes
|
|
55
|
+
# @param priority [Symbol] job priority (:high, :normal, :low)
|
|
56
|
+
# @param metadata [Hash] additional metadata
|
|
57
|
+
# @raise [ClosedQueueError] if queue is closed
|
|
58
|
+
# @return [Job] the created job
|
|
59
|
+
def push(file:, data: nil, size: 0, priority: :normal, metadata: {})
|
|
60
|
+
@mutex.synchronize do
|
|
61
|
+
raise ClosedQueueError, "Queue is closed" if @closed
|
|
62
|
+
|
|
63
|
+
# Wait if queue is full
|
|
64
|
+
@cond.wait(@mutex) while @size >= @max_size && !@closed
|
|
65
|
+
|
|
66
|
+
raise ClosedQueueError, "Queue is closed" if @closed
|
|
67
|
+
|
|
68
|
+
job = Job.new(
|
|
69
|
+
file: file,
|
|
70
|
+
data: data,
|
|
71
|
+
size: size,
|
|
72
|
+
priority: priority,
|
|
73
|
+
metadata: metadata,
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
@queue << job
|
|
77
|
+
@size += 1
|
|
78
|
+
|
|
79
|
+
# Keep queue sorted by priority
|
|
80
|
+
@queue.sort!
|
|
81
|
+
|
|
82
|
+
@cond.signal
|
|
83
|
+
job
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
# Push a job with automatic priority based on file size
|
|
88
|
+
#
|
|
89
|
+
# @param file [String] file path
|
|
90
|
+
# @param size [Integer] file size in bytes
|
|
91
|
+
# @param data [Object] job data
|
|
92
|
+
# @param metadata [Hash] additional metadata
|
|
93
|
+
# @return [Job] the created job
|
|
94
|
+
def push_with_size(file:, size:, data: nil, metadata: {})
|
|
95
|
+
# Determine priority based on size
|
|
96
|
+
# Large files (>10MB) get high priority for better load balancing
|
|
97
|
+
priority = if size > 10 * 1024 * 1024
|
|
98
|
+
:high
|
|
99
|
+
elsif size > 1024 * 1024
|
|
100
|
+
:normal
|
|
101
|
+
else
|
|
102
|
+
:low
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
push(file: file, data: data, size: size, priority: priority,
|
|
106
|
+
metadata: metadata)
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
# Pop a job from the queue
|
|
110
|
+
#
|
|
111
|
+
# @param timeout [Numeric, nil] timeout in seconds, nil for no timeout
|
|
112
|
+
# @return [Job, nil] job or nil if timeout or closed
|
|
113
|
+
def pop(timeout: nil)
|
|
114
|
+
@mutex.synchronize do
|
|
115
|
+
if timeout
|
|
116
|
+
deadline = Time.now + timeout
|
|
117
|
+
while @queue.empty? && !@closed
|
|
118
|
+
remaining = deadline - Time.now
|
|
119
|
+
return nil if remaining <= 0
|
|
120
|
+
|
|
121
|
+
@cond.wait(@mutex, remaining)
|
|
122
|
+
end
|
|
123
|
+
else
|
|
124
|
+
@cond.wait(@mutex) while @queue.empty? && !@closed
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
return nil if @queue.empty?
|
|
128
|
+
|
|
129
|
+
job = @queue.shift
|
|
130
|
+
@size -= 1
|
|
131
|
+
@cond.signal # Signal waiting pushers
|
|
132
|
+
job
|
|
133
|
+
end
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
# Pop multiple jobs in batch
|
|
137
|
+
#
|
|
138
|
+
# @param count [Integer] maximum number of jobs to pop
|
|
139
|
+
# @param timeout [Numeric, nil] timeout in seconds
|
|
140
|
+
# @return [Array<Job>] array of jobs (may be empty)
|
|
141
|
+
def pop_batch(count, timeout: nil)
|
|
142
|
+
jobs = []
|
|
143
|
+
count.times do
|
|
144
|
+
job = pop(timeout: timeout)
|
|
145
|
+
break unless job
|
|
146
|
+
|
|
147
|
+
jobs << job
|
|
148
|
+
end
|
|
149
|
+
jobs
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
# Check if queue is empty
|
|
153
|
+
#
|
|
154
|
+
# @return [Boolean] true if empty
|
|
155
|
+
def empty?
|
|
156
|
+
@mutex.synchronize { @queue.empty? }
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
# Check if queue is closed
|
|
160
|
+
#
|
|
161
|
+
# @return [Boolean] true if closed
|
|
162
|
+
def closed?
|
|
163
|
+
@mutex.synchronize { @closed }
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
# Close the queue
|
|
167
|
+
#
|
|
168
|
+
# No more jobs can be pushed after closing.
|
|
169
|
+
# Pending pops will return nil.
|
|
170
|
+
def close
|
|
171
|
+
@mutex.synchronize do
|
|
172
|
+
@closed = true
|
|
173
|
+
@cond.broadcast # Wake up all waiting threads
|
|
174
|
+
end
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
# Clear all jobs from queue
|
|
178
|
+
#
|
|
179
|
+
# @return [Integer] number of jobs cleared
|
|
180
|
+
def clear
|
|
181
|
+
@mutex.synchronize do
|
|
182
|
+
count = @queue.size
|
|
183
|
+
@queue.clear
|
|
184
|
+
@size = 0
|
|
185
|
+
@cond.broadcast
|
|
186
|
+
count
|
|
187
|
+
end
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
# Get queue statistics
|
|
191
|
+
#
|
|
192
|
+
# @return [Hash] statistics hash
|
|
193
|
+
def stats
|
|
194
|
+
@mutex.synchronize do
|
|
195
|
+
{
|
|
196
|
+
size: @size,
|
|
197
|
+
max_size: @max_size,
|
|
198
|
+
closed: @closed,
|
|
199
|
+
utilization: @max_size.zero? ? 0.0 : @size.to_f / @max_size,
|
|
200
|
+
priority_counts: @queue.group_by(&:priority).transform_values(&:count),
|
|
201
|
+
}
|
|
202
|
+
end
|
|
203
|
+
end
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
# Exception raised when trying to push to a closed queue
|
|
207
|
+
class ClosedQueueError < StandardError; end
|
|
208
|
+
end
|
|
209
|
+
end
|
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Omnizip
|
|
4
|
+
module Parallel
|
|
5
|
+
# Job scheduler for load balancing and work distribution
|
|
6
|
+
#
|
|
7
|
+
# Manages job assignment to workers using different strategies:
|
|
8
|
+
# - Dynamic: Workers pull jobs as they become available (default)
|
|
9
|
+
# - Static: Pre-assign equal chunks to each worker
|
|
10
|
+
#
|
|
11
|
+
# @example Create scheduler with dynamic strategy
|
|
12
|
+
# scheduler = Omnizip::Parallel::JobScheduler.new(strategy: :dynamic)
|
|
13
|
+
# scheduler.schedule_jobs(jobs, worker_count: 4)
|
|
14
|
+
#
|
|
15
|
+
# @example Create scheduler with static strategy
|
|
16
|
+
# scheduler = Omnizip::Parallel::JobScheduler.new(strategy: :static)
|
|
17
|
+
# assignments = scheduler.schedule_jobs(jobs, worker_count: 4)
|
|
18
|
+
class JobScheduler
|
|
19
|
+
# @return [Symbol] scheduling strategy
|
|
20
|
+
attr_reader :strategy
|
|
21
|
+
|
|
22
|
+
# Initialize job scheduler
|
|
23
|
+
#
|
|
24
|
+
# @param strategy [Symbol] :dynamic or :static
|
|
25
|
+
def initialize(strategy: :dynamic)
|
|
26
|
+
@strategy = strategy
|
|
27
|
+
validate_strategy!
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# Schedule jobs for workers
|
|
31
|
+
#
|
|
32
|
+
# @param jobs [Array] array of jobs to schedule
|
|
33
|
+
# @param worker_count [Integer] number of workers
|
|
34
|
+
# @return [Hash, Array] assignments (strategy-dependent)
|
|
35
|
+
def schedule_jobs(jobs, worker_count:)
|
|
36
|
+
case @strategy
|
|
37
|
+
when :dynamic
|
|
38
|
+
schedule_dynamic(jobs, worker_count)
|
|
39
|
+
when :static
|
|
40
|
+
schedule_static(jobs, worker_count)
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
# Estimate completion time based on job sizes and worker count
|
|
45
|
+
#
|
|
46
|
+
# @param jobs [Array] array of jobs with :size attribute
|
|
47
|
+
# @param worker_count [Integer] number of workers
|
|
48
|
+
# @param bytes_per_second [Float] processing rate
|
|
49
|
+
# @return [Float] estimated seconds to completion
|
|
50
|
+
def estimate_completion_time(jobs, worker_count:,
|
|
51
|
+
bytes_per_second: 10_000_000)
|
|
52
|
+
total_bytes = jobs.sum { |job| job.respond_to?(:size) ? job.size : 0 }
|
|
53
|
+
return 0.0 if total_bytes.zero? || worker_count.zero?
|
|
54
|
+
|
|
55
|
+
# Simple estimate: total bytes / (workers * rate)
|
|
56
|
+
total_bytes.to_f / (worker_count * bytes_per_second)
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# Calculate load balance quality metric
|
|
60
|
+
#
|
|
61
|
+
# @param assignments [Hash] worker_id => [jobs] mapping
|
|
62
|
+
# @return [Float] balance score (0.0 = perfect, 1.0 = worst)
|
|
63
|
+
def calculate_load_balance(assignments)
|
|
64
|
+
return 0.0 if assignments.empty?
|
|
65
|
+
|
|
66
|
+
# Calculate total size per worker
|
|
67
|
+
worker_sizes = assignments.transform_values do |jobs|
|
|
68
|
+
jobs.sum { |job| job.respond_to?(:size) ? job.size : 1 }
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
sizes = worker_sizes.values
|
|
72
|
+
return 0.0 if sizes.empty? || sizes.max.zero?
|
|
73
|
+
|
|
74
|
+
# Balance = (max - min) / max
|
|
75
|
+
(sizes.max - sizes.min).to_f / sizes.max
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
private
|
|
79
|
+
|
|
80
|
+
# Validate scheduling strategy
|
|
81
|
+
#
|
|
82
|
+
# @raise [ArgumentError] if strategy is invalid
|
|
83
|
+
def validate_strategy!
|
|
84
|
+
valid_strategies = %i[dynamic static]
|
|
85
|
+
return if valid_strategies.include?(@strategy)
|
|
86
|
+
|
|
87
|
+
raise ArgumentError,
|
|
88
|
+
"Invalid strategy: #{@strategy}. Must be one of: #{valid_strategies.join(', ')}"
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# Dynamic scheduling: jobs pulled from queue as workers become available
|
|
92
|
+
#
|
|
93
|
+
# @param jobs [Array] jobs to schedule
|
|
94
|
+
# @param worker_count [Integer] number of workers
|
|
95
|
+
# @return [Hash] scheduling metadata
|
|
96
|
+
def schedule_dynamic(jobs, worker_count)
|
|
97
|
+
# In dynamic mode, we don't pre-assign jobs
|
|
98
|
+
# Workers pull from shared queue as they complete work
|
|
99
|
+
# Return metadata about the scheduling
|
|
100
|
+
{
|
|
101
|
+
strategy: :dynamic,
|
|
102
|
+
total_jobs: jobs.size,
|
|
103
|
+
worker_count: worker_count,
|
|
104
|
+
estimated_jobs_per_worker: (jobs.size.to_f / worker_count).ceil,
|
|
105
|
+
queue: jobs, # Jobs will be consumed from this queue
|
|
106
|
+
}
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
# Static scheduling: pre-assign jobs to workers in balanced chunks
|
|
110
|
+
#
|
|
111
|
+
# @param jobs [Array] jobs to schedule
|
|
112
|
+
# @param worker_count [Integer] number of workers
|
|
113
|
+
# @return [Hash] worker_id => [jobs] mapping
|
|
114
|
+
def schedule_static(jobs, worker_count)
|
|
115
|
+
return {} if jobs.empty? || worker_count.zero?
|
|
116
|
+
|
|
117
|
+
# Sort jobs by size (largest first) for better balance
|
|
118
|
+
sorted_jobs = jobs.sort_by do |job|
|
|
119
|
+
-(job.respond_to?(:size) ? job.size : 0)
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
# Initialize worker assignments
|
|
123
|
+
assignments = (0...worker_count).to_h { |i| [i, []] }
|
|
124
|
+
worker_loads = Array.new(worker_count, 0)
|
|
125
|
+
|
|
126
|
+
# Assign each job to worker with smallest current load
|
|
127
|
+
sorted_jobs.each do |job|
|
|
128
|
+
job_size = job.respond_to?(:size) ? job.size : 1
|
|
129
|
+
|
|
130
|
+
# Find worker with minimum load
|
|
131
|
+
min_worker = worker_loads.each_with_index.min_by { |load, _| load }[1]
|
|
132
|
+
|
|
133
|
+
# Assign job to this worker
|
|
134
|
+
assignments[min_worker] << job
|
|
135
|
+
worker_loads[min_worker] += job_size
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
# Add metadata
|
|
139
|
+
assignments[:metadata] = {
|
|
140
|
+
strategy: :static,
|
|
141
|
+
total_jobs: jobs.size,
|
|
142
|
+
worker_count: worker_count,
|
|
143
|
+
balance_score: calculate_load_balance(assignments.except(:metadata)),
|
|
144
|
+
worker_loads: worker_loads,
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
assignments
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
# Round-robin assignment (alternative simple strategy)
|
|
151
|
+
#
|
|
152
|
+
# @param jobs [Array] jobs to schedule
|
|
153
|
+
# @param worker_count [Integer] number of workers
|
|
154
|
+
# @return [Hash] worker_id => [jobs] mapping
|
|
155
|
+
def schedule_round_robin(jobs, worker_count)
|
|
156
|
+
return {} if jobs.empty? || worker_count.zero?
|
|
157
|
+
|
|
158
|
+
assignments = (0...worker_count).to_h { |i| [i, []] }
|
|
159
|
+
|
|
160
|
+
jobs.each_with_index do |job, index|
|
|
161
|
+
worker_id = index % worker_count
|
|
162
|
+
assignments[worker_id] << job
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
assignments
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
# Size-aware assignment with bin packing
|
|
169
|
+
#
|
|
170
|
+
# @param jobs [Array] jobs to schedule
|
|
171
|
+
# @param worker_count [Integer] number of workers
|
|
172
|
+
# @return [Hash] worker_id => [jobs] mapping
|
|
173
|
+
def schedule_bin_packing(jobs, worker_count)
|
|
174
|
+
return {} if jobs.empty? || worker_count.zero?
|
|
175
|
+
|
|
176
|
+
# Sort jobs by size (largest first)
|
|
177
|
+
sorted_jobs = jobs.sort_by do |job|
|
|
178
|
+
-(job.respond_to?(:size) ? job.size : 0)
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
# First-fit decreasing bin packing
|
|
182
|
+
bins = Array.new(worker_count) { { jobs: [], total_size: 0 } }
|
|
183
|
+
|
|
184
|
+
sorted_jobs.each do |job|
|
|
185
|
+
job_size = job.respond_to?(:size) ? job.size : 1
|
|
186
|
+
|
|
187
|
+
# Find bin with minimum total size
|
|
188
|
+
min_bin = bins.min_by { |bin| bin[:total_size] }
|
|
189
|
+
min_bin[:jobs] << job
|
|
190
|
+
min_bin[:total_size] += job_size
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
# Convert to standard format
|
|
194
|
+
assignments = {}
|
|
195
|
+
bins.each_with_index do |bin, index|
|
|
196
|
+
assignments[index] = bin[:jobs]
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
assignments
|
|
200
|
+
end
|
|
201
|
+
end
|
|
202
|
+
end
|
|
203
|
+
end
|