omnizip 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/.rubocop.yml +32 -0
- data/.rubocop_todo.yml +754 -0
- data/COPYING +502 -0
- data/Gemfile +17 -0
- data/LICENSE +12 -0
- data/README.adoc +1045 -0
- data/Rakefile +12 -0
- data/benchmark/README.md +260 -0
- data/benchmark/benchmark_suite.rb +125 -0
- data/benchmark/compression_bench.rb +181 -0
- data/benchmark/filter_bench.rb +180 -0
- data/benchmark/models/benchmark_result.rb +59 -0
- data/benchmark/models/comparison_result.rb +69 -0
- data/benchmark/profile_suite.rb +167 -0
- data/benchmark/reporter.rb +150 -0
- data/benchmark/run_benchmarks.rb +66 -0
- data/benchmark/test_data.rb +137 -0
- data/config/formats/rar3_spec.yml +91 -0
- data/config/formats/rar5_spec.yml +102 -0
- data/docs/.github/workflows/docs.yml +142 -0
- data/docs/.gitignore +21 -0
- data/docs/.lychee.toml +67 -0
- data/docs/Gemfile +13 -0
- data/docs/RAR_WRITE_SUPPORT.md +26 -0
- data/docs/README.md +101 -0
- data/docs/_config.yml +112 -0
- data/docs/assets/logo.svg +1 -0
- data/docs/assets/omnizip-logo.pdf +1540 -11
- data/docs/comparison/feature-matrix.adoc +694 -0
- data/docs/comparison/index.adoc +113 -0
- data/docs/comparison/vs-7zip.adoc +309 -0
- data/docs/comparison/vs-peazip.adoc +77 -0
- data/docs/comparison/vs-rubyzip.adoc +342 -0
- data/docs/comparison/vs-winrar.adoc +100 -0
- data/docs/compatibility.adoc +579 -0
- data/docs/concepts/index.adoc +129 -0
- data/docs/developer/architecture.adoc +256 -0
- data/docs/developer/contributing.adoc +158 -0
- data/docs/developer/index.adoc +25 -0
- data/docs/developer/testing.adoc +212 -0
- data/docs/getting-started/basic-usage.adoc +271 -0
- data/docs/getting-started/index.adoc +42 -0
- data/docs/getting-started/installation.adoc +138 -0
- data/docs/getting-started/quick-start.adoc +185 -0
- data/docs/getting-started/your-first-archive.adoc +218 -0
- data/docs/guides/advanced-features/encryption.adoc +300 -0
- data/docs/guides/advanced-features/index.adoc +49 -0
- data/docs/guides/advanced-features/parallel-processing.adoc +246 -0
- data/docs/guides/advanced-features/progress-tracking.adoc +320 -0
- data/docs/guides/advanced-features/streaming.adoc +212 -0
- data/docs/guides/archive-formats/gzip-format.adoc +107 -0
- data/docs/guides/archive-formats/index.adoc +130 -0
- data/docs/guides/archive-formats/rar-format.adoc +104 -0
- data/docs/guides/archive-formats/rar5.adoc +521 -0
- data/docs/guides/archive-formats/seven-zip-format.adoc +35 -0
- data/docs/guides/archive-formats/tar-format.adoc +106 -0
- data/docs/guides/archive-formats/xz-format.adoc +118 -0
- data/docs/guides/archive-formats/zip-format.adoc +35 -0
- data/docs/guides/compression-algorithms/bzip2.adoc +113 -0
- data/docs/guides/compression-algorithms/deflate.adoc +319 -0
- data/docs/guides/compression-algorithms/index.adoc +190 -0
- data/docs/guides/compression-algorithms/lzma.adoc +398 -0
- data/docs/guides/compression-algorithms/lzma2.adoc +327 -0
- data/docs/guides/compression-algorithms/ppmd.adoc +316 -0
- data/docs/guides/compression-algorithms/zstandard.adoc +361 -0
- data/docs/guides/creating-archives.adoc +354 -0
- data/docs/guides/extracting-archives.adoc +53 -0
- data/docs/guides/format-conversion.adoc +64 -0
- data/docs/guides/index.adoc +49 -0
- data/docs/guides/migration-rubyzip.adoc +217 -0
- data/docs/guides/parity-archives.adoc +605 -0
- data/docs/guides/performance-tuning.adoc +88 -0
- data/docs/index.adoc +218 -0
- data/docs/lychee.toml +67 -0
- data/docs/reference/api/overview.adoc +188 -0
- data/docs/reference/cli/compress-command.adoc +114 -0
- data/docs/reference/cli/overview.adoc +140 -0
- data/docs/reference/index.adoc +26 -0
- data/docs/resources/faq.adoc +185 -0
- data/docs/resources/quick-reference.adoc +222 -0
- data/docs/troubleshooting/index.adoc +208 -0
- data/examples/api_comparison.rb +205 -0
- data/examples/deflate64_example.rb +96 -0
- data/examples/par2_demo.rb +121 -0
- data/examples/quick_start_native.rb +150 -0
- data/examples/quick_start_rubyzip.rb +115 -0
- data/examples/rubyzip_compatibility_demo.rb +194 -0
- data/exe/omnizip +27 -0
- data/lib/omnizip/algorithm.rb +130 -0
- data/lib/omnizip/algorithm_registry.rb +86 -0
- data/lib/omnizip/algorithms/.keep +0 -0
- data/lib/omnizip/algorithms/bzip2/bwt.rb +225 -0
- data/lib/omnizip/algorithms/bzip2/decoder.rb +193 -0
- data/lib/omnizip/algorithms/bzip2/encoder.rb +237 -0
- data/lib/omnizip/algorithms/bzip2/huffman.rb +206 -0
- data/lib/omnizip/algorithms/bzip2/mtf.rb +101 -0
- data/lib/omnizip/algorithms/bzip2/rle.rb +151 -0
- data/lib/omnizip/algorithms/bzip2.rb +130 -0
- data/lib/omnizip/algorithms/deflate/constants.rb +28 -0
- data/lib/omnizip/algorithms/deflate/decoder.rb +38 -0
- data/lib/omnizip/algorithms/deflate/encoder.rb +46 -0
- data/lib/omnizip/algorithms/deflate.rb +128 -0
- data/lib/omnizip/algorithms/deflate64/constants.rb +45 -0
- data/lib/omnizip/algorithms/deflate64/decoder.rb +153 -0
- data/lib/omnizip/algorithms/deflate64/encoder.rb +98 -0
- data/lib/omnizip/algorithms/deflate64/huffman_coder.rb +354 -0
- data/lib/omnizip/algorithms/deflate64/lz77_encoder.rb +142 -0
- data/lib/omnizip/algorithms/deflate64.rb +109 -0
- data/lib/omnizip/algorithms/lzma/bit_model.rb +120 -0
- data/lib/omnizip/algorithms/lzma/constants.rb +112 -0
- data/lib/omnizip/algorithms/lzma/decoder.rb +148 -0
- data/lib/omnizip/algorithms/lzma/dictionary.rb +69 -0
- data/lib/omnizip/algorithms/lzma/distance_coder.rb +415 -0
- data/lib/omnizip/algorithms/lzma/encoder.rb +142 -0
- data/lib/omnizip/algorithms/lzma/length_coder.rb +260 -0
- data/lib/omnizip/algorithms/lzma/literal_decoder.rb +320 -0
- data/lib/omnizip/algorithms/lzma/literal_encoder.rb +210 -0
- data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +341 -0
- data/lib/omnizip/algorithms/lzma/lzma_alone_decoder.rb +192 -0
- data/lib/omnizip/algorithms/lzma/lzma_state.rb +128 -0
- data/lib/omnizip/algorithms/lzma/match.rb +32 -0
- data/lib/omnizip/algorithms/lzma/match_finder.rb +205 -0
- data/lib/omnizip/algorithms/lzma/match_finder_config.rb +142 -0
- data/lib/omnizip/algorithms/lzma/match_finder_factory.rb +88 -0
- data/lib/omnizip/algorithms/lzma/optimal_encoder.rb +130 -0
- data/lib/omnizip/algorithms/lzma/probability_models.rb +72 -0
- data/lib/omnizip/algorithms/lzma/range_coder.rb +85 -0
- data/lib/omnizip/algorithms/lzma/range_decoder.rb +434 -0
- data/lib/omnizip/algorithms/lzma/range_encoder.rb +194 -0
- data/lib/omnizip/algorithms/lzma/state.rb +127 -0
- data/lib/omnizip/algorithms/lzma/xz_buffered_range_encoder.rb +325 -0
- data/lib/omnizip/algorithms/lzma/xz_encoder.rb +426 -0
- data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +645 -0
- data/lib/omnizip/algorithms/lzma/xz_match_finder_adapter.rb +227 -0
- data/lib/omnizip/algorithms/lzma/xz_price_calculator.rb +169 -0
- data/lib/omnizip/algorithms/lzma/xz_probability_models.rb +261 -0
- data/lib/omnizip/algorithms/lzma/xz_range_encoder.rb +223 -0
- data/lib/omnizip/algorithms/lzma/xz_range_encoder_exact.rb +331 -0
- data/lib/omnizip/algorithms/lzma/xz_state.rb +116 -0
- data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +2055 -0
- data/lib/omnizip/algorithms/lzma.rb +238 -0
- data/lib/omnizip/algorithms/lzma2/chunk_manager.rb +182 -0
- data/lib/omnizip/algorithms/lzma2/constants.rb +41 -0
- data/lib/omnizip/algorithms/lzma2/encoder.rb +147 -0
- data/lib/omnizip/algorithms/lzma2/lzma2_chunk.rb +161 -0
- data/lib/omnizip/algorithms/lzma2/properties.rb +179 -0
- data/lib/omnizip/algorithms/lzma2/simple_lzma2_encoder.rb +127 -0
- data/lib/omnizip/algorithms/lzma2/xz_encoder_adapter.rb +85 -0
- data/lib/omnizip/algorithms/lzma2.rb +141 -0
- data/lib/omnizip/algorithms/ppmd7/constants.rb +74 -0
- data/lib/omnizip/algorithms/ppmd7/context.rb +154 -0
- data/lib/omnizip/algorithms/ppmd7/decoder.rb +126 -0
- data/lib/omnizip/algorithms/ppmd7/encoder.rb +163 -0
- data/lib/omnizip/algorithms/ppmd7/model.rb +248 -0
- data/lib/omnizip/algorithms/ppmd7/symbol_state.rb +57 -0
- data/lib/omnizip/algorithms/ppmd7.rb +116 -0
- data/lib/omnizip/algorithms/ppmd8/constants.rb +61 -0
- data/lib/omnizip/algorithms/ppmd8/context.rb +34 -0
- data/lib/omnizip/algorithms/ppmd8/decoder.rb +107 -0
- data/lib/omnizip/algorithms/ppmd8/encoder.rb +138 -0
- data/lib/omnizip/algorithms/ppmd8/model.rb +250 -0
- data/lib/omnizip/algorithms/ppmd8/restoration_method.rb +78 -0
- data/lib/omnizip/algorithms/ppmd8.rb +82 -0
- data/lib/omnizip/algorithms/ppmd_base.rb +138 -0
- data/lib/omnizip/algorithms/sevenzip_lzma2.rb +123 -0
- data/lib/omnizip/algorithms/xz_lzma2.rb +118 -0
- data/lib/omnizip/algorithms/zstandard/constants.rb +25 -0
- data/lib/omnizip/algorithms/zstandard/decoder.rb +46 -0
- data/lib/omnizip/algorithms/zstandard/encoder.rb +51 -0
- data/lib/omnizip/algorithms/zstandard.rb +138 -0
- data/lib/omnizip/buffer/memory_archive.rb +251 -0
- data/lib/omnizip/buffer/memory_extractor.rb +224 -0
- data/lib/omnizip/buffer.rb +176 -0
- data/lib/omnizip/checksum_registry.rb +114 -0
- data/lib/omnizip/checksums/crc32.rb +100 -0
- data/lib/omnizip/checksums/crc64.rb +101 -0
- data/lib/omnizip/checksums/crc_base.rb +158 -0
- data/lib/omnizip/checksums/verifier.rb +131 -0
- data/lib/omnizip/chunked/memory_manager.rb +194 -0
- data/lib/omnizip/chunked/reader.rb +78 -0
- data/lib/omnizip/chunked/writer.rb +120 -0
- data/lib/omnizip/chunked.rb +129 -0
- data/lib/omnizip/cli/output_formatter.rb +104 -0
- data/lib/omnizip/cli.rb +572 -0
- data/lib/omnizip/commands/.keep +0 -0
- data/lib/omnizip/commands/archive_create_command.rb +427 -0
- data/lib/omnizip/commands/archive_extract_command.rb +272 -0
- data/lib/omnizip/commands/archive_list_command.rb +218 -0
- data/lib/omnizip/commands/archive_repair_command.rb +131 -0
- data/lib/omnizip/commands/archive_verify_command.rb +117 -0
- data/lib/omnizip/commands/compress_command.rb +117 -0
- data/lib/omnizip/commands/decompress_command.rb +120 -0
- data/lib/omnizip/commands/list_command.rb +53 -0
- data/lib/omnizip/commands/metadata_command.rb +153 -0
- data/lib/omnizip/commands/parity_create_command.rb +122 -0
- data/lib/omnizip/commands/parity_repair_command.rb +122 -0
- data/lib/omnizip/commands/parity_verify_command.rb +124 -0
- data/lib/omnizip/commands/profile_list_command.rb +56 -0
- data/lib/omnizip/commands/profile_show_command.rb +44 -0
- data/lib/omnizip/convenience.rb +359 -0
- data/lib/omnizip/converter/conversion_registry.rb +49 -0
- data/lib/omnizip/converter/conversion_strategy.rb +121 -0
- data/lib/omnizip/converter/seven_zip_to_zip_strategy.rb +97 -0
- data/lib/omnizip/converter/zip_to_seven_zip_strategy.rb +112 -0
- data/lib/omnizip/converter.rb +105 -0
- data/lib/omnizip/crypto/aes256/cipher.rb +100 -0
- data/lib/omnizip/crypto/aes256/constants.rb +28 -0
- data/lib/omnizip/crypto/aes256/key_derivation.rb +101 -0
- data/lib/omnizip/crypto/aes256.rb +102 -0
- data/lib/omnizip/error.rb +106 -0
- data/lib/omnizip/eta/exponential_smoothing_estimator.rb +98 -0
- data/lib/omnizip/eta/moving_average_estimator.rb +99 -0
- data/lib/omnizip/eta/rate_calculator.rb +104 -0
- data/lib/omnizip/eta/sample_history.rb +143 -0
- data/lib/omnizip/eta/time_estimator.rb +106 -0
- data/lib/omnizip/eta.rb +63 -0
- data/lib/omnizip/extraction/filter_chain.rb +177 -0
- data/lib/omnizip/extraction/glob_pattern.rb +140 -0
- data/lib/omnizip/extraction/pattern_matcher.rb +70 -0
- data/lib/omnizip/extraction/predicate_pattern.rb +52 -0
- data/lib/omnizip/extraction/regex_pattern.rb +50 -0
- data/lib/omnizip/extraction/selective_extractor.rb +240 -0
- data/lib/omnizip/extraction.rb +111 -0
- data/lib/omnizip/file_type/mime_classifier.rb +144 -0
- data/lib/omnizip/file_type.rb +113 -0
- data/lib/omnizip/filter.rb +139 -0
- data/lib/omnizip/filter_pipeline.rb +108 -0
- data/lib/omnizip/filter_registry.rb +166 -0
- data/lib/omnizip/filters/bcj.rb +279 -0
- data/lib/omnizip/filters/bcj2/constants.rb +53 -0
- data/lib/omnizip/filters/bcj2/decoder.rb +200 -0
- data/lib/omnizip/filters/bcj2/encoder.rb +61 -0
- data/lib/omnizip/filters/bcj2/stream_data.rb +93 -0
- data/lib/omnizip/filters/bcj2.rb +99 -0
- data/lib/omnizip/filters/bcj_arm.rb +176 -0
- data/lib/omnizip/filters/bcj_arm64.rb +244 -0
- data/lib/omnizip/filters/bcj_ia64.rb +196 -0
- data/lib/omnizip/filters/bcj_ppc.rb +190 -0
- data/lib/omnizip/filters/bcj_sparc.rb +176 -0
- data/lib/omnizip/filters/bcj_x86.rb +193 -0
- data/lib/omnizip/filters/delta.rb +196 -0
- data/lib/omnizip/filters/filter_base.rb +72 -0
- data/lib/omnizip/filters/registry.rb +123 -0
- data/lib/omnizip/filters/xz_delta.rb +258 -0
- data/lib/omnizip/format_detector.rb +162 -0
- data/lib/omnizip/format_registry.rb +59 -0
- data/lib/omnizip/formats/.keep +0 -0
- data/lib/omnizip/formats/bzip2_file.rb +172 -0
- data/lib/omnizip/formats/cpio/constants.rb +55 -0
- data/lib/omnizip/formats/cpio/entry.rb +385 -0
- data/lib/omnizip/formats/cpio/reader.rb +196 -0
- data/lib/omnizip/formats/cpio/writer.rb +234 -0
- data/lib/omnizip/formats/cpio.rb +140 -0
- data/lib/omnizip/formats/format_spec_loader.rb +230 -0
- data/lib/omnizip/formats/gzip.rb +238 -0
- data/lib/omnizip/formats/iso/directory_builder.rb +297 -0
- data/lib/omnizip/formats/iso/directory_record.rb +152 -0
- data/lib/omnizip/formats/iso/joliet.rb +204 -0
- data/lib/omnizip/formats/iso/path_table.rb +125 -0
- data/lib/omnizip/formats/iso/reader.rb +197 -0
- data/lib/omnizip/formats/iso/rock_ridge.rb +349 -0
- data/lib/omnizip/formats/iso/volume_builder.rb +320 -0
- data/lib/omnizip/formats/iso/volume_descriptor.rb +168 -0
- data/lib/omnizip/formats/iso/writer.rb +530 -0
- data/lib/omnizip/formats/iso.rb +140 -0
- data/lib/omnizip/formats/lzip.rb +175 -0
- data/lib/omnizip/formats/lzma_alone.rb +171 -0
- data/lib/omnizip/formats/rar/archive_repairer.rb +243 -0
- data/lib/omnizip/formats/rar/archive_verifier.rb +195 -0
- data/lib/omnizip/formats/rar/block_parser.rb +243 -0
- data/lib/omnizip/formats/rar/compression/bit_stream.rb +180 -0
- data/lib/omnizip/formats/rar/compression/dispatcher.rb +217 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/decoder.rb +216 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/encoder.rb +158 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_builder.rb +217 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_coder.rb +189 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/match_finder.rb +135 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/sliding_window.rb +165 -0
- data/lib/omnizip/formats/rar/compression/ppmd/context.rb +105 -0
- data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +219 -0
- data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +262 -0
- data/lib/omnizip/formats/rar/compression_method_registry.rb +106 -0
- data/lib/omnizip/formats/rar/constants.rb +82 -0
- data/lib/omnizip/formats/rar/decompressor.rb +238 -0
- data/lib/omnizip/formats/rar/external_writer.rb +312 -0
- data/lib/omnizip/formats/rar/header.rb +192 -0
- data/lib/omnizip/formats/rar/license_validator.rb +109 -0
- data/lib/omnizip/formats/rar/models/rar_archive.rb +77 -0
- data/lib/omnizip/formats/rar/models/rar_entry.rb +65 -0
- data/lib/omnizip/formats/rar/models/rar_volume.rb +56 -0
- data/lib/omnizip/formats/rar/parity_handler.rb +292 -0
- data/lib/omnizip/formats/rar/rar5/compression/lzma.rb +202 -0
- data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +578 -0
- data/lib/omnizip/formats/rar/rar5/compression/store.rb +60 -0
- data/lib/omnizip/formats/rar/rar5/crc32.rb +39 -0
- data/lib/omnizip/formats/rar/rar5/encryption/aes256_cbc.rb +97 -0
- data/lib/omnizip/formats/rar/rar5/encryption/encryption_header.rb +114 -0
- data/lib/omnizip/formats/rar/rar5/encryption/encryption_manager.rb +166 -0
- data/lib/omnizip/formats/rar/rar5/encryption/key_derivation.rb +97 -0
- data/lib/omnizip/formats/rar/rar5/header.rb +187 -0
- data/lib/omnizip/formats/rar/rar5/models/encryption_options.rb +74 -0
- data/lib/omnizip/formats/rar/rar5/models/recovery_options.rb +63 -0
- data/lib/omnizip/formats/rar/rar5/models/solid_options.rb +63 -0
- data/lib/omnizip/formats/rar/rar5/models/volume_options.rb +74 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/ARCHITECTURE.md +290 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_manager.rb +264 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_splitter.rb +155 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_writer.rb +194 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_encoder.rb +109 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_manager.rb +142 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_stream.rb +121 -0
- data/lib/omnizip/formats/rar/rar5/vint.rb +65 -0
- data/lib/omnizip/formats/rar/rar5/writer.rb +466 -0
- data/lib/omnizip/formats/rar/rar_format_base.rb +241 -0
- data/lib/omnizip/formats/rar/reader.rb +366 -0
- data/lib/omnizip/formats/rar/recovery_record.rb +245 -0
- data/lib/omnizip/formats/rar/volume_manager.rb +168 -0
- data/lib/omnizip/formats/rar/writer.rb +431 -0
- data/lib/omnizip/formats/rar.rb +205 -0
- data/lib/omnizip/formats/rar3/compressor.rb +73 -0
- data/lib/omnizip/formats/rar3/decompressor.rb +66 -0
- data/lib/omnizip/formats/rar3/reader.rb +386 -0
- data/lib/omnizip/formats/rar3/writer.rb +219 -0
- data/lib/omnizip/formats/rar5/compressor.rb +73 -0
- data/lib/omnizip/formats/rar5/decompressor.rb +66 -0
- data/lib/omnizip/formats/rar5/reader.rb +342 -0
- data/lib/omnizip/formats/rar5/writer.rb +214 -0
- data/lib/omnizip/formats/seven_zip/coder_chain.rb +150 -0
- data/lib/omnizip/formats/seven_zip/constants.rb +126 -0
- data/lib/omnizip/formats/seven_zip/encoded_header.rb +114 -0
- data/lib/omnizip/formats/seven_zip/encrypted_header.rb +142 -0
- data/lib/omnizip/formats/seven_zip/file_collector.rb +144 -0
- data/lib/omnizip/formats/seven_zip/header.rb +106 -0
- data/lib/omnizip/formats/seven_zip/header_encryptor.rb +134 -0
- data/lib/omnizip/formats/seven_zip/header_writer.rb +466 -0
- data/lib/omnizip/formats/seven_zip/models/coder_info.rb +30 -0
- data/lib/omnizip/formats/seven_zip/models/file_entry.rb +58 -0
- data/lib/omnizip/formats/seven_zip/models/folder.rb +69 -0
- data/lib/omnizip/formats/seven_zip/models/stream_info.rb +42 -0
- data/lib/omnizip/formats/seven_zip/parser.rb +660 -0
- data/lib/omnizip/formats/seven_zip/reader.rb +458 -0
- data/lib/omnizip/formats/seven_zip/split_archive_reader.rb +632 -0
- data/lib/omnizip/formats/seven_zip/split_archive_writer.rb +315 -0
- data/lib/omnizip/formats/seven_zip/stream_compressor.rb +151 -0
- data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +162 -0
- data/lib/omnizip/formats/seven_zip/writer.rb +740 -0
- data/lib/omnizip/formats/seven_zip.rb +93 -0
- data/lib/omnizip/formats/tar/constants.rb +73 -0
- data/lib/omnizip/formats/tar/entry.rb +94 -0
- data/lib/omnizip/formats/tar/header.rb +168 -0
- data/lib/omnizip/formats/tar/reader.rb +121 -0
- data/lib/omnizip/formats/tar/writer.rb +216 -0
- data/lib/omnizip/formats/tar.rb +84 -0
- data/lib/omnizip/formats/xz/reader.rb +116 -0
- data/lib/omnizip/formats/xz.rb +237 -0
- data/lib/omnizip/formats/xz_impl/block_decoder.rb +754 -0
- data/lib/omnizip/formats/xz_impl/block_encoder.rb +306 -0
- data/lib/omnizip/formats/xz_impl/block_header.rb +210 -0
- data/lib/omnizip/formats/xz_impl/block_header_parser.rb +186 -0
- data/lib/omnizip/formats/xz_impl/constants.rb +49 -0
- data/lib/omnizip/formats/xz_impl/index_decoder.rb +174 -0
- data/lib/omnizip/formats/xz_impl/index_encoder.rb +122 -0
- data/lib/omnizip/formats/xz_impl/stream_decoder.rb +468 -0
- data/lib/omnizip/formats/xz_impl/stream_encoder.rb +99 -0
- data/lib/omnizip/formats/xz_impl/stream_footer.rb +81 -0
- data/lib/omnizip/formats/xz_impl/stream_footer_parser.rb +117 -0
- data/lib/omnizip/formats/xz_impl/stream_header.rb +55 -0
- data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +108 -0
- data/lib/omnizip/formats/xz_impl/vli.rb +128 -0
- data/lib/omnizip/formats/xz_impl/writer.rb +421 -0
- data/lib/omnizip/formats/zip/central_directory_header.rb +195 -0
- data/lib/omnizip/formats/zip/constants.rb +69 -0
- data/lib/omnizip/formats/zip/end_of_central_directory.rb +133 -0
- data/lib/omnizip/formats/zip/local_file_header.rb +138 -0
- data/lib/omnizip/formats/zip/reader.rb +250 -0
- data/lib/omnizip/formats/zip/unix_extra_field.rb +153 -0
- data/lib/omnizip/formats/zip/writer.rb +375 -0
- data/lib/omnizip/formats/zip/zip64_end_of_central_directory.rb +104 -0
- data/lib/omnizip/formats/zip/zip64_end_of_central_directory_locator.rb +66 -0
- data/lib/omnizip/formats/zip/zip64_extra_field.rb +114 -0
- data/lib/omnizip/formats/zip.rb +50 -0
- data/lib/omnizip/implementations/base/lzma2_decoder_base.rb +75 -0
- data/lib/omnizip/implementations/base/lzma2_encoder_base.rb +128 -0
- data/lib/omnizip/implementations/base/lzma_decoder_base.rb +83 -0
- data/lib/omnizip/implementations/base/lzma_encoder_base.rb +108 -0
- data/lib/omnizip/implementations/base/state_machine_base.rb +182 -0
- data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +421 -0
- data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +465 -0
- data/lib/omnizip/implementations/seven_zip/lzma/match_finder.rb +288 -0
- data/lib/omnizip/implementations/seven_zip/lzma/range_decoder.rb +200 -0
- data/lib/omnizip/implementations/seven_zip/lzma/range_encoder.rb +197 -0
- data/lib/omnizip/implementations/seven_zip/lzma/state_machine.rb +141 -0
- data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +519 -0
- data/lib/omnizip/implementations/xz_utils/lzma2/decoder.rb +723 -0
- data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +750 -0
- data/lib/omnizip/io/buffered_input.rb +146 -0
- data/lib/omnizip/io/buffered_output.rb +105 -0
- data/lib/omnizip/io/stream_manager.rb +115 -0
- data/lib/omnizip/link_handler/hard_link.rb +79 -0
- data/lib/omnizip/link_handler/symbolic_link.rb +74 -0
- data/lib/omnizip/link_handler.rb +124 -0
- data/lib/omnizip/metadata/archive_metadata.rb +114 -0
- data/lib/omnizip/metadata/entry_metadata.rb +146 -0
- data/lib/omnizip/metadata/metadata_editor.rb +171 -0
- data/lib/omnizip/metadata/metadata_registry.rb +64 -0
- data/lib/omnizip/metadata/metadata_validator.rb +99 -0
- data/lib/omnizip/metadata.rb +57 -0
- data/lib/omnizip/models/.keep +0 -0
- data/lib/omnizip/models/algorithm_metadata.rb +73 -0
- data/lib/omnizip/models/compression_options.rb +71 -0
- data/lib/omnizip/models/conversion_options.rb +87 -0
- data/lib/omnizip/models/conversion_result.rb +135 -0
- data/lib/omnizip/models/eta_result.rb +46 -0
- data/lib/omnizip/models/extraction_rule.rb +115 -0
- data/lib/omnizip/models/filter_chain.rb +144 -0
- data/lib/omnizip/models/filter_config.rb +183 -0
- data/lib/omnizip/models/match_result.rb +124 -0
- data/lib/omnizip/models/optimization_suggestion.rb +91 -0
- data/lib/omnizip/models/parallel_options.rb +104 -0
- data/lib/omnizip/models/performance_result.rb +79 -0
- data/lib/omnizip/models/profile_report.rb +82 -0
- data/lib/omnizip/models/progress_options.rb +38 -0
- data/lib/omnizip/models/split_options.rb +116 -0
- data/lib/omnizip/optimization_registry.rb +81 -0
- data/lib/omnizip/parallel/job_queue.rb +209 -0
- data/lib/omnizip/parallel/job_scheduler.rb +203 -0
- data/lib/omnizip/parallel/parallel_compressor.rb +347 -0
- data/lib/omnizip/parallel/parallel_extractor.rb +329 -0
- data/lib/omnizip/parallel/worker_pool.rb +223 -0
- data/lib/omnizip/parallel.rb +149 -0
- data/lib/omnizip/parity/chunked_block_processor.rb +196 -0
- data/lib/omnizip/parity/galois16.rb +145 -0
- data/lib/omnizip/parity/models/creator_packet.rb +73 -0
- data/lib/omnizip/parity/models/file_description_packet.rb +133 -0
- data/lib/omnizip/parity/models/ifsc_packet.rb +123 -0
- data/lib/omnizip/parity/models/main_packet.rb +128 -0
- data/lib/omnizip/parity/models/packet.rb +156 -0
- data/lib/omnizip/parity/models/packet_registry.rb +109 -0
- data/lib/omnizip/parity/models/recovery_slice_packet.rb +78 -0
- data/lib/omnizip/parity/par2_creator.rb +531 -0
- data/lib/omnizip/parity/par2_repairer.rb +407 -0
- data/lib/omnizip/parity/par2_verifier.rb +364 -0
- data/lib/omnizip/parity/par2cmdline_algorithm.rb +110 -0
- data/lib/omnizip/parity/par2cmdline_coefficients.rb +78 -0
- data/lib/omnizip/parity/reed_solomon_decoder.rb +266 -0
- data/lib/omnizip/parity/reed_solomon_encoder.rb +111 -0
- data/lib/omnizip/parity/reed_solomon_matrix.rb +342 -0
- data/lib/omnizip/parity.rb +186 -0
- data/lib/omnizip/password/encryption_registry.rb +65 -0
- data/lib/omnizip/password/encryption_strategy.rb +96 -0
- data/lib/omnizip/password/password_validator.rb +129 -0
- data/lib/omnizip/password/winzip_aes_strategy.rb +192 -0
- data/lib/omnizip/password/zip_crypto_strategy.rb +141 -0
- data/lib/omnizip/password.rb +87 -0
- data/lib/omnizip/pipe/stream_compressor.rb +124 -0
- data/lib/omnizip/pipe/stream_decompressor.rb +174 -0
- data/lib/omnizip/pipe.rb +121 -0
- data/lib/omnizip/platform/ntfs_streams.rb +201 -0
- data/lib/omnizip/platform.rb +189 -0
- data/lib/omnizip/profile/archive_profile.rb +39 -0
- data/lib/omnizip/profile/balanced_profile.rb +33 -0
- data/lib/omnizip/profile/binary_profile.rb +36 -0
- data/lib/omnizip/profile/compression_profile.rb +158 -0
- data/lib/omnizip/profile/custom_profile.rb +157 -0
- data/lib/omnizip/profile/fast_profile.rb +33 -0
- data/lib/omnizip/profile/maximum_profile.rb +33 -0
- data/lib/omnizip/profile/profile_detector.rb +110 -0
- data/lib/omnizip/profile/profile_registry.rb +161 -0
- data/lib/omnizip/profile/text_profile.rb +36 -0
- data/lib/omnizip/profile.rb +190 -0
- data/lib/omnizip/profiler/memory_profiler.rb +66 -0
- data/lib/omnizip/profiler/method_profiler.rb +49 -0
- data/lib/omnizip/profiler/report_generator.rb +169 -0
- data/lib/omnizip/profiler.rb +204 -0
- data/lib/omnizip/progress/callback_reporter.rb +36 -0
- data/lib/omnizip/progress/console_reporter.rb +62 -0
- data/lib/omnizip/progress/log_reporter.rb +91 -0
- data/lib/omnizip/progress/operation_progress.rb +118 -0
- data/lib/omnizip/progress/progress_bar.rb +156 -0
- data/lib/omnizip/progress/progress_reporter.rb +40 -0
- data/lib/omnizip/progress/progress_tracker.rb +190 -0
- data/lib/omnizip/progress/silent_reporter.rb +24 -0
- data/lib/omnizip/progress.rb +127 -0
- data/lib/omnizip/rubyzip_compat.rb +63 -0
- data/lib/omnizip/temp/safe_extract.rb +168 -0
- data/lib/omnizip/temp/temp_file.rb +124 -0
- data/lib/omnizip/temp/temp_file_pool.rb +109 -0
- data/lib/omnizip/temp.rb +181 -0
- data/lib/omnizip/version.rb +5 -0
- data/lib/omnizip/zip/entry.rb +156 -0
- data/lib/omnizip/zip/file.rb +485 -0
- data/lib/omnizip/zip/input_stream.rb +273 -0
- data/lib/omnizip/zip/output_stream.rb +324 -0
- data/lib/omnizip.rb +156 -0
- data/readme-docs/advanced-features.adoc +515 -0
- data/readme-docs/api-usage.adoc +444 -0
- data/readme-docs/architecture.adoc +449 -0
- data/readme-docs/archive-formats.adoc +479 -0
- data/readme-docs/cli-usage.adoc +222 -0
- data/readme-docs/compression-algorithms.adoc +442 -0
- data/readme-docs/compression-profiles.adoc +247 -0
- data/readme-docs/encryption-checksums.adoc +328 -0
- data/readme-docs/format-converter.adoc +325 -0
- data/readme-docs/installation.adoc +228 -0
- data/readme-docs/par2-archives.adoc +608 -0
- data/readme-docs/performance-profiler.adoc +389 -0
- data/readme-docs/preprocessing-filters.adoc +280 -0
- data/xz-file-format-1.2.1.txt +1174 -0
- metadata +617 -0
|
@@ -0,0 +1,347 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "fractor"
|
|
4
|
+
require "fileutils"
|
|
5
|
+
|
|
6
|
+
module Omnizip
|
|
7
|
+
module Parallel
|
|
8
|
+
# Parallel compression coordinator using Fractor
|
|
9
|
+
#
|
|
10
|
+
# Manages parallel compression of files in a directory.
|
|
11
|
+
# Distributes compression work across multiple workers and
|
|
12
|
+
# writes results to archive in a thread-safe manner.
|
|
13
|
+
#
|
|
14
|
+
# @example Compress directory in parallel
|
|
15
|
+
# compressor = Omnizip::Parallel::ParallelCompressor.new(threads: 4)
|
|
16
|
+
# compressor.compress('files/', 'backup.zip')
|
|
17
|
+
#
|
|
18
|
+
# @example With options
|
|
19
|
+
# options = Omnizip::Models::ParallelOptions.new
|
|
20
|
+
# options.threads = 8
|
|
21
|
+
# compressor = Omnizip::Parallel::ParallelCompressor.new(options)
|
|
22
|
+
# compressor.compress('files/', 'backup.zip', compression: :lzma2)
|
|
23
|
+
class ParallelCompressor
|
|
24
|
+
# Fractor Work class for compression jobs
|
|
25
|
+
class CompressionWork < Fractor::Work
|
|
26
|
+
def initialize(file_path:, archive_path:, compression: :deflate,
|
|
27
|
+
level: 6)
|
|
28
|
+
super({
|
|
29
|
+
file_path: file_path,
|
|
30
|
+
archive_path: archive_path,
|
|
31
|
+
compression: compression,
|
|
32
|
+
level: level,
|
|
33
|
+
})
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def file_path
|
|
37
|
+
input[:file_path]
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def archive_path
|
|
41
|
+
input[:archive_path]
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def compression
|
|
45
|
+
input[:compression]
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def level
|
|
49
|
+
input[:level]
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Fractor Worker class for compression
|
|
54
|
+
class CompressionWorker < Fractor::Worker
|
|
55
|
+
def process(work)
|
|
56
|
+
file_path = work.file_path
|
|
57
|
+
archive_path = work.archive_path
|
|
58
|
+
compression = work.compression
|
|
59
|
+
level = work.level
|
|
60
|
+
|
|
61
|
+
# Read file data
|
|
62
|
+
data = ::File.binread(file_path)
|
|
63
|
+
stat = ::File.stat(file_path)
|
|
64
|
+
|
|
65
|
+
# Compress the data
|
|
66
|
+
compressed_data = compress_data(data, compression, level)
|
|
67
|
+
|
|
68
|
+
# Calculate CRC32
|
|
69
|
+
crc32 = Omnizip::Checksums::Crc32.new.tap do |c|
|
|
70
|
+
c.update(data)
|
|
71
|
+
end.finalize
|
|
72
|
+
|
|
73
|
+
# Return result
|
|
74
|
+
Fractor::WorkResult.new(
|
|
75
|
+
result: {
|
|
76
|
+
archive_path: archive_path,
|
|
77
|
+
file_path: file_path,
|
|
78
|
+
compressed_data: compressed_data,
|
|
79
|
+
uncompressed_size: data.bytesize,
|
|
80
|
+
compressed_size: compressed_data.bytesize,
|
|
81
|
+
crc32: crc32,
|
|
82
|
+
stat: stat,
|
|
83
|
+
compression: compression,
|
|
84
|
+
},
|
|
85
|
+
work: work,
|
|
86
|
+
)
|
|
87
|
+
rescue StandardError => e
|
|
88
|
+
Fractor::WorkResult.new(
|
|
89
|
+
error: e,
|
|
90
|
+
work: work,
|
|
91
|
+
)
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
private
|
|
95
|
+
|
|
96
|
+
def compress_data(data, method, level)
|
|
97
|
+
case method
|
|
98
|
+
when :store
|
|
99
|
+
data
|
|
100
|
+
when :deflate
|
|
101
|
+
require "zlib"
|
|
102
|
+
Zlib::Deflate.new(level, -Zlib::MAX_WBITS).deflate(data, Zlib::FINISH)
|
|
103
|
+
when :bzip2
|
|
104
|
+
Omnizip::AlgorithmRegistry.get(:bzip2).compress(data, level: level)
|
|
105
|
+
when :lzma
|
|
106
|
+
Omnizip::AlgorithmRegistry.get(:lzma).compress(data, level: level)
|
|
107
|
+
when :lzma2
|
|
108
|
+
Omnizip::AlgorithmRegistry.get(:lzma2).compress(data, level: level)
|
|
109
|
+
when :zstandard
|
|
110
|
+
Omnizip::AlgorithmRegistry.get(:zstandard).compress(data,
|
|
111
|
+
level: level)
|
|
112
|
+
else
|
|
113
|
+
raise Omnizip::UnsupportedFormatError,
|
|
114
|
+
"Unsupported compression: #{method}"
|
|
115
|
+
end
|
|
116
|
+
end
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
# @return [Omnizip::Models::ParallelOptions] parallel options
|
|
120
|
+
attr_reader :options
|
|
121
|
+
|
|
122
|
+
# @return [Hash] compression statistics
|
|
123
|
+
attr_reader :stats
|
|
124
|
+
|
|
125
|
+
# Initialize parallel compressor
|
|
126
|
+
#
|
|
127
|
+
# @param options [Omnizip::Models::ParallelOptions, Hash] parallel options
|
|
128
|
+
# @param threads [Integer] number of threads (overrides options)
|
|
129
|
+
def initialize(options = nil, threads: nil)
|
|
130
|
+
@options = case options
|
|
131
|
+
when Omnizip::Models::ParallelOptions
|
|
132
|
+
options.dup
|
|
133
|
+
when Hash
|
|
134
|
+
Omnizip::Models::ParallelOptions.new.tap do |opts|
|
|
135
|
+
options.each do |k, v|
|
|
136
|
+
opts.send(:"#{k}=", v) if opts.respond_to?(:"#{k}=")
|
|
137
|
+
end
|
|
138
|
+
end
|
|
139
|
+
else
|
|
140
|
+
Omnizip::Models::ParallelOptions.new
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
@options.threads = threads if threads
|
|
144
|
+
@options.validate!
|
|
145
|
+
|
|
146
|
+
@stats = {
|
|
147
|
+
files_processed: 0,
|
|
148
|
+
bytes_processed: 0,
|
|
149
|
+
bytes_compressed: 0,
|
|
150
|
+
start_time: nil,
|
|
151
|
+
end_time: nil,
|
|
152
|
+
}
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
# Compress directory to archive in parallel
|
|
156
|
+
#
|
|
157
|
+
# @param dir [String] directory path
|
|
158
|
+
# @param output [String] output archive path
|
|
159
|
+
# @param options [Hash] compression options
|
|
160
|
+
# @option options [Symbol] :compression compression method
|
|
161
|
+
# @option options [Integer] :level compression level
|
|
162
|
+
# @option options [Boolean] :recursive include subdirectories
|
|
163
|
+
# @option options [Proc] :progress progress callback
|
|
164
|
+
# @return [String] path to created archive
|
|
165
|
+
def compress(dir, output, **options)
|
|
166
|
+
unless ::File.exist?(dir)
|
|
167
|
+
raise Errno::ENOENT,
|
|
168
|
+
"Directory not found: #{dir}"
|
|
169
|
+
end
|
|
170
|
+
unless ::File.directory?(dir)
|
|
171
|
+
raise ArgumentError,
|
|
172
|
+
"Not a directory: #{dir}"
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
compression = options[:compression] || :deflate
|
|
176
|
+
level = options[:level] || 6
|
|
177
|
+
recursive = options.fetch(:recursive, true)
|
|
178
|
+
options[:progress]
|
|
179
|
+
|
|
180
|
+
@stats[:start_time] = Time.now
|
|
181
|
+
|
|
182
|
+
# Scan directory for files
|
|
183
|
+
files = scan_directory(dir, recursive: recursive)
|
|
184
|
+
|
|
185
|
+
# Create job queue
|
|
186
|
+
job_queue = JobQueue.new(max_size: @options.queue_size)
|
|
187
|
+
|
|
188
|
+
# Schedule jobs
|
|
189
|
+
JobScheduler.new(strategy: @options.strategy)
|
|
190
|
+
files.each do |file_path|
|
|
191
|
+
archive_path = file_path.sub("#{dir}/", "")
|
|
192
|
+
file_size = ::File.size(file_path)
|
|
193
|
+
|
|
194
|
+
job_queue.push_with_size(
|
|
195
|
+
file: file_path,
|
|
196
|
+
size: file_size,
|
|
197
|
+
data: {
|
|
198
|
+
archive_path: archive_path,
|
|
199
|
+
compression: compression,
|
|
200
|
+
level: level,
|
|
201
|
+
},
|
|
202
|
+
)
|
|
203
|
+
end
|
|
204
|
+
|
|
205
|
+
# Create work items from jobs
|
|
206
|
+
work_items = []
|
|
207
|
+
until job_queue.empty?
|
|
208
|
+
job = job_queue.pop(timeout: 0.1)
|
|
209
|
+
break unless job
|
|
210
|
+
|
|
211
|
+
work_items << CompressionWork.new(
|
|
212
|
+
file_path: job.file,
|
|
213
|
+
archive_path: job.data[:archive_path],
|
|
214
|
+
compression: job.data[:compression],
|
|
215
|
+
level: job.data[:level],
|
|
216
|
+
)
|
|
217
|
+
end
|
|
218
|
+
|
|
219
|
+
# Create worker pool
|
|
220
|
+
pool = WorkerPool.new(
|
|
221
|
+
worker_class: CompressionWorker,
|
|
222
|
+
num_workers: @options.threads,
|
|
223
|
+
continuous: false,
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
pool.start
|
|
227
|
+
pool.submit_batch(work_items)
|
|
228
|
+
pool.run
|
|
229
|
+
|
|
230
|
+
# Collect results
|
|
231
|
+
results = pool.successful_results
|
|
232
|
+
errors = pool.failed_results
|
|
233
|
+
|
|
234
|
+
# Handle errors
|
|
235
|
+
unless errors.empty?
|
|
236
|
+
error_msgs = errors.map do |e|
|
|
237
|
+
"#{e.work&.file_path}: #{e.error}"
|
|
238
|
+
end.join("\n")
|
|
239
|
+
raise Omnizip::CompressionError, "Compression errors:\n#{error_msgs}"
|
|
240
|
+
end
|
|
241
|
+
|
|
242
|
+
# Write archive sequentially (thread-safe)
|
|
243
|
+
write_archive(output, results, compression: compression)
|
|
244
|
+
|
|
245
|
+
pool.shutdown
|
|
246
|
+
|
|
247
|
+
@stats[:end_time] = Time.now
|
|
248
|
+
@stats[:files_processed] = results.size
|
|
249
|
+
|
|
250
|
+
output
|
|
251
|
+
end
|
|
252
|
+
|
|
253
|
+
# Get compression statistics
|
|
254
|
+
#
|
|
255
|
+
# @return [Hash] statistics
|
|
256
|
+
def statistics
|
|
257
|
+
duration = if @stats[:start_time] && @stats[:end_time]
|
|
258
|
+
@stats[:end_time] - @stats[:start_time]
|
|
259
|
+
else
|
|
260
|
+
0
|
|
261
|
+
end
|
|
262
|
+
|
|
263
|
+
@stats.merge(
|
|
264
|
+
duration: duration,
|
|
265
|
+
compression_ratio: calculate_compression_ratio,
|
|
266
|
+
throughput_mbps: calculate_throughput(duration),
|
|
267
|
+
)
|
|
268
|
+
end
|
|
269
|
+
|
|
270
|
+
private
|
|
271
|
+
|
|
272
|
+
# Scan directory for files
|
|
273
|
+
#
|
|
274
|
+
# @param dir [String] directory path
|
|
275
|
+
# @param recursive [Boolean] scan recursively
|
|
276
|
+
# @return [Array<String>] file paths
|
|
277
|
+
def scan_directory(dir, recursive: true)
|
|
278
|
+
files = []
|
|
279
|
+
|
|
280
|
+
if recursive
|
|
281
|
+
Dir.glob(::File.join(dir, "**", "*")).each do |path|
|
|
282
|
+
files << path if ::File.file?(path)
|
|
283
|
+
end
|
|
284
|
+
else
|
|
285
|
+
Dir.glob(::File.join(dir, "*")).each do |path|
|
|
286
|
+
files << path if ::File.file?(path)
|
|
287
|
+
end
|
|
288
|
+
end
|
|
289
|
+
|
|
290
|
+
files.sort
|
|
291
|
+
end
|
|
292
|
+
|
|
293
|
+
# Write archive from compressed results
|
|
294
|
+
#
|
|
295
|
+
# @param output [String] output path
|
|
296
|
+
# @param results [Array] compression results
|
|
297
|
+
# @param compression [Symbol] compression method
|
|
298
|
+
def write_archive(output, results, compression:)
|
|
299
|
+
writer = Omnizip::Formats::Zip::Writer.new(output)
|
|
300
|
+
|
|
301
|
+
results.each do |work_result|
|
|
302
|
+
result = work_result.result
|
|
303
|
+
next unless result
|
|
304
|
+
|
|
305
|
+
# Add compressed entry to writer
|
|
306
|
+
entry = writer.send(:create_entry,
|
|
307
|
+
filename: result[:archive_path],
|
|
308
|
+
uncompressed_data: "",
|
|
309
|
+
stat: result[:stat])
|
|
310
|
+
|
|
311
|
+
# Override with pre-compressed data
|
|
312
|
+
entry[:compressed_size] = result[:compressed_size]
|
|
313
|
+
entry[:uncompressed_size] = result[:uncompressed_size]
|
|
314
|
+
entry[:crc32] = result[:crc32]
|
|
315
|
+
entry[:compressed_data] = result[:compressed_data]
|
|
316
|
+
|
|
317
|
+
writer.instance_variable_get(:@entries) << entry
|
|
318
|
+
|
|
319
|
+
@stats[:bytes_processed] += result[:uncompressed_size]
|
|
320
|
+
@stats[:bytes_compressed] += result[:compressed_size]
|
|
321
|
+
end
|
|
322
|
+
|
|
323
|
+
# Write with pre-compressed data
|
|
324
|
+
writer.send(:write_with_precompressed_data, compression)
|
|
325
|
+
end
|
|
326
|
+
|
|
327
|
+
# Calculate compression ratio
|
|
328
|
+
#
|
|
329
|
+
# @return [Float] compression ratio percentage
|
|
330
|
+
def calculate_compression_ratio
|
|
331
|
+
return 0.0 if @stats[:bytes_processed].zero?
|
|
332
|
+
|
|
333
|
+
(1.0 - (@stats[:bytes_compressed].to_f / @stats[:bytes_processed])) * 100.0
|
|
334
|
+
end
|
|
335
|
+
|
|
336
|
+
# Calculate throughput in MB/s
|
|
337
|
+
#
|
|
338
|
+
# @param duration [Float] duration in seconds
|
|
339
|
+
# @return [Float] throughput in MB/s
|
|
340
|
+
def calculate_throughput(duration)
|
|
341
|
+
return 0.0 if duration.zero?
|
|
342
|
+
|
|
343
|
+
(@stats[:bytes_processed].to_f / (1024 * 1024)) / duration
|
|
344
|
+
end
|
|
345
|
+
end
|
|
346
|
+
end
|
|
347
|
+
end
|
|
@@ -0,0 +1,329 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "fractor"
|
|
4
|
+
require "fileutils"
|
|
5
|
+
|
|
6
|
+
module Omnizip
|
|
7
|
+
module Parallel
|
|
8
|
+
# Parallel extraction coordinator using Fractor
|
|
9
|
+
#
|
|
10
|
+
# Manages parallel extraction of files from an archive.
|
|
11
|
+
# Distributes extraction work across multiple workers and
|
|
12
|
+
# writes files to disk in a thread-safe manner.
|
|
13
|
+
#
|
|
14
|
+
# @example Extract archive in parallel
|
|
15
|
+
# extractor = Omnizip::Parallel::ParallelExtractor.new(threads: 4)
|
|
16
|
+
# extractor.extract('backup.zip', 'output/')
|
|
17
|
+
#
|
|
18
|
+
# @example With options
|
|
19
|
+
# options = Omnizip::Models::ParallelOptions.new
|
|
20
|
+
# options.threads = 8
|
|
21
|
+
# extractor = Omnizip::Parallel::ParallelExtractor.new(options)
|
|
22
|
+
# extractor.extract('backup.zip', 'output/')
|
|
23
|
+
class ParallelExtractor
|
|
24
|
+
# Fractor Work class for extraction jobs
|
|
25
|
+
class ExtractionWork < Fractor::Work
|
|
26
|
+
def initialize(entry:, archive_path:, dest_dir:)
|
|
27
|
+
super({
|
|
28
|
+
entry: entry,
|
|
29
|
+
archive_path: archive_path,
|
|
30
|
+
dest_dir: dest_dir,
|
|
31
|
+
})
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def entry
|
|
35
|
+
input[:entry]
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def archive_path
|
|
39
|
+
input[:archive_path]
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def dest_dir
|
|
43
|
+
input[:dest_dir]
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# Fractor Worker class for extraction
|
|
48
|
+
class ExtractionWorker < Fractor::Worker
|
|
49
|
+
def process(work)
|
|
50
|
+
entry = work.entry
|
|
51
|
+
archive_path = work.archive_path
|
|
52
|
+
dest_dir = work.dest_dir
|
|
53
|
+
|
|
54
|
+
# Read and decompress entry data
|
|
55
|
+
data = read_entry_data(archive_path, entry)
|
|
56
|
+
|
|
57
|
+
# Determine destination path
|
|
58
|
+
dest_path = ::File.join(dest_dir, entry.name)
|
|
59
|
+
|
|
60
|
+
# Return result
|
|
61
|
+
Fractor::WorkResult.new(
|
|
62
|
+
result: {
|
|
63
|
+
entry_name: entry.name,
|
|
64
|
+
dest_path: dest_path,
|
|
65
|
+
data: data,
|
|
66
|
+
directory: entry.directory?,
|
|
67
|
+
unix_perms: entry.respond_to?(:unix_perms) ? entry.unix_perms : 0,
|
|
68
|
+
},
|
|
69
|
+
work: work,
|
|
70
|
+
)
|
|
71
|
+
rescue StandardError => e
|
|
72
|
+
Fractor::WorkResult.new(
|
|
73
|
+
error: e,
|
|
74
|
+
work: work,
|
|
75
|
+
)
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
private
|
|
79
|
+
|
|
80
|
+
def read_entry_data(archive_path, entry)
|
|
81
|
+
return "" if entry.directory?
|
|
82
|
+
|
|
83
|
+
# Open archive and extract entry
|
|
84
|
+
reader = Omnizip::Formats::Zip::Reader.new(archive_path)
|
|
85
|
+
reader.read
|
|
86
|
+
|
|
87
|
+
::File.open(archive_path, "rb") do |io|
|
|
88
|
+
# Find the entry in reader
|
|
89
|
+
reader_entry = reader.entries.find { |e| e.filename == entry.name }
|
|
90
|
+
raise "Entry not found in archive: #{entry.name}" unless reader_entry
|
|
91
|
+
|
|
92
|
+
# Seek to entry data
|
|
93
|
+
io.seek(reader_entry.local_header_offset, ::IO::SEEK_SET)
|
|
94
|
+
|
|
95
|
+
# Read and parse local file header
|
|
96
|
+
fixed_header = io.read(30)
|
|
97
|
+
return "" unless fixed_header && fixed_header.size == 30
|
|
98
|
+
|
|
99
|
+
_signature, _version, _flags, _method, _time, _date, _crc32,
|
|
100
|
+
_comp_size, _uncomp_size, filename_length, extra_length = fixed_header.unpack("VvvvvvVVVvv")
|
|
101
|
+
|
|
102
|
+
# Skip filename and extra field
|
|
103
|
+
io.read(filename_length + extra_length)
|
|
104
|
+
|
|
105
|
+
# Read compressed data
|
|
106
|
+
compressed_data = io.read(reader_entry.compressed_size)
|
|
107
|
+
return "" unless compressed_data
|
|
108
|
+
|
|
109
|
+
# Decompress
|
|
110
|
+
reader.send(:decompress_data,
|
|
111
|
+
compressed_data,
|
|
112
|
+
reader_entry.compression_method,
|
|
113
|
+
reader_entry.uncompressed_size)
|
|
114
|
+
end
|
|
115
|
+
end
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
# @return [Omnizip::Models::ParallelOptions] parallel options
|
|
119
|
+
attr_reader :options
|
|
120
|
+
|
|
121
|
+
# @return [Hash] extraction statistics
|
|
122
|
+
attr_reader :stats
|
|
123
|
+
|
|
124
|
+
# Initialize parallel extractor
|
|
125
|
+
#
|
|
126
|
+
# @param options [Omnizip::Models::ParallelOptions, Hash] parallel options
|
|
127
|
+
# @param threads [Integer] number of threads (overrides options)
|
|
128
|
+
def initialize(options = nil, threads: nil)
|
|
129
|
+
@options = case options
|
|
130
|
+
when Omnizip::Models::ParallelOptions
|
|
131
|
+
options.dup
|
|
132
|
+
when Hash
|
|
133
|
+
Omnizip::Models::ParallelOptions.new.tap do |opts|
|
|
134
|
+
options.each do |k, v|
|
|
135
|
+
opts.send(:"#{k}=", v) if opts.respond_to?(:"#{k}=")
|
|
136
|
+
end
|
|
137
|
+
end
|
|
138
|
+
else
|
|
139
|
+
Omnizip::Models::ParallelOptions.new
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
@options.threads = threads if threads
|
|
143
|
+
@options.validate!
|
|
144
|
+
|
|
145
|
+
@stats = {
|
|
146
|
+
files_extracted: 0,
|
|
147
|
+
bytes_extracted: 0,
|
|
148
|
+
start_time: nil,
|
|
149
|
+
end_time: nil,
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
@write_mutex = Mutex.new
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
# Extract archive to directory in parallel
|
|
156
|
+
#
|
|
157
|
+
# @param archive [String] archive path
|
|
158
|
+
# @param dest [String] destination directory
|
|
159
|
+
# @param options [Hash] extraction options
|
|
160
|
+
# @option options [Boolean] :overwrite overwrite existing files
|
|
161
|
+
# @option options [Proc] :progress progress callback
|
|
162
|
+
# @return [Array<String>] extracted file paths
|
|
163
|
+
def extract(archive, dest, **options)
|
|
164
|
+
unless ::File.exist?(archive)
|
|
165
|
+
raise Errno::ENOENT,
|
|
166
|
+
"Archive not found: #{archive}"
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
overwrite = options.fetch(:overwrite, false)
|
|
170
|
+
options[:progress]
|
|
171
|
+
|
|
172
|
+
@stats[:start_time] = Time.now
|
|
173
|
+
|
|
174
|
+
# Read archive to get entries
|
|
175
|
+
entries = read_archive_entries(archive)
|
|
176
|
+
|
|
177
|
+
# Create destination directory
|
|
178
|
+
FileUtils.mkdir_p(dest)
|
|
179
|
+
|
|
180
|
+
# Create job queue
|
|
181
|
+
job_queue = JobQueue.new(max_size: @options.queue_size)
|
|
182
|
+
|
|
183
|
+
# Schedule jobs
|
|
184
|
+
entries.each do |entry|
|
|
185
|
+
file_size = entry.respond_to?(:size) ? entry.size : 0
|
|
186
|
+
|
|
187
|
+
job_queue.push_with_size(
|
|
188
|
+
file: entry.name,
|
|
189
|
+
size: file_size,
|
|
190
|
+
data: { entry: entry },
|
|
191
|
+
)
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
# Create work items from jobs
|
|
195
|
+
work_items = []
|
|
196
|
+
until job_queue.empty?
|
|
197
|
+
job = job_queue.pop(timeout: 0.1)
|
|
198
|
+
break unless job
|
|
199
|
+
|
|
200
|
+
work_items << ExtractionWork.new(
|
|
201
|
+
entry: job.data[:entry],
|
|
202
|
+
archive_path: archive,
|
|
203
|
+
dest_dir: dest,
|
|
204
|
+
)
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
# Create worker pool
|
|
208
|
+
pool = WorkerPool.new(
|
|
209
|
+
worker_class: ExtractionWorker,
|
|
210
|
+
num_workers: @options.threads,
|
|
211
|
+
continuous: false,
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
pool.start
|
|
215
|
+
pool.submit_batch(work_items)
|
|
216
|
+
pool.run
|
|
217
|
+
|
|
218
|
+
# Collect results
|
|
219
|
+
results = pool.successful_results
|
|
220
|
+
errors = pool.failed_results
|
|
221
|
+
|
|
222
|
+
# Handle errors
|
|
223
|
+
unless errors.empty?
|
|
224
|
+
error_msgs = errors.map do |e|
|
|
225
|
+
"#{e.work&.entry&.name}: #{e.error}"
|
|
226
|
+
end.join("\n")
|
|
227
|
+
raise Omnizip::ExtractionError, "Extraction errors:\n#{error_msgs}"
|
|
228
|
+
end
|
|
229
|
+
|
|
230
|
+
# Write files to disk (thread-safe)
|
|
231
|
+
extracted_paths = write_extracted_files(results, overwrite: overwrite)
|
|
232
|
+
|
|
233
|
+
pool.shutdown
|
|
234
|
+
|
|
235
|
+
@stats[:end_time] = Time.now
|
|
236
|
+
@stats[:files_extracted] = results.size
|
|
237
|
+
|
|
238
|
+
extracted_paths
|
|
239
|
+
end
|
|
240
|
+
|
|
241
|
+
# Get extraction statistics
|
|
242
|
+
#
|
|
243
|
+
# @return [Hash] statistics
|
|
244
|
+
def statistics
|
|
245
|
+
duration = if @stats[:start_time] && @stats[:end_time]
|
|
246
|
+
@stats[:end_time] - @stats[:start_time]
|
|
247
|
+
else
|
|
248
|
+
0
|
|
249
|
+
end
|
|
250
|
+
|
|
251
|
+
@stats.merge(
|
|
252
|
+
duration: duration,
|
|
253
|
+
throughput_mbps: calculate_throughput(duration),
|
|
254
|
+
)
|
|
255
|
+
end
|
|
256
|
+
|
|
257
|
+
private
|
|
258
|
+
|
|
259
|
+
# Read archive entries
|
|
260
|
+
#
|
|
261
|
+
# @param archive_path [String] archive path
|
|
262
|
+
# @return [Array<Entry>] array of entries
|
|
263
|
+
def read_archive_entries(archive_path)
|
|
264
|
+
entries = []
|
|
265
|
+
|
|
266
|
+
Omnizip::Zip::File.open(archive_path) do |zip|
|
|
267
|
+
zip.each do |entry|
|
|
268
|
+
entries << entry
|
|
269
|
+
end
|
|
270
|
+
end
|
|
271
|
+
|
|
272
|
+
entries
|
|
273
|
+
end
|
|
274
|
+
|
|
275
|
+
# Write extracted files to disk
|
|
276
|
+
#
|
|
277
|
+
# @param results [Array] extraction results
|
|
278
|
+
# @param overwrite [Boolean] overwrite existing files
|
|
279
|
+
# @return [Array<String>] extracted file paths
|
|
280
|
+
def write_extracted_files(results, overwrite: false)
|
|
281
|
+
extracted_paths = []
|
|
282
|
+
|
|
283
|
+
results.each do |work_result|
|
|
284
|
+
result = work_result.result
|
|
285
|
+
next unless result
|
|
286
|
+
|
|
287
|
+
dest_path = result[:dest_path]
|
|
288
|
+
|
|
289
|
+
# Thread-safe file writing
|
|
290
|
+
@write_mutex.synchronize do
|
|
291
|
+
# Check if file exists
|
|
292
|
+
if ::File.exist?(dest_path) && !overwrite
|
|
293
|
+
raise "File exists: #{dest_path}"
|
|
294
|
+
end
|
|
295
|
+
|
|
296
|
+
# Write file or create directory
|
|
297
|
+
if result[:directory]
|
|
298
|
+
FileUtils.mkdir_p(dest_path)
|
|
299
|
+
else
|
|
300
|
+
FileUtils.mkdir_p(::File.dirname(dest_path))
|
|
301
|
+
::File.binwrite(dest_path, result[:data])
|
|
302
|
+
|
|
303
|
+
# Set permissions if Unix
|
|
304
|
+
if result[:unix_perms].positive?
|
|
305
|
+
::File.chmod(result[:unix_perms] & 0o777, dest_path)
|
|
306
|
+
end
|
|
307
|
+
|
|
308
|
+
@stats[:bytes_extracted] += result[:data].bytesize
|
|
309
|
+
end
|
|
310
|
+
|
|
311
|
+
extracted_paths << dest_path
|
|
312
|
+
end
|
|
313
|
+
end
|
|
314
|
+
|
|
315
|
+
extracted_paths
|
|
316
|
+
end
|
|
317
|
+
|
|
318
|
+
# Calculate throughput in MB/s
|
|
319
|
+
#
|
|
320
|
+
# @param duration [Float] duration in seconds
|
|
321
|
+
# @return [Float] throughput in MB/s
|
|
322
|
+
def calculate_throughput(duration)
|
|
323
|
+
return 0.0 if duration.zero?
|
|
324
|
+
|
|
325
|
+
(@stats[:bytes_extracted].to_f / (1024 * 1024)) / duration
|
|
326
|
+
end
|
|
327
|
+
end
|
|
328
|
+
end
|
|
329
|
+
end
|