omnizip 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/.rubocop.yml +32 -0
- data/.rubocop_todo.yml +754 -0
- data/COPYING +502 -0
- data/Gemfile +17 -0
- data/LICENSE +12 -0
- data/README.adoc +1045 -0
- data/Rakefile +12 -0
- data/benchmark/README.md +260 -0
- data/benchmark/benchmark_suite.rb +125 -0
- data/benchmark/compression_bench.rb +181 -0
- data/benchmark/filter_bench.rb +180 -0
- data/benchmark/models/benchmark_result.rb +59 -0
- data/benchmark/models/comparison_result.rb +69 -0
- data/benchmark/profile_suite.rb +167 -0
- data/benchmark/reporter.rb +150 -0
- data/benchmark/run_benchmarks.rb +66 -0
- data/benchmark/test_data.rb +137 -0
- data/config/formats/rar3_spec.yml +91 -0
- data/config/formats/rar5_spec.yml +102 -0
- data/docs/.github/workflows/docs.yml +142 -0
- data/docs/.gitignore +21 -0
- data/docs/.lychee.toml +67 -0
- data/docs/Gemfile +13 -0
- data/docs/RAR_WRITE_SUPPORT.md +26 -0
- data/docs/README.md +101 -0
- data/docs/_config.yml +112 -0
- data/docs/assets/logo.svg +1 -0
- data/docs/assets/omnizip-logo.pdf +1540 -11
- data/docs/comparison/feature-matrix.adoc +694 -0
- data/docs/comparison/index.adoc +113 -0
- data/docs/comparison/vs-7zip.adoc +309 -0
- data/docs/comparison/vs-peazip.adoc +77 -0
- data/docs/comparison/vs-rubyzip.adoc +342 -0
- data/docs/comparison/vs-winrar.adoc +100 -0
- data/docs/compatibility.adoc +579 -0
- data/docs/concepts/index.adoc +129 -0
- data/docs/developer/architecture.adoc +256 -0
- data/docs/developer/contributing.adoc +158 -0
- data/docs/developer/index.adoc +25 -0
- data/docs/developer/testing.adoc +212 -0
- data/docs/getting-started/basic-usage.adoc +271 -0
- data/docs/getting-started/index.adoc +42 -0
- data/docs/getting-started/installation.adoc +138 -0
- data/docs/getting-started/quick-start.adoc +185 -0
- data/docs/getting-started/your-first-archive.adoc +218 -0
- data/docs/guides/advanced-features/encryption.adoc +300 -0
- data/docs/guides/advanced-features/index.adoc +49 -0
- data/docs/guides/advanced-features/parallel-processing.adoc +246 -0
- data/docs/guides/advanced-features/progress-tracking.adoc +320 -0
- data/docs/guides/advanced-features/streaming.adoc +212 -0
- data/docs/guides/archive-formats/gzip-format.adoc +107 -0
- data/docs/guides/archive-formats/index.adoc +130 -0
- data/docs/guides/archive-formats/rar-format.adoc +104 -0
- data/docs/guides/archive-formats/rar5.adoc +521 -0
- data/docs/guides/archive-formats/seven-zip-format.adoc +35 -0
- data/docs/guides/archive-formats/tar-format.adoc +106 -0
- data/docs/guides/archive-formats/xz-format.adoc +118 -0
- data/docs/guides/archive-formats/zip-format.adoc +35 -0
- data/docs/guides/compression-algorithms/bzip2.adoc +113 -0
- data/docs/guides/compression-algorithms/deflate.adoc +319 -0
- data/docs/guides/compression-algorithms/index.adoc +190 -0
- data/docs/guides/compression-algorithms/lzma.adoc +398 -0
- data/docs/guides/compression-algorithms/lzma2.adoc +327 -0
- data/docs/guides/compression-algorithms/ppmd.adoc +316 -0
- data/docs/guides/compression-algorithms/zstandard.adoc +361 -0
- data/docs/guides/creating-archives.adoc +354 -0
- data/docs/guides/extracting-archives.adoc +53 -0
- data/docs/guides/format-conversion.adoc +64 -0
- data/docs/guides/index.adoc +49 -0
- data/docs/guides/migration-rubyzip.adoc +217 -0
- data/docs/guides/parity-archives.adoc +605 -0
- data/docs/guides/performance-tuning.adoc +88 -0
- data/docs/index.adoc +218 -0
- data/docs/lychee.toml +67 -0
- data/docs/reference/api/overview.adoc +188 -0
- data/docs/reference/cli/compress-command.adoc +114 -0
- data/docs/reference/cli/overview.adoc +140 -0
- data/docs/reference/index.adoc +26 -0
- data/docs/resources/faq.adoc +185 -0
- data/docs/resources/quick-reference.adoc +222 -0
- data/docs/troubleshooting/index.adoc +208 -0
- data/examples/api_comparison.rb +205 -0
- data/examples/deflate64_example.rb +96 -0
- data/examples/par2_demo.rb +121 -0
- data/examples/quick_start_native.rb +150 -0
- data/examples/quick_start_rubyzip.rb +115 -0
- data/examples/rubyzip_compatibility_demo.rb +194 -0
- data/exe/omnizip +27 -0
- data/lib/omnizip/algorithm.rb +130 -0
- data/lib/omnizip/algorithm_registry.rb +86 -0
- data/lib/omnizip/algorithms/.keep +0 -0
- data/lib/omnizip/algorithms/bzip2/bwt.rb +225 -0
- data/lib/omnizip/algorithms/bzip2/decoder.rb +193 -0
- data/lib/omnizip/algorithms/bzip2/encoder.rb +237 -0
- data/lib/omnizip/algorithms/bzip2/huffman.rb +206 -0
- data/lib/omnizip/algorithms/bzip2/mtf.rb +101 -0
- data/lib/omnizip/algorithms/bzip2/rle.rb +151 -0
- data/lib/omnizip/algorithms/bzip2.rb +130 -0
- data/lib/omnizip/algorithms/deflate/constants.rb +28 -0
- data/lib/omnizip/algorithms/deflate/decoder.rb +38 -0
- data/lib/omnizip/algorithms/deflate/encoder.rb +46 -0
- data/lib/omnizip/algorithms/deflate.rb +128 -0
- data/lib/omnizip/algorithms/deflate64/constants.rb +45 -0
- data/lib/omnizip/algorithms/deflate64/decoder.rb +153 -0
- data/lib/omnizip/algorithms/deflate64/encoder.rb +98 -0
- data/lib/omnizip/algorithms/deflate64/huffman_coder.rb +354 -0
- data/lib/omnizip/algorithms/deflate64/lz77_encoder.rb +142 -0
- data/lib/omnizip/algorithms/deflate64.rb +109 -0
- data/lib/omnizip/algorithms/lzma/bit_model.rb +120 -0
- data/lib/omnizip/algorithms/lzma/constants.rb +112 -0
- data/lib/omnizip/algorithms/lzma/decoder.rb +148 -0
- data/lib/omnizip/algorithms/lzma/dictionary.rb +69 -0
- data/lib/omnizip/algorithms/lzma/distance_coder.rb +415 -0
- data/lib/omnizip/algorithms/lzma/encoder.rb +142 -0
- data/lib/omnizip/algorithms/lzma/length_coder.rb +260 -0
- data/lib/omnizip/algorithms/lzma/literal_decoder.rb +320 -0
- data/lib/omnizip/algorithms/lzma/literal_encoder.rb +210 -0
- data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +341 -0
- data/lib/omnizip/algorithms/lzma/lzma_alone_decoder.rb +192 -0
- data/lib/omnizip/algorithms/lzma/lzma_state.rb +128 -0
- data/lib/omnizip/algorithms/lzma/match.rb +32 -0
- data/lib/omnizip/algorithms/lzma/match_finder.rb +205 -0
- data/lib/omnizip/algorithms/lzma/match_finder_config.rb +142 -0
- data/lib/omnizip/algorithms/lzma/match_finder_factory.rb +88 -0
- data/lib/omnizip/algorithms/lzma/optimal_encoder.rb +130 -0
- data/lib/omnizip/algorithms/lzma/probability_models.rb +72 -0
- data/lib/omnizip/algorithms/lzma/range_coder.rb +85 -0
- data/lib/omnizip/algorithms/lzma/range_decoder.rb +434 -0
- data/lib/omnizip/algorithms/lzma/range_encoder.rb +194 -0
- data/lib/omnizip/algorithms/lzma/state.rb +127 -0
- data/lib/omnizip/algorithms/lzma/xz_buffered_range_encoder.rb +325 -0
- data/lib/omnizip/algorithms/lzma/xz_encoder.rb +426 -0
- data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +645 -0
- data/lib/omnizip/algorithms/lzma/xz_match_finder_adapter.rb +227 -0
- data/lib/omnizip/algorithms/lzma/xz_price_calculator.rb +169 -0
- data/lib/omnizip/algorithms/lzma/xz_probability_models.rb +261 -0
- data/lib/omnizip/algorithms/lzma/xz_range_encoder.rb +223 -0
- data/lib/omnizip/algorithms/lzma/xz_range_encoder_exact.rb +331 -0
- data/lib/omnizip/algorithms/lzma/xz_state.rb +116 -0
- data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +2055 -0
- data/lib/omnizip/algorithms/lzma.rb +238 -0
- data/lib/omnizip/algorithms/lzma2/chunk_manager.rb +182 -0
- data/lib/omnizip/algorithms/lzma2/constants.rb +41 -0
- data/lib/omnizip/algorithms/lzma2/encoder.rb +147 -0
- data/lib/omnizip/algorithms/lzma2/lzma2_chunk.rb +161 -0
- data/lib/omnizip/algorithms/lzma2/properties.rb +179 -0
- data/lib/omnizip/algorithms/lzma2/simple_lzma2_encoder.rb +127 -0
- data/lib/omnizip/algorithms/lzma2/xz_encoder_adapter.rb +85 -0
- data/lib/omnizip/algorithms/lzma2.rb +141 -0
- data/lib/omnizip/algorithms/ppmd7/constants.rb +74 -0
- data/lib/omnizip/algorithms/ppmd7/context.rb +154 -0
- data/lib/omnizip/algorithms/ppmd7/decoder.rb +126 -0
- data/lib/omnizip/algorithms/ppmd7/encoder.rb +163 -0
- data/lib/omnizip/algorithms/ppmd7/model.rb +248 -0
- data/lib/omnizip/algorithms/ppmd7/symbol_state.rb +57 -0
- data/lib/omnizip/algorithms/ppmd7.rb +116 -0
- data/lib/omnizip/algorithms/ppmd8/constants.rb +61 -0
- data/lib/omnizip/algorithms/ppmd8/context.rb +34 -0
- data/lib/omnizip/algorithms/ppmd8/decoder.rb +107 -0
- data/lib/omnizip/algorithms/ppmd8/encoder.rb +138 -0
- data/lib/omnizip/algorithms/ppmd8/model.rb +250 -0
- data/lib/omnizip/algorithms/ppmd8/restoration_method.rb +78 -0
- data/lib/omnizip/algorithms/ppmd8.rb +82 -0
- data/lib/omnizip/algorithms/ppmd_base.rb +138 -0
- data/lib/omnizip/algorithms/sevenzip_lzma2.rb +123 -0
- data/lib/omnizip/algorithms/xz_lzma2.rb +118 -0
- data/lib/omnizip/algorithms/zstandard/constants.rb +25 -0
- data/lib/omnizip/algorithms/zstandard/decoder.rb +46 -0
- data/lib/omnizip/algorithms/zstandard/encoder.rb +51 -0
- data/lib/omnizip/algorithms/zstandard.rb +138 -0
- data/lib/omnizip/buffer/memory_archive.rb +251 -0
- data/lib/omnizip/buffer/memory_extractor.rb +224 -0
- data/lib/omnizip/buffer.rb +176 -0
- data/lib/omnizip/checksum_registry.rb +114 -0
- data/lib/omnizip/checksums/crc32.rb +100 -0
- data/lib/omnizip/checksums/crc64.rb +101 -0
- data/lib/omnizip/checksums/crc_base.rb +158 -0
- data/lib/omnizip/checksums/verifier.rb +131 -0
- data/lib/omnizip/chunked/memory_manager.rb +194 -0
- data/lib/omnizip/chunked/reader.rb +78 -0
- data/lib/omnizip/chunked/writer.rb +120 -0
- data/lib/omnizip/chunked.rb +129 -0
- data/lib/omnizip/cli/output_formatter.rb +104 -0
- data/lib/omnizip/cli.rb +572 -0
- data/lib/omnizip/commands/.keep +0 -0
- data/lib/omnizip/commands/archive_create_command.rb +427 -0
- data/lib/omnizip/commands/archive_extract_command.rb +272 -0
- data/lib/omnizip/commands/archive_list_command.rb +218 -0
- data/lib/omnizip/commands/archive_repair_command.rb +131 -0
- data/lib/omnizip/commands/archive_verify_command.rb +117 -0
- data/lib/omnizip/commands/compress_command.rb +117 -0
- data/lib/omnizip/commands/decompress_command.rb +120 -0
- data/lib/omnizip/commands/list_command.rb +53 -0
- data/lib/omnizip/commands/metadata_command.rb +153 -0
- data/lib/omnizip/commands/parity_create_command.rb +122 -0
- data/lib/omnizip/commands/parity_repair_command.rb +122 -0
- data/lib/omnizip/commands/parity_verify_command.rb +124 -0
- data/lib/omnizip/commands/profile_list_command.rb +56 -0
- data/lib/omnizip/commands/profile_show_command.rb +44 -0
- data/lib/omnizip/convenience.rb +359 -0
- data/lib/omnizip/converter/conversion_registry.rb +49 -0
- data/lib/omnizip/converter/conversion_strategy.rb +121 -0
- data/lib/omnizip/converter/seven_zip_to_zip_strategy.rb +97 -0
- data/lib/omnizip/converter/zip_to_seven_zip_strategy.rb +112 -0
- data/lib/omnizip/converter.rb +105 -0
- data/lib/omnizip/crypto/aes256/cipher.rb +100 -0
- data/lib/omnizip/crypto/aes256/constants.rb +28 -0
- data/lib/omnizip/crypto/aes256/key_derivation.rb +101 -0
- data/lib/omnizip/crypto/aes256.rb +102 -0
- data/lib/omnizip/error.rb +106 -0
- data/lib/omnizip/eta/exponential_smoothing_estimator.rb +98 -0
- data/lib/omnizip/eta/moving_average_estimator.rb +99 -0
- data/lib/omnizip/eta/rate_calculator.rb +104 -0
- data/lib/omnizip/eta/sample_history.rb +143 -0
- data/lib/omnizip/eta/time_estimator.rb +106 -0
- data/lib/omnizip/eta.rb +63 -0
- data/lib/omnizip/extraction/filter_chain.rb +177 -0
- data/lib/omnizip/extraction/glob_pattern.rb +140 -0
- data/lib/omnizip/extraction/pattern_matcher.rb +70 -0
- data/lib/omnizip/extraction/predicate_pattern.rb +52 -0
- data/lib/omnizip/extraction/regex_pattern.rb +50 -0
- data/lib/omnizip/extraction/selective_extractor.rb +240 -0
- data/lib/omnizip/extraction.rb +111 -0
- data/lib/omnizip/file_type/mime_classifier.rb +144 -0
- data/lib/omnizip/file_type.rb +113 -0
- data/lib/omnizip/filter.rb +139 -0
- data/lib/omnizip/filter_pipeline.rb +108 -0
- data/lib/omnizip/filter_registry.rb +166 -0
- data/lib/omnizip/filters/bcj.rb +279 -0
- data/lib/omnizip/filters/bcj2/constants.rb +53 -0
- data/lib/omnizip/filters/bcj2/decoder.rb +200 -0
- data/lib/omnizip/filters/bcj2/encoder.rb +61 -0
- data/lib/omnizip/filters/bcj2/stream_data.rb +93 -0
- data/lib/omnizip/filters/bcj2.rb +99 -0
- data/lib/omnizip/filters/bcj_arm.rb +176 -0
- data/lib/omnizip/filters/bcj_arm64.rb +244 -0
- data/lib/omnizip/filters/bcj_ia64.rb +196 -0
- data/lib/omnizip/filters/bcj_ppc.rb +190 -0
- data/lib/omnizip/filters/bcj_sparc.rb +176 -0
- data/lib/omnizip/filters/bcj_x86.rb +193 -0
- data/lib/omnizip/filters/delta.rb +196 -0
- data/lib/omnizip/filters/filter_base.rb +72 -0
- data/lib/omnizip/filters/registry.rb +123 -0
- data/lib/omnizip/filters/xz_delta.rb +258 -0
- data/lib/omnizip/format_detector.rb +162 -0
- data/lib/omnizip/format_registry.rb +59 -0
- data/lib/omnizip/formats/.keep +0 -0
- data/lib/omnizip/formats/bzip2_file.rb +172 -0
- data/lib/omnizip/formats/cpio/constants.rb +55 -0
- data/lib/omnizip/formats/cpio/entry.rb +385 -0
- data/lib/omnizip/formats/cpio/reader.rb +196 -0
- data/lib/omnizip/formats/cpio/writer.rb +234 -0
- data/lib/omnizip/formats/cpio.rb +140 -0
- data/lib/omnizip/formats/format_spec_loader.rb +230 -0
- data/lib/omnizip/formats/gzip.rb +238 -0
- data/lib/omnizip/formats/iso/directory_builder.rb +297 -0
- data/lib/omnizip/formats/iso/directory_record.rb +152 -0
- data/lib/omnizip/formats/iso/joliet.rb +204 -0
- data/lib/omnizip/formats/iso/path_table.rb +125 -0
- data/lib/omnizip/formats/iso/reader.rb +197 -0
- data/lib/omnizip/formats/iso/rock_ridge.rb +349 -0
- data/lib/omnizip/formats/iso/volume_builder.rb +320 -0
- data/lib/omnizip/formats/iso/volume_descriptor.rb +168 -0
- data/lib/omnizip/formats/iso/writer.rb +530 -0
- data/lib/omnizip/formats/iso.rb +140 -0
- data/lib/omnizip/formats/lzip.rb +175 -0
- data/lib/omnizip/formats/lzma_alone.rb +171 -0
- data/lib/omnizip/formats/rar/archive_repairer.rb +243 -0
- data/lib/omnizip/formats/rar/archive_verifier.rb +195 -0
- data/lib/omnizip/formats/rar/block_parser.rb +243 -0
- data/lib/omnizip/formats/rar/compression/bit_stream.rb +180 -0
- data/lib/omnizip/formats/rar/compression/dispatcher.rb +217 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/decoder.rb +216 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/encoder.rb +158 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_builder.rb +217 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_coder.rb +189 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/match_finder.rb +135 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/sliding_window.rb +165 -0
- data/lib/omnizip/formats/rar/compression/ppmd/context.rb +105 -0
- data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +219 -0
- data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +262 -0
- data/lib/omnizip/formats/rar/compression_method_registry.rb +106 -0
- data/lib/omnizip/formats/rar/constants.rb +82 -0
- data/lib/omnizip/formats/rar/decompressor.rb +238 -0
- data/lib/omnizip/formats/rar/external_writer.rb +312 -0
- data/lib/omnizip/formats/rar/header.rb +192 -0
- data/lib/omnizip/formats/rar/license_validator.rb +109 -0
- data/lib/omnizip/formats/rar/models/rar_archive.rb +77 -0
- data/lib/omnizip/formats/rar/models/rar_entry.rb +65 -0
- data/lib/omnizip/formats/rar/models/rar_volume.rb +56 -0
- data/lib/omnizip/formats/rar/parity_handler.rb +292 -0
- data/lib/omnizip/formats/rar/rar5/compression/lzma.rb +202 -0
- data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +578 -0
- data/lib/omnizip/formats/rar/rar5/compression/store.rb +60 -0
- data/lib/omnizip/formats/rar/rar5/crc32.rb +39 -0
- data/lib/omnizip/formats/rar/rar5/encryption/aes256_cbc.rb +97 -0
- data/lib/omnizip/formats/rar/rar5/encryption/encryption_header.rb +114 -0
- data/lib/omnizip/formats/rar/rar5/encryption/encryption_manager.rb +166 -0
- data/lib/omnizip/formats/rar/rar5/encryption/key_derivation.rb +97 -0
- data/lib/omnizip/formats/rar/rar5/header.rb +187 -0
- data/lib/omnizip/formats/rar/rar5/models/encryption_options.rb +74 -0
- data/lib/omnizip/formats/rar/rar5/models/recovery_options.rb +63 -0
- data/lib/omnizip/formats/rar/rar5/models/solid_options.rb +63 -0
- data/lib/omnizip/formats/rar/rar5/models/volume_options.rb +74 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/ARCHITECTURE.md +290 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_manager.rb +264 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_splitter.rb +155 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_writer.rb +194 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_encoder.rb +109 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_manager.rb +142 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_stream.rb +121 -0
- data/lib/omnizip/formats/rar/rar5/vint.rb +65 -0
- data/lib/omnizip/formats/rar/rar5/writer.rb +466 -0
- data/lib/omnizip/formats/rar/rar_format_base.rb +241 -0
- data/lib/omnizip/formats/rar/reader.rb +366 -0
- data/lib/omnizip/formats/rar/recovery_record.rb +245 -0
- data/lib/omnizip/formats/rar/volume_manager.rb +168 -0
- data/lib/omnizip/formats/rar/writer.rb +431 -0
- data/lib/omnizip/formats/rar.rb +205 -0
- data/lib/omnizip/formats/rar3/compressor.rb +73 -0
- data/lib/omnizip/formats/rar3/decompressor.rb +66 -0
- data/lib/omnizip/formats/rar3/reader.rb +386 -0
- data/lib/omnizip/formats/rar3/writer.rb +219 -0
- data/lib/omnizip/formats/rar5/compressor.rb +73 -0
- data/lib/omnizip/formats/rar5/decompressor.rb +66 -0
- data/lib/omnizip/formats/rar5/reader.rb +342 -0
- data/lib/omnizip/formats/rar5/writer.rb +214 -0
- data/lib/omnizip/formats/seven_zip/coder_chain.rb +150 -0
- data/lib/omnizip/formats/seven_zip/constants.rb +126 -0
- data/lib/omnizip/formats/seven_zip/encoded_header.rb +114 -0
- data/lib/omnizip/formats/seven_zip/encrypted_header.rb +142 -0
- data/lib/omnizip/formats/seven_zip/file_collector.rb +144 -0
- data/lib/omnizip/formats/seven_zip/header.rb +106 -0
- data/lib/omnizip/formats/seven_zip/header_encryptor.rb +134 -0
- data/lib/omnizip/formats/seven_zip/header_writer.rb +466 -0
- data/lib/omnizip/formats/seven_zip/models/coder_info.rb +30 -0
- data/lib/omnizip/formats/seven_zip/models/file_entry.rb +58 -0
- data/lib/omnizip/formats/seven_zip/models/folder.rb +69 -0
- data/lib/omnizip/formats/seven_zip/models/stream_info.rb +42 -0
- data/lib/omnizip/formats/seven_zip/parser.rb +660 -0
- data/lib/omnizip/formats/seven_zip/reader.rb +458 -0
- data/lib/omnizip/formats/seven_zip/split_archive_reader.rb +632 -0
- data/lib/omnizip/formats/seven_zip/split_archive_writer.rb +315 -0
- data/lib/omnizip/formats/seven_zip/stream_compressor.rb +151 -0
- data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +162 -0
- data/lib/omnizip/formats/seven_zip/writer.rb +740 -0
- data/lib/omnizip/formats/seven_zip.rb +93 -0
- data/lib/omnizip/formats/tar/constants.rb +73 -0
- data/lib/omnizip/formats/tar/entry.rb +94 -0
- data/lib/omnizip/formats/tar/header.rb +168 -0
- data/lib/omnizip/formats/tar/reader.rb +121 -0
- data/lib/omnizip/formats/tar/writer.rb +216 -0
- data/lib/omnizip/formats/tar.rb +84 -0
- data/lib/omnizip/formats/xz/reader.rb +116 -0
- data/lib/omnizip/formats/xz.rb +237 -0
- data/lib/omnizip/formats/xz_impl/block_decoder.rb +754 -0
- data/lib/omnizip/formats/xz_impl/block_encoder.rb +306 -0
- data/lib/omnizip/formats/xz_impl/block_header.rb +210 -0
- data/lib/omnizip/formats/xz_impl/block_header_parser.rb +186 -0
- data/lib/omnizip/formats/xz_impl/constants.rb +49 -0
- data/lib/omnizip/formats/xz_impl/index_decoder.rb +174 -0
- data/lib/omnizip/formats/xz_impl/index_encoder.rb +122 -0
- data/lib/omnizip/formats/xz_impl/stream_decoder.rb +468 -0
- data/lib/omnizip/formats/xz_impl/stream_encoder.rb +99 -0
- data/lib/omnizip/formats/xz_impl/stream_footer.rb +81 -0
- data/lib/omnizip/formats/xz_impl/stream_footer_parser.rb +117 -0
- data/lib/omnizip/formats/xz_impl/stream_header.rb +55 -0
- data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +108 -0
- data/lib/omnizip/formats/xz_impl/vli.rb +128 -0
- data/lib/omnizip/formats/xz_impl/writer.rb +421 -0
- data/lib/omnizip/formats/zip/central_directory_header.rb +195 -0
- data/lib/omnizip/formats/zip/constants.rb +69 -0
- data/lib/omnizip/formats/zip/end_of_central_directory.rb +133 -0
- data/lib/omnizip/formats/zip/local_file_header.rb +138 -0
- data/lib/omnizip/formats/zip/reader.rb +250 -0
- data/lib/omnizip/formats/zip/unix_extra_field.rb +153 -0
- data/lib/omnizip/formats/zip/writer.rb +375 -0
- data/lib/omnizip/formats/zip/zip64_end_of_central_directory.rb +104 -0
- data/lib/omnizip/formats/zip/zip64_end_of_central_directory_locator.rb +66 -0
- data/lib/omnizip/formats/zip/zip64_extra_field.rb +114 -0
- data/lib/omnizip/formats/zip.rb +50 -0
- data/lib/omnizip/implementations/base/lzma2_decoder_base.rb +75 -0
- data/lib/omnizip/implementations/base/lzma2_encoder_base.rb +128 -0
- data/lib/omnizip/implementations/base/lzma_decoder_base.rb +83 -0
- data/lib/omnizip/implementations/base/lzma_encoder_base.rb +108 -0
- data/lib/omnizip/implementations/base/state_machine_base.rb +182 -0
- data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +421 -0
- data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +465 -0
- data/lib/omnizip/implementations/seven_zip/lzma/match_finder.rb +288 -0
- data/lib/omnizip/implementations/seven_zip/lzma/range_decoder.rb +200 -0
- data/lib/omnizip/implementations/seven_zip/lzma/range_encoder.rb +197 -0
- data/lib/omnizip/implementations/seven_zip/lzma/state_machine.rb +141 -0
- data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +519 -0
- data/lib/omnizip/implementations/xz_utils/lzma2/decoder.rb +723 -0
- data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +750 -0
- data/lib/omnizip/io/buffered_input.rb +146 -0
- data/lib/omnizip/io/buffered_output.rb +105 -0
- data/lib/omnizip/io/stream_manager.rb +115 -0
- data/lib/omnizip/link_handler/hard_link.rb +79 -0
- data/lib/omnizip/link_handler/symbolic_link.rb +74 -0
- data/lib/omnizip/link_handler.rb +124 -0
- data/lib/omnizip/metadata/archive_metadata.rb +114 -0
- data/lib/omnizip/metadata/entry_metadata.rb +146 -0
- data/lib/omnizip/metadata/metadata_editor.rb +171 -0
- data/lib/omnizip/metadata/metadata_registry.rb +64 -0
- data/lib/omnizip/metadata/metadata_validator.rb +99 -0
- data/lib/omnizip/metadata.rb +57 -0
- data/lib/omnizip/models/.keep +0 -0
- data/lib/omnizip/models/algorithm_metadata.rb +73 -0
- data/lib/omnizip/models/compression_options.rb +71 -0
- data/lib/omnizip/models/conversion_options.rb +87 -0
- data/lib/omnizip/models/conversion_result.rb +135 -0
- data/lib/omnizip/models/eta_result.rb +46 -0
- data/lib/omnizip/models/extraction_rule.rb +115 -0
- data/lib/omnizip/models/filter_chain.rb +144 -0
- data/lib/omnizip/models/filter_config.rb +183 -0
- data/lib/omnizip/models/match_result.rb +124 -0
- data/lib/omnizip/models/optimization_suggestion.rb +91 -0
- data/lib/omnizip/models/parallel_options.rb +104 -0
- data/lib/omnizip/models/performance_result.rb +79 -0
- data/lib/omnizip/models/profile_report.rb +82 -0
- data/lib/omnizip/models/progress_options.rb +38 -0
- data/lib/omnizip/models/split_options.rb +116 -0
- data/lib/omnizip/optimization_registry.rb +81 -0
- data/lib/omnizip/parallel/job_queue.rb +209 -0
- data/lib/omnizip/parallel/job_scheduler.rb +203 -0
- data/lib/omnizip/parallel/parallel_compressor.rb +347 -0
- data/lib/omnizip/parallel/parallel_extractor.rb +329 -0
- data/lib/omnizip/parallel/worker_pool.rb +223 -0
- data/lib/omnizip/parallel.rb +149 -0
- data/lib/omnizip/parity/chunked_block_processor.rb +196 -0
- data/lib/omnizip/parity/galois16.rb +145 -0
- data/lib/omnizip/parity/models/creator_packet.rb +73 -0
- data/lib/omnizip/parity/models/file_description_packet.rb +133 -0
- data/lib/omnizip/parity/models/ifsc_packet.rb +123 -0
- data/lib/omnizip/parity/models/main_packet.rb +128 -0
- data/lib/omnizip/parity/models/packet.rb +156 -0
- data/lib/omnizip/parity/models/packet_registry.rb +109 -0
- data/lib/omnizip/parity/models/recovery_slice_packet.rb +78 -0
- data/lib/omnizip/parity/par2_creator.rb +531 -0
- data/lib/omnizip/parity/par2_repairer.rb +407 -0
- data/lib/omnizip/parity/par2_verifier.rb +364 -0
- data/lib/omnizip/parity/par2cmdline_algorithm.rb +110 -0
- data/lib/omnizip/parity/par2cmdline_coefficients.rb +78 -0
- data/lib/omnizip/parity/reed_solomon_decoder.rb +266 -0
- data/lib/omnizip/parity/reed_solomon_encoder.rb +111 -0
- data/lib/omnizip/parity/reed_solomon_matrix.rb +342 -0
- data/lib/omnizip/parity.rb +186 -0
- data/lib/omnizip/password/encryption_registry.rb +65 -0
- data/lib/omnizip/password/encryption_strategy.rb +96 -0
- data/lib/omnizip/password/password_validator.rb +129 -0
- data/lib/omnizip/password/winzip_aes_strategy.rb +192 -0
- data/lib/omnizip/password/zip_crypto_strategy.rb +141 -0
- data/lib/omnizip/password.rb +87 -0
- data/lib/omnizip/pipe/stream_compressor.rb +124 -0
- data/lib/omnizip/pipe/stream_decompressor.rb +174 -0
- data/lib/omnizip/pipe.rb +121 -0
- data/lib/omnizip/platform/ntfs_streams.rb +201 -0
- data/lib/omnizip/platform.rb +189 -0
- data/lib/omnizip/profile/archive_profile.rb +39 -0
- data/lib/omnizip/profile/balanced_profile.rb +33 -0
- data/lib/omnizip/profile/binary_profile.rb +36 -0
- data/lib/omnizip/profile/compression_profile.rb +158 -0
- data/lib/omnizip/profile/custom_profile.rb +157 -0
- data/lib/omnizip/profile/fast_profile.rb +33 -0
- data/lib/omnizip/profile/maximum_profile.rb +33 -0
- data/lib/omnizip/profile/profile_detector.rb +110 -0
- data/lib/omnizip/profile/profile_registry.rb +161 -0
- data/lib/omnizip/profile/text_profile.rb +36 -0
- data/lib/omnizip/profile.rb +190 -0
- data/lib/omnizip/profiler/memory_profiler.rb +66 -0
- data/lib/omnizip/profiler/method_profiler.rb +49 -0
- data/lib/omnizip/profiler/report_generator.rb +169 -0
- data/lib/omnizip/profiler.rb +204 -0
- data/lib/omnizip/progress/callback_reporter.rb +36 -0
- data/lib/omnizip/progress/console_reporter.rb +62 -0
- data/lib/omnizip/progress/log_reporter.rb +91 -0
- data/lib/omnizip/progress/operation_progress.rb +118 -0
- data/lib/omnizip/progress/progress_bar.rb +156 -0
- data/lib/omnizip/progress/progress_reporter.rb +40 -0
- data/lib/omnizip/progress/progress_tracker.rb +190 -0
- data/lib/omnizip/progress/silent_reporter.rb +24 -0
- data/lib/omnizip/progress.rb +127 -0
- data/lib/omnizip/rubyzip_compat.rb +63 -0
- data/lib/omnizip/temp/safe_extract.rb +168 -0
- data/lib/omnizip/temp/temp_file.rb +124 -0
- data/lib/omnizip/temp/temp_file_pool.rb +109 -0
- data/lib/omnizip/temp.rb +181 -0
- data/lib/omnizip/version.rb +5 -0
- data/lib/omnizip/zip/entry.rb +156 -0
- data/lib/omnizip/zip/file.rb +485 -0
- data/lib/omnizip/zip/input_stream.rb +273 -0
- data/lib/omnizip/zip/output_stream.rb +324 -0
- data/lib/omnizip.rb +156 -0
- data/readme-docs/advanced-features.adoc +515 -0
- data/readme-docs/api-usage.adoc +444 -0
- data/readme-docs/architecture.adoc +449 -0
- data/readme-docs/archive-formats.adoc +479 -0
- data/readme-docs/cli-usage.adoc +222 -0
- data/readme-docs/compression-algorithms.adoc +442 -0
- data/readme-docs/compression-profiles.adoc +247 -0
- data/readme-docs/encryption-checksums.adoc +328 -0
- data/readme-docs/format-converter.adoc +325 -0
- data/readme-docs/installation.adoc +228 -0
- data/readme-docs/par2-archives.adoc +608 -0
- data/readme-docs/performance-profiler.adoc +389 -0
- data/readme-docs/preprocessing-filters.adoc +280 -0
- data/xz-file-format-1.2.1.txt +1174 -0
- metadata +617 -0
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "pattern_matcher"
|
|
4
|
+
require_relative "filter_chain"
|
|
5
|
+
require_relative "../models/match_result"
|
|
6
|
+
|
|
7
|
+
module Omnizip
|
|
8
|
+
module Extraction
|
|
9
|
+
# Coordinates selective extraction from archives
|
|
10
|
+
#
|
|
11
|
+
# Extracts only files matching specified patterns, efficiently
|
|
12
|
+
# skipping non-matched files without decompression.
|
|
13
|
+
class SelectiveExtractor
|
|
14
|
+
attr_reader :archive, :filter
|
|
15
|
+
|
|
16
|
+
# Initialize a new selective extractor
|
|
17
|
+
#
|
|
18
|
+
# @param archive [Object] Archive to extract from
|
|
19
|
+
# @param filter [FilterChain, PatternMatcher, Object] Filter to apply
|
|
20
|
+
def initialize(archive, filter = nil)
|
|
21
|
+
@archive = archive
|
|
22
|
+
@filter = normalize_filter(filter)
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# Extract matching files to destination
|
|
26
|
+
#
|
|
27
|
+
# @param dest [String] Destination directory
|
|
28
|
+
# @param options [Hash] Extraction options
|
|
29
|
+
# @option options [Boolean] :preserve_paths Keep directory structure
|
|
30
|
+
# @option options [Boolean] :flatten Extract all to root
|
|
31
|
+
# @option options [Boolean] :overwrite Overwrite existing files
|
|
32
|
+
# @option options [Progress::ProgressTracker] :progress Progress tracker
|
|
33
|
+
# @return [Array<String>] Paths of extracted files
|
|
34
|
+
def extract(dest, options = {})
|
|
35
|
+
FileUtils.mkdir_p(dest)
|
|
36
|
+
extracted = []
|
|
37
|
+
|
|
38
|
+
entries_to_extract = list_matches
|
|
39
|
+
total = entries_to_extract.size
|
|
40
|
+
current = 0
|
|
41
|
+
|
|
42
|
+
entries_to_extract.each do |entry|
|
|
43
|
+
dest_path = build_dest_path(entry, dest, options)
|
|
44
|
+
extract_entry(entry, dest_path, options)
|
|
45
|
+
extracted << dest_path
|
|
46
|
+
|
|
47
|
+
# Update progress if tracker provided
|
|
48
|
+
current += 1
|
|
49
|
+
update_progress(options[:progress], current, total, entry)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
extracted
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
# Extract matching files to memory
|
|
56
|
+
#
|
|
57
|
+
# @return [Hash<String, String>] Hash of filename => content
|
|
58
|
+
def extract_to_memory
|
|
59
|
+
result = {}
|
|
60
|
+
|
|
61
|
+
list_matches.each do |entry|
|
|
62
|
+
filename = entry_filename(entry)
|
|
63
|
+
content = read_entry_content(entry)
|
|
64
|
+
result[filename] = content
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
result
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# List matching entries without extracting
|
|
71
|
+
#
|
|
72
|
+
# @return [Array] Matching entries
|
|
73
|
+
def list_matches
|
|
74
|
+
return list_all if @filter.nil?
|
|
75
|
+
|
|
76
|
+
list_all.select do |entry|
|
|
77
|
+
filter_matches?(entry)
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
# Count matching entries
|
|
82
|
+
#
|
|
83
|
+
# @return [Integer]
|
|
84
|
+
def count_matches
|
|
85
|
+
list_matches.size
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
# Get match result with statistics
|
|
89
|
+
#
|
|
90
|
+
# @return [Models::MatchResult]
|
|
91
|
+
def match_result
|
|
92
|
+
all_entries = list_all
|
|
93
|
+
matches = if @filter
|
|
94
|
+
all_entries.select { |entry| filter_matches?(entry) }
|
|
95
|
+
else
|
|
96
|
+
all_entries
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
Models::MatchResult.new(
|
|
100
|
+
@filter&.to_s || "all",
|
|
101
|
+
matches: matches,
|
|
102
|
+
total_scanned: all_entries.size,
|
|
103
|
+
)
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
private
|
|
107
|
+
|
|
108
|
+
# Normalize filter to FilterChain or PatternMatcher
|
|
109
|
+
#
|
|
110
|
+
# @param filter [Object] Input filter
|
|
111
|
+
# @return [FilterChain, PatternMatcher, nil]
|
|
112
|
+
def normalize_filter(filter)
|
|
113
|
+
case filter
|
|
114
|
+
when FilterChain, PatternMatcher
|
|
115
|
+
filter
|
|
116
|
+
when nil
|
|
117
|
+
nil
|
|
118
|
+
else
|
|
119
|
+
PatternMatcher.new(filter)
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
# List all entries in archive
|
|
124
|
+
#
|
|
125
|
+
# @return [Array] All entries
|
|
126
|
+
def list_all
|
|
127
|
+
if @archive.respond_to?(:entries)
|
|
128
|
+
@archive.entries
|
|
129
|
+
elsif @archive.respond_to?(:each)
|
|
130
|
+
@archive.to_a
|
|
131
|
+
else
|
|
132
|
+
raise Error, "Archive does not support listing entries"
|
|
133
|
+
end
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
# Extract a single entry
|
|
137
|
+
#
|
|
138
|
+
# @param entry [Object] Entry to extract
|
|
139
|
+
# @param dest_path [String] Destination path
|
|
140
|
+
# @param options [Hash] Options
|
|
141
|
+
def extract_entry(entry, dest_path, options)
|
|
142
|
+
return if File.exist?(dest_path) && !options[:overwrite]
|
|
143
|
+
|
|
144
|
+
# Create parent directory
|
|
145
|
+
FileUtils.mkdir_p(File.dirname(dest_path))
|
|
146
|
+
|
|
147
|
+
# Extract content
|
|
148
|
+
content = read_entry_content(entry)
|
|
149
|
+
File.binwrite(dest_path, content)
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
# Read content from an entry
|
|
153
|
+
#
|
|
154
|
+
# @param entry [Object] Entry to read
|
|
155
|
+
# @return [String] Entry content
|
|
156
|
+
def read_entry_content(entry)
|
|
157
|
+
if entry.respond_to?(:read)
|
|
158
|
+
entry.read
|
|
159
|
+
elsif entry.respond_to?(:get_input_stream)
|
|
160
|
+
entry.get_input_stream.read
|
|
161
|
+
elsif @archive.respond_to?(:read)
|
|
162
|
+
@archive.read(entry)
|
|
163
|
+
else
|
|
164
|
+
raise Error, "Cannot read entry content"
|
|
165
|
+
end
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
# Build destination path for an entry
|
|
169
|
+
#
|
|
170
|
+
# @param entry [Object] Entry
|
|
171
|
+
# @param dest [String] Destination directory
|
|
172
|
+
# @param options [Hash] Options
|
|
173
|
+
# @return [String] Full destination path
|
|
174
|
+
def build_dest_path(entry, dest, options)
|
|
175
|
+
filename = entry_filename(entry)
|
|
176
|
+
|
|
177
|
+
if options[:flatten]
|
|
178
|
+
# Extract to root, use basename only
|
|
179
|
+
File.join(dest, File.basename(filename))
|
|
180
|
+
elsif options[:preserve_paths] != false
|
|
181
|
+
# Preserve directory structure (default)
|
|
182
|
+
File.join(dest, filename)
|
|
183
|
+
else
|
|
184
|
+
# Use basename
|
|
185
|
+
File.join(dest, File.basename(filename))
|
|
186
|
+
end
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
# Get filename from entry
|
|
190
|
+
#
|
|
191
|
+
# @param entry [Object] Entry
|
|
192
|
+
# @return [String] Filename
|
|
193
|
+
def entry_filename(entry)
|
|
194
|
+
if entry.respond_to?(:name)
|
|
195
|
+
entry.name
|
|
196
|
+
elsif entry.respond_to?(:path)
|
|
197
|
+
entry.path
|
|
198
|
+
elsif entry.respond_to?(:filename)
|
|
199
|
+
entry.filename
|
|
200
|
+
else
|
|
201
|
+
entry.to_s
|
|
202
|
+
end
|
|
203
|
+
end
|
|
204
|
+
|
|
205
|
+
# Update progress tracker
|
|
206
|
+
#
|
|
207
|
+
# @param tracker [Progress::ProgressTracker, nil] Progress tracker
|
|
208
|
+
# @param current [Integer] Current count
|
|
209
|
+
# @param total [Integer] Total count
|
|
210
|
+
# @param entry [Object] Current entry
|
|
211
|
+
def update_progress(tracker, current, total, entry)
|
|
212
|
+
return unless tracker
|
|
213
|
+
|
|
214
|
+
tracker.update(
|
|
215
|
+
current_bytes: current,
|
|
216
|
+
total_bytes: total,
|
|
217
|
+
current_file: entry_filename(entry),
|
|
218
|
+
)
|
|
219
|
+
end
|
|
220
|
+
|
|
221
|
+
# Check if entry matches the filter
|
|
222
|
+
#
|
|
223
|
+
# @param entry [Object] Entry to check
|
|
224
|
+
# @return [Boolean]
|
|
225
|
+
def filter_matches?(entry)
|
|
226
|
+
filename = entry_filename(entry)
|
|
227
|
+
|
|
228
|
+
case @filter
|
|
229
|
+
when FilterChain
|
|
230
|
+
@filter.match?(entry, filename: filename)
|
|
231
|
+
when PatternMatcher
|
|
232
|
+
@filter.match?(filename)
|
|
233
|
+
else
|
|
234
|
+
# For other pattern types, try match? with filename
|
|
235
|
+
@filter.match?(filename)
|
|
236
|
+
end
|
|
237
|
+
end
|
|
238
|
+
end
|
|
239
|
+
end
|
|
240
|
+
end
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "extraction/pattern_matcher"
|
|
4
|
+
require_relative "extraction/filter_chain"
|
|
5
|
+
require_relative "extraction/selective_extractor"
|
|
6
|
+
require_relative "extraction/glob_pattern"
|
|
7
|
+
require_relative "extraction/regex_pattern"
|
|
8
|
+
require_relative "extraction/predicate_pattern"
|
|
9
|
+
|
|
10
|
+
module Omnizip
|
|
11
|
+
# Provides selective extraction capabilities for archives
|
|
12
|
+
#
|
|
13
|
+
# Supports extracting files matching glob patterns, regex patterns,
|
|
14
|
+
# or custom predicates without extracting the entire archive.
|
|
15
|
+
module Extraction
|
|
16
|
+
class << self
|
|
17
|
+
# Extract files matching a pattern from an archive
|
|
18
|
+
#
|
|
19
|
+
# @param archive [Object] Archive to extract from
|
|
20
|
+
# @param pattern [String, Regexp, Array] Pattern(s) to match
|
|
21
|
+
# @param dest [String] Destination directory
|
|
22
|
+
# @param options [Hash] Extraction options
|
|
23
|
+
# @option options [Boolean] :preserve_paths Keep directory structure
|
|
24
|
+
# @option options [Boolean] :flatten Extract all to destination root
|
|
25
|
+
# @option options [Boolean] :overwrite Overwrite existing files
|
|
26
|
+
# @return [Array<String>] Paths of extracted files
|
|
27
|
+
def extract_matching(archive, pattern, dest, options = {})
|
|
28
|
+
filter = build_filter(pattern)
|
|
29
|
+
extractor = SelectiveExtractor.new(archive, filter)
|
|
30
|
+
extractor.extract(dest, options)
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# Extract files matching a pattern to memory
|
|
34
|
+
#
|
|
35
|
+
# @param archive [Object] Archive to extract from
|
|
36
|
+
# @param pattern [String, Regexp, Array] Pattern(s) to match
|
|
37
|
+
# @return [Hash<String, String>] Hash of filename => content
|
|
38
|
+
def extract_to_memory_matching(archive, pattern)
|
|
39
|
+
filter = build_filter(pattern)
|
|
40
|
+
extractor = SelectiveExtractor.new(archive, filter)
|
|
41
|
+
extractor.extract_to_memory
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
# List files matching a pattern without extracting
|
|
45
|
+
#
|
|
46
|
+
# @param archive [Object] Archive to list from
|
|
47
|
+
# @param pattern [String, Regexp, Array] Pattern(s) to match
|
|
48
|
+
# @return [Array] Matching entries
|
|
49
|
+
def list_matching(archive, pattern)
|
|
50
|
+
filter = build_filter(pattern)
|
|
51
|
+
extractor = SelectiveExtractor.new(archive, filter)
|
|
52
|
+
extractor.list_matches
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
# Count files matching a pattern
|
|
56
|
+
#
|
|
57
|
+
# @param archive [Object] Archive to count in
|
|
58
|
+
# @param pattern [String, Regexp, Array] Pattern(s) to match
|
|
59
|
+
# @return [Integer] Number of matches
|
|
60
|
+
def count_matching(archive, pattern)
|
|
61
|
+
filter = build_filter(pattern)
|
|
62
|
+
extractor = SelectiveExtractor.new(archive, filter)
|
|
63
|
+
extractor.count_matches
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# Extract with a filter chain
|
|
67
|
+
#
|
|
68
|
+
# @param archive [Object] Archive to extract from
|
|
69
|
+
# @param filter [FilterChain] Filter chain to apply
|
|
70
|
+
# @param dest [String] Destination directory
|
|
71
|
+
# @param options [Hash] Extraction options
|
|
72
|
+
# @return [Array<String>] Paths of extracted files
|
|
73
|
+
def extract_with_filter(archive, filter, dest, options = {})
|
|
74
|
+
extractor = SelectiveExtractor.new(archive, filter)
|
|
75
|
+
extractor.extract(dest, options)
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# Get match result with statistics
|
|
79
|
+
#
|
|
80
|
+
# @param archive [Object] Archive to analyze
|
|
81
|
+
# @param pattern [String, Regexp, Array] Pattern(s) to match
|
|
82
|
+
# @return [Models::MatchResult] Match result with statistics
|
|
83
|
+
def match_result(archive, pattern)
|
|
84
|
+
filter = build_filter(pattern)
|
|
85
|
+
extractor = SelectiveExtractor.new(archive, filter)
|
|
86
|
+
extractor.match_result
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
private
|
|
90
|
+
|
|
91
|
+
# Build filter from pattern(s)
|
|
92
|
+
#
|
|
93
|
+
# @param pattern [Object] Pattern or array of patterns
|
|
94
|
+
# @return [PatternMatcher, FilterChain]
|
|
95
|
+
def build_filter(pattern)
|
|
96
|
+
case pattern
|
|
97
|
+
when Array
|
|
98
|
+
# Multiple patterns - combine with OR logic
|
|
99
|
+
filter = FilterChain.new
|
|
100
|
+
pattern.each { |p| filter.include_pattern(p) }
|
|
101
|
+
filter
|
|
102
|
+
when FilterChain
|
|
103
|
+
pattern
|
|
104
|
+
else
|
|
105
|
+
# Single pattern
|
|
106
|
+
PatternMatcher.new(pattern)
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
end
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Omnizip
|
|
4
|
+
module FileType
|
|
5
|
+
# Centralized MIME type classification for file type detection
|
|
6
|
+
#
|
|
7
|
+
# This class provides methods to classify MIME types into categories
|
|
8
|
+
# and determine appropriate compression profiles based on file type.
|
|
9
|
+
class MimeClassifier
|
|
10
|
+
# Text-based MIME types
|
|
11
|
+
TEXT_TYPES = %w[
|
|
12
|
+
text/plain
|
|
13
|
+
text/html
|
|
14
|
+
text/css
|
|
15
|
+
text/javascript
|
|
16
|
+
text/xml
|
|
17
|
+
text/csv
|
|
18
|
+
text/markdown
|
|
19
|
+
application/json
|
|
20
|
+
application/xml
|
|
21
|
+
application/javascript
|
|
22
|
+
application/ecmascript
|
|
23
|
+
application/x-httpd-php
|
|
24
|
+
application/x-sh
|
|
25
|
+
application/x-csh
|
|
26
|
+
application/x-perl
|
|
27
|
+
application/x-python
|
|
28
|
+
application/x-ruby
|
|
29
|
+
application/x-sql
|
|
30
|
+
application/sql
|
|
31
|
+
].freeze
|
|
32
|
+
|
|
33
|
+
# Archive MIME types
|
|
34
|
+
ARCHIVE_TYPES = %w[
|
|
35
|
+
application/zip
|
|
36
|
+
application/x-7z-compressed
|
|
37
|
+
application/x-rar-compressed
|
|
38
|
+
application/x-tar
|
|
39
|
+
application/gzip
|
|
40
|
+
application/x-gzip
|
|
41
|
+
application/x-bzip2
|
|
42
|
+
application/x-xz
|
|
43
|
+
application/x-lzip
|
|
44
|
+
application/x-lzma
|
|
45
|
+
application/x-compress
|
|
46
|
+
application/zstd
|
|
47
|
+
application/x-archive
|
|
48
|
+
application/x-iso9660-image
|
|
49
|
+
].freeze
|
|
50
|
+
|
|
51
|
+
# Executable MIME types
|
|
52
|
+
EXECUTABLE_TYPES = %w[
|
|
53
|
+
application/x-executable
|
|
54
|
+
application/x-mach-binary
|
|
55
|
+
application/x-elf
|
|
56
|
+
application/x-sharedlib
|
|
57
|
+
application/x-msdownload
|
|
58
|
+
application/x-dosexec
|
|
59
|
+
application/vnd.microsoft.portable-executable
|
|
60
|
+
].freeze
|
|
61
|
+
|
|
62
|
+
# Binary/unknown MIME types (treated as binary data)
|
|
63
|
+
BINARY_TYPES = %w[
|
|
64
|
+
application/octet-stream
|
|
65
|
+
].freeze
|
|
66
|
+
|
|
67
|
+
# Media MIME types (images, audio, video)
|
|
68
|
+
MEDIA_TYPES = [
|
|
69
|
+
/\Aimage\//,
|
|
70
|
+
/\Aaudio\//,
|
|
71
|
+
/\Avideo\//,
|
|
72
|
+
"application/pdf",
|
|
73
|
+
].freeze
|
|
74
|
+
|
|
75
|
+
class << self
|
|
76
|
+
# Check if the MIME type is text-based
|
|
77
|
+
#
|
|
78
|
+
# @param mime_type [String] The MIME type to check
|
|
79
|
+
# @return [Boolean] true if text-based
|
|
80
|
+
def text?(mime_type)
|
|
81
|
+
return false unless mime_type
|
|
82
|
+
|
|
83
|
+
TEXT_TYPES.include?(mime_type) || mime_type.start_with?("text/")
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# Check if the MIME type is an archive
|
|
87
|
+
#
|
|
88
|
+
# @param mime_type [String] The MIME type to check
|
|
89
|
+
# @return [Boolean] true if archive
|
|
90
|
+
def archive?(mime_type)
|
|
91
|
+
return false unless mime_type
|
|
92
|
+
|
|
93
|
+
ARCHIVE_TYPES.include?(mime_type)
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
# Check if the MIME type is executable
|
|
97
|
+
#
|
|
98
|
+
# @param mime_type [String] The MIME type to check
|
|
99
|
+
# @return [Boolean] true if executable
|
|
100
|
+
def executable?(mime_type)
|
|
101
|
+
return false unless mime_type
|
|
102
|
+
|
|
103
|
+
EXECUTABLE_TYPES.include?(mime_type) || BINARY_TYPES.include?(mime_type)
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
# Check if the MIME type is media (image/audio/video)
|
|
107
|
+
#
|
|
108
|
+
# @param mime_type [String] The MIME type to check
|
|
109
|
+
# @return [Boolean] true if media
|
|
110
|
+
def media?(mime_type)
|
|
111
|
+
return false unless mime_type
|
|
112
|
+
|
|
113
|
+
MEDIA_TYPES.any? do |pattern|
|
|
114
|
+
case pattern
|
|
115
|
+
when String
|
|
116
|
+
mime_type == pattern
|
|
117
|
+
when Regexp
|
|
118
|
+
pattern.match?(mime_type)
|
|
119
|
+
end
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
# Determine the recommended profile category for a MIME type
|
|
124
|
+
#
|
|
125
|
+
# @param mime_type [String] The MIME type to classify
|
|
126
|
+
# @return [Symbol] The recommended profile category
|
|
127
|
+
# (:text, :binary, :archive, :balanced)
|
|
128
|
+
def profile_category(mime_type)
|
|
129
|
+
return :balanced unless mime_type
|
|
130
|
+
|
|
131
|
+
if text?(mime_type)
|
|
132
|
+
:text
|
|
133
|
+
elsif executable?(mime_type)
|
|
134
|
+
:binary
|
|
135
|
+
elsif archive?(mime_type) || media?(mime_type)
|
|
136
|
+
:archive
|
|
137
|
+
else
|
|
138
|
+
:balanced
|
|
139
|
+
end
|
|
140
|
+
end
|
|
141
|
+
end
|
|
142
|
+
end
|
|
143
|
+
end
|
|
144
|
+
end
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "marcel"
|
|
4
|
+
require_relative "file_type/mime_classifier"
|
|
5
|
+
|
|
6
|
+
module Omnizip
|
|
7
|
+
# File type detection module using Marcel for MIME type detection
|
|
8
|
+
#
|
|
9
|
+
# Provides MIME type detection using the Marcel library:
|
|
10
|
+
# - Path-based detection (examines file extension and content)
|
|
11
|
+
# - Data-based detection (analyzes binary data)
|
|
12
|
+
# - Stream-based detection (reads from IO streams)
|
|
13
|
+
#
|
|
14
|
+
# @example Detect file type from path
|
|
15
|
+
# mime_type = Omnizip::FileType.detect('app.exe')
|
|
16
|
+
# # => 'application/x-executable'
|
|
17
|
+
#
|
|
18
|
+
# @example Detect from binary data
|
|
19
|
+
# mime_type = Omnizip::FileType.detect_data(File.binread('image.png'))
|
|
20
|
+
# # => 'image/png'
|
|
21
|
+
#
|
|
22
|
+
# @example Detect from IO stream with filename hint
|
|
23
|
+
# File.open('document.pdf', 'rb') do |file|
|
|
24
|
+
# mime_type = Omnizip::FileType.detect_stream(file, filename: 'document.pdf')
|
|
25
|
+
# # => 'application/pdf'
|
|
26
|
+
# end
|
|
27
|
+
module FileType
|
|
28
|
+
class << self
|
|
29
|
+
# Detect MIME type from file path
|
|
30
|
+
#
|
|
31
|
+
# Uses Marcel to detect the MIME type by examining both the file
|
|
32
|
+
# extension and file content. This is the most accurate detection
|
|
33
|
+
# method when you have a file path.
|
|
34
|
+
#
|
|
35
|
+
# @param path [String, Pathname] File path
|
|
36
|
+
# @return [String, nil] MIME type string or nil if detection fails
|
|
37
|
+
#
|
|
38
|
+
# @example
|
|
39
|
+
# FileType.detect('document.pdf')
|
|
40
|
+
# # => 'application/pdf'
|
|
41
|
+
def detect(path)
|
|
42
|
+
return nil unless path
|
|
43
|
+
return nil unless File.exist?(path)
|
|
44
|
+
|
|
45
|
+
Marcel::MimeType.for(Pathname.new(path))
|
|
46
|
+
rescue StandardError
|
|
47
|
+
nil
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Detect MIME type from binary data
|
|
51
|
+
#
|
|
52
|
+
# Uses Marcel to analyze binary data for MIME type detection.
|
|
53
|
+
# Optionally accepts a filename hint for better accuracy.
|
|
54
|
+
#
|
|
55
|
+
# @param data [String] Binary data
|
|
56
|
+
# @param filename [String, nil] Optional filename hint
|
|
57
|
+
# @return [String, nil] MIME type string or nil if detection fails
|
|
58
|
+
#
|
|
59
|
+
# @example Without filename hint
|
|
60
|
+
# data = File.binread('image.png')
|
|
61
|
+
# FileType.detect_data(data)
|
|
62
|
+
# # => 'image/png'
|
|
63
|
+
#
|
|
64
|
+
# @example With filename hint
|
|
65
|
+
# FileType.detect_data(data, filename: 'image.png')
|
|
66
|
+
# # => 'image/png'
|
|
67
|
+
def detect_data(data, filename: nil)
|
|
68
|
+
return nil unless data
|
|
69
|
+
return nil if data.empty?
|
|
70
|
+
|
|
71
|
+
io = StringIO.new(data)
|
|
72
|
+
io.set_encoding(Encoding::BINARY)
|
|
73
|
+
|
|
74
|
+
Marcel::MimeType.for(io, name: filename)
|
|
75
|
+
rescue StandardError
|
|
76
|
+
nil
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# Detect MIME type from IO stream
|
|
80
|
+
#
|
|
81
|
+
# Uses Marcel to analyze an IO stream for MIME type detection.
|
|
82
|
+
# Optionally accepts a filename hint for better accuracy.
|
|
83
|
+
# The stream position is preserved.
|
|
84
|
+
#
|
|
85
|
+
# @param io [IO] IO stream
|
|
86
|
+
# @param filename [String, nil] Optional filename hint
|
|
87
|
+
# @return [String, nil] MIME type string or nil if detection fails
|
|
88
|
+
#
|
|
89
|
+
# @example
|
|
90
|
+
# File.open('document.pdf', 'rb') do |file|
|
|
91
|
+
# FileType.detect_stream(file, filename: 'document.pdf')
|
|
92
|
+
# # => 'application/pdf'
|
|
93
|
+
# end
|
|
94
|
+
def detect_stream(io, filename: nil)
|
|
95
|
+
return nil unless io
|
|
96
|
+
|
|
97
|
+
# Save current position
|
|
98
|
+
original_pos = io.pos if io.respond_to?(:pos)
|
|
99
|
+
|
|
100
|
+
mime_type = Marcel::MimeType.for(io, name: filename)
|
|
101
|
+
|
|
102
|
+
# Restore position
|
|
103
|
+
io.seek(original_pos) if original_pos && io.respond_to?(:seek)
|
|
104
|
+
|
|
105
|
+
mime_type
|
|
106
|
+
rescue StandardError
|
|
107
|
+
# Attempt to restore position even on error
|
|
108
|
+
io.seek(original_pos) if original_pos && io.respond_to?(:seek)
|
|
109
|
+
nil
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
end
|