omnizip 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/.rubocop.yml +32 -0
- data/.rubocop_todo.yml +754 -0
- data/COPYING +502 -0
- data/Gemfile +17 -0
- data/LICENSE +12 -0
- data/README.adoc +1045 -0
- data/Rakefile +12 -0
- data/benchmark/README.md +260 -0
- data/benchmark/benchmark_suite.rb +125 -0
- data/benchmark/compression_bench.rb +181 -0
- data/benchmark/filter_bench.rb +180 -0
- data/benchmark/models/benchmark_result.rb +59 -0
- data/benchmark/models/comparison_result.rb +69 -0
- data/benchmark/profile_suite.rb +167 -0
- data/benchmark/reporter.rb +150 -0
- data/benchmark/run_benchmarks.rb +66 -0
- data/benchmark/test_data.rb +137 -0
- data/config/formats/rar3_spec.yml +91 -0
- data/config/formats/rar5_spec.yml +102 -0
- data/docs/.github/workflows/docs.yml +142 -0
- data/docs/.gitignore +21 -0
- data/docs/.lychee.toml +67 -0
- data/docs/Gemfile +13 -0
- data/docs/RAR_WRITE_SUPPORT.md +26 -0
- data/docs/README.md +101 -0
- data/docs/_config.yml +112 -0
- data/docs/assets/logo.svg +1 -0
- data/docs/assets/omnizip-logo.pdf +1540 -11
- data/docs/comparison/feature-matrix.adoc +694 -0
- data/docs/comparison/index.adoc +113 -0
- data/docs/comparison/vs-7zip.adoc +309 -0
- data/docs/comparison/vs-peazip.adoc +77 -0
- data/docs/comparison/vs-rubyzip.adoc +342 -0
- data/docs/comparison/vs-winrar.adoc +100 -0
- data/docs/compatibility.adoc +579 -0
- data/docs/concepts/index.adoc +129 -0
- data/docs/developer/architecture.adoc +256 -0
- data/docs/developer/contributing.adoc +158 -0
- data/docs/developer/index.adoc +25 -0
- data/docs/developer/testing.adoc +212 -0
- data/docs/getting-started/basic-usage.adoc +271 -0
- data/docs/getting-started/index.adoc +42 -0
- data/docs/getting-started/installation.adoc +138 -0
- data/docs/getting-started/quick-start.adoc +185 -0
- data/docs/getting-started/your-first-archive.adoc +218 -0
- data/docs/guides/advanced-features/encryption.adoc +300 -0
- data/docs/guides/advanced-features/index.adoc +49 -0
- data/docs/guides/advanced-features/parallel-processing.adoc +246 -0
- data/docs/guides/advanced-features/progress-tracking.adoc +320 -0
- data/docs/guides/advanced-features/streaming.adoc +212 -0
- data/docs/guides/archive-formats/gzip-format.adoc +107 -0
- data/docs/guides/archive-formats/index.adoc +130 -0
- data/docs/guides/archive-formats/rar-format.adoc +104 -0
- data/docs/guides/archive-formats/rar5.adoc +521 -0
- data/docs/guides/archive-formats/seven-zip-format.adoc +35 -0
- data/docs/guides/archive-formats/tar-format.adoc +106 -0
- data/docs/guides/archive-formats/xz-format.adoc +118 -0
- data/docs/guides/archive-formats/zip-format.adoc +35 -0
- data/docs/guides/compression-algorithms/bzip2.adoc +113 -0
- data/docs/guides/compression-algorithms/deflate.adoc +319 -0
- data/docs/guides/compression-algorithms/index.adoc +190 -0
- data/docs/guides/compression-algorithms/lzma.adoc +398 -0
- data/docs/guides/compression-algorithms/lzma2.adoc +327 -0
- data/docs/guides/compression-algorithms/ppmd.adoc +316 -0
- data/docs/guides/compression-algorithms/zstandard.adoc +361 -0
- data/docs/guides/creating-archives.adoc +354 -0
- data/docs/guides/extracting-archives.adoc +53 -0
- data/docs/guides/format-conversion.adoc +64 -0
- data/docs/guides/index.adoc +49 -0
- data/docs/guides/migration-rubyzip.adoc +217 -0
- data/docs/guides/parity-archives.adoc +605 -0
- data/docs/guides/performance-tuning.adoc +88 -0
- data/docs/index.adoc +218 -0
- data/docs/lychee.toml +67 -0
- data/docs/reference/api/overview.adoc +188 -0
- data/docs/reference/cli/compress-command.adoc +114 -0
- data/docs/reference/cli/overview.adoc +140 -0
- data/docs/reference/index.adoc +26 -0
- data/docs/resources/faq.adoc +185 -0
- data/docs/resources/quick-reference.adoc +222 -0
- data/docs/troubleshooting/index.adoc +208 -0
- data/examples/api_comparison.rb +205 -0
- data/examples/deflate64_example.rb +96 -0
- data/examples/par2_demo.rb +121 -0
- data/examples/quick_start_native.rb +150 -0
- data/examples/quick_start_rubyzip.rb +115 -0
- data/examples/rubyzip_compatibility_demo.rb +194 -0
- data/exe/omnizip +27 -0
- data/lib/omnizip/algorithm.rb +130 -0
- data/lib/omnizip/algorithm_registry.rb +86 -0
- data/lib/omnizip/algorithms/.keep +0 -0
- data/lib/omnizip/algorithms/bzip2/bwt.rb +225 -0
- data/lib/omnizip/algorithms/bzip2/decoder.rb +193 -0
- data/lib/omnizip/algorithms/bzip2/encoder.rb +237 -0
- data/lib/omnizip/algorithms/bzip2/huffman.rb +206 -0
- data/lib/omnizip/algorithms/bzip2/mtf.rb +101 -0
- data/lib/omnizip/algorithms/bzip2/rle.rb +151 -0
- data/lib/omnizip/algorithms/bzip2.rb +130 -0
- data/lib/omnizip/algorithms/deflate/constants.rb +28 -0
- data/lib/omnizip/algorithms/deflate/decoder.rb +38 -0
- data/lib/omnizip/algorithms/deflate/encoder.rb +46 -0
- data/lib/omnizip/algorithms/deflate.rb +128 -0
- data/lib/omnizip/algorithms/deflate64/constants.rb +45 -0
- data/lib/omnizip/algorithms/deflate64/decoder.rb +153 -0
- data/lib/omnizip/algorithms/deflate64/encoder.rb +98 -0
- data/lib/omnizip/algorithms/deflate64/huffman_coder.rb +354 -0
- data/lib/omnizip/algorithms/deflate64/lz77_encoder.rb +142 -0
- data/lib/omnizip/algorithms/deflate64.rb +109 -0
- data/lib/omnizip/algorithms/lzma/bit_model.rb +120 -0
- data/lib/omnizip/algorithms/lzma/constants.rb +112 -0
- data/lib/omnizip/algorithms/lzma/decoder.rb +148 -0
- data/lib/omnizip/algorithms/lzma/dictionary.rb +69 -0
- data/lib/omnizip/algorithms/lzma/distance_coder.rb +415 -0
- data/lib/omnizip/algorithms/lzma/encoder.rb +142 -0
- data/lib/omnizip/algorithms/lzma/length_coder.rb +260 -0
- data/lib/omnizip/algorithms/lzma/literal_decoder.rb +320 -0
- data/lib/omnizip/algorithms/lzma/literal_encoder.rb +210 -0
- data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +341 -0
- data/lib/omnizip/algorithms/lzma/lzma_alone_decoder.rb +192 -0
- data/lib/omnizip/algorithms/lzma/lzma_state.rb +128 -0
- data/lib/omnizip/algorithms/lzma/match.rb +32 -0
- data/lib/omnizip/algorithms/lzma/match_finder.rb +205 -0
- data/lib/omnizip/algorithms/lzma/match_finder_config.rb +142 -0
- data/lib/omnizip/algorithms/lzma/match_finder_factory.rb +88 -0
- data/lib/omnizip/algorithms/lzma/optimal_encoder.rb +130 -0
- data/lib/omnizip/algorithms/lzma/probability_models.rb +72 -0
- data/lib/omnizip/algorithms/lzma/range_coder.rb +85 -0
- data/lib/omnizip/algorithms/lzma/range_decoder.rb +434 -0
- data/lib/omnizip/algorithms/lzma/range_encoder.rb +194 -0
- data/lib/omnizip/algorithms/lzma/state.rb +127 -0
- data/lib/omnizip/algorithms/lzma/xz_buffered_range_encoder.rb +325 -0
- data/lib/omnizip/algorithms/lzma/xz_encoder.rb +426 -0
- data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +645 -0
- data/lib/omnizip/algorithms/lzma/xz_match_finder_adapter.rb +227 -0
- data/lib/omnizip/algorithms/lzma/xz_price_calculator.rb +169 -0
- data/lib/omnizip/algorithms/lzma/xz_probability_models.rb +261 -0
- data/lib/omnizip/algorithms/lzma/xz_range_encoder.rb +223 -0
- data/lib/omnizip/algorithms/lzma/xz_range_encoder_exact.rb +331 -0
- data/lib/omnizip/algorithms/lzma/xz_state.rb +116 -0
- data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +2055 -0
- data/lib/omnizip/algorithms/lzma.rb +238 -0
- data/lib/omnizip/algorithms/lzma2/chunk_manager.rb +182 -0
- data/lib/omnizip/algorithms/lzma2/constants.rb +41 -0
- data/lib/omnizip/algorithms/lzma2/encoder.rb +147 -0
- data/lib/omnizip/algorithms/lzma2/lzma2_chunk.rb +161 -0
- data/lib/omnizip/algorithms/lzma2/properties.rb +179 -0
- data/lib/omnizip/algorithms/lzma2/simple_lzma2_encoder.rb +127 -0
- data/lib/omnizip/algorithms/lzma2/xz_encoder_adapter.rb +85 -0
- data/lib/omnizip/algorithms/lzma2.rb +141 -0
- data/lib/omnizip/algorithms/ppmd7/constants.rb +74 -0
- data/lib/omnizip/algorithms/ppmd7/context.rb +154 -0
- data/lib/omnizip/algorithms/ppmd7/decoder.rb +126 -0
- data/lib/omnizip/algorithms/ppmd7/encoder.rb +163 -0
- data/lib/omnizip/algorithms/ppmd7/model.rb +248 -0
- data/lib/omnizip/algorithms/ppmd7/symbol_state.rb +57 -0
- data/lib/omnizip/algorithms/ppmd7.rb +116 -0
- data/lib/omnizip/algorithms/ppmd8/constants.rb +61 -0
- data/lib/omnizip/algorithms/ppmd8/context.rb +34 -0
- data/lib/omnizip/algorithms/ppmd8/decoder.rb +107 -0
- data/lib/omnizip/algorithms/ppmd8/encoder.rb +138 -0
- data/lib/omnizip/algorithms/ppmd8/model.rb +250 -0
- data/lib/omnizip/algorithms/ppmd8/restoration_method.rb +78 -0
- data/lib/omnizip/algorithms/ppmd8.rb +82 -0
- data/lib/omnizip/algorithms/ppmd_base.rb +138 -0
- data/lib/omnizip/algorithms/sevenzip_lzma2.rb +123 -0
- data/lib/omnizip/algorithms/xz_lzma2.rb +118 -0
- data/lib/omnizip/algorithms/zstandard/constants.rb +25 -0
- data/lib/omnizip/algorithms/zstandard/decoder.rb +46 -0
- data/lib/omnizip/algorithms/zstandard/encoder.rb +51 -0
- data/lib/omnizip/algorithms/zstandard.rb +138 -0
- data/lib/omnizip/buffer/memory_archive.rb +251 -0
- data/lib/omnizip/buffer/memory_extractor.rb +224 -0
- data/lib/omnizip/buffer.rb +176 -0
- data/lib/omnizip/checksum_registry.rb +114 -0
- data/lib/omnizip/checksums/crc32.rb +100 -0
- data/lib/omnizip/checksums/crc64.rb +101 -0
- data/lib/omnizip/checksums/crc_base.rb +158 -0
- data/lib/omnizip/checksums/verifier.rb +131 -0
- data/lib/omnizip/chunked/memory_manager.rb +194 -0
- data/lib/omnizip/chunked/reader.rb +78 -0
- data/lib/omnizip/chunked/writer.rb +120 -0
- data/lib/omnizip/chunked.rb +129 -0
- data/lib/omnizip/cli/output_formatter.rb +104 -0
- data/lib/omnizip/cli.rb +572 -0
- data/lib/omnizip/commands/.keep +0 -0
- data/lib/omnizip/commands/archive_create_command.rb +427 -0
- data/lib/omnizip/commands/archive_extract_command.rb +272 -0
- data/lib/omnizip/commands/archive_list_command.rb +218 -0
- data/lib/omnizip/commands/archive_repair_command.rb +131 -0
- data/lib/omnizip/commands/archive_verify_command.rb +117 -0
- data/lib/omnizip/commands/compress_command.rb +117 -0
- data/lib/omnizip/commands/decompress_command.rb +120 -0
- data/lib/omnizip/commands/list_command.rb +53 -0
- data/lib/omnizip/commands/metadata_command.rb +153 -0
- data/lib/omnizip/commands/parity_create_command.rb +122 -0
- data/lib/omnizip/commands/parity_repair_command.rb +122 -0
- data/lib/omnizip/commands/parity_verify_command.rb +124 -0
- data/lib/omnizip/commands/profile_list_command.rb +56 -0
- data/lib/omnizip/commands/profile_show_command.rb +44 -0
- data/lib/omnizip/convenience.rb +359 -0
- data/lib/omnizip/converter/conversion_registry.rb +49 -0
- data/lib/omnizip/converter/conversion_strategy.rb +121 -0
- data/lib/omnizip/converter/seven_zip_to_zip_strategy.rb +97 -0
- data/lib/omnizip/converter/zip_to_seven_zip_strategy.rb +112 -0
- data/lib/omnizip/converter.rb +105 -0
- data/lib/omnizip/crypto/aes256/cipher.rb +100 -0
- data/lib/omnizip/crypto/aes256/constants.rb +28 -0
- data/lib/omnizip/crypto/aes256/key_derivation.rb +101 -0
- data/lib/omnizip/crypto/aes256.rb +102 -0
- data/lib/omnizip/error.rb +106 -0
- data/lib/omnizip/eta/exponential_smoothing_estimator.rb +98 -0
- data/lib/omnizip/eta/moving_average_estimator.rb +99 -0
- data/lib/omnizip/eta/rate_calculator.rb +104 -0
- data/lib/omnizip/eta/sample_history.rb +143 -0
- data/lib/omnizip/eta/time_estimator.rb +106 -0
- data/lib/omnizip/eta.rb +63 -0
- data/lib/omnizip/extraction/filter_chain.rb +177 -0
- data/lib/omnizip/extraction/glob_pattern.rb +140 -0
- data/lib/omnizip/extraction/pattern_matcher.rb +70 -0
- data/lib/omnizip/extraction/predicate_pattern.rb +52 -0
- data/lib/omnizip/extraction/regex_pattern.rb +50 -0
- data/lib/omnizip/extraction/selective_extractor.rb +240 -0
- data/lib/omnizip/extraction.rb +111 -0
- data/lib/omnizip/file_type/mime_classifier.rb +144 -0
- data/lib/omnizip/file_type.rb +113 -0
- data/lib/omnizip/filter.rb +139 -0
- data/lib/omnizip/filter_pipeline.rb +108 -0
- data/lib/omnizip/filter_registry.rb +166 -0
- data/lib/omnizip/filters/bcj.rb +279 -0
- data/lib/omnizip/filters/bcj2/constants.rb +53 -0
- data/lib/omnizip/filters/bcj2/decoder.rb +200 -0
- data/lib/omnizip/filters/bcj2/encoder.rb +61 -0
- data/lib/omnizip/filters/bcj2/stream_data.rb +93 -0
- data/lib/omnizip/filters/bcj2.rb +99 -0
- data/lib/omnizip/filters/bcj_arm.rb +176 -0
- data/lib/omnizip/filters/bcj_arm64.rb +244 -0
- data/lib/omnizip/filters/bcj_ia64.rb +196 -0
- data/lib/omnizip/filters/bcj_ppc.rb +190 -0
- data/lib/omnizip/filters/bcj_sparc.rb +176 -0
- data/lib/omnizip/filters/bcj_x86.rb +193 -0
- data/lib/omnizip/filters/delta.rb +196 -0
- data/lib/omnizip/filters/filter_base.rb +72 -0
- data/lib/omnizip/filters/registry.rb +123 -0
- data/lib/omnizip/filters/xz_delta.rb +258 -0
- data/lib/omnizip/format_detector.rb +162 -0
- data/lib/omnizip/format_registry.rb +59 -0
- data/lib/omnizip/formats/.keep +0 -0
- data/lib/omnizip/formats/bzip2_file.rb +172 -0
- data/lib/omnizip/formats/cpio/constants.rb +55 -0
- data/lib/omnizip/formats/cpio/entry.rb +385 -0
- data/lib/omnizip/formats/cpio/reader.rb +196 -0
- data/lib/omnizip/formats/cpio/writer.rb +234 -0
- data/lib/omnizip/formats/cpio.rb +140 -0
- data/lib/omnizip/formats/format_spec_loader.rb +230 -0
- data/lib/omnizip/formats/gzip.rb +238 -0
- data/lib/omnizip/formats/iso/directory_builder.rb +297 -0
- data/lib/omnizip/formats/iso/directory_record.rb +152 -0
- data/lib/omnizip/formats/iso/joliet.rb +204 -0
- data/lib/omnizip/formats/iso/path_table.rb +125 -0
- data/lib/omnizip/formats/iso/reader.rb +197 -0
- data/lib/omnizip/formats/iso/rock_ridge.rb +349 -0
- data/lib/omnizip/formats/iso/volume_builder.rb +320 -0
- data/lib/omnizip/formats/iso/volume_descriptor.rb +168 -0
- data/lib/omnizip/formats/iso/writer.rb +530 -0
- data/lib/omnizip/formats/iso.rb +140 -0
- data/lib/omnizip/formats/lzip.rb +175 -0
- data/lib/omnizip/formats/lzma_alone.rb +171 -0
- data/lib/omnizip/formats/rar/archive_repairer.rb +243 -0
- data/lib/omnizip/formats/rar/archive_verifier.rb +195 -0
- data/lib/omnizip/formats/rar/block_parser.rb +243 -0
- data/lib/omnizip/formats/rar/compression/bit_stream.rb +180 -0
- data/lib/omnizip/formats/rar/compression/dispatcher.rb +217 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/decoder.rb +216 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/encoder.rb +158 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_builder.rb +217 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_coder.rb +189 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/match_finder.rb +135 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/sliding_window.rb +165 -0
- data/lib/omnizip/formats/rar/compression/ppmd/context.rb +105 -0
- data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +219 -0
- data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +262 -0
- data/lib/omnizip/formats/rar/compression_method_registry.rb +106 -0
- data/lib/omnizip/formats/rar/constants.rb +82 -0
- data/lib/omnizip/formats/rar/decompressor.rb +238 -0
- data/lib/omnizip/formats/rar/external_writer.rb +312 -0
- data/lib/omnizip/formats/rar/header.rb +192 -0
- data/lib/omnizip/formats/rar/license_validator.rb +109 -0
- data/lib/omnizip/formats/rar/models/rar_archive.rb +77 -0
- data/lib/omnizip/formats/rar/models/rar_entry.rb +65 -0
- data/lib/omnizip/formats/rar/models/rar_volume.rb +56 -0
- data/lib/omnizip/formats/rar/parity_handler.rb +292 -0
- data/lib/omnizip/formats/rar/rar5/compression/lzma.rb +202 -0
- data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +578 -0
- data/lib/omnizip/formats/rar/rar5/compression/store.rb +60 -0
- data/lib/omnizip/formats/rar/rar5/crc32.rb +39 -0
- data/lib/omnizip/formats/rar/rar5/encryption/aes256_cbc.rb +97 -0
- data/lib/omnizip/formats/rar/rar5/encryption/encryption_header.rb +114 -0
- data/lib/omnizip/formats/rar/rar5/encryption/encryption_manager.rb +166 -0
- data/lib/omnizip/formats/rar/rar5/encryption/key_derivation.rb +97 -0
- data/lib/omnizip/formats/rar/rar5/header.rb +187 -0
- data/lib/omnizip/formats/rar/rar5/models/encryption_options.rb +74 -0
- data/lib/omnizip/formats/rar/rar5/models/recovery_options.rb +63 -0
- data/lib/omnizip/formats/rar/rar5/models/solid_options.rb +63 -0
- data/lib/omnizip/formats/rar/rar5/models/volume_options.rb +74 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/ARCHITECTURE.md +290 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_manager.rb +264 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_splitter.rb +155 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_writer.rb +194 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_encoder.rb +109 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_manager.rb +142 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_stream.rb +121 -0
- data/lib/omnizip/formats/rar/rar5/vint.rb +65 -0
- data/lib/omnizip/formats/rar/rar5/writer.rb +466 -0
- data/lib/omnizip/formats/rar/rar_format_base.rb +241 -0
- data/lib/omnizip/formats/rar/reader.rb +366 -0
- data/lib/omnizip/formats/rar/recovery_record.rb +245 -0
- data/lib/omnizip/formats/rar/volume_manager.rb +168 -0
- data/lib/omnizip/formats/rar/writer.rb +431 -0
- data/lib/omnizip/formats/rar.rb +205 -0
- data/lib/omnizip/formats/rar3/compressor.rb +73 -0
- data/lib/omnizip/formats/rar3/decompressor.rb +66 -0
- data/lib/omnizip/formats/rar3/reader.rb +386 -0
- data/lib/omnizip/formats/rar3/writer.rb +219 -0
- data/lib/omnizip/formats/rar5/compressor.rb +73 -0
- data/lib/omnizip/formats/rar5/decompressor.rb +66 -0
- data/lib/omnizip/formats/rar5/reader.rb +342 -0
- data/lib/omnizip/formats/rar5/writer.rb +214 -0
- data/lib/omnizip/formats/seven_zip/coder_chain.rb +150 -0
- data/lib/omnizip/formats/seven_zip/constants.rb +126 -0
- data/lib/omnizip/formats/seven_zip/encoded_header.rb +114 -0
- data/lib/omnizip/formats/seven_zip/encrypted_header.rb +142 -0
- data/lib/omnizip/formats/seven_zip/file_collector.rb +144 -0
- data/lib/omnizip/formats/seven_zip/header.rb +106 -0
- data/lib/omnizip/formats/seven_zip/header_encryptor.rb +134 -0
- data/lib/omnizip/formats/seven_zip/header_writer.rb +466 -0
- data/lib/omnizip/formats/seven_zip/models/coder_info.rb +30 -0
- data/lib/omnizip/formats/seven_zip/models/file_entry.rb +58 -0
- data/lib/omnizip/formats/seven_zip/models/folder.rb +69 -0
- data/lib/omnizip/formats/seven_zip/models/stream_info.rb +42 -0
- data/lib/omnizip/formats/seven_zip/parser.rb +660 -0
- data/lib/omnizip/formats/seven_zip/reader.rb +458 -0
- data/lib/omnizip/formats/seven_zip/split_archive_reader.rb +632 -0
- data/lib/omnizip/formats/seven_zip/split_archive_writer.rb +315 -0
- data/lib/omnizip/formats/seven_zip/stream_compressor.rb +151 -0
- data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +162 -0
- data/lib/omnizip/formats/seven_zip/writer.rb +740 -0
- data/lib/omnizip/formats/seven_zip.rb +93 -0
- data/lib/omnizip/formats/tar/constants.rb +73 -0
- data/lib/omnizip/formats/tar/entry.rb +94 -0
- data/lib/omnizip/formats/tar/header.rb +168 -0
- data/lib/omnizip/formats/tar/reader.rb +121 -0
- data/lib/omnizip/formats/tar/writer.rb +216 -0
- data/lib/omnizip/formats/tar.rb +84 -0
- data/lib/omnizip/formats/xz/reader.rb +116 -0
- data/lib/omnizip/formats/xz.rb +237 -0
- data/lib/omnizip/formats/xz_impl/block_decoder.rb +754 -0
- data/lib/omnizip/formats/xz_impl/block_encoder.rb +306 -0
- data/lib/omnizip/formats/xz_impl/block_header.rb +210 -0
- data/lib/omnizip/formats/xz_impl/block_header_parser.rb +186 -0
- data/lib/omnizip/formats/xz_impl/constants.rb +49 -0
- data/lib/omnizip/formats/xz_impl/index_decoder.rb +174 -0
- data/lib/omnizip/formats/xz_impl/index_encoder.rb +122 -0
- data/lib/omnizip/formats/xz_impl/stream_decoder.rb +468 -0
- data/lib/omnizip/formats/xz_impl/stream_encoder.rb +99 -0
- data/lib/omnizip/formats/xz_impl/stream_footer.rb +81 -0
- data/lib/omnizip/formats/xz_impl/stream_footer_parser.rb +117 -0
- data/lib/omnizip/formats/xz_impl/stream_header.rb +55 -0
- data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +108 -0
- data/lib/omnizip/formats/xz_impl/vli.rb +128 -0
- data/lib/omnizip/formats/xz_impl/writer.rb +421 -0
- data/lib/omnizip/formats/zip/central_directory_header.rb +195 -0
- data/lib/omnizip/formats/zip/constants.rb +69 -0
- data/lib/omnizip/formats/zip/end_of_central_directory.rb +133 -0
- data/lib/omnizip/formats/zip/local_file_header.rb +138 -0
- data/lib/omnizip/formats/zip/reader.rb +250 -0
- data/lib/omnizip/formats/zip/unix_extra_field.rb +153 -0
- data/lib/omnizip/formats/zip/writer.rb +375 -0
- data/lib/omnizip/formats/zip/zip64_end_of_central_directory.rb +104 -0
- data/lib/omnizip/formats/zip/zip64_end_of_central_directory_locator.rb +66 -0
- data/lib/omnizip/formats/zip/zip64_extra_field.rb +114 -0
- data/lib/omnizip/formats/zip.rb +50 -0
- data/lib/omnizip/implementations/base/lzma2_decoder_base.rb +75 -0
- data/lib/omnizip/implementations/base/lzma2_encoder_base.rb +128 -0
- data/lib/omnizip/implementations/base/lzma_decoder_base.rb +83 -0
- data/lib/omnizip/implementations/base/lzma_encoder_base.rb +108 -0
- data/lib/omnizip/implementations/base/state_machine_base.rb +182 -0
- data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +421 -0
- data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +465 -0
- data/lib/omnizip/implementations/seven_zip/lzma/match_finder.rb +288 -0
- data/lib/omnizip/implementations/seven_zip/lzma/range_decoder.rb +200 -0
- data/lib/omnizip/implementations/seven_zip/lzma/range_encoder.rb +197 -0
- data/lib/omnizip/implementations/seven_zip/lzma/state_machine.rb +141 -0
- data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +519 -0
- data/lib/omnizip/implementations/xz_utils/lzma2/decoder.rb +723 -0
- data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +750 -0
- data/lib/omnizip/io/buffered_input.rb +146 -0
- data/lib/omnizip/io/buffered_output.rb +105 -0
- data/lib/omnizip/io/stream_manager.rb +115 -0
- data/lib/omnizip/link_handler/hard_link.rb +79 -0
- data/lib/omnizip/link_handler/symbolic_link.rb +74 -0
- data/lib/omnizip/link_handler.rb +124 -0
- data/lib/omnizip/metadata/archive_metadata.rb +114 -0
- data/lib/omnizip/metadata/entry_metadata.rb +146 -0
- data/lib/omnizip/metadata/metadata_editor.rb +171 -0
- data/lib/omnizip/metadata/metadata_registry.rb +64 -0
- data/lib/omnizip/metadata/metadata_validator.rb +99 -0
- data/lib/omnizip/metadata.rb +57 -0
- data/lib/omnizip/models/.keep +0 -0
- data/lib/omnizip/models/algorithm_metadata.rb +73 -0
- data/lib/omnizip/models/compression_options.rb +71 -0
- data/lib/omnizip/models/conversion_options.rb +87 -0
- data/lib/omnizip/models/conversion_result.rb +135 -0
- data/lib/omnizip/models/eta_result.rb +46 -0
- data/lib/omnizip/models/extraction_rule.rb +115 -0
- data/lib/omnizip/models/filter_chain.rb +144 -0
- data/lib/omnizip/models/filter_config.rb +183 -0
- data/lib/omnizip/models/match_result.rb +124 -0
- data/lib/omnizip/models/optimization_suggestion.rb +91 -0
- data/lib/omnizip/models/parallel_options.rb +104 -0
- data/lib/omnizip/models/performance_result.rb +79 -0
- data/lib/omnizip/models/profile_report.rb +82 -0
- data/lib/omnizip/models/progress_options.rb +38 -0
- data/lib/omnizip/models/split_options.rb +116 -0
- data/lib/omnizip/optimization_registry.rb +81 -0
- data/lib/omnizip/parallel/job_queue.rb +209 -0
- data/lib/omnizip/parallel/job_scheduler.rb +203 -0
- data/lib/omnizip/parallel/parallel_compressor.rb +347 -0
- data/lib/omnizip/parallel/parallel_extractor.rb +329 -0
- data/lib/omnizip/parallel/worker_pool.rb +223 -0
- data/lib/omnizip/parallel.rb +149 -0
- data/lib/omnizip/parity/chunked_block_processor.rb +196 -0
- data/lib/omnizip/parity/galois16.rb +145 -0
- data/lib/omnizip/parity/models/creator_packet.rb +73 -0
- data/lib/omnizip/parity/models/file_description_packet.rb +133 -0
- data/lib/omnizip/parity/models/ifsc_packet.rb +123 -0
- data/lib/omnizip/parity/models/main_packet.rb +128 -0
- data/lib/omnizip/parity/models/packet.rb +156 -0
- data/lib/omnizip/parity/models/packet_registry.rb +109 -0
- data/lib/omnizip/parity/models/recovery_slice_packet.rb +78 -0
- data/lib/omnizip/parity/par2_creator.rb +531 -0
- data/lib/omnizip/parity/par2_repairer.rb +407 -0
- data/lib/omnizip/parity/par2_verifier.rb +364 -0
- data/lib/omnizip/parity/par2cmdline_algorithm.rb +110 -0
- data/lib/omnizip/parity/par2cmdline_coefficients.rb +78 -0
- data/lib/omnizip/parity/reed_solomon_decoder.rb +266 -0
- data/lib/omnizip/parity/reed_solomon_encoder.rb +111 -0
- data/lib/omnizip/parity/reed_solomon_matrix.rb +342 -0
- data/lib/omnizip/parity.rb +186 -0
- data/lib/omnizip/password/encryption_registry.rb +65 -0
- data/lib/omnizip/password/encryption_strategy.rb +96 -0
- data/lib/omnizip/password/password_validator.rb +129 -0
- data/lib/omnizip/password/winzip_aes_strategy.rb +192 -0
- data/lib/omnizip/password/zip_crypto_strategy.rb +141 -0
- data/lib/omnizip/password.rb +87 -0
- data/lib/omnizip/pipe/stream_compressor.rb +124 -0
- data/lib/omnizip/pipe/stream_decompressor.rb +174 -0
- data/lib/omnizip/pipe.rb +121 -0
- data/lib/omnizip/platform/ntfs_streams.rb +201 -0
- data/lib/omnizip/platform.rb +189 -0
- data/lib/omnizip/profile/archive_profile.rb +39 -0
- data/lib/omnizip/profile/balanced_profile.rb +33 -0
- data/lib/omnizip/profile/binary_profile.rb +36 -0
- data/lib/omnizip/profile/compression_profile.rb +158 -0
- data/lib/omnizip/profile/custom_profile.rb +157 -0
- data/lib/omnizip/profile/fast_profile.rb +33 -0
- data/lib/omnizip/profile/maximum_profile.rb +33 -0
- data/lib/omnizip/profile/profile_detector.rb +110 -0
- data/lib/omnizip/profile/profile_registry.rb +161 -0
- data/lib/omnizip/profile/text_profile.rb +36 -0
- data/lib/omnizip/profile.rb +190 -0
- data/lib/omnizip/profiler/memory_profiler.rb +66 -0
- data/lib/omnizip/profiler/method_profiler.rb +49 -0
- data/lib/omnizip/profiler/report_generator.rb +169 -0
- data/lib/omnizip/profiler.rb +204 -0
- data/lib/omnizip/progress/callback_reporter.rb +36 -0
- data/lib/omnizip/progress/console_reporter.rb +62 -0
- data/lib/omnizip/progress/log_reporter.rb +91 -0
- data/lib/omnizip/progress/operation_progress.rb +118 -0
- data/lib/omnizip/progress/progress_bar.rb +156 -0
- data/lib/omnizip/progress/progress_reporter.rb +40 -0
- data/lib/omnizip/progress/progress_tracker.rb +190 -0
- data/lib/omnizip/progress/silent_reporter.rb +24 -0
- data/lib/omnizip/progress.rb +127 -0
- data/lib/omnizip/rubyzip_compat.rb +63 -0
- data/lib/omnizip/temp/safe_extract.rb +168 -0
- data/lib/omnizip/temp/temp_file.rb +124 -0
- data/lib/omnizip/temp/temp_file_pool.rb +109 -0
- data/lib/omnizip/temp.rb +181 -0
- data/lib/omnizip/version.rb +5 -0
- data/lib/omnizip/zip/entry.rb +156 -0
- data/lib/omnizip/zip/file.rb +485 -0
- data/lib/omnizip/zip/input_stream.rb +273 -0
- data/lib/omnizip/zip/output_stream.rb +324 -0
- data/lib/omnizip.rb +156 -0
- data/readme-docs/advanced-features.adoc +515 -0
- data/readme-docs/api-usage.adoc +444 -0
- data/readme-docs/architecture.adoc +449 -0
- data/readme-docs/archive-formats.adoc +479 -0
- data/readme-docs/cli-usage.adoc +222 -0
- data/readme-docs/compression-algorithms.adoc +442 -0
- data/readme-docs/compression-profiles.adoc +247 -0
- data/readme-docs/encryption-checksums.adoc +328 -0
- data/readme-docs/format-converter.adoc +325 -0
- data/readme-docs/installation.adoc +228 -0
- data/readme-docs/par2-archives.adoc +608 -0
- data/readme-docs/performance-profiler.adoc +389 -0
- data/readme-docs/preprocessing-filters.adoc +280 -0
- data/xz-file-format-1.2.1.txt +1174 -0
- metadata +617 -0
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
#
|
|
4
|
+
# Copyright (C) 2025 Ribose Inc.
|
|
5
|
+
#
|
|
6
|
+
# Permission is hereby granted, free of charge, to any person obtaining a
|
|
7
|
+
# copy of this software and associated documentation files (the "Software"),
|
|
8
|
+
# to deal in the Software without restriction, including without limitation
|
|
9
|
+
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
10
|
+
# and/or sell copies of the Software, and to permit persons to whom the
|
|
11
|
+
# Software is furnished to do so, subject to the following conditions:
|
|
12
|
+
#
|
|
13
|
+
# The above copyright notice and this permission notice shall be included in
|
|
14
|
+
# all copies or substantial portions of the Software.
|
|
15
|
+
#
|
|
16
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
17
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
18
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
19
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
20
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
21
|
+
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
22
|
+
# DEALINGS IN THE SOFTWARE.
|
|
23
|
+
|
|
24
|
+
module Omnizip
|
|
25
|
+
# Abstract base class for all preprocessing filters
|
|
26
|
+
#
|
|
27
|
+
# Filters are reversible transformations applied to data before
|
|
28
|
+
# compression to improve compression ratios. This class defines the
|
|
29
|
+
# interface that all filter implementations must follow.
|
|
30
|
+
#
|
|
31
|
+
# The key innovation is format-aware ID resolution: different formats
|
|
32
|
+
# (7z, XZ) use different IDs for the same filter. This class provides the
|
|
33
|
+
# id_for_format(format) method to handle this mapping.
|
|
34
|
+
#
|
|
35
|
+
# @abstract Subclasses must implement encode, decode, metadata
|
|
36
|
+
#
|
|
37
|
+
# @example Create a custom filter
|
|
38
|
+
# class MyFilter < Filter
|
|
39
|
+
# def initialize(architecture:)
|
|
40
|
+
# super(architecture: architecture, name: "MyFilter")
|
|
41
|
+
# end
|
|
42
|
+
#
|
|
43
|
+
# def id_for_format(format)
|
|
44
|
+
# format == :xz ? 0x04 : 0x03
|
|
45
|
+
# end
|
|
46
|
+
#
|
|
47
|
+
# def encode(data, position = 0)
|
|
48
|
+
# # encoding logic
|
|
49
|
+
# end
|
|
50
|
+
#
|
|
51
|
+
# def decode(data, position = 0)
|
|
52
|
+
# # decoding logic
|
|
53
|
+
# end
|
|
54
|
+
#
|
|
55
|
+
# def self.metadata
|
|
56
|
+
# { name: "MyFilter", description: "..." }
|
|
57
|
+
# end
|
|
58
|
+
# end
|
|
59
|
+
class Filter
|
|
60
|
+
# @return [Symbol] Architecture identifier (:x86, :arm, :arm64, :powerpc, :ia64, :sparc)
|
|
61
|
+
attr_reader :architecture
|
|
62
|
+
|
|
63
|
+
# @return [String] Human-readable filter name
|
|
64
|
+
attr_reader :name
|
|
65
|
+
|
|
66
|
+
# Initialize filter
|
|
67
|
+
#
|
|
68
|
+
# @param architecture [Symbol] Target architecture
|
|
69
|
+
# @param name [String] Human-readable name
|
|
70
|
+
def initialize(architecture:, name: "Unknown")
|
|
71
|
+
@architecture = architecture
|
|
72
|
+
@name = name
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
# Get filter ID for specific format
|
|
76
|
+
#
|
|
77
|
+
# This is the KEY METHOD that solves the filter ID mapping problem.
|
|
78
|
+
# Different formats (7z, XZ) use different IDs for the same filter.
|
|
79
|
+
#
|
|
80
|
+
# @param format [Symbol] Format identifier (:seven_zip, :xz)
|
|
81
|
+
# @return [Integer] Format-specific filter ID
|
|
82
|
+
# @raise [NotImplementedError] Subclass must implement
|
|
83
|
+
#
|
|
84
|
+
# @example Get XZ format ID for BCJ filter
|
|
85
|
+
# bcj.id_for_format(:xz) # => 0x04
|
|
86
|
+
#
|
|
87
|
+
# @example Get 7z format ID for BCJ filter
|
|
88
|
+
# bcj.id_for_format(:seven_zip) # => 0x03030103
|
|
89
|
+
def id_for_format(format)
|
|
90
|
+
raise NotImplementedError,
|
|
91
|
+
"#{self.class} must implement #id_for_format(format)"
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
# Encode (preprocess) data for compression
|
|
95
|
+
#
|
|
96
|
+
# Transforms data to make it more compressible. The transformation
|
|
97
|
+
# must be reversible - decode(encode(data)) == data.
|
|
98
|
+
#
|
|
99
|
+
# @param data [String] Binary data to encode
|
|
100
|
+
# @param position [Integer] Current stream position (default: 0)
|
|
101
|
+
# @return [String] Encoded binary data
|
|
102
|
+
# @raise [NotImplementedError] Subclass must implement
|
|
103
|
+
def encode(data, position = 0)
|
|
104
|
+
raise NotImplementedError,
|
|
105
|
+
"#{self.class} must implement #encode(data, position)"
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
# Decode (postprocess) data after decompression
|
|
109
|
+
#
|
|
110
|
+
# Reverses the encoding transformation, restoring original data.
|
|
111
|
+
#
|
|
112
|
+
# @param data [String] Binary data to decode
|
|
113
|
+
# @param position [Integer] Current stream position (default: 0)
|
|
114
|
+
# @return [String] Decoded binary data
|
|
115
|
+
# @raise [NotImplementedError] Subclass must implement
|
|
116
|
+
def decode(data, position = 0)
|
|
117
|
+
raise NotImplementedError,
|
|
118
|
+
"#{self.class} must implement #decode(data, position)"
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
class << self
|
|
122
|
+
# Get metadata about this filter
|
|
123
|
+
#
|
|
124
|
+
# @return [Hash] Filter metadata
|
|
125
|
+
# @option metadata [String] :name Human-readable name
|
|
126
|
+
# @option metadata [String] :description Filter description
|
|
127
|
+
# @option metadata [Array<Symbol>] :supported_archs Supported architectures
|
|
128
|
+
# @raise [NotImplementedError] Subclass must implement
|
|
129
|
+
#
|
|
130
|
+
# @example Get BCJ filter metadata
|
|
131
|
+
# Omnizip::Filters::BCJ.metadata
|
|
132
|
+
# # => { name: "BCJ", description: "...", supported_archs: [:x86, :arm, ...] }
|
|
133
|
+
def metadata
|
|
134
|
+
raise NotImplementedError,
|
|
135
|
+
"#{self} must implement .metadata"
|
|
136
|
+
end
|
|
137
|
+
end
|
|
138
|
+
end
|
|
139
|
+
end
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
#
|
|
4
|
+
# Copyright (C) 2024 Ribose Inc.
|
|
5
|
+
#
|
|
6
|
+
# This file is part of Omnizip.
|
|
7
|
+
#
|
|
8
|
+
# Omnizip is a pure Ruby port of 7-Zip compression algorithms.
|
|
9
|
+
# Based on the 7-Zip LZMA SDK by Igor Pavlov.
|
|
10
|
+
#
|
|
11
|
+
# This library is free software; you can redistribute it and/or
|
|
12
|
+
# modify it under the terms of the GNU Lesser General Public
|
|
13
|
+
# License as published by the Free Software Foundation; either
|
|
14
|
+
# version 2.1 of the License, or (at your option) any later version.
|
|
15
|
+
#
|
|
16
|
+
# See the COPYING file for the complete text of the license.
|
|
17
|
+
#
|
|
18
|
+
|
|
19
|
+
module Omnizip
|
|
20
|
+
# Pipeline for chaining multiple filters together.
|
|
21
|
+
#
|
|
22
|
+
# Filters are applied in sequence during encoding, and in reverse
|
|
23
|
+
# order during decoding. Position tracking is maintained across
|
|
24
|
+
# the entire pipeline.
|
|
25
|
+
class FilterPipeline
|
|
26
|
+
attr_reader :filters
|
|
27
|
+
|
|
28
|
+
# Initialize an empty filter pipeline.
|
|
29
|
+
def initialize
|
|
30
|
+
@filters = []
|
|
31
|
+
@position = 0
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# Add a filter to the pipeline.
|
|
35
|
+
#
|
|
36
|
+
# Filters are applied in the order they are added during encoding,
|
|
37
|
+
# and in reverse order during decoding.
|
|
38
|
+
#
|
|
39
|
+
# @param filter [Filters::FilterBase] Filter instance to add
|
|
40
|
+
# @return [self] For method chaining
|
|
41
|
+
def add_filter(filter)
|
|
42
|
+
@filters << filter
|
|
43
|
+
self
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# Check if pipeline has any filters.
|
|
47
|
+
#
|
|
48
|
+
# @return [Boolean] True if pipeline contains filters
|
|
49
|
+
def empty?
|
|
50
|
+
@filters.empty?
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Get number of filters in pipeline.
|
|
54
|
+
#
|
|
55
|
+
# @return [Integer] Number of filters
|
|
56
|
+
def size
|
|
57
|
+
@filters.size
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# Encode (preprocess) data by applying all filters in order.
|
|
61
|
+
#
|
|
62
|
+
# Filters are applied sequentially with the same position value.
|
|
63
|
+
# Position represents the current stream position for address
|
|
64
|
+
# calculations.
|
|
65
|
+
#
|
|
66
|
+
# @param data [String] Binary data to encode
|
|
67
|
+
# @param position [Integer] Current stream position
|
|
68
|
+
# @return [String] Encoded binary data
|
|
69
|
+
def encode(data, position = 0)
|
|
70
|
+
return data.dup if @filters.empty?
|
|
71
|
+
|
|
72
|
+
result = data
|
|
73
|
+
@filters.each do |filter|
|
|
74
|
+
result = filter.encode(result, position)
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
result
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# Decode (postprocess) data by applying all filters in reverse order.
|
|
81
|
+
#
|
|
82
|
+
# Filters are applied in reverse order with the same position value
|
|
83
|
+
# to undo the encoding transformation.
|
|
84
|
+
#
|
|
85
|
+
# @param data [String] Binary data to decode
|
|
86
|
+
# @param position [Integer] Current stream position
|
|
87
|
+
# @return [String] Decoded binary data
|
|
88
|
+
def decode(data, position = 0)
|
|
89
|
+
return data.dup if @filters.empty?
|
|
90
|
+
|
|
91
|
+
result = data
|
|
92
|
+
# Apply filters in reverse order
|
|
93
|
+
@filters.reverse_each do |filter|
|
|
94
|
+
result = filter.decode(result, position)
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
result
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
# Clear all filters from the pipeline.
|
|
101
|
+
#
|
|
102
|
+
# @return [void]
|
|
103
|
+
def clear
|
|
104
|
+
@filters.clear
|
|
105
|
+
@position = 0
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
end
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
#
|
|
4
|
+
# Copyright (C) 2024 Ribose Inc.
|
|
5
|
+
#
|
|
6
|
+
# This file is part of Omnizip.
|
|
7
|
+
#
|
|
8
|
+
# Omnizip is a pure Ruby port of 7-Zip compression algorithms.
|
|
9
|
+
# Based on the 7-Zip LZMA SDK by Igor Pavlov.
|
|
10
|
+
#
|
|
11
|
+
# This library is free software; you can redistribute it and/or
|
|
12
|
+
# modify it under the terms of the GNU Lesser General Public
|
|
13
|
+
# License as published by the Free Software Foundation; either
|
|
14
|
+
# version 2.1 of the License, or (at your option) any later version.
|
|
15
|
+
#
|
|
16
|
+
# See the COPYING file for the complete text of the license.
|
|
17
|
+
#
|
|
18
|
+
|
|
19
|
+
module Omnizip
|
|
20
|
+
# Registry for managing filter classes.
|
|
21
|
+
#
|
|
22
|
+
# This class provides a centralized registry for preprocessing filters,
|
|
23
|
+
# allowing filters to self-register and be retrieved by name.
|
|
24
|
+
# It implements a plugin-style architecture for extensibility.
|
|
25
|
+
class FilterRegistry
|
|
26
|
+
@filters = {}
|
|
27
|
+
|
|
28
|
+
class << self
|
|
29
|
+
# Register a filter class with the registry.
|
|
30
|
+
#
|
|
31
|
+
# @param name [Symbol, String] The name identifier for the filter
|
|
32
|
+
# @param klass [Class] The filter class to register
|
|
33
|
+
# @raise [ArgumentError] If name or klass is nil
|
|
34
|
+
# @return [void]
|
|
35
|
+
def register(name, klass)
|
|
36
|
+
raise ArgumentError, "Filter name cannot be nil" if name.nil?
|
|
37
|
+
raise ArgumentError, "Filter class cannot be nil" if klass.nil?
|
|
38
|
+
|
|
39
|
+
@filters[name.to_sym] = klass
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# Retrieve a filter class by name.
|
|
43
|
+
#
|
|
44
|
+
# Handles both old-style (Class) and new-style (Hash with :class key)
|
|
45
|
+
# registrations for backward compatibility.
|
|
46
|
+
#
|
|
47
|
+
# @param name [Symbol, String] The name identifier for the filter
|
|
48
|
+
# @raise [UnknownFilterError] If filter is not registered
|
|
49
|
+
# @return [Class] The registered filter class
|
|
50
|
+
def get(name)
|
|
51
|
+
filter = @filters[name.to_sym]
|
|
52
|
+
unless filter
|
|
53
|
+
raise UnknownFilterError,
|
|
54
|
+
"Unknown filter: #{name}. " \
|
|
55
|
+
"Available: #{available.join(', ')}"
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# Handle new-style registration (Hash with :class key)
|
|
59
|
+
return filter[:class] if filter.is_a?(Hash)
|
|
60
|
+
|
|
61
|
+
# Handle old-style registration (Class directly)
|
|
62
|
+
filter
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
# Check if a filter is registered.
|
|
66
|
+
#
|
|
67
|
+
# @param name [Symbol, String] The name identifier for the filter
|
|
68
|
+
# @return [Boolean] True if filter is registered, false otherwise
|
|
69
|
+
def registered?(name)
|
|
70
|
+
@filters.key?(name.to_sym)
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# Get list of all registered filter names.
|
|
74
|
+
#
|
|
75
|
+
# @return [Array<Symbol>] Array of registered filter names
|
|
76
|
+
def available
|
|
77
|
+
@filters.keys
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# Reset the registry (primarily for testing).
|
|
81
|
+
#
|
|
82
|
+
# @return [void]
|
|
83
|
+
def reset!
|
|
84
|
+
@filters.clear
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
# Register a filter class with format support.
|
|
88
|
+
#
|
|
89
|
+
# This format-aware registration stores which formats the filter
|
|
90
|
+
# supports, enabling format-specific filter retrieval.
|
|
91
|
+
#
|
|
92
|
+
# @param name [Symbol, String] The name identifier for the filter
|
|
93
|
+
# @param filter_class [Class] The filter class to register
|
|
94
|
+
# @param formats [Array<Symbol>] Supported formats (default: [:xz,
|
|
95
|
+
# :seven_zip])
|
|
96
|
+
# @return [void]
|
|
97
|
+
def register_with_formats(name, filter_class, formats: %i[xz seven_zip])
|
|
98
|
+
raise ArgumentError, "Filter name cannot be nil" if name.nil?
|
|
99
|
+
raise ArgumentError, "Filter class cannot be nil" if filter_class.nil?
|
|
100
|
+
|
|
101
|
+
@filters[name.to_sym] = {
|
|
102
|
+
class: filter_class,
|
|
103
|
+
formats: formats,
|
|
104
|
+
}
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
# Get filter instance for specific format.
|
|
108
|
+
#
|
|
109
|
+
# Returns a new filter instance after verifying the filter supports
|
|
110
|
+
# the specified format.
|
|
111
|
+
#
|
|
112
|
+
# @param name [Symbol, String] The name identifier for the filter
|
|
113
|
+
# @param format [Symbol] Format identifier (:xz, :seven_zip)
|
|
114
|
+
# @raise [KeyError] If filter is not registered
|
|
115
|
+
# @raise [ArgumentError] If filter doesn't support the format
|
|
116
|
+
# @return [Object] New filter instance
|
|
117
|
+
def get_for_format(name, format)
|
|
118
|
+
filter_info = @filters[name.to_sym]
|
|
119
|
+
raise KeyError, "Filter not found: #{name}" unless filter_info
|
|
120
|
+
|
|
121
|
+
unless filter_info[:formats].include?(format)
|
|
122
|
+
raise ArgumentError,
|
|
123
|
+
"Filter #{name} not supported for format #{format}"
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
filter_info[:class].new
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
# Check if filter supports specific format.
|
|
130
|
+
#
|
|
131
|
+
# @param name [Symbol, String] The name identifier for the filter
|
|
132
|
+
# @param format [Symbol] Format identifier
|
|
133
|
+
# @return [Boolean] True if filter supports the format
|
|
134
|
+
def supports_format?(name, format)
|
|
135
|
+
return false unless @filters[name.to_sym]
|
|
136
|
+
|
|
137
|
+
filter_info = @filters[name.to_sym]
|
|
138
|
+
# Handle both old-style (Class) and new-style (Hash) registrations
|
|
139
|
+
if filter_info.is_a?(Hash)
|
|
140
|
+
filter_info[:formats]&.include?(format)
|
|
141
|
+
else
|
|
142
|
+
# Old-style registration - assume supports all formats
|
|
143
|
+
true
|
|
144
|
+
end
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
# Get all filters supporting a specific format.
|
|
148
|
+
#
|
|
149
|
+
# @param format [Symbol] Format identifier
|
|
150
|
+
# @return [Array<Symbol>] Filter names supporting the format
|
|
151
|
+
def filters_for_format(format)
|
|
152
|
+
@filters.select do |_, info|
|
|
153
|
+
if info.is_a?(Hash)
|
|
154
|
+
info[:formats]&.include?(format)
|
|
155
|
+
else
|
|
156
|
+
# Old-style registration - assume supports all formats
|
|
157
|
+
true
|
|
158
|
+
end
|
|
159
|
+
end.keys
|
|
160
|
+
end
|
|
161
|
+
end
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
# Error raised when an unknown filter is requested
|
|
165
|
+
class UnknownFilterError < StandardError; end
|
|
166
|
+
end
|
|
@@ -0,0 +1,279 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
#
|
|
4
|
+
# Copyright (C) 2025 Ribose Inc.
|
|
5
|
+
#
|
|
6
|
+
# Permission is hereby granted, free of charge, to any person obtaining a
|
|
7
|
+
# copy of this software and associated documentation files (the "Software"),
|
|
8
|
+
# to deal in the Software without restriction, including without limitation
|
|
9
|
+
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
10
|
+
# and/or sell copies of the Software, and to permit persons to whom the
|
|
11
|
+
# Software is furnished to do so, subject to the following conditions:
|
|
12
|
+
#
|
|
13
|
+
# The above copyright notice and this permission notice shall be included in
|
|
14
|
+
# all copies or substantial portions of the Software.
|
|
15
|
+
#
|
|
16
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
17
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
18
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
19
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
20
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
21
|
+
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
22
|
+
# DEALINGS IN THE SOFTWARE.
|
|
23
|
+
|
|
24
|
+
require_relative "../filter"
|
|
25
|
+
|
|
26
|
+
module Omnizip
|
|
27
|
+
module Filters
|
|
28
|
+
# Unified BCJ (Branch/Call/Jump) filter for multiple architectures
|
|
29
|
+
#
|
|
30
|
+
# This filter preprocesses executable code by converting relative
|
|
31
|
+
# addresses in branch/call instructions to absolute addresses.
|
|
32
|
+
# The transformation is reversible and improves compression ratio.
|
|
33
|
+
#
|
|
34
|
+
# Supports x86, ARM, ARM Thumb, ARM64, PowerPC, IA64, SPARC architectures.
|
|
35
|
+
# Automatically returns correct filter ID for 7z or XZ format.
|
|
36
|
+
#
|
|
37
|
+
# @example Create x86 BCJ filter
|
|
38
|
+
# bcj = Omnizip::Filters::BCJ.new(architecture: :x86)
|
|
39
|
+
# bcj.id_for_format(:xz) # => 0x04
|
|
40
|
+
# bcj.id_for_format(:seven_zip) # => 0x03030103
|
|
41
|
+
class BCJ < Filter
|
|
42
|
+
# Architecture-specific configurations
|
|
43
|
+
CONFIG = {
|
|
44
|
+
x86: {
|
|
45
|
+
opcodes: [0xE8, 0xE9], # CALL, JMP
|
|
46
|
+
address_size: 4,
|
|
47
|
+
instruction_size: 5,
|
|
48
|
+
xz_id: 0x04,
|
|
49
|
+
seven_zip_id: 0x03030103,
|
|
50
|
+
},
|
|
51
|
+
arm: {
|
|
52
|
+
opcodes: [0x0A, 0x0B], # ARM BL/B conditional
|
|
53
|
+
address_size: 4,
|
|
54
|
+
instruction_size: 4,
|
|
55
|
+
xz_id: 0x07,
|
|
56
|
+
seven_zip_id: 0x03030501,
|
|
57
|
+
},
|
|
58
|
+
armthumb: {
|
|
59
|
+
opcodes: [0xE8, 0xF0, 0xF1], # ARM Thumb BL/B conditional
|
|
60
|
+
address_size: 4,
|
|
61
|
+
instruction_size: 4,
|
|
62
|
+
xz_id: 0x08,
|
|
63
|
+
seven_zip_id: 0x03030701,
|
|
64
|
+
},
|
|
65
|
+
arm64: {
|
|
66
|
+
opcodes: [0x00], # ARM64 BL
|
|
67
|
+
address_size: 4,
|
|
68
|
+
instruction_size: 4,
|
|
69
|
+
xz_id: nil, # Not yet in XZ
|
|
70
|
+
seven_zip_id: 0x03030601,
|
|
71
|
+
},
|
|
72
|
+
powerpc: {
|
|
73
|
+
opcodes: [0x48, 0x18], # PowerPC branch instructions
|
|
74
|
+
address_size: 4,
|
|
75
|
+
instruction_size: 4,
|
|
76
|
+
xz_id: 0x05,
|
|
77
|
+
seven_zip_id: 0x03030205,
|
|
78
|
+
},
|
|
79
|
+
ia64: {
|
|
80
|
+
opcodes: [0x04, 0x05, 0x06, 0x07, 0x08], # IA64 branches
|
|
81
|
+
address_size: 4,
|
|
82
|
+
instruction_size: 4,
|
|
83
|
+
xz_id: 0x06,
|
|
84
|
+
seven_zip_id: 0x03030401,
|
|
85
|
+
},
|
|
86
|
+
sparc: {
|
|
87
|
+
opcodes: [0x04, 0x06, 0x07], # SPARC call/branch
|
|
88
|
+
address_size: 4,
|
|
89
|
+
instruction_size: 4,
|
|
90
|
+
xz_id: 0x09,
|
|
91
|
+
seven_zip_id: 0x03030805,
|
|
92
|
+
},
|
|
93
|
+
}.freeze
|
|
94
|
+
|
|
95
|
+
# @return [Symbol] Architecture identifier
|
|
96
|
+
attr_reader :architecture
|
|
97
|
+
|
|
98
|
+
# Initialize BCJ filter for specific architecture
|
|
99
|
+
#
|
|
100
|
+
# @param architecture [Symbol] Target architecture (:x86, :arm, :armthumb, :arm64, :powerpc, :ia64, :sparc)
|
|
101
|
+
# @raise [ArgumentError] If architecture is not supported
|
|
102
|
+
def initialize(architecture:)
|
|
103
|
+
unless CONFIG.key?(architecture)
|
|
104
|
+
raise ArgumentError, "Unsupported BCJ architecture: #{architecture}. " \
|
|
105
|
+
"Supported: #{CONFIG.keys.join(', ')}"
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
@architecture = architecture
|
|
109
|
+
@config = CONFIG[architecture]
|
|
110
|
+
super(architecture: architecture, name: "BCJ-#{architecture.to_s.upcase}")
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
# Get filter ID for specific format
|
|
114
|
+
#
|
|
115
|
+
# @param format [Symbol] Format identifier (:seven_zip, :xz)
|
|
116
|
+
# @return [Integer] Format-specific filter ID
|
|
117
|
+
# @raise [ArgumentError] If format is not supported
|
|
118
|
+
# @raise [NotImplementedError] If architecture not supported in format
|
|
119
|
+
def id_for_format(format)
|
|
120
|
+
case format
|
|
121
|
+
when :seven_zip
|
|
122
|
+
@config[:seven_zip_id]
|
|
123
|
+
when :xz
|
|
124
|
+
id = @config[:xz_id]
|
|
125
|
+
if id.nil?
|
|
126
|
+
raise NotImplementedError,
|
|
127
|
+
"#{@architecture} BCJ not yet supported in XZ format"
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
id
|
|
131
|
+
else
|
|
132
|
+
raise ArgumentError,
|
|
133
|
+
"Unknown format: #{format}. Supported: :seven_zip, :xz"
|
|
134
|
+
end
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
# Encode (preprocess) data for compression
|
|
138
|
+
#
|
|
139
|
+
# Scans for branch/call opcodes and converts relative addresses
|
|
140
|
+
# to absolute addresses.
|
|
141
|
+
#
|
|
142
|
+
# @param data [String] Binary executable data
|
|
143
|
+
# @param position [Integer] Current stream position (default: 0)
|
|
144
|
+
# @return [String] Encoded binary data
|
|
145
|
+
def encode(data, position = 0)
|
|
146
|
+
return data.dup if data.bytesize < @config[:instruction_size]
|
|
147
|
+
|
|
148
|
+
result = data.b
|
|
149
|
+
i = 0
|
|
150
|
+
limit = data.bytesize - @config[:instruction_size]
|
|
151
|
+
|
|
152
|
+
while i <= limit
|
|
153
|
+
opcode = result.getbyte(i)
|
|
154
|
+
|
|
155
|
+
if @config[:opcodes].include?(opcode)
|
|
156
|
+
# Extract address (little-endian)
|
|
157
|
+
address = extract_address(result, i + 1)
|
|
158
|
+
|
|
159
|
+
# Check if valid relative address
|
|
160
|
+
if valid_relative_address?(address)
|
|
161
|
+
# Convert to absolute
|
|
162
|
+
absolute = address + position + i + @config[:instruction_size]
|
|
163
|
+
write_address(result, i + 1, absolute)
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
i += @config[:instruction_size]
|
|
167
|
+
else
|
|
168
|
+
i += 1
|
|
169
|
+
end
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
result
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
# Decode (postprocess) data after decompression
|
|
176
|
+
#
|
|
177
|
+
# Reverses encoding by converting absolute addresses back to
|
|
178
|
+
# relative addresses.
|
|
179
|
+
#
|
|
180
|
+
# @param data [String] Binary executable data
|
|
181
|
+
# @param position [Integer] Current stream position (default: 0)
|
|
182
|
+
# @return [String] Decoded binary data
|
|
183
|
+
def decode(data, position = 0)
|
|
184
|
+
return data.dup if data.bytesize < @config[:instruction_size]
|
|
185
|
+
|
|
186
|
+
result = data.b
|
|
187
|
+
i = 0
|
|
188
|
+
limit = data.bytesize - @config[:instruction_size]
|
|
189
|
+
|
|
190
|
+
while i <= limit
|
|
191
|
+
opcode = result.getbyte(i)
|
|
192
|
+
|
|
193
|
+
if @config[:opcodes].include?(opcode)
|
|
194
|
+
# Extract absolute address
|
|
195
|
+
absolute = extract_address(result, i + 1)
|
|
196
|
+
|
|
197
|
+
# Convert to relative
|
|
198
|
+
address = absolute - (position + i + @config[:instruction_size])
|
|
199
|
+
|
|
200
|
+
if valid_relative_address?(address)
|
|
201
|
+
write_address(result, i + 1, address)
|
|
202
|
+
end
|
|
203
|
+
|
|
204
|
+
i += @config[:instruction_size]
|
|
205
|
+
else
|
|
206
|
+
i += 1
|
|
207
|
+
end
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
result
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
class << self
|
|
214
|
+
# Get metadata about this filter
|
|
215
|
+
#
|
|
216
|
+
# @return [Hash] Filter metadata
|
|
217
|
+
def metadata
|
|
218
|
+
{
|
|
219
|
+
name: "BCJ",
|
|
220
|
+
description: "Branch/Call/Jump converter for executable files",
|
|
221
|
+
supported_architectures: CONFIG.keys,
|
|
222
|
+
architectures: {
|
|
223
|
+
x86: "x86/x86-64",
|
|
224
|
+
arm: "ARM 32-bit",
|
|
225
|
+
arm64: "ARM 64-bit",
|
|
226
|
+
powerpc: "PowerPC",
|
|
227
|
+
ia64: "IA-64 (Itanium)",
|
|
228
|
+
sparc: "SPARC",
|
|
229
|
+
},
|
|
230
|
+
}
|
|
231
|
+
end
|
|
232
|
+
end
|
|
233
|
+
|
|
234
|
+
private
|
|
235
|
+
|
|
236
|
+
# Extract address from data at offset (little-endian)
|
|
237
|
+
#
|
|
238
|
+
# @param data [String] Binary data
|
|
239
|
+
# @param offset [Integer] Starting position
|
|
240
|
+
# @return [Integer] Address value
|
|
241
|
+
def extract_address(data, offset)
|
|
242
|
+
bytes = data.byteslice(offset, @config[:address_size]).bytes
|
|
243
|
+
value = bytes.each_with_index.reduce(0) do |acc, (byte, i)|
|
|
244
|
+
acc | (byte << (8 * i))
|
|
245
|
+
end
|
|
246
|
+
|
|
247
|
+
# Convert to signed if needed (for 32-bit addresses)
|
|
248
|
+
mask = (1 << (8 * @config[:address_size])) - 1
|
|
249
|
+
value.nobits?(~mask) ? value - (1 << (8 * @config[:address_size])) : value
|
|
250
|
+
end
|
|
251
|
+
|
|
252
|
+
# Write address to data at offset (little-endian)
|
|
253
|
+
#
|
|
254
|
+
# @param data [String] Binary data (modified in place)
|
|
255
|
+
# @param offset [Integer] Starting position
|
|
256
|
+
# @param value [Integer] Address value to write
|
|
257
|
+
# @return [void]
|
|
258
|
+
def write_address(data, offset, value)
|
|
259
|
+
@config[:address_size].times do |i|
|
|
260
|
+
data.setbyte(offset + i, value & 0xFF)
|
|
261
|
+
value >>= 8
|
|
262
|
+
end
|
|
263
|
+
end
|
|
264
|
+
|
|
265
|
+
# Check if address is a valid relative address
|
|
266
|
+
#
|
|
267
|
+
# Valid relative addresses have high byte of 0x00 or 0xFF,
|
|
268
|
+
# indicating small positive or negative offsets.
|
|
269
|
+
#
|
|
270
|
+
# @param value [Integer] Address value to check
|
|
271
|
+
# @return [Boolean] True if valid relative address
|
|
272
|
+
def valid_relative_address?(value)
|
|
273
|
+
unsigned = value & 0xFFFFFFFF
|
|
274
|
+
high_byte = (unsigned >> 24) & 0xFF
|
|
275
|
+
[0x00, 0xFF].include?(high_byte)
|
|
276
|
+
end
|
|
277
|
+
end
|
|
278
|
+
end
|
|
279
|
+
end
|