omnizip 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/.rubocop.yml +32 -0
- data/.rubocop_todo.yml +754 -0
- data/COPYING +502 -0
- data/Gemfile +17 -0
- data/LICENSE +12 -0
- data/README.adoc +1045 -0
- data/Rakefile +12 -0
- data/benchmark/README.md +260 -0
- data/benchmark/benchmark_suite.rb +125 -0
- data/benchmark/compression_bench.rb +181 -0
- data/benchmark/filter_bench.rb +180 -0
- data/benchmark/models/benchmark_result.rb +59 -0
- data/benchmark/models/comparison_result.rb +69 -0
- data/benchmark/profile_suite.rb +167 -0
- data/benchmark/reporter.rb +150 -0
- data/benchmark/run_benchmarks.rb +66 -0
- data/benchmark/test_data.rb +137 -0
- data/config/formats/rar3_spec.yml +91 -0
- data/config/formats/rar5_spec.yml +102 -0
- data/docs/.github/workflows/docs.yml +142 -0
- data/docs/.gitignore +21 -0
- data/docs/.lychee.toml +67 -0
- data/docs/Gemfile +13 -0
- data/docs/RAR_WRITE_SUPPORT.md +26 -0
- data/docs/README.md +101 -0
- data/docs/_config.yml +112 -0
- data/docs/assets/logo.svg +1 -0
- data/docs/assets/omnizip-logo.pdf +1540 -11
- data/docs/comparison/feature-matrix.adoc +694 -0
- data/docs/comparison/index.adoc +113 -0
- data/docs/comparison/vs-7zip.adoc +309 -0
- data/docs/comparison/vs-peazip.adoc +77 -0
- data/docs/comparison/vs-rubyzip.adoc +342 -0
- data/docs/comparison/vs-winrar.adoc +100 -0
- data/docs/compatibility.adoc +579 -0
- data/docs/concepts/index.adoc +129 -0
- data/docs/developer/architecture.adoc +256 -0
- data/docs/developer/contributing.adoc +158 -0
- data/docs/developer/index.adoc +25 -0
- data/docs/developer/testing.adoc +212 -0
- data/docs/getting-started/basic-usage.adoc +271 -0
- data/docs/getting-started/index.adoc +42 -0
- data/docs/getting-started/installation.adoc +138 -0
- data/docs/getting-started/quick-start.adoc +185 -0
- data/docs/getting-started/your-first-archive.adoc +218 -0
- data/docs/guides/advanced-features/encryption.adoc +300 -0
- data/docs/guides/advanced-features/index.adoc +49 -0
- data/docs/guides/advanced-features/parallel-processing.adoc +246 -0
- data/docs/guides/advanced-features/progress-tracking.adoc +320 -0
- data/docs/guides/advanced-features/streaming.adoc +212 -0
- data/docs/guides/archive-formats/gzip-format.adoc +107 -0
- data/docs/guides/archive-formats/index.adoc +130 -0
- data/docs/guides/archive-formats/rar-format.adoc +104 -0
- data/docs/guides/archive-formats/rar5.adoc +521 -0
- data/docs/guides/archive-formats/seven-zip-format.adoc +35 -0
- data/docs/guides/archive-formats/tar-format.adoc +106 -0
- data/docs/guides/archive-formats/xz-format.adoc +118 -0
- data/docs/guides/archive-formats/zip-format.adoc +35 -0
- data/docs/guides/compression-algorithms/bzip2.adoc +113 -0
- data/docs/guides/compression-algorithms/deflate.adoc +319 -0
- data/docs/guides/compression-algorithms/index.adoc +190 -0
- data/docs/guides/compression-algorithms/lzma.adoc +398 -0
- data/docs/guides/compression-algorithms/lzma2.adoc +327 -0
- data/docs/guides/compression-algorithms/ppmd.adoc +316 -0
- data/docs/guides/compression-algorithms/zstandard.adoc +361 -0
- data/docs/guides/creating-archives.adoc +354 -0
- data/docs/guides/extracting-archives.adoc +53 -0
- data/docs/guides/format-conversion.adoc +64 -0
- data/docs/guides/index.adoc +49 -0
- data/docs/guides/migration-rubyzip.adoc +217 -0
- data/docs/guides/parity-archives.adoc +605 -0
- data/docs/guides/performance-tuning.adoc +88 -0
- data/docs/index.adoc +218 -0
- data/docs/lychee.toml +67 -0
- data/docs/reference/api/overview.adoc +188 -0
- data/docs/reference/cli/compress-command.adoc +114 -0
- data/docs/reference/cli/overview.adoc +140 -0
- data/docs/reference/index.adoc +26 -0
- data/docs/resources/faq.adoc +185 -0
- data/docs/resources/quick-reference.adoc +222 -0
- data/docs/troubleshooting/index.adoc +208 -0
- data/examples/api_comparison.rb +205 -0
- data/examples/deflate64_example.rb +96 -0
- data/examples/par2_demo.rb +121 -0
- data/examples/quick_start_native.rb +150 -0
- data/examples/quick_start_rubyzip.rb +115 -0
- data/examples/rubyzip_compatibility_demo.rb +194 -0
- data/exe/omnizip +27 -0
- data/lib/omnizip/algorithm.rb +130 -0
- data/lib/omnizip/algorithm_registry.rb +86 -0
- data/lib/omnizip/algorithms/.keep +0 -0
- data/lib/omnizip/algorithms/bzip2/bwt.rb +225 -0
- data/lib/omnizip/algorithms/bzip2/decoder.rb +193 -0
- data/lib/omnizip/algorithms/bzip2/encoder.rb +237 -0
- data/lib/omnizip/algorithms/bzip2/huffman.rb +206 -0
- data/lib/omnizip/algorithms/bzip2/mtf.rb +101 -0
- data/lib/omnizip/algorithms/bzip2/rle.rb +151 -0
- data/lib/omnizip/algorithms/bzip2.rb +130 -0
- data/lib/omnizip/algorithms/deflate/constants.rb +28 -0
- data/lib/omnizip/algorithms/deflate/decoder.rb +38 -0
- data/lib/omnizip/algorithms/deflate/encoder.rb +46 -0
- data/lib/omnizip/algorithms/deflate.rb +128 -0
- data/lib/omnizip/algorithms/deflate64/constants.rb +45 -0
- data/lib/omnizip/algorithms/deflate64/decoder.rb +153 -0
- data/lib/omnizip/algorithms/deflate64/encoder.rb +98 -0
- data/lib/omnizip/algorithms/deflate64/huffman_coder.rb +354 -0
- data/lib/omnizip/algorithms/deflate64/lz77_encoder.rb +142 -0
- data/lib/omnizip/algorithms/deflate64.rb +109 -0
- data/lib/omnizip/algorithms/lzma/bit_model.rb +120 -0
- data/lib/omnizip/algorithms/lzma/constants.rb +112 -0
- data/lib/omnizip/algorithms/lzma/decoder.rb +148 -0
- data/lib/omnizip/algorithms/lzma/dictionary.rb +69 -0
- data/lib/omnizip/algorithms/lzma/distance_coder.rb +415 -0
- data/lib/omnizip/algorithms/lzma/encoder.rb +142 -0
- data/lib/omnizip/algorithms/lzma/length_coder.rb +260 -0
- data/lib/omnizip/algorithms/lzma/literal_decoder.rb +320 -0
- data/lib/omnizip/algorithms/lzma/literal_encoder.rb +210 -0
- data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +341 -0
- data/lib/omnizip/algorithms/lzma/lzma_alone_decoder.rb +192 -0
- data/lib/omnizip/algorithms/lzma/lzma_state.rb +128 -0
- data/lib/omnizip/algorithms/lzma/match.rb +32 -0
- data/lib/omnizip/algorithms/lzma/match_finder.rb +205 -0
- data/lib/omnizip/algorithms/lzma/match_finder_config.rb +142 -0
- data/lib/omnizip/algorithms/lzma/match_finder_factory.rb +88 -0
- data/lib/omnizip/algorithms/lzma/optimal_encoder.rb +130 -0
- data/lib/omnizip/algorithms/lzma/probability_models.rb +72 -0
- data/lib/omnizip/algorithms/lzma/range_coder.rb +85 -0
- data/lib/omnizip/algorithms/lzma/range_decoder.rb +434 -0
- data/lib/omnizip/algorithms/lzma/range_encoder.rb +194 -0
- data/lib/omnizip/algorithms/lzma/state.rb +127 -0
- data/lib/omnizip/algorithms/lzma/xz_buffered_range_encoder.rb +325 -0
- data/lib/omnizip/algorithms/lzma/xz_encoder.rb +426 -0
- data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +645 -0
- data/lib/omnizip/algorithms/lzma/xz_match_finder_adapter.rb +227 -0
- data/lib/omnizip/algorithms/lzma/xz_price_calculator.rb +169 -0
- data/lib/omnizip/algorithms/lzma/xz_probability_models.rb +261 -0
- data/lib/omnizip/algorithms/lzma/xz_range_encoder.rb +223 -0
- data/lib/omnizip/algorithms/lzma/xz_range_encoder_exact.rb +331 -0
- data/lib/omnizip/algorithms/lzma/xz_state.rb +116 -0
- data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +2055 -0
- data/lib/omnizip/algorithms/lzma.rb +238 -0
- data/lib/omnizip/algorithms/lzma2/chunk_manager.rb +182 -0
- data/lib/omnizip/algorithms/lzma2/constants.rb +41 -0
- data/lib/omnizip/algorithms/lzma2/encoder.rb +147 -0
- data/lib/omnizip/algorithms/lzma2/lzma2_chunk.rb +161 -0
- data/lib/omnizip/algorithms/lzma2/properties.rb +179 -0
- data/lib/omnizip/algorithms/lzma2/simple_lzma2_encoder.rb +127 -0
- data/lib/omnizip/algorithms/lzma2/xz_encoder_adapter.rb +85 -0
- data/lib/omnizip/algorithms/lzma2.rb +141 -0
- data/lib/omnizip/algorithms/ppmd7/constants.rb +74 -0
- data/lib/omnizip/algorithms/ppmd7/context.rb +154 -0
- data/lib/omnizip/algorithms/ppmd7/decoder.rb +126 -0
- data/lib/omnizip/algorithms/ppmd7/encoder.rb +163 -0
- data/lib/omnizip/algorithms/ppmd7/model.rb +248 -0
- data/lib/omnizip/algorithms/ppmd7/symbol_state.rb +57 -0
- data/lib/omnizip/algorithms/ppmd7.rb +116 -0
- data/lib/omnizip/algorithms/ppmd8/constants.rb +61 -0
- data/lib/omnizip/algorithms/ppmd8/context.rb +34 -0
- data/lib/omnizip/algorithms/ppmd8/decoder.rb +107 -0
- data/lib/omnizip/algorithms/ppmd8/encoder.rb +138 -0
- data/lib/omnizip/algorithms/ppmd8/model.rb +250 -0
- data/lib/omnizip/algorithms/ppmd8/restoration_method.rb +78 -0
- data/lib/omnizip/algorithms/ppmd8.rb +82 -0
- data/lib/omnizip/algorithms/ppmd_base.rb +138 -0
- data/lib/omnizip/algorithms/sevenzip_lzma2.rb +123 -0
- data/lib/omnizip/algorithms/xz_lzma2.rb +118 -0
- data/lib/omnizip/algorithms/zstandard/constants.rb +25 -0
- data/lib/omnizip/algorithms/zstandard/decoder.rb +46 -0
- data/lib/omnizip/algorithms/zstandard/encoder.rb +51 -0
- data/lib/omnizip/algorithms/zstandard.rb +138 -0
- data/lib/omnizip/buffer/memory_archive.rb +251 -0
- data/lib/omnizip/buffer/memory_extractor.rb +224 -0
- data/lib/omnizip/buffer.rb +176 -0
- data/lib/omnizip/checksum_registry.rb +114 -0
- data/lib/omnizip/checksums/crc32.rb +100 -0
- data/lib/omnizip/checksums/crc64.rb +101 -0
- data/lib/omnizip/checksums/crc_base.rb +158 -0
- data/lib/omnizip/checksums/verifier.rb +131 -0
- data/lib/omnizip/chunked/memory_manager.rb +194 -0
- data/lib/omnizip/chunked/reader.rb +78 -0
- data/lib/omnizip/chunked/writer.rb +120 -0
- data/lib/omnizip/chunked.rb +129 -0
- data/lib/omnizip/cli/output_formatter.rb +104 -0
- data/lib/omnizip/cli.rb +572 -0
- data/lib/omnizip/commands/.keep +0 -0
- data/lib/omnizip/commands/archive_create_command.rb +427 -0
- data/lib/omnizip/commands/archive_extract_command.rb +272 -0
- data/lib/omnizip/commands/archive_list_command.rb +218 -0
- data/lib/omnizip/commands/archive_repair_command.rb +131 -0
- data/lib/omnizip/commands/archive_verify_command.rb +117 -0
- data/lib/omnizip/commands/compress_command.rb +117 -0
- data/lib/omnizip/commands/decompress_command.rb +120 -0
- data/lib/omnizip/commands/list_command.rb +53 -0
- data/lib/omnizip/commands/metadata_command.rb +153 -0
- data/lib/omnizip/commands/parity_create_command.rb +122 -0
- data/lib/omnizip/commands/parity_repair_command.rb +122 -0
- data/lib/omnizip/commands/parity_verify_command.rb +124 -0
- data/lib/omnizip/commands/profile_list_command.rb +56 -0
- data/lib/omnizip/commands/profile_show_command.rb +44 -0
- data/lib/omnizip/convenience.rb +359 -0
- data/lib/omnizip/converter/conversion_registry.rb +49 -0
- data/lib/omnizip/converter/conversion_strategy.rb +121 -0
- data/lib/omnizip/converter/seven_zip_to_zip_strategy.rb +97 -0
- data/lib/omnizip/converter/zip_to_seven_zip_strategy.rb +112 -0
- data/lib/omnizip/converter.rb +105 -0
- data/lib/omnizip/crypto/aes256/cipher.rb +100 -0
- data/lib/omnizip/crypto/aes256/constants.rb +28 -0
- data/lib/omnizip/crypto/aes256/key_derivation.rb +101 -0
- data/lib/omnizip/crypto/aes256.rb +102 -0
- data/lib/omnizip/error.rb +106 -0
- data/lib/omnizip/eta/exponential_smoothing_estimator.rb +98 -0
- data/lib/omnizip/eta/moving_average_estimator.rb +99 -0
- data/lib/omnizip/eta/rate_calculator.rb +104 -0
- data/lib/omnizip/eta/sample_history.rb +143 -0
- data/lib/omnizip/eta/time_estimator.rb +106 -0
- data/lib/omnizip/eta.rb +63 -0
- data/lib/omnizip/extraction/filter_chain.rb +177 -0
- data/lib/omnizip/extraction/glob_pattern.rb +140 -0
- data/lib/omnizip/extraction/pattern_matcher.rb +70 -0
- data/lib/omnizip/extraction/predicate_pattern.rb +52 -0
- data/lib/omnizip/extraction/regex_pattern.rb +50 -0
- data/lib/omnizip/extraction/selective_extractor.rb +240 -0
- data/lib/omnizip/extraction.rb +111 -0
- data/lib/omnizip/file_type/mime_classifier.rb +144 -0
- data/lib/omnizip/file_type.rb +113 -0
- data/lib/omnizip/filter.rb +139 -0
- data/lib/omnizip/filter_pipeline.rb +108 -0
- data/lib/omnizip/filter_registry.rb +166 -0
- data/lib/omnizip/filters/bcj.rb +279 -0
- data/lib/omnizip/filters/bcj2/constants.rb +53 -0
- data/lib/omnizip/filters/bcj2/decoder.rb +200 -0
- data/lib/omnizip/filters/bcj2/encoder.rb +61 -0
- data/lib/omnizip/filters/bcj2/stream_data.rb +93 -0
- data/lib/omnizip/filters/bcj2.rb +99 -0
- data/lib/omnizip/filters/bcj_arm.rb +176 -0
- data/lib/omnizip/filters/bcj_arm64.rb +244 -0
- data/lib/omnizip/filters/bcj_ia64.rb +196 -0
- data/lib/omnizip/filters/bcj_ppc.rb +190 -0
- data/lib/omnizip/filters/bcj_sparc.rb +176 -0
- data/lib/omnizip/filters/bcj_x86.rb +193 -0
- data/lib/omnizip/filters/delta.rb +196 -0
- data/lib/omnizip/filters/filter_base.rb +72 -0
- data/lib/omnizip/filters/registry.rb +123 -0
- data/lib/omnizip/filters/xz_delta.rb +258 -0
- data/lib/omnizip/format_detector.rb +162 -0
- data/lib/omnizip/format_registry.rb +59 -0
- data/lib/omnizip/formats/.keep +0 -0
- data/lib/omnizip/formats/bzip2_file.rb +172 -0
- data/lib/omnizip/formats/cpio/constants.rb +55 -0
- data/lib/omnizip/formats/cpio/entry.rb +385 -0
- data/lib/omnizip/formats/cpio/reader.rb +196 -0
- data/lib/omnizip/formats/cpio/writer.rb +234 -0
- data/lib/omnizip/formats/cpio.rb +140 -0
- data/lib/omnizip/formats/format_spec_loader.rb +230 -0
- data/lib/omnizip/formats/gzip.rb +238 -0
- data/lib/omnizip/formats/iso/directory_builder.rb +297 -0
- data/lib/omnizip/formats/iso/directory_record.rb +152 -0
- data/lib/omnizip/formats/iso/joliet.rb +204 -0
- data/lib/omnizip/formats/iso/path_table.rb +125 -0
- data/lib/omnizip/formats/iso/reader.rb +197 -0
- data/lib/omnizip/formats/iso/rock_ridge.rb +349 -0
- data/lib/omnizip/formats/iso/volume_builder.rb +320 -0
- data/lib/omnizip/formats/iso/volume_descriptor.rb +168 -0
- data/lib/omnizip/formats/iso/writer.rb +530 -0
- data/lib/omnizip/formats/iso.rb +140 -0
- data/lib/omnizip/formats/lzip.rb +175 -0
- data/lib/omnizip/formats/lzma_alone.rb +171 -0
- data/lib/omnizip/formats/rar/archive_repairer.rb +243 -0
- data/lib/omnizip/formats/rar/archive_verifier.rb +195 -0
- data/lib/omnizip/formats/rar/block_parser.rb +243 -0
- data/lib/omnizip/formats/rar/compression/bit_stream.rb +180 -0
- data/lib/omnizip/formats/rar/compression/dispatcher.rb +217 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/decoder.rb +216 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/encoder.rb +158 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_builder.rb +217 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_coder.rb +189 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/match_finder.rb +135 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/sliding_window.rb +165 -0
- data/lib/omnizip/formats/rar/compression/ppmd/context.rb +105 -0
- data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +219 -0
- data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +262 -0
- data/lib/omnizip/formats/rar/compression_method_registry.rb +106 -0
- data/lib/omnizip/formats/rar/constants.rb +82 -0
- data/lib/omnizip/formats/rar/decompressor.rb +238 -0
- data/lib/omnizip/formats/rar/external_writer.rb +312 -0
- data/lib/omnizip/formats/rar/header.rb +192 -0
- data/lib/omnizip/formats/rar/license_validator.rb +109 -0
- data/lib/omnizip/formats/rar/models/rar_archive.rb +77 -0
- data/lib/omnizip/formats/rar/models/rar_entry.rb +65 -0
- data/lib/omnizip/formats/rar/models/rar_volume.rb +56 -0
- data/lib/omnizip/formats/rar/parity_handler.rb +292 -0
- data/lib/omnizip/formats/rar/rar5/compression/lzma.rb +202 -0
- data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +578 -0
- data/lib/omnizip/formats/rar/rar5/compression/store.rb +60 -0
- data/lib/omnizip/formats/rar/rar5/crc32.rb +39 -0
- data/lib/omnizip/formats/rar/rar5/encryption/aes256_cbc.rb +97 -0
- data/lib/omnizip/formats/rar/rar5/encryption/encryption_header.rb +114 -0
- data/lib/omnizip/formats/rar/rar5/encryption/encryption_manager.rb +166 -0
- data/lib/omnizip/formats/rar/rar5/encryption/key_derivation.rb +97 -0
- data/lib/omnizip/formats/rar/rar5/header.rb +187 -0
- data/lib/omnizip/formats/rar/rar5/models/encryption_options.rb +74 -0
- data/lib/omnizip/formats/rar/rar5/models/recovery_options.rb +63 -0
- data/lib/omnizip/formats/rar/rar5/models/solid_options.rb +63 -0
- data/lib/omnizip/formats/rar/rar5/models/volume_options.rb +74 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/ARCHITECTURE.md +290 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_manager.rb +264 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_splitter.rb +155 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_writer.rb +194 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_encoder.rb +109 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_manager.rb +142 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_stream.rb +121 -0
- data/lib/omnizip/formats/rar/rar5/vint.rb +65 -0
- data/lib/omnizip/formats/rar/rar5/writer.rb +466 -0
- data/lib/omnizip/formats/rar/rar_format_base.rb +241 -0
- data/lib/omnizip/formats/rar/reader.rb +366 -0
- data/lib/omnizip/formats/rar/recovery_record.rb +245 -0
- data/lib/omnizip/formats/rar/volume_manager.rb +168 -0
- data/lib/omnizip/formats/rar/writer.rb +431 -0
- data/lib/omnizip/formats/rar.rb +205 -0
- data/lib/omnizip/formats/rar3/compressor.rb +73 -0
- data/lib/omnizip/formats/rar3/decompressor.rb +66 -0
- data/lib/omnizip/formats/rar3/reader.rb +386 -0
- data/lib/omnizip/formats/rar3/writer.rb +219 -0
- data/lib/omnizip/formats/rar5/compressor.rb +73 -0
- data/lib/omnizip/formats/rar5/decompressor.rb +66 -0
- data/lib/omnizip/formats/rar5/reader.rb +342 -0
- data/lib/omnizip/formats/rar5/writer.rb +214 -0
- data/lib/omnizip/formats/seven_zip/coder_chain.rb +150 -0
- data/lib/omnizip/formats/seven_zip/constants.rb +126 -0
- data/lib/omnizip/formats/seven_zip/encoded_header.rb +114 -0
- data/lib/omnizip/formats/seven_zip/encrypted_header.rb +142 -0
- data/lib/omnizip/formats/seven_zip/file_collector.rb +144 -0
- data/lib/omnizip/formats/seven_zip/header.rb +106 -0
- data/lib/omnizip/formats/seven_zip/header_encryptor.rb +134 -0
- data/lib/omnizip/formats/seven_zip/header_writer.rb +466 -0
- data/lib/omnizip/formats/seven_zip/models/coder_info.rb +30 -0
- data/lib/omnizip/formats/seven_zip/models/file_entry.rb +58 -0
- data/lib/omnizip/formats/seven_zip/models/folder.rb +69 -0
- data/lib/omnizip/formats/seven_zip/models/stream_info.rb +42 -0
- data/lib/omnizip/formats/seven_zip/parser.rb +660 -0
- data/lib/omnizip/formats/seven_zip/reader.rb +458 -0
- data/lib/omnizip/formats/seven_zip/split_archive_reader.rb +632 -0
- data/lib/omnizip/formats/seven_zip/split_archive_writer.rb +315 -0
- data/lib/omnizip/formats/seven_zip/stream_compressor.rb +151 -0
- data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +162 -0
- data/lib/omnizip/formats/seven_zip/writer.rb +740 -0
- data/lib/omnizip/formats/seven_zip.rb +93 -0
- data/lib/omnizip/formats/tar/constants.rb +73 -0
- data/lib/omnizip/formats/tar/entry.rb +94 -0
- data/lib/omnizip/formats/tar/header.rb +168 -0
- data/lib/omnizip/formats/tar/reader.rb +121 -0
- data/lib/omnizip/formats/tar/writer.rb +216 -0
- data/lib/omnizip/formats/tar.rb +84 -0
- data/lib/omnizip/formats/xz/reader.rb +116 -0
- data/lib/omnizip/formats/xz.rb +237 -0
- data/lib/omnizip/formats/xz_impl/block_decoder.rb +754 -0
- data/lib/omnizip/formats/xz_impl/block_encoder.rb +306 -0
- data/lib/omnizip/formats/xz_impl/block_header.rb +210 -0
- data/lib/omnizip/formats/xz_impl/block_header_parser.rb +186 -0
- data/lib/omnizip/formats/xz_impl/constants.rb +49 -0
- data/lib/omnizip/formats/xz_impl/index_decoder.rb +174 -0
- data/lib/omnizip/formats/xz_impl/index_encoder.rb +122 -0
- data/lib/omnizip/formats/xz_impl/stream_decoder.rb +468 -0
- data/lib/omnizip/formats/xz_impl/stream_encoder.rb +99 -0
- data/lib/omnizip/formats/xz_impl/stream_footer.rb +81 -0
- data/lib/omnizip/formats/xz_impl/stream_footer_parser.rb +117 -0
- data/lib/omnizip/formats/xz_impl/stream_header.rb +55 -0
- data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +108 -0
- data/lib/omnizip/formats/xz_impl/vli.rb +128 -0
- data/lib/omnizip/formats/xz_impl/writer.rb +421 -0
- data/lib/omnizip/formats/zip/central_directory_header.rb +195 -0
- data/lib/omnizip/formats/zip/constants.rb +69 -0
- data/lib/omnizip/formats/zip/end_of_central_directory.rb +133 -0
- data/lib/omnizip/formats/zip/local_file_header.rb +138 -0
- data/lib/omnizip/formats/zip/reader.rb +250 -0
- data/lib/omnizip/formats/zip/unix_extra_field.rb +153 -0
- data/lib/omnizip/formats/zip/writer.rb +375 -0
- data/lib/omnizip/formats/zip/zip64_end_of_central_directory.rb +104 -0
- data/lib/omnizip/formats/zip/zip64_end_of_central_directory_locator.rb +66 -0
- data/lib/omnizip/formats/zip/zip64_extra_field.rb +114 -0
- data/lib/omnizip/formats/zip.rb +50 -0
- data/lib/omnizip/implementations/base/lzma2_decoder_base.rb +75 -0
- data/lib/omnizip/implementations/base/lzma2_encoder_base.rb +128 -0
- data/lib/omnizip/implementations/base/lzma_decoder_base.rb +83 -0
- data/lib/omnizip/implementations/base/lzma_encoder_base.rb +108 -0
- data/lib/omnizip/implementations/base/state_machine_base.rb +182 -0
- data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +421 -0
- data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +465 -0
- data/lib/omnizip/implementations/seven_zip/lzma/match_finder.rb +288 -0
- data/lib/omnizip/implementations/seven_zip/lzma/range_decoder.rb +200 -0
- data/lib/omnizip/implementations/seven_zip/lzma/range_encoder.rb +197 -0
- data/lib/omnizip/implementations/seven_zip/lzma/state_machine.rb +141 -0
- data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +519 -0
- data/lib/omnizip/implementations/xz_utils/lzma2/decoder.rb +723 -0
- data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +750 -0
- data/lib/omnizip/io/buffered_input.rb +146 -0
- data/lib/omnizip/io/buffered_output.rb +105 -0
- data/lib/omnizip/io/stream_manager.rb +115 -0
- data/lib/omnizip/link_handler/hard_link.rb +79 -0
- data/lib/omnizip/link_handler/symbolic_link.rb +74 -0
- data/lib/omnizip/link_handler.rb +124 -0
- data/lib/omnizip/metadata/archive_metadata.rb +114 -0
- data/lib/omnizip/metadata/entry_metadata.rb +146 -0
- data/lib/omnizip/metadata/metadata_editor.rb +171 -0
- data/lib/omnizip/metadata/metadata_registry.rb +64 -0
- data/lib/omnizip/metadata/metadata_validator.rb +99 -0
- data/lib/omnizip/metadata.rb +57 -0
- data/lib/omnizip/models/.keep +0 -0
- data/lib/omnizip/models/algorithm_metadata.rb +73 -0
- data/lib/omnizip/models/compression_options.rb +71 -0
- data/lib/omnizip/models/conversion_options.rb +87 -0
- data/lib/omnizip/models/conversion_result.rb +135 -0
- data/lib/omnizip/models/eta_result.rb +46 -0
- data/lib/omnizip/models/extraction_rule.rb +115 -0
- data/lib/omnizip/models/filter_chain.rb +144 -0
- data/lib/omnizip/models/filter_config.rb +183 -0
- data/lib/omnizip/models/match_result.rb +124 -0
- data/lib/omnizip/models/optimization_suggestion.rb +91 -0
- data/lib/omnizip/models/parallel_options.rb +104 -0
- data/lib/omnizip/models/performance_result.rb +79 -0
- data/lib/omnizip/models/profile_report.rb +82 -0
- data/lib/omnizip/models/progress_options.rb +38 -0
- data/lib/omnizip/models/split_options.rb +116 -0
- data/lib/omnizip/optimization_registry.rb +81 -0
- data/lib/omnizip/parallel/job_queue.rb +209 -0
- data/lib/omnizip/parallel/job_scheduler.rb +203 -0
- data/lib/omnizip/parallel/parallel_compressor.rb +347 -0
- data/lib/omnizip/parallel/parallel_extractor.rb +329 -0
- data/lib/omnizip/parallel/worker_pool.rb +223 -0
- data/lib/omnizip/parallel.rb +149 -0
- data/lib/omnizip/parity/chunked_block_processor.rb +196 -0
- data/lib/omnizip/parity/galois16.rb +145 -0
- data/lib/omnizip/parity/models/creator_packet.rb +73 -0
- data/lib/omnizip/parity/models/file_description_packet.rb +133 -0
- data/lib/omnizip/parity/models/ifsc_packet.rb +123 -0
- data/lib/omnizip/parity/models/main_packet.rb +128 -0
- data/lib/omnizip/parity/models/packet.rb +156 -0
- data/lib/omnizip/parity/models/packet_registry.rb +109 -0
- data/lib/omnizip/parity/models/recovery_slice_packet.rb +78 -0
- data/lib/omnizip/parity/par2_creator.rb +531 -0
- data/lib/omnizip/parity/par2_repairer.rb +407 -0
- data/lib/omnizip/parity/par2_verifier.rb +364 -0
- data/lib/omnizip/parity/par2cmdline_algorithm.rb +110 -0
- data/lib/omnizip/parity/par2cmdline_coefficients.rb +78 -0
- data/lib/omnizip/parity/reed_solomon_decoder.rb +266 -0
- data/lib/omnizip/parity/reed_solomon_encoder.rb +111 -0
- data/lib/omnizip/parity/reed_solomon_matrix.rb +342 -0
- data/lib/omnizip/parity.rb +186 -0
- data/lib/omnizip/password/encryption_registry.rb +65 -0
- data/lib/omnizip/password/encryption_strategy.rb +96 -0
- data/lib/omnizip/password/password_validator.rb +129 -0
- data/lib/omnizip/password/winzip_aes_strategy.rb +192 -0
- data/lib/omnizip/password/zip_crypto_strategy.rb +141 -0
- data/lib/omnizip/password.rb +87 -0
- data/lib/omnizip/pipe/stream_compressor.rb +124 -0
- data/lib/omnizip/pipe/stream_decompressor.rb +174 -0
- data/lib/omnizip/pipe.rb +121 -0
- data/lib/omnizip/platform/ntfs_streams.rb +201 -0
- data/lib/omnizip/platform.rb +189 -0
- data/lib/omnizip/profile/archive_profile.rb +39 -0
- data/lib/omnizip/profile/balanced_profile.rb +33 -0
- data/lib/omnizip/profile/binary_profile.rb +36 -0
- data/lib/omnizip/profile/compression_profile.rb +158 -0
- data/lib/omnizip/profile/custom_profile.rb +157 -0
- data/lib/omnizip/profile/fast_profile.rb +33 -0
- data/lib/omnizip/profile/maximum_profile.rb +33 -0
- data/lib/omnizip/profile/profile_detector.rb +110 -0
- data/lib/omnizip/profile/profile_registry.rb +161 -0
- data/lib/omnizip/profile/text_profile.rb +36 -0
- data/lib/omnizip/profile.rb +190 -0
- data/lib/omnizip/profiler/memory_profiler.rb +66 -0
- data/lib/omnizip/profiler/method_profiler.rb +49 -0
- data/lib/omnizip/profiler/report_generator.rb +169 -0
- data/lib/omnizip/profiler.rb +204 -0
- data/lib/omnizip/progress/callback_reporter.rb +36 -0
- data/lib/omnizip/progress/console_reporter.rb +62 -0
- data/lib/omnizip/progress/log_reporter.rb +91 -0
- data/lib/omnizip/progress/operation_progress.rb +118 -0
- data/lib/omnizip/progress/progress_bar.rb +156 -0
- data/lib/omnizip/progress/progress_reporter.rb +40 -0
- data/lib/omnizip/progress/progress_tracker.rb +190 -0
- data/lib/omnizip/progress/silent_reporter.rb +24 -0
- data/lib/omnizip/progress.rb +127 -0
- data/lib/omnizip/rubyzip_compat.rb +63 -0
- data/lib/omnizip/temp/safe_extract.rb +168 -0
- data/lib/omnizip/temp/temp_file.rb +124 -0
- data/lib/omnizip/temp/temp_file_pool.rb +109 -0
- data/lib/omnizip/temp.rb +181 -0
- data/lib/omnizip/version.rb +5 -0
- data/lib/omnizip/zip/entry.rb +156 -0
- data/lib/omnizip/zip/file.rb +485 -0
- data/lib/omnizip/zip/input_stream.rb +273 -0
- data/lib/omnizip/zip/output_stream.rb +324 -0
- data/lib/omnizip.rb +156 -0
- data/readme-docs/advanced-features.adoc +515 -0
- data/readme-docs/api-usage.adoc +444 -0
- data/readme-docs/architecture.adoc +449 -0
- data/readme-docs/archive-formats.adoc +479 -0
- data/readme-docs/cli-usage.adoc +222 -0
- data/readme-docs/compression-algorithms.adoc +442 -0
- data/readme-docs/compression-profiles.adoc +247 -0
- data/readme-docs/encryption-checksums.adoc +328 -0
- data/readme-docs/format-converter.adoc +325 -0
- data/readme-docs/installation.adoc +228 -0
- data/readme-docs/par2-archives.adoc +608 -0
- data/readme-docs/performance-profiler.adoc +389 -0
- data/readme-docs/preprocessing-filters.adoc +280 -0
- data/xz-file-format-1.2.1.txt +1174 -0
- metadata +617 -0
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
#
|
|
4
|
+
# Copyright (C) 2024 Ribose Inc.
|
|
5
|
+
#
|
|
6
|
+
# This file is part of Omnizip.
|
|
7
|
+
#
|
|
8
|
+
# Omnizip is a pure Ruby port of 7-Zip compression algorithms.
|
|
9
|
+
# Based on the 7-Zip LZMA SDK by Igor Pavlov.
|
|
10
|
+
#
|
|
11
|
+
# This library is free software; you can redistribute it and/or
|
|
12
|
+
# modify it under the terms of the GNU Lesser General Public
|
|
13
|
+
# License as published by the Free Software Foundation; either
|
|
14
|
+
# version 2.1 of the License, or (at your option) any later version.
|
|
15
|
+
#
|
|
16
|
+
# See the COPYING file for the complete text of the license.
|
|
17
|
+
#
|
|
18
|
+
|
|
19
|
+
module Omnizip
|
|
20
|
+
module Filters
|
|
21
|
+
module Bcj2Constants
|
|
22
|
+
# Number of output streams
|
|
23
|
+
NUM_STREAMS = 4
|
|
24
|
+
|
|
25
|
+
# Stream indices
|
|
26
|
+
STREAM_MAIN = 0 # Main data stream (non-convertible bytes)
|
|
27
|
+
STREAM_CALL = 1 # CALL instruction addresses (E8)
|
|
28
|
+
STREAM_JUMP = 2 # JUMP instruction addresses (E9)
|
|
29
|
+
STREAM_RC = 3 # Range coder probability stream
|
|
30
|
+
|
|
31
|
+
# x86 opcodes
|
|
32
|
+
OPCODE_CALL = 0xE8 # CALL instruction
|
|
33
|
+
OPCODE_JUMP = 0xE9 # JUMP instruction
|
|
34
|
+
|
|
35
|
+
# Size of x86 address (4 bytes, little-endian)
|
|
36
|
+
ADDRESS_SIZE = 4
|
|
37
|
+
|
|
38
|
+
# Range coder constants
|
|
39
|
+
TOP_VALUE = 1 << 24 # Range normalization threshold
|
|
40
|
+
BIT_MODEL_TOTAL_BITS = 11 # Probability model bits
|
|
41
|
+
BIT_MODEL_TOTAL = 1 << BIT_MODEL_TOTAL_BITS
|
|
42
|
+
MOVE_BITS = 5 # Probability update shift
|
|
43
|
+
|
|
44
|
+
# Number of probability models (2 + 256)
|
|
45
|
+
# - 2 for general cases (not E8/E9, or 0F8x pattern)
|
|
46
|
+
# - 256 for byte-specific models when processing E8
|
|
47
|
+
NUM_PROBS = 2 + 256
|
|
48
|
+
|
|
49
|
+
# Initial probability value (50%)
|
|
50
|
+
INITIAL_PROB = BIT_MODEL_TOTAL >> 1
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
end
|
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
#
|
|
4
|
+
# Copyright (C) 2024 Ribose Inc.
|
|
5
|
+
#
|
|
6
|
+
# This file is part of Omnizip.
|
|
7
|
+
#
|
|
8
|
+
# Omnizip is a pure Ruby port of 7-Zip compression algorithms.
|
|
9
|
+
# Based on the 7-Zip LZMA SDK by Igor Pavlov.
|
|
10
|
+
#
|
|
11
|
+
# This library is free software; you can redistribute it and/or
|
|
12
|
+
# modify it under the terms of the GNU Lesser General Public
|
|
13
|
+
# License as published by the Free Software Foundation; either
|
|
14
|
+
# version 2.1 of the License, or (at your option) any later version.
|
|
15
|
+
#
|
|
16
|
+
# See the COPYING file for the complete text of the license.
|
|
17
|
+
#
|
|
18
|
+
|
|
19
|
+
require_relative "constants"
|
|
20
|
+
require_relative "stream_data"
|
|
21
|
+
|
|
22
|
+
module Omnizip
|
|
23
|
+
module Filters
|
|
24
|
+
# BCJ2 decoder - reconstructs original data from 4 streams.
|
|
25
|
+
#
|
|
26
|
+
# Combines:
|
|
27
|
+
# - Main stream (non-convertible bytes)
|
|
28
|
+
# - Call stream (CALL/E8 addresses)
|
|
29
|
+
# - Jump stream (JUMP/E9 addresses)
|
|
30
|
+
# - RC stream (range coder probability data)
|
|
31
|
+
class Bcj2Decoder
|
|
32
|
+
include Bcj2Constants
|
|
33
|
+
|
|
34
|
+
attr_reader :ip
|
|
35
|
+
|
|
36
|
+
# Initialize decoder.
|
|
37
|
+
#
|
|
38
|
+
# @param streams [Bcj2StreamData] The 4 input streams
|
|
39
|
+
# @param position [Integer] Starting instruction pointer
|
|
40
|
+
def initialize(streams, position = 0)
|
|
41
|
+
@streams = streams
|
|
42
|
+
@ip = position
|
|
43
|
+
@range = 0
|
|
44
|
+
@code = 0
|
|
45
|
+
@probs = Array.new(NUM_PROBS, INITIAL_PROB)
|
|
46
|
+
|
|
47
|
+
# Stream positions
|
|
48
|
+
@main_pos = 0
|
|
49
|
+
@call_pos = 0
|
|
50
|
+
@jump_pos = 0
|
|
51
|
+
@rc_pos = 0
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# Decode 4 streams back to original data.
|
|
55
|
+
#
|
|
56
|
+
# @return [String] Decoded binary data
|
|
57
|
+
def decode
|
|
58
|
+
result = String.new(encoding: Encoding::BINARY)
|
|
59
|
+
init_range_decoder
|
|
60
|
+
|
|
61
|
+
loop do
|
|
62
|
+
break if @main_pos >= @streams.main.bytesize
|
|
63
|
+
|
|
64
|
+
byte = @streams.main.getbyte(@main_pos)
|
|
65
|
+
@main_pos += 1
|
|
66
|
+
|
|
67
|
+
# Check for CALL (E8) or JUMP (E9) opcodes
|
|
68
|
+
if [OPCODE_CALL, OPCODE_JUMP].include?(byte)
|
|
69
|
+
# Use range decoder to determine if convertible
|
|
70
|
+
if read_bit(get_prob_index(byte))
|
|
71
|
+
# Convertible - read address from call/jump stream
|
|
72
|
+
addr = read_address(byte)
|
|
73
|
+
result << byte.chr(Encoding::BINARY)
|
|
74
|
+
result << encode_int32_le(addr)
|
|
75
|
+
@ip += 5
|
|
76
|
+
else
|
|
77
|
+
# Not convertible - just copy byte
|
|
78
|
+
result << byte.chr(Encoding::BINARY)
|
|
79
|
+
@ip += 1
|
|
80
|
+
end
|
|
81
|
+
else
|
|
82
|
+
# Regular byte - just copy
|
|
83
|
+
result << byte.chr(Encoding::BINARY)
|
|
84
|
+
@ip += 1
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
result
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
private
|
|
92
|
+
|
|
93
|
+
# Initialize range decoder by reading first 5 bytes from RC stream.
|
|
94
|
+
#
|
|
95
|
+
# @return [void]
|
|
96
|
+
def init_range_decoder
|
|
97
|
+
@range = 0xFFFFFFFF
|
|
98
|
+
@code = 0
|
|
99
|
+
|
|
100
|
+
5.times do
|
|
101
|
+
break if @rc_pos >= @streams.rc.bytesize
|
|
102
|
+
|
|
103
|
+
@code = (@code << 8) | @streams.rc.getbyte(@rc_pos)
|
|
104
|
+
@rc_pos += 1
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
# Read a single bit from range coder.
|
|
109
|
+
#
|
|
110
|
+
# @param prob_index [Integer] Probability model index
|
|
111
|
+
# @return [Boolean] Decoded bit (true = 1, false = 0)
|
|
112
|
+
def read_bit(prob_index) # rubocop:disable Naming/PredicateMethod
|
|
113
|
+
normalize_range
|
|
114
|
+
|
|
115
|
+
prob = @probs[prob_index]
|
|
116
|
+
bound = (@range >> BIT_MODEL_TOTAL_BITS) * prob
|
|
117
|
+
|
|
118
|
+
if @code < bound
|
|
119
|
+
# Bit is 0
|
|
120
|
+
@range = bound
|
|
121
|
+
@probs[prob_index] += (BIT_MODEL_TOTAL - prob) >> MOVE_BITS
|
|
122
|
+
false
|
|
123
|
+
else
|
|
124
|
+
# Bit is 1
|
|
125
|
+
@range -= bound
|
|
126
|
+
@code -= bound
|
|
127
|
+
@probs[prob_index] -= prob >> MOVE_BITS
|
|
128
|
+
true
|
|
129
|
+
end
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
# Normalize range decoder if needed.
|
|
133
|
+
#
|
|
134
|
+
# @return [void]
|
|
135
|
+
def normalize_range
|
|
136
|
+
while @range < TOP_VALUE
|
|
137
|
+
@range <<= 8
|
|
138
|
+
next_byte = if @rc_pos < @streams.rc.bytesize
|
|
139
|
+
@streams.rc.getbyte(@rc_pos)
|
|
140
|
+
else
|
|
141
|
+
0
|
|
142
|
+
end
|
|
143
|
+
@code = (@code << 8) | next_byte
|
|
144
|
+
@rc_pos += 1 if @rc_pos < @streams.rc.bytesize
|
|
145
|
+
end
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
# Get probability model index for a byte.
|
|
149
|
+
#
|
|
150
|
+
# @param byte [Integer] Byte value
|
|
151
|
+
# @return [Integer] Probability model index
|
|
152
|
+
def get_prob_index(byte)
|
|
153
|
+
# Use byte-specific model for E8, general model for E9
|
|
154
|
+
byte == OPCODE_CALL ? (2 + (@ip & 0xFF)) : 0
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
# Read 32-bit address from call or jump stream.
|
|
158
|
+
#
|
|
159
|
+
# @param opcode [Integer] Opcode (E8 or E9)
|
|
160
|
+
# @return [Integer] Converted address
|
|
161
|
+
def read_address(opcode)
|
|
162
|
+
stream_pos = opcode == OPCODE_CALL ? @call_pos : @jump_pos
|
|
163
|
+
stream = opcode == OPCODE_CALL ? @streams.call : @streams.jump
|
|
164
|
+
|
|
165
|
+
# Read 4 bytes (big-endian in stream, stored as absolute)
|
|
166
|
+
addr = 0
|
|
167
|
+
4.times do |i|
|
|
168
|
+
break if stream_pos >= stream.bytesize
|
|
169
|
+
|
|
170
|
+
addr |= stream.getbyte(stream_pos) << (24 - (i * 8))
|
|
171
|
+
stream_pos += 1
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
# Update stream position
|
|
175
|
+
if opcode == OPCODE_CALL
|
|
176
|
+
@call_pos = stream_pos
|
|
177
|
+
else
|
|
178
|
+
@jump_pos = stream_pos
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
# Convert back to relative
|
|
182
|
+
addr - (@ip + 5)
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
# Encode 32-bit integer as little-endian bytes.
|
|
186
|
+
#
|
|
187
|
+
# @param value [Integer] Value to encode
|
|
188
|
+
# @return [String] 4-byte string
|
|
189
|
+
def encode_int32_le(value)
|
|
190
|
+
unsigned = value & 0xFFFFFFFF
|
|
191
|
+
[
|
|
192
|
+
unsigned & 0xFF,
|
|
193
|
+
(unsigned >> 8) & 0xFF,
|
|
194
|
+
(unsigned >> 16) & 0xFF,
|
|
195
|
+
(unsigned >> 24) & 0xFF,
|
|
196
|
+
].pack("C*")
|
|
197
|
+
end
|
|
198
|
+
end
|
|
199
|
+
end
|
|
200
|
+
end
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
#
|
|
4
|
+
# Copyright (C) 2024 Ribose Inc.
|
|
5
|
+
#
|
|
6
|
+
# This file is part of Omnizip.
|
|
7
|
+
#
|
|
8
|
+
# Omnizip is a pure Ruby port of 7-Zip compression algorithms.
|
|
9
|
+
# Based on the 7-Zip LZMA SDK by Igor Pavlov.
|
|
10
|
+
#
|
|
11
|
+
# This library is free software; you can redistribute it and/or
|
|
12
|
+
# modify it under the terms of the GNU Lesser General Public
|
|
13
|
+
# License as published by the Free Software Foundation; either
|
|
14
|
+
# version 2.1 of the License, or (at your option) any later version.
|
|
15
|
+
#
|
|
16
|
+
# See the COPYING file for the complete text of the license.
|
|
17
|
+
#
|
|
18
|
+
|
|
19
|
+
require_relative "constants"
|
|
20
|
+
require_relative "stream_data"
|
|
21
|
+
|
|
22
|
+
module Omnizip
|
|
23
|
+
module Filters
|
|
24
|
+
# BCJ2 encoder - splits data into 4 streams.
|
|
25
|
+
#
|
|
26
|
+
# NOTE: BCJ2 encoding is extremely complex and is not yet
|
|
27
|
+
# implemented. This is primarily needed for compression,
|
|
28
|
+
# while decoding (decompression) is the more common use case.
|
|
29
|
+
#
|
|
30
|
+
# BCJ2 encoding requires:
|
|
31
|
+
# - Range encoder implementation
|
|
32
|
+
# - Proper probability model management
|
|
33
|
+
# - Stream splitting logic
|
|
34
|
+
# - Address conversion to absolute
|
|
35
|
+
#
|
|
36
|
+
# This will be implemented in a future version.
|
|
37
|
+
class Bcj2Encoder
|
|
38
|
+
include Bcj2Constants
|
|
39
|
+
|
|
40
|
+
# Initialize encoder.
|
|
41
|
+
#
|
|
42
|
+
# @param data [String] Binary data to encode
|
|
43
|
+
# @param position [Integer] Starting instruction pointer
|
|
44
|
+
def initialize(data, position = 0)
|
|
45
|
+
@data = data
|
|
46
|
+
@position = position
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# Encode data into 4 streams.
|
|
50
|
+
#
|
|
51
|
+
# @raise [NotImplementedError] BCJ2 encoding not yet impl
|
|
52
|
+
# @return [Bcj2StreamData] The 4 output streams
|
|
53
|
+
def encode
|
|
54
|
+
raise NotImplementedError,
|
|
55
|
+
"BCJ2 encoding is not yet implemented. " \
|
|
56
|
+
"BCJ2 is primarily used for decompression. " \
|
|
57
|
+
"For compression, use the simpler BCJ-x86 filter."
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
end
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
#
|
|
4
|
+
# Copyright (C) 2024 Ribose Inc.
|
|
5
|
+
#
|
|
6
|
+
# This file is part of Omnizip.
|
|
7
|
+
#
|
|
8
|
+
# Omnizip is a pure Ruby port of 7-Zip compression algorithms.
|
|
9
|
+
# Based on the 7-Zip LZMA SDK by Igor Pavlov.
|
|
10
|
+
#
|
|
11
|
+
# This library is free software; you can redistribute it and/or
|
|
12
|
+
# modify it under the terms of the GNU Lesser General Public
|
|
13
|
+
# License as published by the Free Software Foundation; either
|
|
14
|
+
# version 2.1 of the License, or (at your option) any later version.
|
|
15
|
+
#
|
|
16
|
+
# See the COPYING file for the complete text of the license.
|
|
17
|
+
#
|
|
18
|
+
|
|
19
|
+
require_relative "constants"
|
|
20
|
+
|
|
21
|
+
module Omnizip
|
|
22
|
+
module Filters
|
|
23
|
+
# Model class representing the 4 BCJ2 streams.
|
|
24
|
+
#
|
|
25
|
+
# BCJ2 splits data into:
|
|
26
|
+
# - Main stream: Non-convertible bytes
|
|
27
|
+
# - Call stream: CALL (E8) instruction addresses
|
|
28
|
+
# - Jump stream: JUMP (E9) instruction addresses
|
|
29
|
+
# - RC stream: Range coder probability data
|
|
30
|
+
class Bcj2StreamData
|
|
31
|
+
include Bcj2Constants
|
|
32
|
+
|
|
33
|
+
attr_accessor :main, :call, :jump, :rc
|
|
34
|
+
|
|
35
|
+
# Initialize empty streams.
|
|
36
|
+
#
|
|
37
|
+
# @return [Bcj2StreamData] New stream data instance
|
|
38
|
+
def initialize
|
|
39
|
+
@main = String.new(encoding: Encoding::BINARY)
|
|
40
|
+
@call = String.new(encoding: Encoding::BINARY)
|
|
41
|
+
@jump = String.new(encoding: Encoding::BINARY)
|
|
42
|
+
@rc = String.new(encoding: Encoding::BINARY)
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Get stream by index.
|
|
46
|
+
#
|
|
47
|
+
# @param index [Integer] Stream index (0-3)
|
|
48
|
+
# @return [String] Stream data
|
|
49
|
+
# @raise [ArgumentError] If index is invalid
|
|
50
|
+
def [](index)
|
|
51
|
+
case index
|
|
52
|
+
when STREAM_MAIN then @main
|
|
53
|
+
when STREAM_CALL then @call
|
|
54
|
+
when STREAM_JUMP then @jump
|
|
55
|
+
when STREAM_RC then @rc
|
|
56
|
+
else
|
|
57
|
+
raise ArgumentError, "Invalid stream index: #{index}"
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# Set stream by index.
|
|
62
|
+
#
|
|
63
|
+
# @param index [Integer] Stream index (0-3)
|
|
64
|
+
# @param data [String] Stream data
|
|
65
|
+
# @return [String] The data that was set
|
|
66
|
+
# @raise [ArgumentError] If index is invalid
|
|
67
|
+
def []=(index, data)
|
|
68
|
+
case index
|
|
69
|
+
when STREAM_MAIN then @main = data
|
|
70
|
+
when STREAM_CALL then @call = data
|
|
71
|
+
when STREAM_JUMP then @jump = data
|
|
72
|
+
when STREAM_RC then @rc = data
|
|
73
|
+
else
|
|
74
|
+
raise ArgumentError, "Invalid stream index: #{index}"
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# Get all streams as an array.
|
|
79
|
+
#
|
|
80
|
+
# @return [Array<String>] Array of 4 streams
|
|
81
|
+
def to_a
|
|
82
|
+
[@main, @call, @jump, @rc]
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
# Check if all streams are empty.
|
|
86
|
+
#
|
|
87
|
+
# @return [Boolean] True if all streams are empty
|
|
88
|
+
def empty?
|
|
89
|
+
@main.empty? && @call.empty? && @jump.empty? && @rc.empty?
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
end
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
#
|
|
4
|
+
# Copyright (C) 2024 Ribose Inc.
|
|
5
|
+
#
|
|
6
|
+
# This file is part of Omnizip.
|
|
7
|
+
#
|
|
8
|
+
# Omnizip is a pure Ruby port of 7-Zip compression algorithms.
|
|
9
|
+
# Based on the 7-Zip LZMA SDK by Igor Pavlov.
|
|
10
|
+
#
|
|
11
|
+
# This library is free software; you can redistribute it and/or
|
|
12
|
+
# modify it under the terms of the GNU Lesser General Public
|
|
13
|
+
# License as published by the Free Software Foundation; either
|
|
14
|
+
# version 2.1 of the License, or (at your option) any later version.
|
|
15
|
+
#
|
|
16
|
+
# See the COPYING file for the complete text of the license.
|
|
17
|
+
#
|
|
18
|
+
|
|
19
|
+
require_relative "filter_base"
|
|
20
|
+
require_relative "bcj2/constants"
|
|
21
|
+
require_relative "bcj2/stream_data"
|
|
22
|
+
require_relative "bcj2/decoder"
|
|
23
|
+
require_relative "bcj2/encoder"
|
|
24
|
+
|
|
25
|
+
module Omnizip
|
|
26
|
+
module Filters
|
|
27
|
+
# BCJ2 filter for x86/x64 executables (4-stream variant).
|
|
28
|
+
#
|
|
29
|
+
# BCJ2 is an advanced version of BCJ that splits x86 executable code
|
|
30
|
+
# into 4 separate streams for maximum compression:
|
|
31
|
+
# - Main stream: Non-convertible bytes
|
|
32
|
+
# - Call stream: CALL (0xE8) instruction addresses
|
|
33
|
+
# - Jump stream: JUMP (0xE9) instruction addresses
|
|
34
|
+
# - RC stream: Range coder probability data
|
|
35
|
+
#
|
|
36
|
+
# This provides better compression than simple BCJ at the cost of
|
|
37
|
+
# increased complexity. BCJ2 requires special handling in archive
|
|
38
|
+
# formats - the 4 streams must be stored and retrieved separately.
|
|
39
|
+
#
|
|
40
|
+
# NOTE: Currently only decoding (decompression) is implemented.
|
|
41
|
+
# Encoding is extremely complex and will be added in a future version.
|
|
42
|
+
# For compression use cases, the simpler BCJ-x86 filter is recommended.
|
|
43
|
+
class Bcj2 < FilterBase
|
|
44
|
+
# Encode is not currently supported for BCJ2.
|
|
45
|
+
#
|
|
46
|
+
# @param _data [String] Binary data to encode
|
|
47
|
+
# @param _position [Integer] Current stream position
|
|
48
|
+
# @raise [NotImplementedError] BCJ2 encoding not yet implemented
|
|
49
|
+
# @return [String] Encoded binary data
|
|
50
|
+
def encode(_data, _position = 0)
|
|
51
|
+
raise NotImplementedError,
|
|
52
|
+
"BCJ2 encoding is not yet implemented. " \
|
|
53
|
+
"Use the simpler BCJ-x86 filter for compression, " \
|
|
54
|
+
"or wait for a future version with BCJ2 encoding support."
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# Decode (postprocess) BCJ2 data after decompression.
|
|
58
|
+
#
|
|
59
|
+
# This method expects the 4 BCJ2 streams to be provided in a
|
|
60
|
+
# Bcj2StreamData object. In practice, this is called by the archive
|
|
61
|
+
# format reader (e.g., 7z reader) which handles splitting the
|
|
62
|
+
# compressed data into the 4 streams.
|
|
63
|
+
#
|
|
64
|
+
# @param data [Bcj2StreamData, String] The 4 BCJ2 streams or error
|
|
65
|
+
# @param position [Integer] Current stream position
|
|
66
|
+
# @raise [ArgumentError] If data is not a Bcj2StreamData object
|
|
67
|
+
# @return [String] Decoded binary data
|
|
68
|
+
def decode(data, position = 0)
|
|
69
|
+
unless data.is_a?(Bcj2StreamData)
|
|
70
|
+
raise ArgumentError,
|
|
71
|
+
"BCJ2 decode requires a Bcj2StreamData object with " \
|
|
72
|
+
"4 streams. This is typically handled by the archive " \
|
|
73
|
+
"format reader."
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
decoder = Bcj2Decoder.new(data, position)
|
|
77
|
+
decoder.decode
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
class << self
|
|
81
|
+
# Get metadata about this filter.
|
|
82
|
+
#
|
|
83
|
+
# @return [Hash] Filter metadata
|
|
84
|
+
def metadata
|
|
85
|
+
{
|
|
86
|
+
name: "BCJ2",
|
|
87
|
+
description: "Advanced 4-stream Branch/Call/Jump converter " \
|
|
88
|
+
"for x86/x64 executables",
|
|
89
|
+
architecture: "x86/x64",
|
|
90
|
+
streams: 4,
|
|
91
|
+
complexity: "high",
|
|
92
|
+
compression_quality: "maximum",
|
|
93
|
+
limitations: "Encoding not yet implemented",
|
|
94
|
+
}
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
end
|
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
#
|
|
4
|
+
# Copyright (C) 2024 Ribose Inc.
|
|
5
|
+
#
|
|
6
|
+
# This file is part of Omnizip.
|
|
7
|
+
#
|
|
8
|
+
# Omnizip is a pure Ruby port of 7-Zip compression algorithms.
|
|
9
|
+
# Based on the 7-Zip LZMA SDK by Igor Pavlov.
|
|
10
|
+
#
|
|
11
|
+
# This library is free software; you can redistribute it and/or
|
|
12
|
+
# modify it under the terms of the GNU Lesser General Public
|
|
13
|
+
# License as published by the Free Software Foundation; either
|
|
14
|
+
# version 2.1 of the License, or (at your option) any later version.
|
|
15
|
+
#
|
|
16
|
+
# See the COPYING file for the complete text of the license.
|
|
17
|
+
#
|
|
18
|
+
|
|
19
|
+
require_relative "filter_base"
|
|
20
|
+
|
|
21
|
+
module Omnizip
|
|
22
|
+
module Filters
|
|
23
|
+
# BCJ filter for 32-bit ARM executables.
|
|
24
|
+
#
|
|
25
|
+
# This filter preprocesses ARM machine code by converting relative
|
|
26
|
+
# addresses in BL (Branch and Link - 0xEB) instructions to absolute
|
|
27
|
+
# addresses. ARM uses 4-byte aligned instructions with little-endian
|
|
28
|
+
# encoding.
|
|
29
|
+
#
|
|
30
|
+
# The filter improves compression by making branch targets
|
|
31
|
+
# position-independent. The offset in ARM BL instructions is stored
|
|
32
|
+
# as word offset (divided by 4), and is relative to PC+8.
|
|
33
|
+
class BcjArm < FilterBase
|
|
34
|
+
# ARM BL (Branch and Link) opcode
|
|
35
|
+
OPCODE_BL = 0xEB
|
|
36
|
+
|
|
37
|
+
# Size of ARM instruction (4 bytes, little-endian)
|
|
38
|
+
INSTRUCTION_SIZE = 4
|
|
39
|
+
|
|
40
|
+
# Offset mask (24-bit offset in BL instruction)
|
|
41
|
+
OFFSET_MASK = 0x00FFFFFF
|
|
42
|
+
|
|
43
|
+
# Encode (preprocess) ARM executable data for compression.
|
|
44
|
+
#
|
|
45
|
+
# Scans for BL (0xEB) opcodes and converts relative word offsets
|
|
46
|
+
# to absolute word offsets. ARM branch offset is relative to PC+8.
|
|
47
|
+
#
|
|
48
|
+
# @param data [String] Binary executable data
|
|
49
|
+
# @param position [Integer] Current stream position
|
|
50
|
+
# @return [String] Encoded binary data
|
|
51
|
+
def encode(data, position = 0)
|
|
52
|
+
return data.dup if data.bytesize < INSTRUCTION_SIZE
|
|
53
|
+
|
|
54
|
+
result = data.b
|
|
55
|
+
size = data.bytesize & ~(INSTRUCTION_SIZE - 1)
|
|
56
|
+
i = 0
|
|
57
|
+
# PC starts at position + 4 (as per C implementation)
|
|
58
|
+
pc = position + 4
|
|
59
|
+
|
|
60
|
+
while i < size
|
|
61
|
+
# Advance PC to current instruction position
|
|
62
|
+
current_pc = pc + i
|
|
63
|
+
|
|
64
|
+
# Check if last byte is 0xEB (BL instruction)
|
|
65
|
+
if result.getbyte(i + 3) == OPCODE_BL
|
|
66
|
+
# Extract full 32-bit instruction value
|
|
67
|
+
instruction = extract_uint32_le(result, i)
|
|
68
|
+
|
|
69
|
+
# Calculate word offset from PC
|
|
70
|
+
word_offset = current_pc >> 2
|
|
71
|
+
|
|
72
|
+
# Add word offset to instruction value
|
|
73
|
+
instruction += word_offset
|
|
74
|
+
|
|
75
|
+
# Mask to 24-bit and combine with opcode
|
|
76
|
+
instruction = (instruction & OFFSET_MASK) | 0xEB000000
|
|
77
|
+
|
|
78
|
+
write_uint32_le(result, i, instruction)
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
i += INSTRUCTION_SIZE
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
result
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
# Decode (postprocess) ARM executable data after decompression.
|
|
88
|
+
#
|
|
89
|
+
# Reverses the encoding by converting absolute word offsets back to
|
|
90
|
+
# relative word offsets.
|
|
91
|
+
#
|
|
92
|
+
# @param data [String] Binary executable data
|
|
93
|
+
# @param position [Integer] Current stream position
|
|
94
|
+
# @return [String] Decoded binary data
|
|
95
|
+
def decode(data, position = 0)
|
|
96
|
+
return data.dup if data.bytesize < INSTRUCTION_SIZE
|
|
97
|
+
|
|
98
|
+
result = data.b
|
|
99
|
+
size = data.bytesize & ~(INSTRUCTION_SIZE - 1)
|
|
100
|
+
i = 0
|
|
101
|
+
# PC starts at position + 4 (as per C implementation)
|
|
102
|
+
pc = position + 4
|
|
103
|
+
|
|
104
|
+
while i < size
|
|
105
|
+
# Advance PC to current instruction position
|
|
106
|
+
current_pc = pc + i
|
|
107
|
+
|
|
108
|
+
# Check if last byte is 0xEB (BL instruction)
|
|
109
|
+
if result.getbyte(i + 3) == OPCODE_BL
|
|
110
|
+
# Extract full 32-bit instruction value
|
|
111
|
+
instruction = extract_uint32_le(result, i)
|
|
112
|
+
|
|
113
|
+
# Calculate word offset from PC
|
|
114
|
+
word_offset = current_pc >> 2
|
|
115
|
+
|
|
116
|
+
# Subtract word offset from instruction value
|
|
117
|
+
instruction -= word_offset
|
|
118
|
+
|
|
119
|
+
# Mask to 24-bit and combine with opcode
|
|
120
|
+
instruction = (instruction & OFFSET_MASK) | 0xEB000000
|
|
121
|
+
|
|
122
|
+
write_uint32_le(result, i, instruction)
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
i += INSTRUCTION_SIZE
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
result
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
class << self
|
|
132
|
+
# Get metadata about this filter.
|
|
133
|
+
#
|
|
134
|
+
# @return [Hash] Filter metadata
|
|
135
|
+
def metadata
|
|
136
|
+
{
|
|
137
|
+
name: "BCJ-ARM",
|
|
138
|
+
description: "Branch converter for 32-bit ARM executables",
|
|
139
|
+
architecture: "ARM (32-bit)",
|
|
140
|
+
alignment: 4,
|
|
141
|
+
endian: "little",
|
|
142
|
+
}
|
|
143
|
+
end
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
private
|
|
147
|
+
|
|
148
|
+
# Extract an unsigned 32-bit little-endian integer from data.
|
|
149
|
+
#
|
|
150
|
+
# @param data [String] Binary data
|
|
151
|
+
# @param offset [Integer] Starting position
|
|
152
|
+
# @return [Integer] Unsigned 32-bit integer
|
|
153
|
+
def extract_uint32_le(data, offset)
|
|
154
|
+
bytes = data.byteslice(offset, INSTRUCTION_SIZE).bytes
|
|
155
|
+
bytes[0] |
|
|
156
|
+
(bytes[1] << 8) |
|
|
157
|
+
(bytes[2] << 16) |
|
|
158
|
+
(bytes[3] << 24)
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
# Write an unsigned 32-bit little-endian integer to data.
|
|
162
|
+
#
|
|
163
|
+
# @param data [String] Binary data (modified in place)
|
|
164
|
+
# @param offset [Integer] Starting position
|
|
165
|
+
# @param value [Integer] 32-bit integer to write
|
|
166
|
+
# @return [void]
|
|
167
|
+
def write_uint32_le(data, offset, value)
|
|
168
|
+
value &= 0xFFFFFFFF
|
|
169
|
+
data.setbyte(offset, value & 0xFF)
|
|
170
|
+
data.setbyte(offset + 1, (value >> 8) & 0xFF)
|
|
171
|
+
data.setbyte(offset + 2, (value >> 16) & 0xFF)
|
|
172
|
+
data.setbyte(offset + 3, (value >> 24) & 0xFF)
|
|
173
|
+
end
|
|
174
|
+
end
|
|
175
|
+
end
|
|
176
|
+
end
|