omnizip 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/.rubocop.yml +32 -0
- data/.rubocop_todo.yml +754 -0
- data/COPYING +502 -0
- data/Gemfile +17 -0
- data/LICENSE +12 -0
- data/README.adoc +1045 -0
- data/Rakefile +12 -0
- data/benchmark/README.md +260 -0
- data/benchmark/benchmark_suite.rb +125 -0
- data/benchmark/compression_bench.rb +181 -0
- data/benchmark/filter_bench.rb +180 -0
- data/benchmark/models/benchmark_result.rb +59 -0
- data/benchmark/models/comparison_result.rb +69 -0
- data/benchmark/profile_suite.rb +167 -0
- data/benchmark/reporter.rb +150 -0
- data/benchmark/run_benchmarks.rb +66 -0
- data/benchmark/test_data.rb +137 -0
- data/config/formats/rar3_spec.yml +91 -0
- data/config/formats/rar5_spec.yml +102 -0
- data/docs/.github/workflows/docs.yml +142 -0
- data/docs/.gitignore +21 -0
- data/docs/.lychee.toml +67 -0
- data/docs/Gemfile +13 -0
- data/docs/RAR_WRITE_SUPPORT.md +26 -0
- data/docs/README.md +101 -0
- data/docs/_config.yml +112 -0
- data/docs/assets/logo.svg +1 -0
- data/docs/assets/omnizip-logo.pdf +1540 -11
- data/docs/comparison/feature-matrix.adoc +694 -0
- data/docs/comparison/index.adoc +113 -0
- data/docs/comparison/vs-7zip.adoc +309 -0
- data/docs/comparison/vs-peazip.adoc +77 -0
- data/docs/comparison/vs-rubyzip.adoc +342 -0
- data/docs/comparison/vs-winrar.adoc +100 -0
- data/docs/compatibility.adoc +579 -0
- data/docs/concepts/index.adoc +129 -0
- data/docs/developer/architecture.adoc +256 -0
- data/docs/developer/contributing.adoc +158 -0
- data/docs/developer/index.adoc +25 -0
- data/docs/developer/testing.adoc +212 -0
- data/docs/getting-started/basic-usage.adoc +271 -0
- data/docs/getting-started/index.adoc +42 -0
- data/docs/getting-started/installation.adoc +138 -0
- data/docs/getting-started/quick-start.adoc +185 -0
- data/docs/getting-started/your-first-archive.adoc +218 -0
- data/docs/guides/advanced-features/encryption.adoc +300 -0
- data/docs/guides/advanced-features/index.adoc +49 -0
- data/docs/guides/advanced-features/parallel-processing.adoc +246 -0
- data/docs/guides/advanced-features/progress-tracking.adoc +320 -0
- data/docs/guides/advanced-features/streaming.adoc +212 -0
- data/docs/guides/archive-formats/gzip-format.adoc +107 -0
- data/docs/guides/archive-formats/index.adoc +130 -0
- data/docs/guides/archive-formats/rar-format.adoc +104 -0
- data/docs/guides/archive-formats/rar5.adoc +521 -0
- data/docs/guides/archive-formats/seven-zip-format.adoc +35 -0
- data/docs/guides/archive-formats/tar-format.adoc +106 -0
- data/docs/guides/archive-formats/xz-format.adoc +118 -0
- data/docs/guides/archive-formats/zip-format.adoc +35 -0
- data/docs/guides/compression-algorithms/bzip2.adoc +113 -0
- data/docs/guides/compression-algorithms/deflate.adoc +319 -0
- data/docs/guides/compression-algorithms/index.adoc +190 -0
- data/docs/guides/compression-algorithms/lzma.adoc +398 -0
- data/docs/guides/compression-algorithms/lzma2.adoc +327 -0
- data/docs/guides/compression-algorithms/ppmd.adoc +316 -0
- data/docs/guides/compression-algorithms/zstandard.adoc +361 -0
- data/docs/guides/creating-archives.adoc +354 -0
- data/docs/guides/extracting-archives.adoc +53 -0
- data/docs/guides/format-conversion.adoc +64 -0
- data/docs/guides/index.adoc +49 -0
- data/docs/guides/migration-rubyzip.adoc +217 -0
- data/docs/guides/parity-archives.adoc +605 -0
- data/docs/guides/performance-tuning.adoc +88 -0
- data/docs/index.adoc +218 -0
- data/docs/lychee.toml +67 -0
- data/docs/reference/api/overview.adoc +188 -0
- data/docs/reference/cli/compress-command.adoc +114 -0
- data/docs/reference/cli/overview.adoc +140 -0
- data/docs/reference/index.adoc +26 -0
- data/docs/resources/faq.adoc +185 -0
- data/docs/resources/quick-reference.adoc +222 -0
- data/docs/troubleshooting/index.adoc +208 -0
- data/examples/api_comparison.rb +205 -0
- data/examples/deflate64_example.rb +96 -0
- data/examples/par2_demo.rb +121 -0
- data/examples/quick_start_native.rb +150 -0
- data/examples/quick_start_rubyzip.rb +115 -0
- data/examples/rubyzip_compatibility_demo.rb +194 -0
- data/exe/omnizip +27 -0
- data/lib/omnizip/algorithm.rb +130 -0
- data/lib/omnizip/algorithm_registry.rb +86 -0
- data/lib/omnizip/algorithms/.keep +0 -0
- data/lib/omnizip/algorithms/bzip2/bwt.rb +225 -0
- data/lib/omnizip/algorithms/bzip2/decoder.rb +193 -0
- data/lib/omnizip/algorithms/bzip2/encoder.rb +237 -0
- data/lib/omnizip/algorithms/bzip2/huffman.rb +206 -0
- data/lib/omnizip/algorithms/bzip2/mtf.rb +101 -0
- data/lib/omnizip/algorithms/bzip2/rle.rb +151 -0
- data/lib/omnizip/algorithms/bzip2.rb +130 -0
- data/lib/omnizip/algorithms/deflate/constants.rb +28 -0
- data/lib/omnizip/algorithms/deflate/decoder.rb +38 -0
- data/lib/omnizip/algorithms/deflate/encoder.rb +46 -0
- data/lib/omnizip/algorithms/deflate.rb +128 -0
- data/lib/omnizip/algorithms/deflate64/constants.rb +45 -0
- data/lib/omnizip/algorithms/deflate64/decoder.rb +153 -0
- data/lib/omnizip/algorithms/deflate64/encoder.rb +98 -0
- data/lib/omnizip/algorithms/deflate64/huffman_coder.rb +354 -0
- data/lib/omnizip/algorithms/deflate64/lz77_encoder.rb +142 -0
- data/lib/omnizip/algorithms/deflate64.rb +109 -0
- data/lib/omnizip/algorithms/lzma/bit_model.rb +120 -0
- data/lib/omnizip/algorithms/lzma/constants.rb +112 -0
- data/lib/omnizip/algorithms/lzma/decoder.rb +148 -0
- data/lib/omnizip/algorithms/lzma/dictionary.rb +69 -0
- data/lib/omnizip/algorithms/lzma/distance_coder.rb +415 -0
- data/lib/omnizip/algorithms/lzma/encoder.rb +142 -0
- data/lib/omnizip/algorithms/lzma/length_coder.rb +260 -0
- data/lib/omnizip/algorithms/lzma/literal_decoder.rb +320 -0
- data/lib/omnizip/algorithms/lzma/literal_encoder.rb +210 -0
- data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +341 -0
- data/lib/omnizip/algorithms/lzma/lzma_alone_decoder.rb +192 -0
- data/lib/omnizip/algorithms/lzma/lzma_state.rb +128 -0
- data/lib/omnizip/algorithms/lzma/match.rb +32 -0
- data/lib/omnizip/algorithms/lzma/match_finder.rb +205 -0
- data/lib/omnizip/algorithms/lzma/match_finder_config.rb +142 -0
- data/lib/omnizip/algorithms/lzma/match_finder_factory.rb +88 -0
- data/lib/omnizip/algorithms/lzma/optimal_encoder.rb +130 -0
- data/lib/omnizip/algorithms/lzma/probability_models.rb +72 -0
- data/lib/omnizip/algorithms/lzma/range_coder.rb +85 -0
- data/lib/omnizip/algorithms/lzma/range_decoder.rb +434 -0
- data/lib/omnizip/algorithms/lzma/range_encoder.rb +194 -0
- data/lib/omnizip/algorithms/lzma/state.rb +127 -0
- data/lib/omnizip/algorithms/lzma/xz_buffered_range_encoder.rb +325 -0
- data/lib/omnizip/algorithms/lzma/xz_encoder.rb +426 -0
- data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +645 -0
- data/lib/omnizip/algorithms/lzma/xz_match_finder_adapter.rb +227 -0
- data/lib/omnizip/algorithms/lzma/xz_price_calculator.rb +169 -0
- data/lib/omnizip/algorithms/lzma/xz_probability_models.rb +261 -0
- data/lib/omnizip/algorithms/lzma/xz_range_encoder.rb +223 -0
- data/lib/omnizip/algorithms/lzma/xz_range_encoder_exact.rb +331 -0
- data/lib/omnizip/algorithms/lzma/xz_state.rb +116 -0
- data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +2055 -0
- data/lib/omnizip/algorithms/lzma.rb +238 -0
- data/lib/omnizip/algorithms/lzma2/chunk_manager.rb +182 -0
- data/lib/omnizip/algorithms/lzma2/constants.rb +41 -0
- data/lib/omnizip/algorithms/lzma2/encoder.rb +147 -0
- data/lib/omnizip/algorithms/lzma2/lzma2_chunk.rb +161 -0
- data/lib/omnizip/algorithms/lzma2/properties.rb +179 -0
- data/lib/omnizip/algorithms/lzma2/simple_lzma2_encoder.rb +127 -0
- data/lib/omnizip/algorithms/lzma2/xz_encoder_adapter.rb +85 -0
- data/lib/omnizip/algorithms/lzma2.rb +141 -0
- data/lib/omnizip/algorithms/ppmd7/constants.rb +74 -0
- data/lib/omnizip/algorithms/ppmd7/context.rb +154 -0
- data/lib/omnizip/algorithms/ppmd7/decoder.rb +126 -0
- data/lib/omnizip/algorithms/ppmd7/encoder.rb +163 -0
- data/lib/omnizip/algorithms/ppmd7/model.rb +248 -0
- data/lib/omnizip/algorithms/ppmd7/symbol_state.rb +57 -0
- data/lib/omnizip/algorithms/ppmd7.rb +116 -0
- data/lib/omnizip/algorithms/ppmd8/constants.rb +61 -0
- data/lib/omnizip/algorithms/ppmd8/context.rb +34 -0
- data/lib/omnizip/algorithms/ppmd8/decoder.rb +107 -0
- data/lib/omnizip/algorithms/ppmd8/encoder.rb +138 -0
- data/lib/omnizip/algorithms/ppmd8/model.rb +250 -0
- data/lib/omnizip/algorithms/ppmd8/restoration_method.rb +78 -0
- data/lib/omnizip/algorithms/ppmd8.rb +82 -0
- data/lib/omnizip/algorithms/ppmd_base.rb +138 -0
- data/lib/omnizip/algorithms/sevenzip_lzma2.rb +123 -0
- data/lib/omnizip/algorithms/xz_lzma2.rb +118 -0
- data/lib/omnizip/algorithms/zstandard/constants.rb +25 -0
- data/lib/omnizip/algorithms/zstandard/decoder.rb +46 -0
- data/lib/omnizip/algorithms/zstandard/encoder.rb +51 -0
- data/lib/omnizip/algorithms/zstandard.rb +138 -0
- data/lib/omnizip/buffer/memory_archive.rb +251 -0
- data/lib/omnizip/buffer/memory_extractor.rb +224 -0
- data/lib/omnizip/buffer.rb +176 -0
- data/lib/omnizip/checksum_registry.rb +114 -0
- data/lib/omnizip/checksums/crc32.rb +100 -0
- data/lib/omnizip/checksums/crc64.rb +101 -0
- data/lib/omnizip/checksums/crc_base.rb +158 -0
- data/lib/omnizip/checksums/verifier.rb +131 -0
- data/lib/omnizip/chunked/memory_manager.rb +194 -0
- data/lib/omnizip/chunked/reader.rb +78 -0
- data/lib/omnizip/chunked/writer.rb +120 -0
- data/lib/omnizip/chunked.rb +129 -0
- data/lib/omnizip/cli/output_formatter.rb +104 -0
- data/lib/omnizip/cli.rb +572 -0
- data/lib/omnizip/commands/.keep +0 -0
- data/lib/omnizip/commands/archive_create_command.rb +427 -0
- data/lib/omnizip/commands/archive_extract_command.rb +272 -0
- data/lib/omnizip/commands/archive_list_command.rb +218 -0
- data/lib/omnizip/commands/archive_repair_command.rb +131 -0
- data/lib/omnizip/commands/archive_verify_command.rb +117 -0
- data/lib/omnizip/commands/compress_command.rb +117 -0
- data/lib/omnizip/commands/decompress_command.rb +120 -0
- data/lib/omnizip/commands/list_command.rb +53 -0
- data/lib/omnizip/commands/metadata_command.rb +153 -0
- data/lib/omnizip/commands/parity_create_command.rb +122 -0
- data/lib/omnizip/commands/parity_repair_command.rb +122 -0
- data/lib/omnizip/commands/parity_verify_command.rb +124 -0
- data/lib/omnizip/commands/profile_list_command.rb +56 -0
- data/lib/omnizip/commands/profile_show_command.rb +44 -0
- data/lib/omnizip/convenience.rb +359 -0
- data/lib/omnizip/converter/conversion_registry.rb +49 -0
- data/lib/omnizip/converter/conversion_strategy.rb +121 -0
- data/lib/omnizip/converter/seven_zip_to_zip_strategy.rb +97 -0
- data/lib/omnizip/converter/zip_to_seven_zip_strategy.rb +112 -0
- data/lib/omnizip/converter.rb +105 -0
- data/lib/omnizip/crypto/aes256/cipher.rb +100 -0
- data/lib/omnizip/crypto/aes256/constants.rb +28 -0
- data/lib/omnizip/crypto/aes256/key_derivation.rb +101 -0
- data/lib/omnizip/crypto/aes256.rb +102 -0
- data/lib/omnizip/error.rb +106 -0
- data/lib/omnizip/eta/exponential_smoothing_estimator.rb +98 -0
- data/lib/omnizip/eta/moving_average_estimator.rb +99 -0
- data/lib/omnizip/eta/rate_calculator.rb +104 -0
- data/lib/omnizip/eta/sample_history.rb +143 -0
- data/lib/omnizip/eta/time_estimator.rb +106 -0
- data/lib/omnizip/eta.rb +63 -0
- data/lib/omnizip/extraction/filter_chain.rb +177 -0
- data/lib/omnizip/extraction/glob_pattern.rb +140 -0
- data/lib/omnizip/extraction/pattern_matcher.rb +70 -0
- data/lib/omnizip/extraction/predicate_pattern.rb +52 -0
- data/lib/omnizip/extraction/regex_pattern.rb +50 -0
- data/lib/omnizip/extraction/selective_extractor.rb +240 -0
- data/lib/omnizip/extraction.rb +111 -0
- data/lib/omnizip/file_type/mime_classifier.rb +144 -0
- data/lib/omnizip/file_type.rb +113 -0
- data/lib/omnizip/filter.rb +139 -0
- data/lib/omnizip/filter_pipeline.rb +108 -0
- data/lib/omnizip/filter_registry.rb +166 -0
- data/lib/omnizip/filters/bcj.rb +279 -0
- data/lib/omnizip/filters/bcj2/constants.rb +53 -0
- data/lib/omnizip/filters/bcj2/decoder.rb +200 -0
- data/lib/omnizip/filters/bcj2/encoder.rb +61 -0
- data/lib/omnizip/filters/bcj2/stream_data.rb +93 -0
- data/lib/omnizip/filters/bcj2.rb +99 -0
- data/lib/omnizip/filters/bcj_arm.rb +176 -0
- data/lib/omnizip/filters/bcj_arm64.rb +244 -0
- data/lib/omnizip/filters/bcj_ia64.rb +196 -0
- data/lib/omnizip/filters/bcj_ppc.rb +190 -0
- data/lib/omnizip/filters/bcj_sparc.rb +176 -0
- data/lib/omnizip/filters/bcj_x86.rb +193 -0
- data/lib/omnizip/filters/delta.rb +196 -0
- data/lib/omnizip/filters/filter_base.rb +72 -0
- data/lib/omnizip/filters/registry.rb +123 -0
- data/lib/omnizip/filters/xz_delta.rb +258 -0
- data/lib/omnizip/format_detector.rb +162 -0
- data/lib/omnizip/format_registry.rb +59 -0
- data/lib/omnizip/formats/.keep +0 -0
- data/lib/omnizip/formats/bzip2_file.rb +172 -0
- data/lib/omnizip/formats/cpio/constants.rb +55 -0
- data/lib/omnizip/formats/cpio/entry.rb +385 -0
- data/lib/omnizip/formats/cpio/reader.rb +196 -0
- data/lib/omnizip/formats/cpio/writer.rb +234 -0
- data/lib/omnizip/formats/cpio.rb +140 -0
- data/lib/omnizip/formats/format_spec_loader.rb +230 -0
- data/lib/omnizip/formats/gzip.rb +238 -0
- data/lib/omnizip/formats/iso/directory_builder.rb +297 -0
- data/lib/omnizip/formats/iso/directory_record.rb +152 -0
- data/lib/omnizip/formats/iso/joliet.rb +204 -0
- data/lib/omnizip/formats/iso/path_table.rb +125 -0
- data/lib/omnizip/formats/iso/reader.rb +197 -0
- data/lib/omnizip/formats/iso/rock_ridge.rb +349 -0
- data/lib/omnizip/formats/iso/volume_builder.rb +320 -0
- data/lib/omnizip/formats/iso/volume_descriptor.rb +168 -0
- data/lib/omnizip/formats/iso/writer.rb +530 -0
- data/lib/omnizip/formats/iso.rb +140 -0
- data/lib/omnizip/formats/lzip.rb +175 -0
- data/lib/omnizip/formats/lzma_alone.rb +171 -0
- data/lib/omnizip/formats/rar/archive_repairer.rb +243 -0
- data/lib/omnizip/formats/rar/archive_verifier.rb +195 -0
- data/lib/omnizip/formats/rar/block_parser.rb +243 -0
- data/lib/omnizip/formats/rar/compression/bit_stream.rb +180 -0
- data/lib/omnizip/formats/rar/compression/dispatcher.rb +217 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/decoder.rb +216 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/encoder.rb +158 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_builder.rb +217 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_coder.rb +189 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/match_finder.rb +135 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/sliding_window.rb +165 -0
- data/lib/omnizip/formats/rar/compression/ppmd/context.rb +105 -0
- data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +219 -0
- data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +262 -0
- data/lib/omnizip/formats/rar/compression_method_registry.rb +106 -0
- data/lib/omnizip/formats/rar/constants.rb +82 -0
- data/lib/omnizip/formats/rar/decompressor.rb +238 -0
- data/lib/omnizip/formats/rar/external_writer.rb +312 -0
- data/lib/omnizip/formats/rar/header.rb +192 -0
- data/lib/omnizip/formats/rar/license_validator.rb +109 -0
- data/lib/omnizip/formats/rar/models/rar_archive.rb +77 -0
- data/lib/omnizip/formats/rar/models/rar_entry.rb +65 -0
- data/lib/omnizip/formats/rar/models/rar_volume.rb +56 -0
- data/lib/omnizip/formats/rar/parity_handler.rb +292 -0
- data/lib/omnizip/formats/rar/rar5/compression/lzma.rb +202 -0
- data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +578 -0
- data/lib/omnizip/formats/rar/rar5/compression/store.rb +60 -0
- data/lib/omnizip/formats/rar/rar5/crc32.rb +39 -0
- data/lib/omnizip/formats/rar/rar5/encryption/aes256_cbc.rb +97 -0
- data/lib/omnizip/formats/rar/rar5/encryption/encryption_header.rb +114 -0
- data/lib/omnizip/formats/rar/rar5/encryption/encryption_manager.rb +166 -0
- data/lib/omnizip/formats/rar/rar5/encryption/key_derivation.rb +97 -0
- data/lib/omnizip/formats/rar/rar5/header.rb +187 -0
- data/lib/omnizip/formats/rar/rar5/models/encryption_options.rb +74 -0
- data/lib/omnizip/formats/rar/rar5/models/recovery_options.rb +63 -0
- data/lib/omnizip/formats/rar/rar5/models/solid_options.rb +63 -0
- data/lib/omnizip/formats/rar/rar5/models/volume_options.rb +74 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/ARCHITECTURE.md +290 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_manager.rb +264 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_splitter.rb +155 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_writer.rb +194 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_encoder.rb +109 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_manager.rb +142 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_stream.rb +121 -0
- data/lib/omnizip/formats/rar/rar5/vint.rb +65 -0
- data/lib/omnizip/formats/rar/rar5/writer.rb +466 -0
- data/lib/omnizip/formats/rar/rar_format_base.rb +241 -0
- data/lib/omnizip/formats/rar/reader.rb +366 -0
- data/lib/omnizip/formats/rar/recovery_record.rb +245 -0
- data/lib/omnizip/formats/rar/volume_manager.rb +168 -0
- data/lib/omnizip/formats/rar/writer.rb +431 -0
- data/lib/omnizip/formats/rar.rb +205 -0
- data/lib/omnizip/formats/rar3/compressor.rb +73 -0
- data/lib/omnizip/formats/rar3/decompressor.rb +66 -0
- data/lib/omnizip/formats/rar3/reader.rb +386 -0
- data/lib/omnizip/formats/rar3/writer.rb +219 -0
- data/lib/omnizip/formats/rar5/compressor.rb +73 -0
- data/lib/omnizip/formats/rar5/decompressor.rb +66 -0
- data/lib/omnizip/formats/rar5/reader.rb +342 -0
- data/lib/omnizip/formats/rar5/writer.rb +214 -0
- data/lib/omnizip/formats/seven_zip/coder_chain.rb +150 -0
- data/lib/omnizip/formats/seven_zip/constants.rb +126 -0
- data/lib/omnizip/formats/seven_zip/encoded_header.rb +114 -0
- data/lib/omnizip/formats/seven_zip/encrypted_header.rb +142 -0
- data/lib/omnizip/formats/seven_zip/file_collector.rb +144 -0
- data/lib/omnizip/formats/seven_zip/header.rb +106 -0
- data/lib/omnizip/formats/seven_zip/header_encryptor.rb +134 -0
- data/lib/omnizip/formats/seven_zip/header_writer.rb +466 -0
- data/lib/omnizip/formats/seven_zip/models/coder_info.rb +30 -0
- data/lib/omnizip/formats/seven_zip/models/file_entry.rb +58 -0
- data/lib/omnizip/formats/seven_zip/models/folder.rb +69 -0
- data/lib/omnizip/formats/seven_zip/models/stream_info.rb +42 -0
- data/lib/omnizip/formats/seven_zip/parser.rb +660 -0
- data/lib/omnizip/formats/seven_zip/reader.rb +458 -0
- data/lib/omnizip/formats/seven_zip/split_archive_reader.rb +632 -0
- data/lib/omnizip/formats/seven_zip/split_archive_writer.rb +315 -0
- data/lib/omnizip/formats/seven_zip/stream_compressor.rb +151 -0
- data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +162 -0
- data/lib/omnizip/formats/seven_zip/writer.rb +740 -0
- data/lib/omnizip/formats/seven_zip.rb +93 -0
- data/lib/omnizip/formats/tar/constants.rb +73 -0
- data/lib/omnizip/formats/tar/entry.rb +94 -0
- data/lib/omnizip/formats/tar/header.rb +168 -0
- data/lib/omnizip/formats/tar/reader.rb +121 -0
- data/lib/omnizip/formats/tar/writer.rb +216 -0
- data/lib/omnizip/formats/tar.rb +84 -0
- data/lib/omnizip/formats/xz/reader.rb +116 -0
- data/lib/omnizip/formats/xz.rb +237 -0
- data/lib/omnizip/formats/xz_impl/block_decoder.rb +754 -0
- data/lib/omnizip/formats/xz_impl/block_encoder.rb +306 -0
- data/lib/omnizip/formats/xz_impl/block_header.rb +210 -0
- data/lib/omnizip/formats/xz_impl/block_header_parser.rb +186 -0
- data/lib/omnizip/formats/xz_impl/constants.rb +49 -0
- data/lib/omnizip/formats/xz_impl/index_decoder.rb +174 -0
- data/lib/omnizip/formats/xz_impl/index_encoder.rb +122 -0
- data/lib/omnizip/formats/xz_impl/stream_decoder.rb +468 -0
- data/lib/omnizip/formats/xz_impl/stream_encoder.rb +99 -0
- data/lib/omnizip/formats/xz_impl/stream_footer.rb +81 -0
- data/lib/omnizip/formats/xz_impl/stream_footer_parser.rb +117 -0
- data/lib/omnizip/formats/xz_impl/stream_header.rb +55 -0
- data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +108 -0
- data/lib/omnizip/formats/xz_impl/vli.rb +128 -0
- data/lib/omnizip/formats/xz_impl/writer.rb +421 -0
- data/lib/omnizip/formats/zip/central_directory_header.rb +195 -0
- data/lib/omnizip/formats/zip/constants.rb +69 -0
- data/lib/omnizip/formats/zip/end_of_central_directory.rb +133 -0
- data/lib/omnizip/formats/zip/local_file_header.rb +138 -0
- data/lib/omnizip/formats/zip/reader.rb +250 -0
- data/lib/omnizip/formats/zip/unix_extra_field.rb +153 -0
- data/lib/omnizip/formats/zip/writer.rb +375 -0
- data/lib/omnizip/formats/zip/zip64_end_of_central_directory.rb +104 -0
- data/lib/omnizip/formats/zip/zip64_end_of_central_directory_locator.rb +66 -0
- data/lib/omnizip/formats/zip/zip64_extra_field.rb +114 -0
- data/lib/omnizip/formats/zip.rb +50 -0
- data/lib/omnizip/implementations/base/lzma2_decoder_base.rb +75 -0
- data/lib/omnizip/implementations/base/lzma2_encoder_base.rb +128 -0
- data/lib/omnizip/implementations/base/lzma_decoder_base.rb +83 -0
- data/lib/omnizip/implementations/base/lzma_encoder_base.rb +108 -0
- data/lib/omnizip/implementations/base/state_machine_base.rb +182 -0
- data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +421 -0
- data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +465 -0
- data/lib/omnizip/implementations/seven_zip/lzma/match_finder.rb +288 -0
- data/lib/omnizip/implementations/seven_zip/lzma/range_decoder.rb +200 -0
- data/lib/omnizip/implementations/seven_zip/lzma/range_encoder.rb +197 -0
- data/lib/omnizip/implementations/seven_zip/lzma/state_machine.rb +141 -0
- data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +519 -0
- data/lib/omnizip/implementations/xz_utils/lzma2/decoder.rb +723 -0
- data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +750 -0
- data/lib/omnizip/io/buffered_input.rb +146 -0
- data/lib/omnizip/io/buffered_output.rb +105 -0
- data/lib/omnizip/io/stream_manager.rb +115 -0
- data/lib/omnizip/link_handler/hard_link.rb +79 -0
- data/lib/omnizip/link_handler/symbolic_link.rb +74 -0
- data/lib/omnizip/link_handler.rb +124 -0
- data/lib/omnizip/metadata/archive_metadata.rb +114 -0
- data/lib/omnizip/metadata/entry_metadata.rb +146 -0
- data/lib/omnizip/metadata/metadata_editor.rb +171 -0
- data/lib/omnizip/metadata/metadata_registry.rb +64 -0
- data/lib/omnizip/metadata/metadata_validator.rb +99 -0
- data/lib/omnizip/metadata.rb +57 -0
- data/lib/omnizip/models/.keep +0 -0
- data/lib/omnizip/models/algorithm_metadata.rb +73 -0
- data/lib/omnizip/models/compression_options.rb +71 -0
- data/lib/omnizip/models/conversion_options.rb +87 -0
- data/lib/omnizip/models/conversion_result.rb +135 -0
- data/lib/omnizip/models/eta_result.rb +46 -0
- data/lib/omnizip/models/extraction_rule.rb +115 -0
- data/lib/omnizip/models/filter_chain.rb +144 -0
- data/lib/omnizip/models/filter_config.rb +183 -0
- data/lib/omnizip/models/match_result.rb +124 -0
- data/lib/omnizip/models/optimization_suggestion.rb +91 -0
- data/lib/omnizip/models/parallel_options.rb +104 -0
- data/lib/omnizip/models/performance_result.rb +79 -0
- data/lib/omnizip/models/profile_report.rb +82 -0
- data/lib/omnizip/models/progress_options.rb +38 -0
- data/lib/omnizip/models/split_options.rb +116 -0
- data/lib/omnizip/optimization_registry.rb +81 -0
- data/lib/omnizip/parallel/job_queue.rb +209 -0
- data/lib/omnizip/parallel/job_scheduler.rb +203 -0
- data/lib/omnizip/parallel/parallel_compressor.rb +347 -0
- data/lib/omnizip/parallel/parallel_extractor.rb +329 -0
- data/lib/omnizip/parallel/worker_pool.rb +223 -0
- data/lib/omnizip/parallel.rb +149 -0
- data/lib/omnizip/parity/chunked_block_processor.rb +196 -0
- data/lib/omnizip/parity/galois16.rb +145 -0
- data/lib/omnizip/parity/models/creator_packet.rb +73 -0
- data/lib/omnizip/parity/models/file_description_packet.rb +133 -0
- data/lib/omnizip/parity/models/ifsc_packet.rb +123 -0
- data/lib/omnizip/parity/models/main_packet.rb +128 -0
- data/lib/omnizip/parity/models/packet.rb +156 -0
- data/lib/omnizip/parity/models/packet_registry.rb +109 -0
- data/lib/omnizip/parity/models/recovery_slice_packet.rb +78 -0
- data/lib/omnizip/parity/par2_creator.rb +531 -0
- data/lib/omnizip/parity/par2_repairer.rb +407 -0
- data/lib/omnizip/parity/par2_verifier.rb +364 -0
- data/lib/omnizip/parity/par2cmdline_algorithm.rb +110 -0
- data/lib/omnizip/parity/par2cmdline_coefficients.rb +78 -0
- data/lib/omnizip/parity/reed_solomon_decoder.rb +266 -0
- data/lib/omnizip/parity/reed_solomon_encoder.rb +111 -0
- data/lib/omnizip/parity/reed_solomon_matrix.rb +342 -0
- data/lib/omnizip/parity.rb +186 -0
- data/lib/omnizip/password/encryption_registry.rb +65 -0
- data/lib/omnizip/password/encryption_strategy.rb +96 -0
- data/lib/omnizip/password/password_validator.rb +129 -0
- data/lib/omnizip/password/winzip_aes_strategy.rb +192 -0
- data/lib/omnizip/password/zip_crypto_strategy.rb +141 -0
- data/lib/omnizip/password.rb +87 -0
- data/lib/omnizip/pipe/stream_compressor.rb +124 -0
- data/lib/omnizip/pipe/stream_decompressor.rb +174 -0
- data/lib/omnizip/pipe.rb +121 -0
- data/lib/omnizip/platform/ntfs_streams.rb +201 -0
- data/lib/omnizip/platform.rb +189 -0
- data/lib/omnizip/profile/archive_profile.rb +39 -0
- data/lib/omnizip/profile/balanced_profile.rb +33 -0
- data/lib/omnizip/profile/binary_profile.rb +36 -0
- data/lib/omnizip/profile/compression_profile.rb +158 -0
- data/lib/omnizip/profile/custom_profile.rb +157 -0
- data/lib/omnizip/profile/fast_profile.rb +33 -0
- data/lib/omnizip/profile/maximum_profile.rb +33 -0
- data/lib/omnizip/profile/profile_detector.rb +110 -0
- data/lib/omnizip/profile/profile_registry.rb +161 -0
- data/lib/omnizip/profile/text_profile.rb +36 -0
- data/lib/omnizip/profile.rb +190 -0
- data/lib/omnizip/profiler/memory_profiler.rb +66 -0
- data/lib/omnizip/profiler/method_profiler.rb +49 -0
- data/lib/omnizip/profiler/report_generator.rb +169 -0
- data/lib/omnizip/profiler.rb +204 -0
- data/lib/omnizip/progress/callback_reporter.rb +36 -0
- data/lib/omnizip/progress/console_reporter.rb +62 -0
- data/lib/omnizip/progress/log_reporter.rb +91 -0
- data/lib/omnizip/progress/operation_progress.rb +118 -0
- data/lib/omnizip/progress/progress_bar.rb +156 -0
- data/lib/omnizip/progress/progress_reporter.rb +40 -0
- data/lib/omnizip/progress/progress_tracker.rb +190 -0
- data/lib/omnizip/progress/silent_reporter.rb +24 -0
- data/lib/omnizip/progress.rb +127 -0
- data/lib/omnizip/rubyzip_compat.rb +63 -0
- data/lib/omnizip/temp/safe_extract.rb +168 -0
- data/lib/omnizip/temp/temp_file.rb +124 -0
- data/lib/omnizip/temp/temp_file_pool.rb +109 -0
- data/lib/omnizip/temp.rb +181 -0
- data/lib/omnizip/version.rb +5 -0
- data/lib/omnizip/zip/entry.rb +156 -0
- data/lib/omnizip/zip/file.rb +485 -0
- data/lib/omnizip/zip/input_stream.rb +273 -0
- data/lib/omnizip/zip/output_stream.rb +324 -0
- data/lib/omnizip.rb +156 -0
- data/readme-docs/advanced-features.adoc +515 -0
- data/readme-docs/api-usage.adoc +444 -0
- data/readme-docs/architecture.adoc +449 -0
- data/readme-docs/archive-formats.adoc +479 -0
- data/readme-docs/cli-usage.adoc +222 -0
- data/readme-docs/compression-algorithms.adoc +442 -0
- data/readme-docs/compression-profiles.adoc +247 -0
- data/readme-docs/encryption-checksums.adoc +328 -0
- data/readme-docs/format-converter.adoc +325 -0
- data/readme-docs/installation.adoc +228 -0
- data/readme-docs/par2-archives.adoc +608 -0
- data/readme-docs/performance-profiler.adoc +389 -0
- data/readme-docs/preprocessing-filters.adoc +280 -0
- data/xz-file-format-1.2.1.txt +1174 -0
- metadata +617 -0
|
@@ -0,0 +1,288 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Copyright (C) 2025 Ribose Inc.
|
|
4
|
+
#
|
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a
|
|
6
|
+
# copy of this software and associated documentation files (the "Software"),
|
|
7
|
+
# to deal in the Software without restriction, including without limitation
|
|
8
|
+
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
9
|
+
# and/or sell copies of the Software, and to permit persons to whom the
|
|
10
|
+
# Software is furnished to do so, subject to the following conditions:
|
|
11
|
+
#
|
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
|
13
|
+
# all copies or substantial portions of the Software.
|
|
14
|
+
#
|
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
20
|
+
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
21
|
+
# DEALINGS IN THE SOFTWARE.
|
|
22
|
+
|
|
23
|
+
require_relative "../../../algorithms/lzma/constants"
|
|
24
|
+
require_relative "../../../algorithms/lzma/match_finder_config"
|
|
25
|
+
|
|
26
|
+
module Omnizip
|
|
27
|
+
module Implementations
|
|
28
|
+
module SevenZip
|
|
29
|
+
module LZMA
|
|
30
|
+
# 7-Zip LZMA SDK match finder implementation.
|
|
31
|
+
#
|
|
32
|
+
# This is the original SdkMatchFinder moved from algorithms/lzma/sdk_match_finder.rb
|
|
33
|
+
# to the new namespace structure.
|
|
34
|
+
#
|
|
35
|
+
# Ported from 7-Zip LZMA SDK by Igor Pavlov.
|
|
36
|
+
class MatchFinder
|
|
37
|
+
include Omnizip::Algorithms::LZMA::Constants
|
|
38
|
+
|
|
39
|
+
# Represents a match found in the dictionary
|
|
40
|
+
class Match
|
|
41
|
+
attr_reader :length, :distance
|
|
42
|
+
|
|
43
|
+
def initialize(length, distance)
|
|
44
|
+
@length = length
|
|
45
|
+
@distance = distance
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
attr_reader :config
|
|
50
|
+
|
|
51
|
+
# Initialize the SDK-compatible match finder
|
|
52
|
+
#
|
|
53
|
+
# @param config [MatchFinderConfig] Configuration object
|
|
54
|
+
def initialize(config)
|
|
55
|
+
@config = config
|
|
56
|
+
@window_size = config.window_size
|
|
57
|
+
@max_match_length = config.max_match_length
|
|
58
|
+
@chain_length = config.chain_length
|
|
59
|
+
@lazy_matching = config.lazy_matching
|
|
60
|
+
|
|
61
|
+
# Hash table: maps hash value to position
|
|
62
|
+
# SDK uses separate hash2 and hash3 tables, but we simplify
|
|
63
|
+
# to single hash table with chaining
|
|
64
|
+
@hash_table = {}
|
|
65
|
+
|
|
66
|
+
# Hash chain: stores previous positions for each hash value
|
|
67
|
+
@hash_chain = {}
|
|
68
|
+
|
|
69
|
+
# CRC table for hash computation (SDK uses CRC)
|
|
70
|
+
init_crc_table
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# Find the longest match at the given position
|
|
74
|
+
#
|
|
75
|
+
# Implements SDK's GetMatches() function from LzFind.c
|
|
76
|
+
#
|
|
77
|
+
# @param data [String, Array<Integer>] Input data
|
|
78
|
+
# @param pos [Integer] Current position in data
|
|
79
|
+
# @return [Match, nil] Best match or nil if no match found
|
|
80
|
+
def find_longest_match(data, pos)
|
|
81
|
+
return nil if pos >= data.size
|
|
82
|
+
return nil if data.size - pos < MATCH_LEN_MIN
|
|
83
|
+
|
|
84
|
+
if @lazy_matching && @lazy_match
|
|
85
|
+
# Return lazy match from previous position
|
|
86
|
+
match = @lazy_match
|
|
87
|
+
@lazy_match = nil
|
|
88
|
+
# Don't update hash - current position was already added when lazy match was created
|
|
89
|
+
return match
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
best_match = find_best_match(data, pos)
|
|
93
|
+
|
|
94
|
+
if @lazy_matching && best_match && pos + 1 < data.size
|
|
95
|
+
# Try next position for potentially better match
|
|
96
|
+
next_match = find_best_match(data, pos + 1)
|
|
97
|
+
if next_match && next_match.length > best_match.length
|
|
98
|
+
# Save better match for next call
|
|
99
|
+
@lazy_match = next_match
|
|
100
|
+
# Don't update hash - we'll add it when lazy match is consumed
|
|
101
|
+
return nil
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
# CRITICAL: Update hash AFTER finding matches
|
|
106
|
+
# This ensures the current position is available for future matches
|
|
107
|
+
update_hash(data, pos)
|
|
108
|
+
best_match
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
# Reset the match finder state
|
|
112
|
+
#
|
|
113
|
+
# @return [void]
|
|
114
|
+
def reset
|
|
115
|
+
@hash_table.clear
|
|
116
|
+
@hash_chain.clear
|
|
117
|
+
@lazy_match = nil
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
private
|
|
121
|
+
|
|
122
|
+
# Find best match at position (SDK's GetMatches core logic)
|
|
123
|
+
#
|
|
124
|
+
# Searches both 2-byte and 3-byte hash chains for the best match.
|
|
125
|
+
#
|
|
126
|
+
# @param data [String, Array<Integer>] Input data
|
|
127
|
+
# @param pos [Integer] Current position
|
|
128
|
+
# @return [Match, nil] Best match or nil
|
|
129
|
+
def find_best_match(data, pos)
|
|
130
|
+
best_match = nil
|
|
131
|
+
best_length = MATCH_LEN_MIN - 1
|
|
132
|
+
|
|
133
|
+
hashes = compute_hashes(data, pos)
|
|
134
|
+
return nil if hashes.empty?
|
|
135
|
+
|
|
136
|
+
# Search both hash chains
|
|
137
|
+
hashes.each_value do |hash_val|
|
|
138
|
+
positions = @hash_chain[hash_val] || []
|
|
139
|
+
next if positions.empty?
|
|
140
|
+
|
|
141
|
+
# SDK traverses hash chain from most recent to oldest
|
|
142
|
+
# Limited by chain_length (nice_len in SDK)
|
|
143
|
+
count = 0
|
|
144
|
+
positions.reverse_each do |prev_pos|
|
|
145
|
+
break if count >= @chain_length
|
|
146
|
+
break if pos <= prev_pos || pos - prev_pos > @window_size
|
|
147
|
+
|
|
148
|
+
match_len = calculate_match_length(data, pos, prev_pos)
|
|
149
|
+
|
|
150
|
+
if match_len > best_length
|
|
151
|
+
best_length = match_len
|
|
152
|
+
best_match = Match.new(match_len, pos - prev_pos)
|
|
153
|
+
|
|
154
|
+
# SDK optimization: stop if we found max length
|
|
155
|
+
break if best_length >= @max_match_length
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
count += 1
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
# If we found a full-length match, no need to check other hashes
|
|
162
|
+
break if best_length >= @max_match_length
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
best_match
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
# Compute hash value using SDK's algorithm
|
|
169
|
+
#
|
|
170
|
+
# SDK uses CRC-based hashing with multiple hash levels:
|
|
171
|
+
# - hash2: 2-byte hash
|
|
172
|
+
# - hash3: 3-byte hash
|
|
173
|
+
# - hash4: 4-byte hash (binary tree mode)
|
|
174
|
+
#
|
|
175
|
+
# We compute both 2-byte and 3-byte hashes and store matches
|
|
176
|
+
# in both hash tables to ensure matches are found regardless
|
|
177
|
+
# of which hash size is used at query time.
|
|
178
|
+
#
|
|
179
|
+
# @param data [String, Array<Integer>] Input data
|
|
180
|
+
# @param pos [Integer] Position to hash from
|
|
181
|
+
# @return [Integer] Hash value (3-byte if available, else 2-byte)
|
|
182
|
+
def compute_hash(data, pos)
|
|
183
|
+
bytes = data.is_a?(String) ? data.bytes : data
|
|
184
|
+
|
|
185
|
+
if pos + 3 <= data.size
|
|
186
|
+
# 3-byte hash: CRC[byte[0]] ^ byte[1] ^ (byte[2] << 8)
|
|
187
|
+
hash = @crc_table[bytes[pos]] ^ bytes[pos + 1]
|
|
188
|
+
hash ^= (bytes[pos + 2] << 8)
|
|
189
|
+
hash & 0xFFFF
|
|
190
|
+
elsif pos + 2 <= data.size
|
|
191
|
+
# 2-byte hash: CRC[byte[0]] ^ byte[1]
|
|
192
|
+
hash = @crc_table[bytes[pos]] ^ bytes[pos + 1]
|
|
193
|
+
hash & 0xFFFF
|
|
194
|
+
end
|
|
195
|
+
# Less than 2 bytes remaining returns nil implicitly
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
# Compute both 2-byte and 3-byte hashes
|
|
199
|
+
#
|
|
200
|
+
# @param data [String, Array<Integer>] Input data
|
|
201
|
+
# @param pos [Integer] Position to hash from
|
|
202
|
+
# @return [Array<Integer>] Array of [hash2, hash3] or [hash2, nil]
|
|
203
|
+
def compute_hashes(data, pos)
|
|
204
|
+
bytes = data.is_a?(String) ? data.bytes : data
|
|
205
|
+
hashes = {}
|
|
206
|
+
|
|
207
|
+
# 2-byte hash (always compute if possible)
|
|
208
|
+
if pos + 2 <= data.size
|
|
209
|
+
hash2 = @crc_table[bytes[pos]] ^ bytes[pos + 1]
|
|
210
|
+
hashes[:hash2] = hash2 & 0xFFFF
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
# 3-byte hash (only if 3+ bytes available)
|
|
214
|
+
if pos + 3 <= data.size
|
|
215
|
+
hash3 = @crc_table[bytes[pos]] ^ bytes[pos + 1]
|
|
216
|
+
hash3 ^= (bytes[pos + 2] << 8)
|
|
217
|
+
hashes[:hash3] = hash3 & 0xFFFF
|
|
218
|
+
end
|
|
219
|
+
|
|
220
|
+
hashes
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
# Calculate match length between two positions
|
|
224
|
+
#
|
|
225
|
+
# SDK compares bytes until mismatch or max length
|
|
226
|
+
#
|
|
227
|
+
# @param data [String, Array<Integer>] Input data
|
|
228
|
+
# @param pos1 [Integer] First position
|
|
229
|
+
# @param pos2 [Integer] Second position
|
|
230
|
+
# @return [Integer] Length of match
|
|
231
|
+
def calculate_match_length(data, pos1, pos2)
|
|
232
|
+
bytes = data.is_a?(String) ? data.bytes : data
|
|
233
|
+
max_len = [data.size - pos1, @max_match_length].min
|
|
234
|
+
length = 0
|
|
235
|
+
|
|
236
|
+
while length < max_len && bytes[pos1 + length] == bytes[pos2 + length]
|
|
237
|
+
length += 1
|
|
238
|
+
end
|
|
239
|
+
|
|
240
|
+
length
|
|
241
|
+
end
|
|
242
|
+
|
|
243
|
+
# Update hash table with new position
|
|
244
|
+
#
|
|
245
|
+
# Stores position in both 2-byte and 3-byte hash chains
|
|
246
|
+
# to ensure matches are found regardless of hash size used at query time.
|
|
247
|
+
#
|
|
248
|
+
# @param data [String, Array<Integer>] Input data
|
|
249
|
+
# @param pos [Integer] Position to add
|
|
250
|
+
# @return [void]
|
|
251
|
+
def update_hash(data, pos)
|
|
252
|
+
hashes = compute_hashes(data, pos)
|
|
253
|
+
return if hashes.empty?
|
|
254
|
+
|
|
255
|
+
hashes.each_value do |hash_val|
|
|
256
|
+
@hash_chain[hash_val] ||= []
|
|
257
|
+
@hash_chain[hash_val] << pos
|
|
258
|
+
|
|
259
|
+
# Keep hash chains from growing too large
|
|
260
|
+
# SDK uses cyclic buffer, we use simple truncation
|
|
261
|
+
max_chain = @chain_length * 2
|
|
262
|
+
@hash_chain[hash_val].shift if @hash_chain[hash_val].size > max_chain
|
|
263
|
+
end
|
|
264
|
+
end
|
|
265
|
+
|
|
266
|
+
# Initialize CRC table for hash computation
|
|
267
|
+
#
|
|
268
|
+
# SDK uses CRC32 table for hashing
|
|
269
|
+
#
|
|
270
|
+
# @return [void]
|
|
271
|
+
def init_crc_table
|
|
272
|
+
@crc_table = Array.new(256) do |i|
|
|
273
|
+
crc = i
|
|
274
|
+
8.times do
|
|
275
|
+
if crc.anybits?(1)
|
|
276
|
+
crc = (crc >> 1) ^ 0xEDB88320
|
|
277
|
+
else
|
|
278
|
+
crc >>= 1
|
|
279
|
+
end
|
|
280
|
+
end
|
|
281
|
+
crc & 0xFF
|
|
282
|
+
end
|
|
283
|
+
end
|
|
284
|
+
end
|
|
285
|
+
end
|
|
286
|
+
end
|
|
287
|
+
end
|
|
288
|
+
end
|
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Copyright (C) 2025 Ribose Inc.
|
|
4
|
+
#
|
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a
|
|
6
|
+
# copy of this software and associated documentation files (the "Software"),
|
|
7
|
+
# to deal in the Software without restriction, including without limitation
|
|
8
|
+
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
9
|
+
# and/or sell copies of the Software, and to permit persons to whom the
|
|
10
|
+
# Software is furnished to do so, subject to the following conditions:
|
|
11
|
+
#
|
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
|
13
|
+
# all copies or substantial portions of the Software.
|
|
14
|
+
#
|
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
20
|
+
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
21
|
+
# DEALINGS IN THE SOFTWARE.
|
|
22
|
+
|
|
23
|
+
# Ported from 7-Zip SDK C/LzmaDec.c
|
|
24
|
+
# Direct port of the LZMA SDK range decoder for byte-for-byte compatibility
|
|
25
|
+
# with 7-Zip archives.
|
|
26
|
+
|
|
27
|
+
require_relative "../../../algorithms/lzma/constants"
|
|
28
|
+
require_relative "../../../algorithms/lzma/bit_model"
|
|
29
|
+
|
|
30
|
+
module Omnizip
|
|
31
|
+
module Implementations
|
|
32
|
+
module SevenZip
|
|
33
|
+
module LZMA
|
|
34
|
+
# Range decoder for 7-Zip SDK LZMA decompression
|
|
35
|
+
#
|
|
36
|
+
# This is a direct port of 7-Zip SDK's range decoder implementation
|
|
37
|
+
# from LzmaDec.c for guaranteed compatibility with 7-Zip archives.
|
|
38
|
+
#
|
|
39
|
+
# Reference: /Users/mulgogi/src/external/7-Zip/C/LzmaDec.c
|
|
40
|
+
class RangeDecoder
|
|
41
|
+
include Omnizip::Algorithms::LZMA::Constants
|
|
42
|
+
|
|
43
|
+
attr_reader :code
|
|
44
|
+
|
|
45
|
+
# Initialize the range decoder
|
|
46
|
+
#
|
|
47
|
+
# @param input_stream [IO] The input stream of encoded bytes
|
|
48
|
+
def initialize(input_stream)
|
|
49
|
+
@stream = input_stream
|
|
50
|
+
@range = 0xFFFFFFFF
|
|
51
|
+
@code = 0
|
|
52
|
+
init_decoder
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
# Decode a single bit using a probability model
|
|
56
|
+
#
|
|
57
|
+
# Ported from 7-Zip SDK IF_BIT_0/UPDATE_0/UPDATE_1 macros
|
|
58
|
+
# (LzmaDec.c lines 22-26)
|
|
59
|
+
#
|
|
60
|
+
# SDK pattern:
|
|
61
|
+
# #define IF_BIT_0(p) ttt = *(p); NORMALIZE; bound = (range >> kNumBitModelTotalBits) * (UInt32)ttt; if (code < bound)
|
|
62
|
+
# #define UPDATE_0(p) range = bound; *(p) = (CLzmaProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits));
|
|
63
|
+
# #define UPDATE_1(p) range -= bound; code -= bound; *(p) = (CLzmaProb)(ttt - (ttt >> kNumMoveBits));
|
|
64
|
+
#
|
|
65
|
+
# @param model [BitModel] The probability model for this bit
|
|
66
|
+
# @return [Integer] The decoded bit value (0 or 1)
|
|
67
|
+
def decode_bit(model)
|
|
68
|
+
prob = model.probability
|
|
69
|
+
|
|
70
|
+
# NORMALIZE (SDK pattern: normalize BEFORE decoding)
|
|
71
|
+
normalize
|
|
72
|
+
|
|
73
|
+
# Calculate bound
|
|
74
|
+
bound = (@range >> 11) * prob
|
|
75
|
+
|
|
76
|
+
if @code < bound
|
|
77
|
+
# UPDATE_0: bit is 0
|
|
78
|
+
@range = bound & 0xFFFFFFFF
|
|
79
|
+
new_prob = prob + ((BIT_MODEL_TOTAL - prob) >> MOVE_BITS)
|
|
80
|
+
model.instance_variable_set(:@probability, new_prob)
|
|
81
|
+
0
|
|
82
|
+
else
|
|
83
|
+
# UPDATE_1: bit is 1
|
|
84
|
+
@range = (@range - bound) & 0xFFFFFFFF
|
|
85
|
+
@code = (@code - bound) & 0xFFFFFFFF
|
|
86
|
+
new_prob = prob - (prob >> MOVE_BITS)
|
|
87
|
+
model.instance_variable_set(:@probability, new_prob)
|
|
88
|
+
1
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
# Decode bits directly without using probability model
|
|
93
|
+
#
|
|
94
|
+
# @param num_bits [Integer] Number of bits to decode
|
|
95
|
+
# @return [Integer] The decoded value
|
|
96
|
+
def decode_direct_bits(num_bits)
|
|
97
|
+
result = 0
|
|
98
|
+
num_bits.times do
|
|
99
|
+
normalize
|
|
100
|
+
@range >>= 1
|
|
101
|
+
@range &= 0xFFFFFFFF
|
|
102
|
+
@code = (@code - @range) & 0xFFFFFFFF
|
|
103
|
+
bit = (@code >> 31) & 1
|
|
104
|
+
@code = (@code + (@range & (0 - bit))) & 0xFFFFFFFF
|
|
105
|
+
result = (result << 1) | bit
|
|
106
|
+
end
|
|
107
|
+
result
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
# Decode bits directly with a base value
|
|
111
|
+
#
|
|
112
|
+
# Used by distance decoder for slots 14+ where we need to
|
|
113
|
+
# build on a base value (2 or 3) iteratively.
|
|
114
|
+
#
|
|
115
|
+
# @param num_bits [Integer] Number of bits to decode
|
|
116
|
+
# @param base [Integer] Base value to start from
|
|
117
|
+
# @return [Integer] The decoded value
|
|
118
|
+
def decode_direct_bits_with_base(num_bits, base)
|
|
119
|
+
result = base
|
|
120
|
+
num_bits.times do
|
|
121
|
+
result = (result << 1) + 1
|
|
122
|
+
normalize
|
|
123
|
+
@range >>= 1
|
|
124
|
+
@range &= 0xFFFFFFFF
|
|
125
|
+
|
|
126
|
+
# Check if bit is 1
|
|
127
|
+
bit = @code >= @range ? 1 : 0
|
|
128
|
+
|
|
129
|
+
if bit == 1
|
|
130
|
+
@code = (@code - @range) & 0xFFFFFFFF
|
|
131
|
+
else
|
|
132
|
+
result -= 1
|
|
133
|
+
end
|
|
134
|
+
end
|
|
135
|
+
result
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
# Update the input stream (for LZMA2 multi-chunk streams)
|
|
139
|
+
#
|
|
140
|
+
# @param new_stream [IO] New input stream
|
|
141
|
+
# @return [void]
|
|
142
|
+
def update_stream(new_stream)
|
|
143
|
+
@stream = new_stream
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
# Reset the decoder state (for LZMA2 chunks)
|
|
147
|
+
#
|
|
148
|
+
# @return [void]
|
|
149
|
+
def reset
|
|
150
|
+
@range = 0xFFFFFFFF
|
|
151
|
+
@code = 0
|
|
152
|
+
# Read initial 5 bytes for code
|
|
153
|
+
5.times { @code = ((@code << 8) | read_byte) & 0xFFFFFFFF }
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
private
|
|
157
|
+
|
|
158
|
+
# Initialize the decoder by reading the first 5 bytes
|
|
159
|
+
#
|
|
160
|
+
# @return [void]
|
|
161
|
+
def init_decoder
|
|
162
|
+
# Read first byte (should be 0 for valid LZMA stream)
|
|
163
|
+
first = read_byte
|
|
164
|
+
raise "Invalid LZMA stream: first byte not 0" unless first.zero?
|
|
165
|
+
|
|
166
|
+
# Read 4 bytes for initial code value
|
|
167
|
+
@code = 0
|
|
168
|
+
4.times { @code = ((@code << 8) | read_byte) & 0xFFFFFFFF }
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
# Normalize the range when it becomes too small
|
|
172
|
+
#
|
|
173
|
+
# Ported from 7-Zip SDK NORMALIZE macro (LzmaDec.c line 22):
|
|
174
|
+
# #define NORMALIZE if (range < kTopValue) { range <<= 8; code = (code << 8) | (*buf++); }
|
|
175
|
+
#
|
|
176
|
+
# @return [void]
|
|
177
|
+
def normalize
|
|
178
|
+
while @range < TOP
|
|
179
|
+
@range = (@range << 8) & 0xFFFFFFFF
|
|
180
|
+
@code = ((@code << 8) | read_byte) & 0xFFFFFFFF
|
|
181
|
+
end
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
# Read a single byte from the input stream
|
|
185
|
+
#
|
|
186
|
+
# @return [Integer] The byte value (0-255)
|
|
187
|
+
def read_byte
|
|
188
|
+
byte = @stream.getbyte
|
|
189
|
+
if byte.nil?
|
|
190
|
+
raise Omnizip::DecompressionError,
|
|
191
|
+
"LZMA compressed data exhausted prematurely"
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
byte
|
|
195
|
+
end
|
|
196
|
+
end
|
|
197
|
+
end
|
|
198
|
+
end
|
|
199
|
+
end
|
|
200
|
+
end
|
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Copyright (C) 2025 Ribose Inc.
|
|
4
|
+
#
|
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a
|
|
6
|
+
# copy of this software and associated documentation files (the "Software"),
|
|
7
|
+
# to deal in the Software without restriction, including without limitation
|
|
8
|
+
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
9
|
+
# and/or sell copies of the Software, and to permit persons to whom the
|
|
10
|
+
# Software is furnished to do so, subject to the following conditions:
|
|
11
|
+
#
|
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
|
13
|
+
# all copies or substantial portions of the Software.
|
|
14
|
+
#
|
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
20
|
+
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
21
|
+
# DEALINGS IN THE SOFTWARE.
|
|
22
|
+
|
|
23
|
+
# Ported from 7-Zip SDK C/LzmaEnc.c
|
|
24
|
+
# Direct port of the LZMA SDK range encoder for byte-for-byte compatibility
|
|
25
|
+
# with 7-Zip archives.
|
|
26
|
+
|
|
27
|
+
require_relative "../../../algorithms/lzma/range_coder"
|
|
28
|
+
require_relative "../../../algorithms/lzma/constants"
|
|
29
|
+
|
|
30
|
+
module Omnizip
|
|
31
|
+
module Implementations
|
|
32
|
+
module SevenZip
|
|
33
|
+
module LZMA
|
|
34
|
+
# Range encoder for 7-Zip SDK LZMA compression
|
|
35
|
+
#
|
|
36
|
+
# This is a direct port of 7-Zip SDK's range encoder implementation
|
|
37
|
+
# from LzmaEnc.c for guaranteed compatibility with 7-Zip archives.
|
|
38
|
+
#
|
|
39
|
+
# KEY DIFFERENCE from XZ Utils:
|
|
40
|
+
# - 7-Zip SDK normalizes AFTER encoding each bit
|
|
41
|
+
# - XZ Utils normalizes BEFORE encoding each bit
|
|
42
|
+
#
|
|
43
|
+
# This difference produces different output bytes, so we need
|
|
44
|
+
# separate implementations for 7-Zip and XZ Utils compatibility.
|
|
45
|
+
#
|
|
46
|
+
# Reference: /Users/mulgogi/src/external/7-Zip/C/LzmaEnc.c lines 730-784
|
|
47
|
+
class RangeEncoder
|
|
48
|
+
include Omnizip::Algorithms::LZMA::Constants
|
|
49
|
+
|
|
50
|
+
# Initialize the range encoder
|
|
51
|
+
#
|
|
52
|
+
# @param output_stream [IO] The output stream for encoded bytes
|
|
53
|
+
def initialize(output_stream)
|
|
54
|
+
@stream = output_stream
|
|
55
|
+
@low = 0
|
|
56
|
+
@range = 0xFFFFFFFF # Full 32-bit range
|
|
57
|
+
@cache = 0
|
|
58
|
+
@cache_size = 1 # SDK initializes to 1
|
|
59
|
+
@pre_flush_pos = 0
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
# Encode a single bit using a probability model
|
|
63
|
+
#
|
|
64
|
+
# Ported from 7-Zip SDK RC_BIT() macro (LzmaEnc.c lines 750-765)
|
|
65
|
+
# The key difference is that normalization happens AFTER encoding.
|
|
66
|
+
#
|
|
67
|
+
# SDK macro:
|
|
68
|
+
# #define RC_BIT(p, prob, bit) { \
|
|
69
|
+
# RC_BIT_PRE(p, prob) \
|
|
70
|
+
# if (bit == 0) { range = newBound; ttt += (kBitModelTotal - ttt) >> kNumMoveBits; } \
|
|
71
|
+
# else { (p)->low += newBound; range -= newBound; ttt -= ttt >> kNumMoveBits; } \
|
|
72
|
+
# *(prob) = (CLzmaProb)ttt; \
|
|
73
|
+
# RC_NORM(p) \
|
|
74
|
+
# }
|
|
75
|
+
#
|
|
76
|
+
# @param model [BitModel] The probability model for this bit
|
|
77
|
+
# @param bit [Integer] The bit value (0 or 1)
|
|
78
|
+
# @return [void]
|
|
79
|
+
def encode_bit(model, bit)
|
|
80
|
+
prob = model.probability
|
|
81
|
+
|
|
82
|
+
# RC_BIT_PRE: Calculate newBound = (range >> kNumBitModelTotalBits) * prob
|
|
83
|
+
new_bound = (@range >> 11) * prob
|
|
84
|
+
|
|
85
|
+
new_prob = if bit.zero?
|
|
86
|
+
# RC_BIT_0: shrink range to lower portion
|
|
87
|
+
@range = new_bound & 0xFFFFFFFF
|
|
88
|
+
# Update probability: ttt += (kBitModelTotal - ttt) >> kNumMoveBits
|
|
89
|
+
prob + ((BIT_MODEL_TOTAL - prob) >> MOVE_BITS)
|
|
90
|
+
else
|
|
91
|
+
# RC_BIT_1: add bound to low, shrink range to upper portion
|
|
92
|
+
@low = (@low + new_bound) & 0xFFFFFFFFFFFFFFFF
|
|
93
|
+
@range = (@range - new_bound) & 0xFFFFFFFF
|
|
94
|
+
# Update probability: ttt -= ttt >> kNumMoveBits
|
|
95
|
+
prob - (prob >> MOVE_BITS)
|
|
96
|
+
end
|
|
97
|
+
model.instance_variable_set(:@probability, new_prob)
|
|
98
|
+
|
|
99
|
+
# RC_NORM: Normalize AFTER encoding (key SDK difference!)
|
|
100
|
+
normalize
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
# Encode bits directly without using probability model
|
|
104
|
+
#
|
|
105
|
+
# Used for encoding values with uniform distribution (e.g., distance
|
|
106
|
+
# high bits).
|
|
107
|
+
#
|
|
108
|
+
# @param value [Integer] The value to encode
|
|
109
|
+
# @param num_bits [Integer] Number of bits to encode
|
|
110
|
+
# @return [void]
|
|
111
|
+
def encode_direct_bits(value, num_bits)
|
|
112
|
+
num_bits.times do |i|
|
|
113
|
+
@range >>= 1
|
|
114
|
+
@range &= 0xFFFFFFFF
|
|
115
|
+
bit = (value >> (num_bits - 1 - i)) & 1
|
|
116
|
+
@low = (@low + @range) & 0xFFFFFFFFFFFFFFFF if bit == 1
|
|
117
|
+
normalize
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
# Flush remaining bytes to output stream
|
|
122
|
+
#
|
|
123
|
+
# Writes the final bytes to complete the range coding stream.
|
|
124
|
+
#
|
|
125
|
+
# @return [void]
|
|
126
|
+
def flush
|
|
127
|
+
# Store position BEFORE flush for compatibility
|
|
128
|
+
@pre_flush_pos = @stream.pos
|
|
129
|
+
|
|
130
|
+
# Prevent further normalizations
|
|
131
|
+
@range = 0xFFFFFFFF
|
|
132
|
+
|
|
133
|
+
# Flush 5 bytes (matches SDK behavior)
|
|
134
|
+
5.times { shift_low }
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
# Return bytes needed for decoding
|
|
138
|
+
#
|
|
139
|
+
# @return [Integer] Number of bytes decoder will consume
|
|
140
|
+
def bytes_for_decode
|
|
141
|
+
@pre_flush_pos || @stream.pos
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
# Get current output position
|
|
145
|
+
#
|
|
146
|
+
# @return [Integer] Current position in output stream
|
|
147
|
+
def pos
|
|
148
|
+
@stream.pos
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
private
|
|
152
|
+
|
|
153
|
+
# Normalize the range when it becomes too small
|
|
154
|
+
#
|
|
155
|
+
# Ported from 7-Zip SDK RC_NORM macro (LzmaEnc.c line 730):
|
|
156
|
+
# #define RC_NORM(p) if (range < kTopValue) { range <<= 8; RangeEnc_ShiftLow(p); }
|
|
157
|
+
#
|
|
158
|
+
# @return [void]
|
|
159
|
+
def normalize
|
|
160
|
+
while @range < TOP
|
|
161
|
+
@range <<= 8
|
|
162
|
+
@range &= 0xFFFFFFFF
|
|
163
|
+
shift_low
|
|
164
|
+
end
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
# Shift the top byte of 'low' to output
|
|
168
|
+
#
|
|
169
|
+
# Ported from 7-Zip SDK RangeEnc_ShiftLow().
|
|
170
|
+
# Handles carry propagation through the cache mechanism.
|
|
171
|
+
#
|
|
172
|
+
# Reference: 7-Zip SDK C/LzmaEnc.c RangeEnc_ShiftLow
|
|
173
|
+
#
|
|
174
|
+
# @return [void]
|
|
175
|
+
def shift_low
|
|
176
|
+
low_32 = @low & 0xFFFFFFFF
|
|
177
|
+
carry = (@low >> 32) & 0xFF
|
|
178
|
+
|
|
179
|
+
if low_32 < 0xFF000000 || carry != 0
|
|
180
|
+
loop do
|
|
181
|
+
@stream.putc((@cache + carry) & 0xFF)
|
|
182
|
+
@cache = 0xFF
|
|
183
|
+
@cache_size -= 1
|
|
184
|
+
break if @cache_size.zero?
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
@cache = (low_32 >> 24) & 0xFF
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
@cache_size += 1
|
|
191
|
+
@low = (@low & 0x00FFFFFF) << 8
|
|
192
|
+
end
|
|
193
|
+
end
|
|
194
|
+
end
|
|
195
|
+
end
|
|
196
|
+
end
|
|
197
|
+
end
|