omnizip 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/.rubocop.yml +32 -0
- data/.rubocop_todo.yml +754 -0
- data/COPYING +502 -0
- data/Gemfile +17 -0
- data/LICENSE +12 -0
- data/README.adoc +1045 -0
- data/Rakefile +12 -0
- data/benchmark/README.md +260 -0
- data/benchmark/benchmark_suite.rb +125 -0
- data/benchmark/compression_bench.rb +181 -0
- data/benchmark/filter_bench.rb +180 -0
- data/benchmark/models/benchmark_result.rb +59 -0
- data/benchmark/models/comparison_result.rb +69 -0
- data/benchmark/profile_suite.rb +167 -0
- data/benchmark/reporter.rb +150 -0
- data/benchmark/run_benchmarks.rb +66 -0
- data/benchmark/test_data.rb +137 -0
- data/config/formats/rar3_spec.yml +91 -0
- data/config/formats/rar5_spec.yml +102 -0
- data/docs/.github/workflows/docs.yml +142 -0
- data/docs/.gitignore +21 -0
- data/docs/.lychee.toml +67 -0
- data/docs/Gemfile +13 -0
- data/docs/RAR_WRITE_SUPPORT.md +26 -0
- data/docs/README.md +101 -0
- data/docs/_config.yml +112 -0
- data/docs/assets/logo.svg +1 -0
- data/docs/assets/omnizip-logo.pdf +1540 -11
- data/docs/comparison/feature-matrix.adoc +694 -0
- data/docs/comparison/index.adoc +113 -0
- data/docs/comparison/vs-7zip.adoc +309 -0
- data/docs/comparison/vs-peazip.adoc +77 -0
- data/docs/comparison/vs-rubyzip.adoc +342 -0
- data/docs/comparison/vs-winrar.adoc +100 -0
- data/docs/compatibility.adoc +579 -0
- data/docs/concepts/index.adoc +129 -0
- data/docs/developer/architecture.adoc +256 -0
- data/docs/developer/contributing.adoc +158 -0
- data/docs/developer/index.adoc +25 -0
- data/docs/developer/testing.adoc +212 -0
- data/docs/getting-started/basic-usage.adoc +271 -0
- data/docs/getting-started/index.adoc +42 -0
- data/docs/getting-started/installation.adoc +138 -0
- data/docs/getting-started/quick-start.adoc +185 -0
- data/docs/getting-started/your-first-archive.adoc +218 -0
- data/docs/guides/advanced-features/encryption.adoc +300 -0
- data/docs/guides/advanced-features/index.adoc +49 -0
- data/docs/guides/advanced-features/parallel-processing.adoc +246 -0
- data/docs/guides/advanced-features/progress-tracking.adoc +320 -0
- data/docs/guides/advanced-features/streaming.adoc +212 -0
- data/docs/guides/archive-formats/gzip-format.adoc +107 -0
- data/docs/guides/archive-formats/index.adoc +130 -0
- data/docs/guides/archive-formats/rar-format.adoc +104 -0
- data/docs/guides/archive-formats/rar5.adoc +521 -0
- data/docs/guides/archive-formats/seven-zip-format.adoc +35 -0
- data/docs/guides/archive-formats/tar-format.adoc +106 -0
- data/docs/guides/archive-formats/xz-format.adoc +118 -0
- data/docs/guides/archive-formats/zip-format.adoc +35 -0
- data/docs/guides/compression-algorithms/bzip2.adoc +113 -0
- data/docs/guides/compression-algorithms/deflate.adoc +319 -0
- data/docs/guides/compression-algorithms/index.adoc +190 -0
- data/docs/guides/compression-algorithms/lzma.adoc +398 -0
- data/docs/guides/compression-algorithms/lzma2.adoc +327 -0
- data/docs/guides/compression-algorithms/ppmd.adoc +316 -0
- data/docs/guides/compression-algorithms/zstandard.adoc +361 -0
- data/docs/guides/creating-archives.adoc +354 -0
- data/docs/guides/extracting-archives.adoc +53 -0
- data/docs/guides/format-conversion.adoc +64 -0
- data/docs/guides/index.adoc +49 -0
- data/docs/guides/migration-rubyzip.adoc +217 -0
- data/docs/guides/parity-archives.adoc +605 -0
- data/docs/guides/performance-tuning.adoc +88 -0
- data/docs/index.adoc +218 -0
- data/docs/lychee.toml +67 -0
- data/docs/reference/api/overview.adoc +188 -0
- data/docs/reference/cli/compress-command.adoc +114 -0
- data/docs/reference/cli/overview.adoc +140 -0
- data/docs/reference/index.adoc +26 -0
- data/docs/resources/faq.adoc +185 -0
- data/docs/resources/quick-reference.adoc +222 -0
- data/docs/troubleshooting/index.adoc +208 -0
- data/examples/api_comparison.rb +205 -0
- data/examples/deflate64_example.rb +96 -0
- data/examples/par2_demo.rb +121 -0
- data/examples/quick_start_native.rb +150 -0
- data/examples/quick_start_rubyzip.rb +115 -0
- data/examples/rubyzip_compatibility_demo.rb +194 -0
- data/exe/omnizip +27 -0
- data/lib/omnizip/algorithm.rb +130 -0
- data/lib/omnizip/algorithm_registry.rb +86 -0
- data/lib/omnizip/algorithms/.keep +0 -0
- data/lib/omnizip/algorithms/bzip2/bwt.rb +225 -0
- data/lib/omnizip/algorithms/bzip2/decoder.rb +193 -0
- data/lib/omnizip/algorithms/bzip2/encoder.rb +237 -0
- data/lib/omnizip/algorithms/bzip2/huffman.rb +206 -0
- data/lib/omnizip/algorithms/bzip2/mtf.rb +101 -0
- data/lib/omnizip/algorithms/bzip2/rle.rb +151 -0
- data/lib/omnizip/algorithms/bzip2.rb +130 -0
- data/lib/omnizip/algorithms/deflate/constants.rb +28 -0
- data/lib/omnizip/algorithms/deflate/decoder.rb +38 -0
- data/lib/omnizip/algorithms/deflate/encoder.rb +46 -0
- data/lib/omnizip/algorithms/deflate.rb +128 -0
- data/lib/omnizip/algorithms/deflate64/constants.rb +45 -0
- data/lib/omnizip/algorithms/deflate64/decoder.rb +153 -0
- data/lib/omnizip/algorithms/deflate64/encoder.rb +98 -0
- data/lib/omnizip/algorithms/deflate64/huffman_coder.rb +354 -0
- data/lib/omnizip/algorithms/deflate64/lz77_encoder.rb +142 -0
- data/lib/omnizip/algorithms/deflate64.rb +109 -0
- data/lib/omnizip/algorithms/lzma/bit_model.rb +120 -0
- data/lib/omnizip/algorithms/lzma/constants.rb +112 -0
- data/lib/omnizip/algorithms/lzma/decoder.rb +148 -0
- data/lib/omnizip/algorithms/lzma/dictionary.rb +69 -0
- data/lib/omnizip/algorithms/lzma/distance_coder.rb +415 -0
- data/lib/omnizip/algorithms/lzma/encoder.rb +142 -0
- data/lib/omnizip/algorithms/lzma/length_coder.rb +260 -0
- data/lib/omnizip/algorithms/lzma/literal_decoder.rb +320 -0
- data/lib/omnizip/algorithms/lzma/literal_encoder.rb +210 -0
- data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +341 -0
- data/lib/omnizip/algorithms/lzma/lzma_alone_decoder.rb +192 -0
- data/lib/omnizip/algorithms/lzma/lzma_state.rb +128 -0
- data/lib/omnizip/algorithms/lzma/match.rb +32 -0
- data/lib/omnizip/algorithms/lzma/match_finder.rb +205 -0
- data/lib/omnizip/algorithms/lzma/match_finder_config.rb +142 -0
- data/lib/omnizip/algorithms/lzma/match_finder_factory.rb +88 -0
- data/lib/omnizip/algorithms/lzma/optimal_encoder.rb +130 -0
- data/lib/omnizip/algorithms/lzma/probability_models.rb +72 -0
- data/lib/omnizip/algorithms/lzma/range_coder.rb +85 -0
- data/lib/omnizip/algorithms/lzma/range_decoder.rb +434 -0
- data/lib/omnizip/algorithms/lzma/range_encoder.rb +194 -0
- data/lib/omnizip/algorithms/lzma/state.rb +127 -0
- data/lib/omnizip/algorithms/lzma/xz_buffered_range_encoder.rb +325 -0
- data/lib/omnizip/algorithms/lzma/xz_encoder.rb +426 -0
- data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +645 -0
- data/lib/omnizip/algorithms/lzma/xz_match_finder_adapter.rb +227 -0
- data/lib/omnizip/algorithms/lzma/xz_price_calculator.rb +169 -0
- data/lib/omnizip/algorithms/lzma/xz_probability_models.rb +261 -0
- data/lib/omnizip/algorithms/lzma/xz_range_encoder.rb +223 -0
- data/lib/omnizip/algorithms/lzma/xz_range_encoder_exact.rb +331 -0
- data/lib/omnizip/algorithms/lzma/xz_state.rb +116 -0
- data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +2055 -0
- data/lib/omnizip/algorithms/lzma.rb +238 -0
- data/lib/omnizip/algorithms/lzma2/chunk_manager.rb +182 -0
- data/lib/omnizip/algorithms/lzma2/constants.rb +41 -0
- data/lib/omnizip/algorithms/lzma2/encoder.rb +147 -0
- data/lib/omnizip/algorithms/lzma2/lzma2_chunk.rb +161 -0
- data/lib/omnizip/algorithms/lzma2/properties.rb +179 -0
- data/lib/omnizip/algorithms/lzma2/simple_lzma2_encoder.rb +127 -0
- data/lib/omnizip/algorithms/lzma2/xz_encoder_adapter.rb +85 -0
- data/lib/omnizip/algorithms/lzma2.rb +141 -0
- data/lib/omnizip/algorithms/ppmd7/constants.rb +74 -0
- data/lib/omnizip/algorithms/ppmd7/context.rb +154 -0
- data/lib/omnizip/algorithms/ppmd7/decoder.rb +126 -0
- data/lib/omnizip/algorithms/ppmd7/encoder.rb +163 -0
- data/lib/omnizip/algorithms/ppmd7/model.rb +248 -0
- data/lib/omnizip/algorithms/ppmd7/symbol_state.rb +57 -0
- data/lib/omnizip/algorithms/ppmd7.rb +116 -0
- data/lib/omnizip/algorithms/ppmd8/constants.rb +61 -0
- data/lib/omnizip/algorithms/ppmd8/context.rb +34 -0
- data/lib/omnizip/algorithms/ppmd8/decoder.rb +107 -0
- data/lib/omnizip/algorithms/ppmd8/encoder.rb +138 -0
- data/lib/omnizip/algorithms/ppmd8/model.rb +250 -0
- data/lib/omnizip/algorithms/ppmd8/restoration_method.rb +78 -0
- data/lib/omnizip/algorithms/ppmd8.rb +82 -0
- data/lib/omnizip/algorithms/ppmd_base.rb +138 -0
- data/lib/omnizip/algorithms/sevenzip_lzma2.rb +123 -0
- data/lib/omnizip/algorithms/xz_lzma2.rb +118 -0
- data/lib/omnizip/algorithms/zstandard/constants.rb +25 -0
- data/lib/omnizip/algorithms/zstandard/decoder.rb +46 -0
- data/lib/omnizip/algorithms/zstandard/encoder.rb +51 -0
- data/lib/omnizip/algorithms/zstandard.rb +138 -0
- data/lib/omnizip/buffer/memory_archive.rb +251 -0
- data/lib/omnizip/buffer/memory_extractor.rb +224 -0
- data/lib/omnizip/buffer.rb +176 -0
- data/lib/omnizip/checksum_registry.rb +114 -0
- data/lib/omnizip/checksums/crc32.rb +100 -0
- data/lib/omnizip/checksums/crc64.rb +101 -0
- data/lib/omnizip/checksums/crc_base.rb +158 -0
- data/lib/omnizip/checksums/verifier.rb +131 -0
- data/lib/omnizip/chunked/memory_manager.rb +194 -0
- data/lib/omnizip/chunked/reader.rb +78 -0
- data/lib/omnizip/chunked/writer.rb +120 -0
- data/lib/omnizip/chunked.rb +129 -0
- data/lib/omnizip/cli/output_formatter.rb +104 -0
- data/lib/omnizip/cli.rb +572 -0
- data/lib/omnizip/commands/.keep +0 -0
- data/lib/omnizip/commands/archive_create_command.rb +427 -0
- data/lib/omnizip/commands/archive_extract_command.rb +272 -0
- data/lib/omnizip/commands/archive_list_command.rb +218 -0
- data/lib/omnizip/commands/archive_repair_command.rb +131 -0
- data/lib/omnizip/commands/archive_verify_command.rb +117 -0
- data/lib/omnizip/commands/compress_command.rb +117 -0
- data/lib/omnizip/commands/decompress_command.rb +120 -0
- data/lib/omnizip/commands/list_command.rb +53 -0
- data/lib/omnizip/commands/metadata_command.rb +153 -0
- data/lib/omnizip/commands/parity_create_command.rb +122 -0
- data/lib/omnizip/commands/parity_repair_command.rb +122 -0
- data/lib/omnizip/commands/parity_verify_command.rb +124 -0
- data/lib/omnizip/commands/profile_list_command.rb +56 -0
- data/lib/omnizip/commands/profile_show_command.rb +44 -0
- data/lib/omnizip/convenience.rb +359 -0
- data/lib/omnizip/converter/conversion_registry.rb +49 -0
- data/lib/omnizip/converter/conversion_strategy.rb +121 -0
- data/lib/omnizip/converter/seven_zip_to_zip_strategy.rb +97 -0
- data/lib/omnizip/converter/zip_to_seven_zip_strategy.rb +112 -0
- data/lib/omnizip/converter.rb +105 -0
- data/lib/omnizip/crypto/aes256/cipher.rb +100 -0
- data/lib/omnizip/crypto/aes256/constants.rb +28 -0
- data/lib/omnizip/crypto/aes256/key_derivation.rb +101 -0
- data/lib/omnizip/crypto/aes256.rb +102 -0
- data/lib/omnizip/error.rb +106 -0
- data/lib/omnizip/eta/exponential_smoothing_estimator.rb +98 -0
- data/lib/omnizip/eta/moving_average_estimator.rb +99 -0
- data/lib/omnizip/eta/rate_calculator.rb +104 -0
- data/lib/omnizip/eta/sample_history.rb +143 -0
- data/lib/omnizip/eta/time_estimator.rb +106 -0
- data/lib/omnizip/eta.rb +63 -0
- data/lib/omnizip/extraction/filter_chain.rb +177 -0
- data/lib/omnizip/extraction/glob_pattern.rb +140 -0
- data/lib/omnizip/extraction/pattern_matcher.rb +70 -0
- data/lib/omnizip/extraction/predicate_pattern.rb +52 -0
- data/lib/omnizip/extraction/regex_pattern.rb +50 -0
- data/lib/omnizip/extraction/selective_extractor.rb +240 -0
- data/lib/omnizip/extraction.rb +111 -0
- data/lib/omnizip/file_type/mime_classifier.rb +144 -0
- data/lib/omnizip/file_type.rb +113 -0
- data/lib/omnizip/filter.rb +139 -0
- data/lib/omnizip/filter_pipeline.rb +108 -0
- data/lib/omnizip/filter_registry.rb +166 -0
- data/lib/omnizip/filters/bcj.rb +279 -0
- data/lib/omnizip/filters/bcj2/constants.rb +53 -0
- data/lib/omnizip/filters/bcj2/decoder.rb +200 -0
- data/lib/omnizip/filters/bcj2/encoder.rb +61 -0
- data/lib/omnizip/filters/bcj2/stream_data.rb +93 -0
- data/lib/omnizip/filters/bcj2.rb +99 -0
- data/lib/omnizip/filters/bcj_arm.rb +176 -0
- data/lib/omnizip/filters/bcj_arm64.rb +244 -0
- data/lib/omnizip/filters/bcj_ia64.rb +196 -0
- data/lib/omnizip/filters/bcj_ppc.rb +190 -0
- data/lib/omnizip/filters/bcj_sparc.rb +176 -0
- data/lib/omnizip/filters/bcj_x86.rb +193 -0
- data/lib/omnizip/filters/delta.rb +196 -0
- data/lib/omnizip/filters/filter_base.rb +72 -0
- data/lib/omnizip/filters/registry.rb +123 -0
- data/lib/omnizip/filters/xz_delta.rb +258 -0
- data/lib/omnizip/format_detector.rb +162 -0
- data/lib/omnizip/format_registry.rb +59 -0
- data/lib/omnizip/formats/.keep +0 -0
- data/lib/omnizip/formats/bzip2_file.rb +172 -0
- data/lib/omnizip/formats/cpio/constants.rb +55 -0
- data/lib/omnizip/formats/cpio/entry.rb +385 -0
- data/lib/omnizip/formats/cpio/reader.rb +196 -0
- data/lib/omnizip/formats/cpio/writer.rb +234 -0
- data/lib/omnizip/formats/cpio.rb +140 -0
- data/lib/omnizip/formats/format_spec_loader.rb +230 -0
- data/lib/omnizip/formats/gzip.rb +238 -0
- data/lib/omnizip/formats/iso/directory_builder.rb +297 -0
- data/lib/omnizip/formats/iso/directory_record.rb +152 -0
- data/lib/omnizip/formats/iso/joliet.rb +204 -0
- data/lib/omnizip/formats/iso/path_table.rb +125 -0
- data/lib/omnizip/formats/iso/reader.rb +197 -0
- data/lib/omnizip/formats/iso/rock_ridge.rb +349 -0
- data/lib/omnizip/formats/iso/volume_builder.rb +320 -0
- data/lib/omnizip/formats/iso/volume_descriptor.rb +168 -0
- data/lib/omnizip/formats/iso/writer.rb +530 -0
- data/lib/omnizip/formats/iso.rb +140 -0
- data/lib/omnizip/formats/lzip.rb +175 -0
- data/lib/omnizip/formats/lzma_alone.rb +171 -0
- data/lib/omnizip/formats/rar/archive_repairer.rb +243 -0
- data/lib/omnizip/formats/rar/archive_verifier.rb +195 -0
- data/lib/omnizip/formats/rar/block_parser.rb +243 -0
- data/lib/omnizip/formats/rar/compression/bit_stream.rb +180 -0
- data/lib/omnizip/formats/rar/compression/dispatcher.rb +217 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/decoder.rb +216 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/encoder.rb +158 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_builder.rb +217 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_coder.rb +189 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/match_finder.rb +135 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/sliding_window.rb +165 -0
- data/lib/omnizip/formats/rar/compression/ppmd/context.rb +105 -0
- data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +219 -0
- data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +262 -0
- data/lib/omnizip/formats/rar/compression_method_registry.rb +106 -0
- data/lib/omnizip/formats/rar/constants.rb +82 -0
- data/lib/omnizip/formats/rar/decompressor.rb +238 -0
- data/lib/omnizip/formats/rar/external_writer.rb +312 -0
- data/lib/omnizip/formats/rar/header.rb +192 -0
- data/lib/omnizip/formats/rar/license_validator.rb +109 -0
- data/lib/omnizip/formats/rar/models/rar_archive.rb +77 -0
- data/lib/omnizip/formats/rar/models/rar_entry.rb +65 -0
- data/lib/omnizip/formats/rar/models/rar_volume.rb +56 -0
- data/lib/omnizip/formats/rar/parity_handler.rb +292 -0
- data/lib/omnizip/formats/rar/rar5/compression/lzma.rb +202 -0
- data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +578 -0
- data/lib/omnizip/formats/rar/rar5/compression/store.rb +60 -0
- data/lib/omnizip/formats/rar/rar5/crc32.rb +39 -0
- data/lib/omnizip/formats/rar/rar5/encryption/aes256_cbc.rb +97 -0
- data/lib/omnizip/formats/rar/rar5/encryption/encryption_header.rb +114 -0
- data/lib/omnizip/formats/rar/rar5/encryption/encryption_manager.rb +166 -0
- data/lib/omnizip/formats/rar/rar5/encryption/key_derivation.rb +97 -0
- data/lib/omnizip/formats/rar/rar5/header.rb +187 -0
- data/lib/omnizip/formats/rar/rar5/models/encryption_options.rb +74 -0
- data/lib/omnizip/formats/rar/rar5/models/recovery_options.rb +63 -0
- data/lib/omnizip/formats/rar/rar5/models/solid_options.rb +63 -0
- data/lib/omnizip/formats/rar/rar5/models/volume_options.rb +74 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/ARCHITECTURE.md +290 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_manager.rb +264 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_splitter.rb +155 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_writer.rb +194 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_encoder.rb +109 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_manager.rb +142 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_stream.rb +121 -0
- data/lib/omnizip/formats/rar/rar5/vint.rb +65 -0
- data/lib/omnizip/formats/rar/rar5/writer.rb +466 -0
- data/lib/omnizip/formats/rar/rar_format_base.rb +241 -0
- data/lib/omnizip/formats/rar/reader.rb +366 -0
- data/lib/omnizip/formats/rar/recovery_record.rb +245 -0
- data/lib/omnizip/formats/rar/volume_manager.rb +168 -0
- data/lib/omnizip/formats/rar/writer.rb +431 -0
- data/lib/omnizip/formats/rar.rb +205 -0
- data/lib/omnizip/formats/rar3/compressor.rb +73 -0
- data/lib/omnizip/formats/rar3/decompressor.rb +66 -0
- data/lib/omnizip/formats/rar3/reader.rb +386 -0
- data/lib/omnizip/formats/rar3/writer.rb +219 -0
- data/lib/omnizip/formats/rar5/compressor.rb +73 -0
- data/lib/omnizip/formats/rar5/decompressor.rb +66 -0
- data/lib/omnizip/formats/rar5/reader.rb +342 -0
- data/lib/omnizip/formats/rar5/writer.rb +214 -0
- data/lib/omnizip/formats/seven_zip/coder_chain.rb +150 -0
- data/lib/omnizip/formats/seven_zip/constants.rb +126 -0
- data/lib/omnizip/formats/seven_zip/encoded_header.rb +114 -0
- data/lib/omnizip/formats/seven_zip/encrypted_header.rb +142 -0
- data/lib/omnizip/formats/seven_zip/file_collector.rb +144 -0
- data/lib/omnizip/formats/seven_zip/header.rb +106 -0
- data/lib/omnizip/formats/seven_zip/header_encryptor.rb +134 -0
- data/lib/omnizip/formats/seven_zip/header_writer.rb +466 -0
- data/lib/omnizip/formats/seven_zip/models/coder_info.rb +30 -0
- data/lib/omnizip/formats/seven_zip/models/file_entry.rb +58 -0
- data/lib/omnizip/formats/seven_zip/models/folder.rb +69 -0
- data/lib/omnizip/formats/seven_zip/models/stream_info.rb +42 -0
- data/lib/omnizip/formats/seven_zip/parser.rb +660 -0
- data/lib/omnizip/formats/seven_zip/reader.rb +458 -0
- data/lib/omnizip/formats/seven_zip/split_archive_reader.rb +632 -0
- data/lib/omnizip/formats/seven_zip/split_archive_writer.rb +315 -0
- data/lib/omnizip/formats/seven_zip/stream_compressor.rb +151 -0
- data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +162 -0
- data/lib/omnizip/formats/seven_zip/writer.rb +740 -0
- data/lib/omnizip/formats/seven_zip.rb +93 -0
- data/lib/omnizip/formats/tar/constants.rb +73 -0
- data/lib/omnizip/formats/tar/entry.rb +94 -0
- data/lib/omnizip/formats/tar/header.rb +168 -0
- data/lib/omnizip/formats/tar/reader.rb +121 -0
- data/lib/omnizip/formats/tar/writer.rb +216 -0
- data/lib/omnizip/formats/tar.rb +84 -0
- data/lib/omnizip/formats/xz/reader.rb +116 -0
- data/lib/omnizip/formats/xz.rb +237 -0
- data/lib/omnizip/formats/xz_impl/block_decoder.rb +754 -0
- data/lib/omnizip/formats/xz_impl/block_encoder.rb +306 -0
- data/lib/omnizip/formats/xz_impl/block_header.rb +210 -0
- data/lib/omnizip/formats/xz_impl/block_header_parser.rb +186 -0
- data/lib/omnizip/formats/xz_impl/constants.rb +49 -0
- data/lib/omnizip/formats/xz_impl/index_decoder.rb +174 -0
- data/lib/omnizip/formats/xz_impl/index_encoder.rb +122 -0
- data/lib/omnizip/formats/xz_impl/stream_decoder.rb +468 -0
- data/lib/omnizip/formats/xz_impl/stream_encoder.rb +99 -0
- data/lib/omnizip/formats/xz_impl/stream_footer.rb +81 -0
- data/lib/omnizip/formats/xz_impl/stream_footer_parser.rb +117 -0
- data/lib/omnizip/formats/xz_impl/stream_header.rb +55 -0
- data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +108 -0
- data/lib/omnizip/formats/xz_impl/vli.rb +128 -0
- data/lib/omnizip/formats/xz_impl/writer.rb +421 -0
- data/lib/omnizip/formats/zip/central_directory_header.rb +195 -0
- data/lib/omnizip/formats/zip/constants.rb +69 -0
- data/lib/omnizip/formats/zip/end_of_central_directory.rb +133 -0
- data/lib/omnizip/formats/zip/local_file_header.rb +138 -0
- data/lib/omnizip/formats/zip/reader.rb +250 -0
- data/lib/omnizip/formats/zip/unix_extra_field.rb +153 -0
- data/lib/omnizip/formats/zip/writer.rb +375 -0
- data/lib/omnizip/formats/zip/zip64_end_of_central_directory.rb +104 -0
- data/lib/omnizip/formats/zip/zip64_end_of_central_directory_locator.rb +66 -0
- data/lib/omnizip/formats/zip/zip64_extra_field.rb +114 -0
- data/lib/omnizip/formats/zip.rb +50 -0
- data/lib/omnizip/implementations/base/lzma2_decoder_base.rb +75 -0
- data/lib/omnizip/implementations/base/lzma2_encoder_base.rb +128 -0
- data/lib/omnizip/implementations/base/lzma_decoder_base.rb +83 -0
- data/lib/omnizip/implementations/base/lzma_encoder_base.rb +108 -0
- data/lib/omnizip/implementations/base/state_machine_base.rb +182 -0
- data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +421 -0
- data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +465 -0
- data/lib/omnizip/implementations/seven_zip/lzma/match_finder.rb +288 -0
- data/lib/omnizip/implementations/seven_zip/lzma/range_decoder.rb +200 -0
- data/lib/omnizip/implementations/seven_zip/lzma/range_encoder.rb +197 -0
- data/lib/omnizip/implementations/seven_zip/lzma/state_machine.rb +141 -0
- data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +519 -0
- data/lib/omnizip/implementations/xz_utils/lzma2/decoder.rb +723 -0
- data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +750 -0
- data/lib/omnizip/io/buffered_input.rb +146 -0
- data/lib/omnizip/io/buffered_output.rb +105 -0
- data/lib/omnizip/io/stream_manager.rb +115 -0
- data/lib/omnizip/link_handler/hard_link.rb +79 -0
- data/lib/omnizip/link_handler/symbolic_link.rb +74 -0
- data/lib/omnizip/link_handler.rb +124 -0
- data/lib/omnizip/metadata/archive_metadata.rb +114 -0
- data/lib/omnizip/metadata/entry_metadata.rb +146 -0
- data/lib/omnizip/metadata/metadata_editor.rb +171 -0
- data/lib/omnizip/metadata/metadata_registry.rb +64 -0
- data/lib/omnizip/metadata/metadata_validator.rb +99 -0
- data/lib/omnizip/metadata.rb +57 -0
- data/lib/omnizip/models/.keep +0 -0
- data/lib/omnizip/models/algorithm_metadata.rb +73 -0
- data/lib/omnizip/models/compression_options.rb +71 -0
- data/lib/omnizip/models/conversion_options.rb +87 -0
- data/lib/omnizip/models/conversion_result.rb +135 -0
- data/lib/omnizip/models/eta_result.rb +46 -0
- data/lib/omnizip/models/extraction_rule.rb +115 -0
- data/lib/omnizip/models/filter_chain.rb +144 -0
- data/lib/omnizip/models/filter_config.rb +183 -0
- data/lib/omnizip/models/match_result.rb +124 -0
- data/lib/omnizip/models/optimization_suggestion.rb +91 -0
- data/lib/omnizip/models/parallel_options.rb +104 -0
- data/lib/omnizip/models/performance_result.rb +79 -0
- data/lib/omnizip/models/profile_report.rb +82 -0
- data/lib/omnizip/models/progress_options.rb +38 -0
- data/lib/omnizip/models/split_options.rb +116 -0
- data/lib/omnizip/optimization_registry.rb +81 -0
- data/lib/omnizip/parallel/job_queue.rb +209 -0
- data/lib/omnizip/parallel/job_scheduler.rb +203 -0
- data/lib/omnizip/parallel/parallel_compressor.rb +347 -0
- data/lib/omnizip/parallel/parallel_extractor.rb +329 -0
- data/lib/omnizip/parallel/worker_pool.rb +223 -0
- data/lib/omnizip/parallel.rb +149 -0
- data/lib/omnizip/parity/chunked_block_processor.rb +196 -0
- data/lib/omnizip/parity/galois16.rb +145 -0
- data/lib/omnizip/parity/models/creator_packet.rb +73 -0
- data/lib/omnizip/parity/models/file_description_packet.rb +133 -0
- data/lib/omnizip/parity/models/ifsc_packet.rb +123 -0
- data/lib/omnizip/parity/models/main_packet.rb +128 -0
- data/lib/omnizip/parity/models/packet.rb +156 -0
- data/lib/omnizip/parity/models/packet_registry.rb +109 -0
- data/lib/omnizip/parity/models/recovery_slice_packet.rb +78 -0
- data/lib/omnizip/parity/par2_creator.rb +531 -0
- data/lib/omnizip/parity/par2_repairer.rb +407 -0
- data/lib/omnizip/parity/par2_verifier.rb +364 -0
- data/lib/omnizip/parity/par2cmdline_algorithm.rb +110 -0
- data/lib/omnizip/parity/par2cmdline_coefficients.rb +78 -0
- data/lib/omnizip/parity/reed_solomon_decoder.rb +266 -0
- data/lib/omnizip/parity/reed_solomon_encoder.rb +111 -0
- data/lib/omnizip/parity/reed_solomon_matrix.rb +342 -0
- data/lib/omnizip/parity.rb +186 -0
- data/lib/omnizip/password/encryption_registry.rb +65 -0
- data/lib/omnizip/password/encryption_strategy.rb +96 -0
- data/lib/omnizip/password/password_validator.rb +129 -0
- data/lib/omnizip/password/winzip_aes_strategy.rb +192 -0
- data/lib/omnizip/password/zip_crypto_strategy.rb +141 -0
- data/lib/omnizip/password.rb +87 -0
- data/lib/omnizip/pipe/stream_compressor.rb +124 -0
- data/lib/omnizip/pipe/stream_decompressor.rb +174 -0
- data/lib/omnizip/pipe.rb +121 -0
- data/lib/omnizip/platform/ntfs_streams.rb +201 -0
- data/lib/omnizip/platform.rb +189 -0
- data/lib/omnizip/profile/archive_profile.rb +39 -0
- data/lib/omnizip/profile/balanced_profile.rb +33 -0
- data/lib/omnizip/profile/binary_profile.rb +36 -0
- data/lib/omnizip/profile/compression_profile.rb +158 -0
- data/lib/omnizip/profile/custom_profile.rb +157 -0
- data/lib/omnizip/profile/fast_profile.rb +33 -0
- data/lib/omnizip/profile/maximum_profile.rb +33 -0
- data/lib/omnizip/profile/profile_detector.rb +110 -0
- data/lib/omnizip/profile/profile_registry.rb +161 -0
- data/lib/omnizip/profile/text_profile.rb +36 -0
- data/lib/omnizip/profile.rb +190 -0
- data/lib/omnizip/profiler/memory_profiler.rb +66 -0
- data/lib/omnizip/profiler/method_profiler.rb +49 -0
- data/lib/omnizip/profiler/report_generator.rb +169 -0
- data/lib/omnizip/profiler.rb +204 -0
- data/lib/omnizip/progress/callback_reporter.rb +36 -0
- data/lib/omnizip/progress/console_reporter.rb +62 -0
- data/lib/omnizip/progress/log_reporter.rb +91 -0
- data/lib/omnizip/progress/operation_progress.rb +118 -0
- data/lib/omnizip/progress/progress_bar.rb +156 -0
- data/lib/omnizip/progress/progress_reporter.rb +40 -0
- data/lib/omnizip/progress/progress_tracker.rb +190 -0
- data/lib/omnizip/progress/silent_reporter.rb +24 -0
- data/lib/omnizip/progress.rb +127 -0
- data/lib/omnizip/rubyzip_compat.rb +63 -0
- data/lib/omnizip/temp/safe_extract.rb +168 -0
- data/lib/omnizip/temp/temp_file.rb +124 -0
- data/lib/omnizip/temp/temp_file_pool.rb +109 -0
- data/lib/omnizip/temp.rb +181 -0
- data/lib/omnizip/version.rb +5 -0
- data/lib/omnizip/zip/entry.rb +156 -0
- data/lib/omnizip/zip/file.rb +485 -0
- data/lib/omnizip/zip/input_stream.rb +273 -0
- data/lib/omnizip/zip/output_stream.rb +324 -0
- data/lib/omnizip.rb +156 -0
- data/readme-docs/advanced-features.adoc +515 -0
- data/readme-docs/api-usage.adoc +444 -0
- data/readme-docs/architecture.adoc +449 -0
- data/readme-docs/archive-formats.adoc +479 -0
- data/readme-docs/cli-usage.adoc +222 -0
- data/readme-docs/compression-algorithms.adoc +442 -0
- data/readme-docs/compression-profiles.adoc +247 -0
- data/readme-docs/encryption-checksums.adoc +328 -0
- data/readme-docs/format-converter.adoc +325 -0
- data/readme-docs/installation.adoc +228 -0
- data/readme-docs/par2-archives.adoc +608 -0
- data/readme-docs/performance-profiler.adoc +389 -0
- data/readme-docs/preprocessing-filters.adoc +280 -0
- data/xz-file-format-1.2.1.txt +1174 -0
- metadata +617 -0
|
@@ -0,0 +1,421 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Copyright (C) 2025 Ribose Inc.
|
|
4
|
+
#
|
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a
|
|
6
|
+
# copy of this software and associated documentation files (the "Software"),
|
|
7
|
+
# to deal in the Software without restriction, including without limitation
|
|
8
|
+
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
9
|
+
# and/or sell copies of the Software, and to permit persons to whom the
|
|
10
|
+
# Software is furnished to do so, subject to the following conditions:
|
|
11
|
+
#
|
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
|
13
|
+
# all copies or substantial portions of the Software.
|
|
14
|
+
#
|
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
20
|
+
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
21
|
+
# DEALINGS IN THE SOFTWARE.
|
|
22
|
+
|
|
23
|
+
require_relative "../../../algorithms/lzma/constants"
|
|
24
|
+
require_relative "range_decoder" # Use 7-Zip SDK range decoder (not XZ Utils)
|
|
25
|
+
require_relative "../../../algorithms/lzma/bit_model"
|
|
26
|
+
require_relative "../../../algorithms/lzma/length_coder"
|
|
27
|
+
require_relative "../../../algorithms/lzma/distance_coder"
|
|
28
|
+
require_relative "state_machine"
|
|
29
|
+
|
|
30
|
+
module Omnizip
|
|
31
|
+
module Implementations
|
|
32
|
+
module SevenZip
|
|
33
|
+
module LZMA
|
|
34
|
+
# 7-Zip SDK compatible LZMA decoder.
|
|
35
|
+
#
|
|
36
|
+
# This decoder is designed to decode data encoded by the 7-Zip SDK encoder.
|
|
37
|
+
# Uses the same shared infrastructure (RangeDecoder, LengthCoder, DistanceCoder)
|
|
38
|
+
# to ensure model layout compatibility with the encoder.
|
|
39
|
+
class Decoder
|
|
40
|
+
include Omnizip::Algorithms::LZMA::Constants
|
|
41
|
+
|
|
42
|
+
attr_reader :lc, :lp, :pb, :dict_size, :uncompressed_size
|
|
43
|
+
|
|
44
|
+
# Initialize 7-Zip LZMA decoder
|
|
45
|
+
#
|
|
46
|
+
# @param input [IO] Input stream with LZMA compressed data
|
|
47
|
+
# @param options [Hash] Decoding options
|
|
48
|
+
# @option options [Boolean] :raw_mode Skip header parsing (for LZMA2)
|
|
49
|
+
# @option options [Integer] :lc Literal context bits
|
|
50
|
+
# @option options [Integer] :lp Literal position bits
|
|
51
|
+
# @option options [Integer] :pb Position bits
|
|
52
|
+
# @option options [Integer] :dict_size Dictionary size
|
|
53
|
+
def initialize(input, options = {})
|
|
54
|
+
@input = input
|
|
55
|
+
@raw_mode = options.fetch(:raw_mode, false)
|
|
56
|
+
|
|
57
|
+
if @raw_mode
|
|
58
|
+
@lc = options[:lc] || 3
|
|
59
|
+
@lp = options[:lp] || 0
|
|
60
|
+
@pb = options[:pb] || 2
|
|
61
|
+
@dict_size = options[:dict_size] || (1 << 16)
|
|
62
|
+
@uncompressed_size = options[:uncompressed_size]
|
|
63
|
+
else
|
|
64
|
+
parse_header
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
init_decoder
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# Decode LZMA stream
|
|
71
|
+
#
|
|
72
|
+
# @param output [IO, nil] Output stream (if nil, returns String)
|
|
73
|
+
# @param preserve_dict [Boolean] Preserve dictionary for chunked decoding
|
|
74
|
+
# @return [String, Integer] Decompressed data or bytes written
|
|
75
|
+
def decode_stream(output = nil, preserve_dict: false)
|
|
76
|
+
@output_buffer = []
|
|
77
|
+
@dictionary = Array.new(@dict_size, 0) unless preserve_dict && @dictionary
|
|
78
|
+
@dict_pos = 0
|
|
79
|
+
@dict_full = false
|
|
80
|
+
|
|
81
|
+
# Initialize range decoder (7-Zip SDK version)
|
|
82
|
+
@range_decoder = RangeDecoder.new(@input)
|
|
83
|
+
|
|
84
|
+
# Main decode loop
|
|
85
|
+
loop do
|
|
86
|
+
break if reached_end?
|
|
87
|
+
|
|
88
|
+
# Track if we were using EOPM before decoding
|
|
89
|
+
was_using_eopm = @allow_eopm
|
|
90
|
+
|
|
91
|
+
decode_symbol
|
|
92
|
+
|
|
93
|
+
# If we were using EOPM and now we're not, EOS was detected
|
|
94
|
+
break if was_using_eopm && !@allow_eopm
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
result = @output_buffer.pack("C*").force_encoding(Encoding::BINARY)
|
|
98
|
+
|
|
99
|
+
if output
|
|
100
|
+
output.write(result)
|
|
101
|
+
result.bytesize
|
|
102
|
+
else
|
|
103
|
+
result
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
# Reset decoder state for new chunk (LZMA2)
|
|
108
|
+
def reset(new_lc: nil, new_lp: nil, new_pb: nil, preserve_dict: false)
|
|
109
|
+
@lc = new_lc if new_lc
|
|
110
|
+
@lp = new_lp if new_lp
|
|
111
|
+
@pb = new_pb if new_pb
|
|
112
|
+
|
|
113
|
+
unless preserve_dict
|
|
114
|
+
@dictionary = Array.new(@dict_size, 0)
|
|
115
|
+
@dict_pos = 0
|
|
116
|
+
@dict_full = false
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
init_decoder
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
# Set new input stream (LZMA2)
|
|
123
|
+
def set_input(new_input)
|
|
124
|
+
@input = new_input
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
# Set uncompressed size (LZMA2)
|
|
128
|
+
def set_uncompressed_size(size, allow_eopm: true)
|
|
129
|
+
@uncompressed_size = size
|
|
130
|
+
@allow_eopm = allow_eopm
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
private
|
|
134
|
+
|
|
135
|
+
# Parse LZMA header
|
|
136
|
+
def parse_header
|
|
137
|
+
props = @input.getbyte
|
|
138
|
+
raise "Invalid LZMA header: missing properties" unless props
|
|
139
|
+
|
|
140
|
+
@lc = props % 9
|
|
141
|
+
rem = props / 9
|
|
142
|
+
@lp = rem % 5
|
|
143
|
+
@pb = rem / 5
|
|
144
|
+
|
|
145
|
+
raise "Invalid LZMA properties: pb=#{@pb} > 4" if @pb > 4
|
|
146
|
+
|
|
147
|
+
@dict_size = 0
|
|
148
|
+
4.times do |i|
|
|
149
|
+
byte = @input.getbyte
|
|
150
|
+
raise "Invalid LZMA header: missing dictionary size" unless byte
|
|
151
|
+
|
|
152
|
+
@dict_size |= (byte << (i * 8))
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
@dict_size = [@dict_size, 1].max
|
|
156
|
+
|
|
157
|
+
@uncompressed_size = 0
|
|
158
|
+
8.times do |i|
|
|
159
|
+
byte = @input.getbyte
|
|
160
|
+
raise "Invalid LZMA header: missing uncompressed size" unless byte
|
|
161
|
+
|
|
162
|
+
@uncompressed_size |= (byte * (1 << (i * 8)))
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
@allow_eopm = (@uncompressed_size == 0xFFFFFFFFFFFFFFFF)
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
# Initialize decoder state
|
|
169
|
+
def init_decoder
|
|
170
|
+
@state = StateMachine.new
|
|
171
|
+
@reps = [0, 0, 0, 0]
|
|
172
|
+
|
|
173
|
+
# Calculate literal_mask using XZ Utils formula
|
|
174
|
+
# literal_mask = (UINT32_C(0x100) << (lp)) - (UINT32_C(0x100) >> (lc))
|
|
175
|
+
@literal_mask = (0x100 << @lp) - (0x100 >> @lc)
|
|
176
|
+
|
|
177
|
+
# Initialize literal models using same layout as encoder
|
|
178
|
+
# max_context = literal_mask
|
|
179
|
+
# max_base_offset = 3 * (max_context << lc)
|
|
180
|
+
# max_model_index = max_base_offset + 0x300
|
|
181
|
+
max_context = @literal_mask
|
|
182
|
+
max_base_offset = 3 * (max_context << @lc)
|
|
183
|
+
max_model_index = max_base_offset + 0x300
|
|
184
|
+
|
|
185
|
+
@literal_models = Array.new(max_model_index + 1) do
|
|
186
|
+
Omnizip::Algorithms::LZMA::BitModel.new
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
# Initialize probability models using same layout as encoder
|
|
190
|
+
num_pos_states = 1 << @pb
|
|
191
|
+
@is_match_models = Array.new(NUM_STATES * num_pos_states) do
|
|
192
|
+
Omnizip::Algorithms::LZMA::BitModel.new
|
|
193
|
+
end
|
|
194
|
+
@is_rep_models = Array.new(NUM_STATES) do
|
|
195
|
+
Omnizip::Algorithms::LZMA::BitModel.new
|
|
196
|
+
end
|
|
197
|
+
@is_rep0_models = Array.new(NUM_STATES) do
|
|
198
|
+
Omnizip::Algorithms::LZMA::BitModel.new
|
|
199
|
+
end
|
|
200
|
+
@is_rep1_models = Array.new(NUM_STATES) do
|
|
201
|
+
Omnizip::Algorithms::LZMA::BitModel.new
|
|
202
|
+
end
|
|
203
|
+
@is_rep2_models = Array.new(NUM_STATES) do
|
|
204
|
+
Omnizip::Algorithms::LZMA::BitModel.new
|
|
205
|
+
end
|
|
206
|
+
@is_rep0_long_models = Array.new(NUM_STATES * num_pos_states) do
|
|
207
|
+
Omnizip::Algorithms::LZMA::BitModel.new
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
# Use shared LengthCoder and DistanceCoder (they have decode methods)
|
|
211
|
+
@length_coder = Omnizip::Algorithms::LZMA::LengthCoder.new(num_pos_states)
|
|
212
|
+
@rep_length_coder = Omnizip::Algorithms::LZMA::LengthCoder.new(num_pos_states)
|
|
213
|
+
@distance_coder = Omnizip::Algorithms::LZMA::DistanceCoder.new(NUM_LEN_TO_POS_STATES)
|
|
214
|
+
|
|
215
|
+
@output_count = 0
|
|
216
|
+
end
|
|
217
|
+
|
|
218
|
+
# Check if we've reached end of stream
|
|
219
|
+
def reached_end?
|
|
220
|
+
if @allow_eopm
|
|
221
|
+
false
|
|
222
|
+
else
|
|
223
|
+
@output_count >= @uncompressed_size
|
|
224
|
+
end
|
|
225
|
+
end
|
|
226
|
+
|
|
227
|
+
# Decode one symbol
|
|
228
|
+
def decode_symbol
|
|
229
|
+
pos_state = @output_count & ((1 << @pb) - 1)
|
|
230
|
+
|
|
231
|
+
# Decode is_match using same model layout as encoder
|
|
232
|
+
model_index = (@state.value * (1 << @pb)) + pos_state
|
|
233
|
+
is_match = @range_decoder.decode_bit(@is_match_models[model_index])
|
|
234
|
+
|
|
235
|
+
if is_match.zero?
|
|
236
|
+
decode_literal
|
|
237
|
+
else
|
|
238
|
+
decode_match(pos_state)
|
|
239
|
+
end
|
|
240
|
+
end
|
|
241
|
+
|
|
242
|
+
# Decode a literal byte
|
|
243
|
+
def decode_literal
|
|
244
|
+
prev_byte = @dict_pos.positive? ? @dictionary[@dict_pos - 1] : 0
|
|
245
|
+
|
|
246
|
+
# Calculate base_offset using XZ Utils formula (same as encoder)
|
|
247
|
+
# context = (((pos << 8) + prev_byte) & literal_mask)
|
|
248
|
+
# base_offset = 3 * (context << lc)
|
|
249
|
+
context = (((@output_count << 8) + prev_byte) & @literal_mask)
|
|
250
|
+
base_offset = 3 * (context << @lc)
|
|
251
|
+
|
|
252
|
+
symbol = 1
|
|
253
|
+
|
|
254
|
+
if @state.use_matched_literal?
|
|
255
|
+
# Matched literal: use match byte from dictionary
|
|
256
|
+
#
|
|
257
|
+
# XZ Utils decoder pattern (from range_decoder.h rc_matched_literal):
|
|
258
|
+
# - symbol starts at 1
|
|
259
|
+
# - match_byte is shifted FIRST, then match_bit extracted
|
|
260
|
+
# - subcoder_index = offset + match_bit + symbol (symbol not shifted)
|
|
261
|
+
# - offset updated: if bit 0: offset &= ~match_bit, if bit 1: offset &= match_bit
|
|
262
|
+
# - loop 8 times
|
|
263
|
+
#
|
|
264
|
+
match_byte_val = get_byte_from_dict(@reps[0])
|
|
265
|
+
offset = 0x100
|
|
266
|
+
symbol = 1 # XZ Utils starts at 1, not 0x100
|
|
267
|
+
|
|
268
|
+
8.times do
|
|
269
|
+
# Shift match_byte FIRST (matches XZ Utils decoder)
|
|
270
|
+
match_byte_val <<= 1
|
|
271
|
+
|
|
272
|
+
# Get match_bit from shifted value
|
|
273
|
+
match_bit = match_byte_val & offset
|
|
274
|
+
|
|
275
|
+
# Calculate subcoder index (XZ Utils: offset + match_bit + symbol)
|
|
276
|
+
model_idx = base_offset + offset + match_bit + symbol
|
|
277
|
+
|
|
278
|
+
# Decode bit
|
|
279
|
+
bit = @range_decoder.decode_bit(@literal_models[model_idx])
|
|
280
|
+
|
|
281
|
+
# Update symbol and offset based on decoded bit
|
|
282
|
+
if bit.zero?
|
|
283
|
+
symbol <<= 1
|
|
284
|
+
offset &= ~match_bit
|
|
285
|
+
else
|
|
286
|
+
symbol = (symbol << 1) + 1
|
|
287
|
+
offset &= match_bit
|
|
288
|
+
end
|
|
289
|
+
end
|
|
290
|
+
|
|
291
|
+
else
|
|
292
|
+
# Normal (unmatched) literal
|
|
293
|
+
8.times do
|
|
294
|
+
bit = @range_decoder.decode_bit(@literal_models[base_offset + symbol])
|
|
295
|
+
symbol = (symbol << 1) | bit
|
|
296
|
+
end
|
|
297
|
+
end
|
|
298
|
+
byte = symbol & 0xFF
|
|
299
|
+
|
|
300
|
+
put_byte_to_dict(byte)
|
|
301
|
+
@output_buffer << byte
|
|
302
|
+
@output_count += 1
|
|
303
|
+
@state.update_literal
|
|
304
|
+
end
|
|
305
|
+
|
|
306
|
+
# Decode a match
|
|
307
|
+
def decode_match(pos_state)
|
|
308
|
+
is_rep = @range_decoder.decode_bit(@is_rep_models[@state.value])
|
|
309
|
+
|
|
310
|
+
if is_rep.zero?
|
|
311
|
+
# Simple match
|
|
312
|
+
len = @length_coder.decode(@range_decoder, pos_state) + MATCH_LEN_MIN
|
|
313
|
+
@state.update_match
|
|
314
|
+
|
|
315
|
+
# Decode distance
|
|
316
|
+
len_state = [len - MATCH_LEN_MIN, NUM_LEN_TO_POS_STATES - 1].min
|
|
317
|
+
distance = @distance_coder.decode(@range_decoder, len_state)
|
|
318
|
+
|
|
319
|
+
# Check for EOPM (distance = 0xFFFFFFFF means end marker)
|
|
320
|
+
if distance == 0xFFFFFFFF
|
|
321
|
+
@allow_eopm = false
|
|
322
|
+
return
|
|
323
|
+
end
|
|
324
|
+
|
|
325
|
+
# Decoder returns distance before +1, so add 1 to get actual distance
|
|
326
|
+
distance += 1
|
|
327
|
+
|
|
328
|
+
raise "Invalid distance: #{distance}" if distance >= @dict_size && @dict_full
|
|
329
|
+
|
|
330
|
+
# Update reps
|
|
331
|
+
@reps[3] = @reps[2]
|
|
332
|
+
@reps[2] = @reps[1]
|
|
333
|
+
@reps[1] = @reps[0]
|
|
334
|
+
@reps[0] = distance
|
|
335
|
+
else
|
|
336
|
+
# Repeated match
|
|
337
|
+
len, distance = decode_rep_match(pos_state)
|
|
338
|
+
end
|
|
339
|
+
|
|
340
|
+
# Copy from dictionary
|
|
341
|
+
len.times do
|
|
342
|
+
byte = get_byte_from_dict(distance)
|
|
343
|
+
put_byte_to_dict(byte.ord)
|
|
344
|
+
@output_buffer << byte
|
|
345
|
+
@output_count += 1
|
|
346
|
+
end
|
|
347
|
+
end
|
|
348
|
+
|
|
349
|
+
# Decode repeated match
|
|
350
|
+
def decode_rep_match(pos_state)
|
|
351
|
+
if @range_decoder.decode_bit(@is_rep0_models[@state.value]).zero?
|
|
352
|
+
# Rep0
|
|
353
|
+
if @range_decoder.decode_bit(
|
|
354
|
+
@is_rep0_long_models[(@state.value * (1 << @pb)) + pos_state],
|
|
355
|
+
).zero?
|
|
356
|
+
# Short rep (length 1)
|
|
357
|
+
@state.update_short_rep
|
|
358
|
+
return [1, @reps[0]]
|
|
359
|
+
end
|
|
360
|
+
|
|
361
|
+
len = @rep_length_coder.decode(@range_decoder, pos_state) + MATCH_LEN_MIN
|
|
362
|
+
@state.update_rep
|
|
363
|
+
return [len, @reps[0]]
|
|
364
|
+
end
|
|
365
|
+
|
|
366
|
+
if @range_decoder.decode_bit(@is_rep1_models[@state.value]).zero?
|
|
367
|
+
# Rep1
|
|
368
|
+
len = @rep_length_coder.decode(@range_decoder, pos_state) + MATCH_LEN_MIN
|
|
369
|
+
distance = @reps[1]
|
|
370
|
+
@reps[1] = @reps[0]
|
|
371
|
+
@reps[0] = distance
|
|
372
|
+
@state.update_rep
|
|
373
|
+
return [len, distance]
|
|
374
|
+
end
|
|
375
|
+
|
|
376
|
+
if @range_decoder.decode_bit(@is_rep2_models[@state.value]).zero?
|
|
377
|
+
# Rep2
|
|
378
|
+
len = @rep_length_coder.decode(@range_decoder, pos_state) + MATCH_LEN_MIN
|
|
379
|
+
distance = @reps[2]
|
|
380
|
+
@reps[2] = @reps[1]
|
|
381
|
+
@reps[1] = @reps[0]
|
|
382
|
+
@reps[0] = distance
|
|
383
|
+
@state.update_rep
|
|
384
|
+
return [len, distance]
|
|
385
|
+
end
|
|
386
|
+
|
|
387
|
+
# Rep3
|
|
388
|
+
len = @rep_length_coder.decode(@range_decoder, pos_state) + MATCH_LEN_MIN
|
|
389
|
+
distance = @reps[3]
|
|
390
|
+
@reps[3] = @reps[2]
|
|
391
|
+
@reps[2] = @reps[1]
|
|
392
|
+
@reps[1] = @reps[0]
|
|
393
|
+
@reps[0] = distance
|
|
394
|
+
@state.update_rep
|
|
395
|
+
[len, distance]
|
|
396
|
+
end
|
|
397
|
+
|
|
398
|
+
# Get byte from dictionary at distance
|
|
399
|
+
# Distance 1 = most recent byte = position dict_pos - 1
|
|
400
|
+
# Distance N = position dict_pos - N
|
|
401
|
+
def get_byte_from_dict(distance)
|
|
402
|
+
if distance > @dict_pos && !@dict_full
|
|
403
|
+
0
|
|
404
|
+
else
|
|
405
|
+
# Use positive modulo to handle negative numbers correctly
|
|
406
|
+
pos = (@dict_pos - distance) % @dict_size
|
|
407
|
+
@dictionary[pos]
|
|
408
|
+
end
|
|
409
|
+
end
|
|
410
|
+
|
|
411
|
+
# Put byte to dictionary
|
|
412
|
+
def put_byte_to_dict(byte)
|
|
413
|
+
@dictionary[@dict_pos] = byte
|
|
414
|
+
@dict_pos = (@dict_pos + 1) % @dict_size
|
|
415
|
+
@dict_full = true if @dict_pos.zero?
|
|
416
|
+
end
|
|
417
|
+
end
|
|
418
|
+
end
|
|
419
|
+
end
|
|
420
|
+
end
|
|
421
|
+
end
|