omnizip 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/.rubocop.yml +32 -0
- data/.rubocop_todo.yml +754 -0
- data/COPYING +502 -0
- data/Gemfile +17 -0
- data/LICENSE +12 -0
- data/README.adoc +1045 -0
- data/Rakefile +12 -0
- data/benchmark/README.md +260 -0
- data/benchmark/benchmark_suite.rb +125 -0
- data/benchmark/compression_bench.rb +181 -0
- data/benchmark/filter_bench.rb +180 -0
- data/benchmark/models/benchmark_result.rb +59 -0
- data/benchmark/models/comparison_result.rb +69 -0
- data/benchmark/profile_suite.rb +167 -0
- data/benchmark/reporter.rb +150 -0
- data/benchmark/run_benchmarks.rb +66 -0
- data/benchmark/test_data.rb +137 -0
- data/config/formats/rar3_spec.yml +91 -0
- data/config/formats/rar5_spec.yml +102 -0
- data/docs/.github/workflows/docs.yml +142 -0
- data/docs/.gitignore +21 -0
- data/docs/.lychee.toml +67 -0
- data/docs/Gemfile +13 -0
- data/docs/RAR_WRITE_SUPPORT.md +26 -0
- data/docs/README.md +101 -0
- data/docs/_config.yml +112 -0
- data/docs/assets/logo.svg +1 -0
- data/docs/assets/omnizip-logo.pdf +1540 -11
- data/docs/comparison/feature-matrix.adoc +694 -0
- data/docs/comparison/index.adoc +113 -0
- data/docs/comparison/vs-7zip.adoc +309 -0
- data/docs/comparison/vs-peazip.adoc +77 -0
- data/docs/comparison/vs-rubyzip.adoc +342 -0
- data/docs/comparison/vs-winrar.adoc +100 -0
- data/docs/compatibility.adoc +579 -0
- data/docs/concepts/index.adoc +129 -0
- data/docs/developer/architecture.adoc +256 -0
- data/docs/developer/contributing.adoc +158 -0
- data/docs/developer/index.adoc +25 -0
- data/docs/developer/testing.adoc +212 -0
- data/docs/getting-started/basic-usage.adoc +271 -0
- data/docs/getting-started/index.adoc +42 -0
- data/docs/getting-started/installation.adoc +138 -0
- data/docs/getting-started/quick-start.adoc +185 -0
- data/docs/getting-started/your-first-archive.adoc +218 -0
- data/docs/guides/advanced-features/encryption.adoc +300 -0
- data/docs/guides/advanced-features/index.adoc +49 -0
- data/docs/guides/advanced-features/parallel-processing.adoc +246 -0
- data/docs/guides/advanced-features/progress-tracking.adoc +320 -0
- data/docs/guides/advanced-features/streaming.adoc +212 -0
- data/docs/guides/archive-formats/gzip-format.adoc +107 -0
- data/docs/guides/archive-formats/index.adoc +130 -0
- data/docs/guides/archive-formats/rar-format.adoc +104 -0
- data/docs/guides/archive-formats/rar5.adoc +521 -0
- data/docs/guides/archive-formats/seven-zip-format.adoc +35 -0
- data/docs/guides/archive-formats/tar-format.adoc +106 -0
- data/docs/guides/archive-formats/xz-format.adoc +118 -0
- data/docs/guides/archive-formats/zip-format.adoc +35 -0
- data/docs/guides/compression-algorithms/bzip2.adoc +113 -0
- data/docs/guides/compression-algorithms/deflate.adoc +319 -0
- data/docs/guides/compression-algorithms/index.adoc +190 -0
- data/docs/guides/compression-algorithms/lzma.adoc +398 -0
- data/docs/guides/compression-algorithms/lzma2.adoc +327 -0
- data/docs/guides/compression-algorithms/ppmd.adoc +316 -0
- data/docs/guides/compression-algorithms/zstandard.adoc +361 -0
- data/docs/guides/creating-archives.adoc +354 -0
- data/docs/guides/extracting-archives.adoc +53 -0
- data/docs/guides/format-conversion.adoc +64 -0
- data/docs/guides/index.adoc +49 -0
- data/docs/guides/migration-rubyzip.adoc +217 -0
- data/docs/guides/parity-archives.adoc +605 -0
- data/docs/guides/performance-tuning.adoc +88 -0
- data/docs/index.adoc +218 -0
- data/docs/lychee.toml +67 -0
- data/docs/reference/api/overview.adoc +188 -0
- data/docs/reference/cli/compress-command.adoc +114 -0
- data/docs/reference/cli/overview.adoc +140 -0
- data/docs/reference/index.adoc +26 -0
- data/docs/resources/faq.adoc +185 -0
- data/docs/resources/quick-reference.adoc +222 -0
- data/docs/troubleshooting/index.adoc +208 -0
- data/examples/api_comparison.rb +205 -0
- data/examples/deflate64_example.rb +96 -0
- data/examples/par2_demo.rb +121 -0
- data/examples/quick_start_native.rb +150 -0
- data/examples/quick_start_rubyzip.rb +115 -0
- data/examples/rubyzip_compatibility_demo.rb +194 -0
- data/exe/omnizip +27 -0
- data/lib/omnizip/algorithm.rb +130 -0
- data/lib/omnizip/algorithm_registry.rb +86 -0
- data/lib/omnizip/algorithms/.keep +0 -0
- data/lib/omnizip/algorithms/bzip2/bwt.rb +225 -0
- data/lib/omnizip/algorithms/bzip2/decoder.rb +193 -0
- data/lib/omnizip/algorithms/bzip2/encoder.rb +237 -0
- data/lib/omnizip/algorithms/bzip2/huffman.rb +206 -0
- data/lib/omnizip/algorithms/bzip2/mtf.rb +101 -0
- data/lib/omnizip/algorithms/bzip2/rle.rb +151 -0
- data/lib/omnizip/algorithms/bzip2.rb +130 -0
- data/lib/omnizip/algorithms/deflate/constants.rb +28 -0
- data/lib/omnizip/algorithms/deflate/decoder.rb +38 -0
- data/lib/omnizip/algorithms/deflate/encoder.rb +46 -0
- data/lib/omnizip/algorithms/deflate.rb +128 -0
- data/lib/omnizip/algorithms/deflate64/constants.rb +45 -0
- data/lib/omnizip/algorithms/deflate64/decoder.rb +153 -0
- data/lib/omnizip/algorithms/deflate64/encoder.rb +98 -0
- data/lib/omnizip/algorithms/deflate64/huffman_coder.rb +354 -0
- data/lib/omnizip/algorithms/deflate64/lz77_encoder.rb +142 -0
- data/lib/omnizip/algorithms/deflate64.rb +109 -0
- data/lib/omnizip/algorithms/lzma/bit_model.rb +120 -0
- data/lib/omnizip/algorithms/lzma/constants.rb +112 -0
- data/lib/omnizip/algorithms/lzma/decoder.rb +148 -0
- data/lib/omnizip/algorithms/lzma/dictionary.rb +69 -0
- data/lib/omnizip/algorithms/lzma/distance_coder.rb +415 -0
- data/lib/omnizip/algorithms/lzma/encoder.rb +142 -0
- data/lib/omnizip/algorithms/lzma/length_coder.rb +260 -0
- data/lib/omnizip/algorithms/lzma/literal_decoder.rb +320 -0
- data/lib/omnizip/algorithms/lzma/literal_encoder.rb +210 -0
- data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +341 -0
- data/lib/omnizip/algorithms/lzma/lzma_alone_decoder.rb +192 -0
- data/lib/omnizip/algorithms/lzma/lzma_state.rb +128 -0
- data/lib/omnizip/algorithms/lzma/match.rb +32 -0
- data/lib/omnizip/algorithms/lzma/match_finder.rb +205 -0
- data/lib/omnizip/algorithms/lzma/match_finder_config.rb +142 -0
- data/lib/omnizip/algorithms/lzma/match_finder_factory.rb +88 -0
- data/lib/omnizip/algorithms/lzma/optimal_encoder.rb +130 -0
- data/lib/omnizip/algorithms/lzma/probability_models.rb +72 -0
- data/lib/omnizip/algorithms/lzma/range_coder.rb +85 -0
- data/lib/omnizip/algorithms/lzma/range_decoder.rb +434 -0
- data/lib/omnizip/algorithms/lzma/range_encoder.rb +194 -0
- data/lib/omnizip/algorithms/lzma/state.rb +127 -0
- data/lib/omnizip/algorithms/lzma/xz_buffered_range_encoder.rb +325 -0
- data/lib/omnizip/algorithms/lzma/xz_encoder.rb +426 -0
- data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +645 -0
- data/lib/omnizip/algorithms/lzma/xz_match_finder_adapter.rb +227 -0
- data/lib/omnizip/algorithms/lzma/xz_price_calculator.rb +169 -0
- data/lib/omnizip/algorithms/lzma/xz_probability_models.rb +261 -0
- data/lib/omnizip/algorithms/lzma/xz_range_encoder.rb +223 -0
- data/lib/omnizip/algorithms/lzma/xz_range_encoder_exact.rb +331 -0
- data/lib/omnizip/algorithms/lzma/xz_state.rb +116 -0
- data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +2055 -0
- data/lib/omnizip/algorithms/lzma.rb +238 -0
- data/lib/omnizip/algorithms/lzma2/chunk_manager.rb +182 -0
- data/lib/omnizip/algorithms/lzma2/constants.rb +41 -0
- data/lib/omnizip/algorithms/lzma2/encoder.rb +147 -0
- data/lib/omnizip/algorithms/lzma2/lzma2_chunk.rb +161 -0
- data/lib/omnizip/algorithms/lzma2/properties.rb +179 -0
- data/lib/omnizip/algorithms/lzma2/simple_lzma2_encoder.rb +127 -0
- data/lib/omnizip/algorithms/lzma2/xz_encoder_adapter.rb +85 -0
- data/lib/omnizip/algorithms/lzma2.rb +141 -0
- data/lib/omnizip/algorithms/ppmd7/constants.rb +74 -0
- data/lib/omnizip/algorithms/ppmd7/context.rb +154 -0
- data/lib/omnizip/algorithms/ppmd7/decoder.rb +126 -0
- data/lib/omnizip/algorithms/ppmd7/encoder.rb +163 -0
- data/lib/omnizip/algorithms/ppmd7/model.rb +248 -0
- data/lib/omnizip/algorithms/ppmd7/symbol_state.rb +57 -0
- data/lib/omnizip/algorithms/ppmd7.rb +116 -0
- data/lib/omnizip/algorithms/ppmd8/constants.rb +61 -0
- data/lib/omnizip/algorithms/ppmd8/context.rb +34 -0
- data/lib/omnizip/algorithms/ppmd8/decoder.rb +107 -0
- data/lib/omnizip/algorithms/ppmd8/encoder.rb +138 -0
- data/lib/omnizip/algorithms/ppmd8/model.rb +250 -0
- data/lib/omnizip/algorithms/ppmd8/restoration_method.rb +78 -0
- data/lib/omnizip/algorithms/ppmd8.rb +82 -0
- data/lib/omnizip/algorithms/ppmd_base.rb +138 -0
- data/lib/omnizip/algorithms/sevenzip_lzma2.rb +123 -0
- data/lib/omnizip/algorithms/xz_lzma2.rb +118 -0
- data/lib/omnizip/algorithms/zstandard/constants.rb +25 -0
- data/lib/omnizip/algorithms/zstandard/decoder.rb +46 -0
- data/lib/omnizip/algorithms/zstandard/encoder.rb +51 -0
- data/lib/omnizip/algorithms/zstandard.rb +138 -0
- data/lib/omnizip/buffer/memory_archive.rb +251 -0
- data/lib/omnizip/buffer/memory_extractor.rb +224 -0
- data/lib/omnizip/buffer.rb +176 -0
- data/lib/omnizip/checksum_registry.rb +114 -0
- data/lib/omnizip/checksums/crc32.rb +100 -0
- data/lib/omnizip/checksums/crc64.rb +101 -0
- data/lib/omnizip/checksums/crc_base.rb +158 -0
- data/lib/omnizip/checksums/verifier.rb +131 -0
- data/lib/omnizip/chunked/memory_manager.rb +194 -0
- data/lib/omnizip/chunked/reader.rb +78 -0
- data/lib/omnizip/chunked/writer.rb +120 -0
- data/lib/omnizip/chunked.rb +129 -0
- data/lib/omnizip/cli/output_formatter.rb +104 -0
- data/lib/omnizip/cli.rb +572 -0
- data/lib/omnizip/commands/.keep +0 -0
- data/lib/omnizip/commands/archive_create_command.rb +427 -0
- data/lib/omnizip/commands/archive_extract_command.rb +272 -0
- data/lib/omnizip/commands/archive_list_command.rb +218 -0
- data/lib/omnizip/commands/archive_repair_command.rb +131 -0
- data/lib/omnizip/commands/archive_verify_command.rb +117 -0
- data/lib/omnizip/commands/compress_command.rb +117 -0
- data/lib/omnizip/commands/decompress_command.rb +120 -0
- data/lib/omnizip/commands/list_command.rb +53 -0
- data/lib/omnizip/commands/metadata_command.rb +153 -0
- data/lib/omnizip/commands/parity_create_command.rb +122 -0
- data/lib/omnizip/commands/parity_repair_command.rb +122 -0
- data/lib/omnizip/commands/parity_verify_command.rb +124 -0
- data/lib/omnizip/commands/profile_list_command.rb +56 -0
- data/lib/omnizip/commands/profile_show_command.rb +44 -0
- data/lib/omnizip/convenience.rb +359 -0
- data/lib/omnizip/converter/conversion_registry.rb +49 -0
- data/lib/omnizip/converter/conversion_strategy.rb +121 -0
- data/lib/omnizip/converter/seven_zip_to_zip_strategy.rb +97 -0
- data/lib/omnizip/converter/zip_to_seven_zip_strategy.rb +112 -0
- data/lib/omnizip/converter.rb +105 -0
- data/lib/omnizip/crypto/aes256/cipher.rb +100 -0
- data/lib/omnizip/crypto/aes256/constants.rb +28 -0
- data/lib/omnizip/crypto/aes256/key_derivation.rb +101 -0
- data/lib/omnizip/crypto/aes256.rb +102 -0
- data/lib/omnizip/error.rb +106 -0
- data/lib/omnizip/eta/exponential_smoothing_estimator.rb +98 -0
- data/lib/omnizip/eta/moving_average_estimator.rb +99 -0
- data/lib/omnizip/eta/rate_calculator.rb +104 -0
- data/lib/omnizip/eta/sample_history.rb +143 -0
- data/lib/omnizip/eta/time_estimator.rb +106 -0
- data/lib/omnizip/eta.rb +63 -0
- data/lib/omnizip/extraction/filter_chain.rb +177 -0
- data/lib/omnizip/extraction/glob_pattern.rb +140 -0
- data/lib/omnizip/extraction/pattern_matcher.rb +70 -0
- data/lib/omnizip/extraction/predicate_pattern.rb +52 -0
- data/lib/omnizip/extraction/regex_pattern.rb +50 -0
- data/lib/omnizip/extraction/selective_extractor.rb +240 -0
- data/lib/omnizip/extraction.rb +111 -0
- data/lib/omnizip/file_type/mime_classifier.rb +144 -0
- data/lib/omnizip/file_type.rb +113 -0
- data/lib/omnizip/filter.rb +139 -0
- data/lib/omnizip/filter_pipeline.rb +108 -0
- data/lib/omnizip/filter_registry.rb +166 -0
- data/lib/omnizip/filters/bcj.rb +279 -0
- data/lib/omnizip/filters/bcj2/constants.rb +53 -0
- data/lib/omnizip/filters/bcj2/decoder.rb +200 -0
- data/lib/omnizip/filters/bcj2/encoder.rb +61 -0
- data/lib/omnizip/filters/bcj2/stream_data.rb +93 -0
- data/lib/omnizip/filters/bcj2.rb +99 -0
- data/lib/omnizip/filters/bcj_arm.rb +176 -0
- data/lib/omnizip/filters/bcj_arm64.rb +244 -0
- data/lib/omnizip/filters/bcj_ia64.rb +196 -0
- data/lib/omnizip/filters/bcj_ppc.rb +190 -0
- data/lib/omnizip/filters/bcj_sparc.rb +176 -0
- data/lib/omnizip/filters/bcj_x86.rb +193 -0
- data/lib/omnizip/filters/delta.rb +196 -0
- data/lib/omnizip/filters/filter_base.rb +72 -0
- data/lib/omnizip/filters/registry.rb +123 -0
- data/lib/omnizip/filters/xz_delta.rb +258 -0
- data/lib/omnizip/format_detector.rb +162 -0
- data/lib/omnizip/format_registry.rb +59 -0
- data/lib/omnizip/formats/.keep +0 -0
- data/lib/omnizip/formats/bzip2_file.rb +172 -0
- data/lib/omnizip/formats/cpio/constants.rb +55 -0
- data/lib/omnizip/formats/cpio/entry.rb +385 -0
- data/lib/omnizip/formats/cpio/reader.rb +196 -0
- data/lib/omnizip/formats/cpio/writer.rb +234 -0
- data/lib/omnizip/formats/cpio.rb +140 -0
- data/lib/omnizip/formats/format_spec_loader.rb +230 -0
- data/lib/omnizip/formats/gzip.rb +238 -0
- data/lib/omnizip/formats/iso/directory_builder.rb +297 -0
- data/lib/omnizip/formats/iso/directory_record.rb +152 -0
- data/lib/omnizip/formats/iso/joliet.rb +204 -0
- data/lib/omnizip/formats/iso/path_table.rb +125 -0
- data/lib/omnizip/formats/iso/reader.rb +197 -0
- data/lib/omnizip/formats/iso/rock_ridge.rb +349 -0
- data/lib/omnizip/formats/iso/volume_builder.rb +320 -0
- data/lib/omnizip/formats/iso/volume_descriptor.rb +168 -0
- data/lib/omnizip/formats/iso/writer.rb +530 -0
- data/lib/omnizip/formats/iso.rb +140 -0
- data/lib/omnizip/formats/lzip.rb +175 -0
- data/lib/omnizip/formats/lzma_alone.rb +171 -0
- data/lib/omnizip/formats/rar/archive_repairer.rb +243 -0
- data/lib/omnizip/formats/rar/archive_verifier.rb +195 -0
- data/lib/omnizip/formats/rar/block_parser.rb +243 -0
- data/lib/omnizip/formats/rar/compression/bit_stream.rb +180 -0
- data/lib/omnizip/formats/rar/compression/dispatcher.rb +217 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/decoder.rb +216 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/encoder.rb +158 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_builder.rb +217 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_coder.rb +189 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/match_finder.rb +135 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/sliding_window.rb +165 -0
- data/lib/omnizip/formats/rar/compression/ppmd/context.rb +105 -0
- data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +219 -0
- data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +262 -0
- data/lib/omnizip/formats/rar/compression_method_registry.rb +106 -0
- data/lib/omnizip/formats/rar/constants.rb +82 -0
- data/lib/omnizip/formats/rar/decompressor.rb +238 -0
- data/lib/omnizip/formats/rar/external_writer.rb +312 -0
- data/lib/omnizip/formats/rar/header.rb +192 -0
- data/lib/omnizip/formats/rar/license_validator.rb +109 -0
- data/lib/omnizip/formats/rar/models/rar_archive.rb +77 -0
- data/lib/omnizip/formats/rar/models/rar_entry.rb +65 -0
- data/lib/omnizip/formats/rar/models/rar_volume.rb +56 -0
- data/lib/omnizip/formats/rar/parity_handler.rb +292 -0
- data/lib/omnizip/formats/rar/rar5/compression/lzma.rb +202 -0
- data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +578 -0
- data/lib/omnizip/formats/rar/rar5/compression/store.rb +60 -0
- data/lib/omnizip/formats/rar/rar5/crc32.rb +39 -0
- data/lib/omnizip/formats/rar/rar5/encryption/aes256_cbc.rb +97 -0
- data/lib/omnizip/formats/rar/rar5/encryption/encryption_header.rb +114 -0
- data/lib/omnizip/formats/rar/rar5/encryption/encryption_manager.rb +166 -0
- data/lib/omnizip/formats/rar/rar5/encryption/key_derivation.rb +97 -0
- data/lib/omnizip/formats/rar/rar5/header.rb +187 -0
- data/lib/omnizip/formats/rar/rar5/models/encryption_options.rb +74 -0
- data/lib/omnizip/formats/rar/rar5/models/recovery_options.rb +63 -0
- data/lib/omnizip/formats/rar/rar5/models/solid_options.rb +63 -0
- data/lib/omnizip/formats/rar/rar5/models/volume_options.rb +74 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/ARCHITECTURE.md +290 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_manager.rb +264 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_splitter.rb +155 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_writer.rb +194 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_encoder.rb +109 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_manager.rb +142 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_stream.rb +121 -0
- data/lib/omnizip/formats/rar/rar5/vint.rb +65 -0
- data/lib/omnizip/formats/rar/rar5/writer.rb +466 -0
- data/lib/omnizip/formats/rar/rar_format_base.rb +241 -0
- data/lib/omnizip/formats/rar/reader.rb +366 -0
- data/lib/omnizip/formats/rar/recovery_record.rb +245 -0
- data/lib/omnizip/formats/rar/volume_manager.rb +168 -0
- data/lib/omnizip/formats/rar/writer.rb +431 -0
- data/lib/omnizip/formats/rar.rb +205 -0
- data/lib/omnizip/formats/rar3/compressor.rb +73 -0
- data/lib/omnizip/formats/rar3/decompressor.rb +66 -0
- data/lib/omnizip/formats/rar3/reader.rb +386 -0
- data/lib/omnizip/formats/rar3/writer.rb +219 -0
- data/lib/omnizip/formats/rar5/compressor.rb +73 -0
- data/lib/omnizip/formats/rar5/decompressor.rb +66 -0
- data/lib/omnizip/formats/rar5/reader.rb +342 -0
- data/lib/omnizip/formats/rar5/writer.rb +214 -0
- data/lib/omnizip/formats/seven_zip/coder_chain.rb +150 -0
- data/lib/omnizip/formats/seven_zip/constants.rb +126 -0
- data/lib/omnizip/formats/seven_zip/encoded_header.rb +114 -0
- data/lib/omnizip/formats/seven_zip/encrypted_header.rb +142 -0
- data/lib/omnizip/formats/seven_zip/file_collector.rb +144 -0
- data/lib/omnizip/formats/seven_zip/header.rb +106 -0
- data/lib/omnizip/formats/seven_zip/header_encryptor.rb +134 -0
- data/lib/omnizip/formats/seven_zip/header_writer.rb +466 -0
- data/lib/omnizip/formats/seven_zip/models/coder_info.rb +30 -0
- data/lib/omnizip/formats/seven_zip/models/file_entry.rb +58 -0
- data/lib/omnizip/formats/seven_zip/models/folder.rb +69 -0
- data/lib/omnizip/formats/seven_zip/models/stream_info.rb +42 -0
- data/lib/omnizip/formats/seven_zip/parser.rb +660 -0
- data/lib/omnizip/formats/seven_zip/reader.rb +458 -0
- data/lib/omnizip/formats/seven_zip/split_archive_reader.rb +632 -0
- data/lib/omnizip/formats/seven_zip/split_archive_writer.rb +315 -0
- data/lib/omnizip/formats/seven_zip/stream_compressor.rb +151 -0
- data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +162 -0
- data/lib/omnizip/formats/seven_zip/writer.rb +740 -0
- data/lib/omnizip/formats/seven_zip.rb +93 -0
- data/lib/omnizip/formats/tar/constants.rb +73 -0
- data/lib/omnizip/formats/tar/entry.rb +94 -0
- data/lib/omnizip/formats/tar/header.rb +168 -0
- data/lib/omnizip/formats/tar/reader.rb +121 -0
- data/lib/omnizip/formats/tar/writer.rb +216 -0
- data/lib/omnizip/formats/tar.rb +84 -0
- data/lib/omnizip/formats/xz/reader.rb +116 -0
- data/lib/omnizip/formats/xz.rb +237 -0
- data/lib/omnizip/formats/xz_impl/block_decoder.rb +754 -0
- data/lib/omnizip/formats/xz_impl/block_encoder.rb +306 -0
- data/lib/omnizip/formats/xz_impl/block_header.rb +210 -0
- data/lib/omnizip/formats/xz_impl/block_header_parser.rb +186 -0
- data/lib/omnizip/formats/xz_impl/constants.rb +49 -0
- data/lib/omnizip/formats/xz_impl/index_decoder.rb +174 -0
- data/lib/omnizip/formats/xz_impl/index_encoder.rb +122 -0
- data/lib/omnizip/formats/xz_impl/stream_decoder.rb +468 -0
- data/lib/omnizip/formats/xz_impl/stream_encoder.rb +99 -0
- data/lib/omnizip/formats/xz_impl/stream_footer.rb +81 -0
- data/lib/omnizip/formats/xz_impl/stream_footer_parser.rb +117 -0
- data/lib/omnizip/formats/xz_impl/stream_header.rb +55 -0
- data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +108 -0
- data/lib/omnizip/formats/xz_impl/vli.rb +128 -0
- data/lib/omnizip/formats/xz_impl/writer.rb +421 -0
- data/lib/omnizip/formats/zip/central_directory_header.rb +195 -0
- data/lib/omnizip/formats/zip/constants.rb +69 -0
- data/lib/omnizip/formats/zip/end_of_central_directory.rb +133 -0
- data/lib/omnizip/formats/zip/local_file_header.rb +138 -0
- data/lib/omnizip/formats/zip/reader.rb +250 -0
- data/lib/omnizip/formats/zip/unix_extra_field.rb +153 -0
- data/lib/omnizip/formats/zip/writer.rb +375 -0
- data/lib/omnizip/formats/zip/zip64_end_of_central_directory.rb +104 -0
- data/lib/omnizip/formats/zip/zip64_end_of_central_directory_locator.rb +66 -0
- data/lib/omnizip/formats/zip/zip64_extra_field.rb +114 -0
- data/lib/omnizip/formats/zip.rb +50 -0
- data/lib/omnizip/implementations/base/lzma2_decoder_base.rb +75 -0
- data/lib/omnizip/implementations/base/lzma2_encoder_base.rb +128 -0
- data/lib/omnizip/implementations/base/lzma_decoder_base.rb +83 -0
- data/lib/omnizip/implementations/base/lzma_encoder_base.rb +108 -0
- data/lib/omnizip/implementations/base/state_machine_base.rb +182 -0
- data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +421 -0
- data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +465 -0
- data/lib/omnizip/implementations/seven_zip/lzma/match_finder.rb +288 -0
- data/lib/omnizip/implementations/seven_zip/lzma/range_decoder.rb +200 -0
- data/lib/omnizip/implementations/seven_zip/lzma/range_encoder.rb +197 -0
- data/lib/omnizip/implementations/seven_zip/lzma/state_machine.rb +141 -0
- data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +519 -0
- data/lib/omnizip/implementations/xz_utils/lzma2/decoder.rb +723 -0
- data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +750 -0
- data/lib/omnizip/io/buffered_input.rb +146 -0
- data/lib/omnizip/io/buffered_output.rb +105 -0
- data/lib/omnizip/io/stream_manager.rb +115 -0
- data/lib/omnizip/link_handler/hard_link.rb +79 -0
- data/lib/omnizip/link_handler/symbolic_link.rb +74 -0
- data/lib/omnizip/link_handler.rb +124 -0
- data/lib/omnizip/metadata/archive_metadata.rb +114 -0
- data/lib/omnizip/metadata/entry_metadata.rb +146 -0
- data/lib/omnizip/metadata/metadata_editor.rb +171 -0
- data/lib/omnizip/metadata/metadata_registry.rb +64 -0
- data/lib/omnizip/metadata/metadata_validator.rb +99 -0
- data/lib/omnizip/metadata.rb +57 -0
- data/lib/omnizip/models/.keep +0 -0
- data/lib/omnizip/models/algorithm_metadata.rb +73 -0
- data/lib/omnizip/models/compression_options.rb +71 -0
- data/lib/omnizip/models/conversion_options.rb +87 -0
- data/lib/omnizip/models/conversion_result.rb +135 -0
- data/lib/omnizip/models/eta_result.rb +46 -0
- data/lib/omnizip/models/extraction_rule.rb +115 -0
- data/lib/omnizip/models/filter_chain.rb +144 -0
- data/lib/omnizip/models/filter_config.rb +183 -0
- data/lib/omnizip/models/match_result.rb +124 -0
- data/lib/omnizip/models/optimization_suggestion.rb +91 -0
- data/lib/omnizip/models/parallel_options.rb +104 -0
- data/lib/omnizip/models/performance_result.rb +79 -0
- data/lib/omnizip/models/profile_report.rb +82 -0
- data/lib/omnizip/models/progress_options.rb +38 -0
- data/lib/omnizip/models/split_options.rb +116 -0
- data/lib/omnizip/optimization_registry.rb +81 -0
- data/lib/omnizip/parallel/job_queue.rb +209 -0
- data/lib/omnizip/parallel/job_scheduler.rb +203 -0
- data/lib/omnizip/parallel/parallel_compressor.rb +347 -0
- data/lib/omnizip/parallel/parallel_extractor.rb +329 -0
- data/lib/omnizip/parallel/worker_pool.rb +223 -0
- data/lib/omnizip/parallel.rb +149 -0
- data/lib/omnizip/parity/chunked_block_processor.rb +196 -0
- data/lib/omnizip/parity/galois16.rb +145 -0
- data/lib/omnizip/parity/models/creator_packet.rb +73 -0
- data/lib/omnizip/parity/models/file_description_packet.rb +133 -0
- data/lib/omnizip/parity/models/ifsc_packet.rb +123 -0
- data/lib/omnizip/parity/models/main_packet.rb +128 -0
- data/lib/omnizip/parity/models/packet.rb +156 -0
- data/lib/omnizip/parity/models/packet_registry.rb +109 -0
- data/lib/omnizip/parity/models/recovery_slice_packet.rb +78 -0
- data/lib/omnizip/parity/par2_creator.rb +531 -0
- data/lib/omnizip/parity/par2_repairer.rb +407 -0
- data/lib/omnizip/parity/par2_verifier.rb +364 -0
- data/lib/omnizip/parity/par2cmdline_algorithm.rb +110 -0
- data/lib/omnizip/parity/par2cmdline_coefficients.rb +78 -0
- data/lib/omnizip/parity/reed_solomon_decoder.rb +266 -0
- data/lib/omnizip/parity/reed_solomon_encoder.rb +111 -0
- data/lib/omnizip/parity/reed_solomon_matrix.rb +342 -0
- data/lib/omnizip/parity.rb +186 -0
- data/lib/omnizip/password/encryption_registry.rb +65 -0
- data/lib/omnizip/password/encryption_strategy.rb +96 -0
- data/lib/omnizip/password/password_validator.rb +129 -0
- data/lib/omnizip/password/winzip_aes_strategy.rb +192 -0
- data/lib/omnizip/password/zip_crypto_strategy.rb +141 -0
- data/lib/omnizip/password.rb +87 -0
- data/lib/omnizip/pipe/stream_compressor.rb +124 -0
- data/lib/omnizip/pipe/stream_decompressor.rb +174 -0
- data/lib/omnizip/pipe.rb +121 -0
- data/lib/omnizip/platform/ntfs_streams.rb +201 -0
- data/lib/omnizip/platform.rb +189 -0
- data/lib/omnizip/profile/archive_profile.rb +39 -0
- data/lib/omnizip/profile/balanced_profile.rb +33 -0
- data/lib/omnizip/profile/binary_profile.rb +36 -0
- data/lib/omnizip/profile/compression_profile.rb +158 -0
- data/lib/omnizip/profile/custom_profile.rb +157 -0
- data/lib/omnizip/profile/fast_profile.rb +33 -0
- data/lib/omnizip/profile/maximum_profile.rb +33 -0
- data/lib/omnizip/profile/profile_detector.rb +110 -0
- data/lib/omnizip/profile/profile_registry.rb +161 -0
- data/lib/omnizip/profile/text_profile.rb +36 -0
- data/lib/omnizip/profile.rb +190 -0
- data/lib/omnizip/profiler/memory_profiler.rb +66 -0
- data/lib/omnizip/profiler/method_profiler.rb +49 -0
- data/lib/omnizip/profiler/report_generator.rb +169 -0
- data/lib/omnizip/profiler.rb +204 -0
- data/lib/omnizip/progress/callback_reporter.rb +36 -0
- data/lib/omnizip/progress/console_reporter.rb +62 -0
- data/lib/omnizip/progress/log_reporter.rb +91 -0
- data/lib/omnizip/progress/operation_progress.rb +118 -0
- data/lib/omnizip/progress/progress_bar.rb +156 -0
- data/lib/omnizip/progress/progress_reporter.rb +40 -0
- data/lib/omnizip/progress/progress_tracker.rb +190 -0
- data/lib/omnizip/progress/silent_reporter.rb +24 -0
- data/lib/omnizip/progress.rb +127 -0
- data/lib/omnizip/rubyzip_compat.rb +63 -0
- data/lib/omnizip/temp/safe_extract.rb +168 -0
- data/lib/omnizip/temp/temp_file.rb +124 -0
- data/lib/omnizip/temp/temp_file_pool.rb +109 -0
- data/lib/omnizip/temp.rb +181 -0
- data/lib/omnizip/version.rb +5 -0
- data/lib/omnizip/zip/entry.rb +156 -0
- data/lib/omnizip/zip/file.rb +485 -0
- data/lib/omnizip/zip/input_stream.rb +273 -0
- data/lib/omnizip/zip/output_stream.rb +324 -0
- data/lib/omnizip.rb +156 -0
- data/readme-docs/advanced-features.adoc +515 -0
- data/readme-docs/api-usage.adoc +444 -0
- data/readme-docs/architecture.adoc +449 -0
- data/readme-docs/archive-formats.adoc +479 -0
- data/readme-docs/cli-usage.adoc +222 -0
- data/readme-docs/compression-algorithms.adoc +442 -0
- data/readme-docs/compression-profiles.adoc +247 -0
- data/readme-docs/encryption-checksums.adoc +328 -0
- data/readme-docs/format-converter.adoc +325 -0
- data/readme-docs/installation.adoc +228 -0
- data/readme-docs/par2-archives.adoc +608 -0
- data/readme-docs/performance-profiler.adoc +389 -0
- data/readme-docs/preprocessing-filters.adoc +280 -0
- data/xz-file-format-1.2.1.txt +1174 -0
- metadata +617 -0
|
@@ -0,0 +1,645 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Copyright (C) 2025 Ribose Inc.
|
|
4
|
+
#
|
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a
|
|
6
|
+
# copy of this software and associated documentation files (the "Software"),
|
|
7
|
+
# to deal in the Software without restriction, including without limitation
|
|
8
|
+
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
9
|
+
# and/or sell copies of the Software, and to permit persons to whom the
|
|
10
|
+
# Software is furnished to do so, subject to the following conditions:
|
|
11
|
+
#
|
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
|
13
|
+
# all copies or substantial portions of the Software.
|
|
14
|
+
#
|
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
20
|
+
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
21
|
+
# DEALINGS IN THE SOFTWARE.
|
|
22
|
+
|
|
23
|
+
require_relative "xz_match_finder_adapter"
|
|
24
|
+
require_relative "xz_state"
|
|
25
|
+
require_relative "xz_probability_models"
|
|
26
|
+
require_relative "xz_buffered_range_encoder"
|
|
27
|
+
require_relative "constants"
|
|
28
|
+
|
|
29
|
+
module Omnizip
|
|
30
|
+
module Algorithms
|
|
31
|
+
class LZMA < Algorithm
|
|
32
|
+
# XZ Utils-compatible fast mode encoder
|
|
33
|
+
#
|
|
34
|
+
# Implements greedy heuristics from lzma_encoder_optimum_fast.c.
|
|
35
|
+
# Uses 1-position lookahead to decide between literals and matches.
|
|
36
|
+
# No price calculation - relies on simple heuristics for speed.
|
|
37
|
+
#
|
|
38
|
+
# Based on: xz/src/liblzma/lzma/lzma_encoder_optimum_fast.c
|
|
39
|
+
class XzEncoderFast
|
|
40
|
+
include Constants
|
|
41
|
+
|
|
42
|
+
# Number of rep distances (REPS constant)
|
|
43
|
+
REPS = 4
|
|
44
|
+
|
|
45
|
+
# Literal marker (matches XZ Utils UINT32_MAX)
|
|
46
|
+
LITERAL_MARKER = 0xFFFFFFFF
|
|
47
|
+
|
|
48
|
+
attr_reader :reps
|
|
49
|
+
|
|
50
|
+
# Return bytes needed for decoding (excludes flush padding)
|
|
51
|
+
#
|
|
52
|
+
# For LZMA2: returns pre-flush position (excludes 5-byte flush padding)
|
|
53
|
+
# For regular LZMA: returns full output size
|
|
54
|
+
#
|
|
55
|
+
# @return [Integer] Number of bytes decoder will consume
|
|
56
|
+
def bytes_for_decode
|
|
57
|
+
@encoder.bytes_for_decode
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# Initialize fast mode encoder
|
|
61
|
+
#
|
|
62
|
+
# @param mf [XzMatchFinderAdapter] Match finder
|
|
63
|
+
# @param encoder [XzBufferedRangeEncoder] Range encoder
|
|
64
|
+
# @param models [XzProbabilityModels] Probability models
|
|
65
|
+
# @param state [XzState] LZMA state machine
|
|
66
|
+
# @param nice_len [Integer] Nice match length (default 32)
|
|
67
|
+
# @param lc [Integer] Literal context bits (default 3)
|
|
68
|
+
# @param lp [Integer] Literal position bits (default 0)
|
|
69
|
+
# @param pb [Integer] Position bits (default 2)
|
|
70
|
+
def initialize(mf, encoder, models, state, nice_len: 32, lc: 3, lp: 0,
|
|
71
|
+
pb: 2)
|
|
72
|
+
@mf = mf
|
|
73
|
+
@encoder = encoder
|
|
74
|
+
@models = models
|
|
75
|
+
@state = state
|
|
76
|
+
@nice_len = nice_len
|
|
77
|
+
@lc = lc
|
|
78
|
+
@lp = lp
|
|
79
|
+
@pb = pb
|
|
80
|
+
|
|
81
|
+
# Rep distances (last 4 match distances)
|
|
82
|
+
# Initialize to 0 to prevent false matches before first normal match
|
|
83
|
+
@reps = [0, 0, 0, 0]
|
|
84
|
+
|
|
85
|
+
# Lookahead cache (for read_ahead == 1 optimization)
|
|
86
|
+
@read_ahead = 0
|
|
87
|
+
@longest_match_length = 0
|
|
88
|
+
@matches_count = 0
|
|
89
|
+
@cached_matches = []
|
|
90
|
+
|
|
91
|
+
# Track previous byte for literal context
|
|
92
|
+
@prev_byte = 0
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
# Find best match at current position using fast mode heuristics
|
|
96
|
+
#
|
|
97
|
+
# Returns (back, len) where:
|
|
98
|
+
# - back = LITERAL_MARKER, len = 1: encode literal
|
|
99
|
+
# - back = 0..3, len >= 2: rep match (use reps[back])
|
|
100
|
+
# - back >= 4, len >= 2: normal match (distance = back - 4)
|
|
101
|
+
#
|
|
102
|
+
# @return [Array<Integer, Integer>] [back, len]
|
|
103
|
+
def find_best_match
|
|
104
|
+
# Get matches (use cached if lookahead was done)
|
|
105
|
+
if @read_ahead.zero?
|
|
106
|
+
len_main = @mf.find_matches
|
|
107
|
+
matches_count = @mf.matches.size
|
|
108
|
+
else
|
|
109
|
+
# Use cached matches from previous lookahead
|
|
110
|
+
len_main = @longest_match_length
|
|
111
|
+
matches_count = @matches_count
|
|
112
|
+
@read_ahead = 0
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
buf_avail = [@mf.available + 1, MATCH_LEN_MAX].min
|
|
116
|
+
|
|
117
|
+
# Not enough input for match
|
|
118
|
+
return [LITERAL_MARKER, 1] if buf_avail < 2
|
|
119
|
+
|
|
120
|
+
# Check rep matches
|
|
121
|
+
rep_len, rep_index = check_rep_matches(buf_avail)
|
|
122
|
+
|
|
123
|
+
# Found long rep match - return immediately
|
|
124
|
+
if rep_len >= @nice_len
|
|
125
|
+
# Don't skip here - main loop handles it
|
|
126
|
+
return [rep_index, rep_len]
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
# Found long normal match - return immediately
|
|
130
|
+
if len_main >= @nice_len
|
|
131
|
+
back_main = @mf.matches.last.dist - 1 + REPS # Convert to 0-based then add REPS offset
|
|
132
|
+
# Don't skip here - main loop handles it
|
|
133
|
+
return [back_main, len_main]
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
# Select best normal match using heuristics
|
|
137
|
+
back_main = 0
|
|
138
|
+
if len_main >= 2
|
|
139
|
+
back_main = @mf.matches.last.dist
|
|
140
|
+
|
|
141
|
+
# Apply change_pair heuristic: prefer closer distances
|
|
142
|
+
while matches_count > 1 &&
|
|
143
|
+
len_main == @mf.matches[matches_count - 2].len + 1
|
|
144
|
+
prev_dist = @mf.matches[matches_count - 2].dist
|
|
145
|
+
break unless change_pair?(prev_dist, back_main)
|
|
146
|
+
|
|
147
|
+
matches_count -= 1
|
|
148
|
+
len_main = @mf.matches[matches_count - 1].len
|
|
149
|
+
back_main = @mf.matches[matches_count - 1].dist
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
# Reject short matches with far distances
|
|
153
|
+
len_main = 1 if len_main == 2 && back_main >= 0x80
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
# Compare rep vs normal match
|
|
157
|
+
# Prefer rep match if:
|
|
158
|
+
# - rep_len + 1 >= len_main, OR
|
|
159
|
+
# - rep_len + 2 >= len_main AND back_main > 512, OR
|
|
160
|
+
# - rep_len + 3 >= len_main AND back_main > 32768
|
|
161
|
+
if (rep_len >= 2) && ((rep_len + 1 >= len_main) ||
|
|
162
|
+
(rep_len + 2 >= len_main && back_main > (1 << 9)) ||
|
|
163
|
+
(rep_len + 3 >= len_main && back_main > (1 << 15)))
|
|
164
|
+
# Don't skip here - main loop handles it
|
|
165
|
+
return [rep_index, rep_len]
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
# No good match found
|
|
169
|
+
return [LITERAL_MARKER, 1] if len_main < 2 || buf_avail <= 2
|
|
170
|
+
|
|
171
|
+
# Lookahead: check next position for better match
|
|
172
|
+
@longest_match_length = @mf.find_matches
|
|
173
|
+
@matches_count = @mf.matches.size
|
|
174
|
+
@read_ahead = 1
|
|
175
|
+
|
|
176
|
+
if @longest_match_length >= 2
|
|
177
|
+
new_dist = @mf.matches.last.dist
|
|
178
|
+
|
|
179
|
+
# Encode literal if next position has better match
|
|
180
|
+
if (@longest_match_length >= len_main && new_dist < back_main) ||
|
|
181
|
+
(@longest_match_length == len_main + 1 && !change_pair?(
|
|
182
|
+
back_main, new_dist
|
|
183
|
+
)) ||
|
|
184
|
+
(@longest_match_length > len_main + 1) ||
|
|
185
|
+
(len_main.between?(3, @longest_match_length + 1) &&
|
|
186
|
+
change_pair?(new_dist, back_main))
|
|
187
|
+
return [LITERAL_MARKER, 1]
|
|
188
|
+
end
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
# Check reps at next position (after lookahead)
|
|
192
|
+
# Skip if all distances are 0 (uninitialized - before first normal match)
|
|
193
|
+
unless @reps.all?(0)
|
|
194
|
+
limit = [2, len_main - 1].max
|
|
195
|
+
@reps.each do |rep_dist|
|
|
196
|
+
if memcmp_at_offset(1, rep_dist, limit)
|
|
197
|
+
return [LITERAL_MARKER, 1]
|
|
198
|
+
end
|
|
199
|
+
end
|
|
200
|
+
end
|
|
201
|
+
|
|
202
|
+
# Encode best normal match
|
|
203
|
+
# Don't skip here - main loop handles it
|
|
204
|
+
# back_main contains raw 1-based distance, convert to back value
|
|
205
|
+
[back_main - 1 + REPS, len_main] # Convert to 0-based then add REPS offset
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
# Update rep distances after encoding match
|
|
209
|
+
#
|
|
210
|
+
# @param distance [Integer] Match distance (0-based)
|
|
211
|
+
def update_reps_match(distance)
|
|
212
|
+
@reps = [distance, @reps[0], @reps[1], @reps[2]]
|
|
213
|
+
end
|
|
214
|
+
|
|
215
|
+
# Update rep distances after encoding rep match
|
|
216
|
+
#
|
|
217
|
+
# @param rep_index [Integer] Rep index (0-3)
|
|
218
|
+
def update_reps_rep(rep_index)
|
|
219
|
+
rep_dist = @reps[rep_index]
|
|
220
|
+
@reps.delete_at(rep_index)
|
|
221
|
+
@reps.unshift(rep_dist)
|
|
222
|
+
end
|
|
223
|
+
|
|
224
|
+
# Encode literal symbol
|
|
225
|
+
#
|
|
226
|
+
# @param symbol [Integer] Byte value to encode
|
|
227
|
+
def encode_literal(symbol)
|
|
228
|
+
pos_state = @mf.pos & ((1 << @pb) - 1)
|
|
229
|
+
|
|
230
|
+
# Encode is_match bit (0 for literal)
|
|
231
|
+
prob_is_match = @models.is_match[@state.value][pos_state]
|
|
232
|
+
@encoder.queue_bit(prob_is_match, 0)
|
|
233
|
+
|
|
234
|
+
# Get literal subcoder BASE index (XZ Utils literal_subcoder macro)
|
|
235
|
+
# The subcoder is a flat array of 768 probability models
|
|
236
|
+
literal_base = get_literal_base(@mf.pos, @prev_byte)
|
|
237
|
+
|
|
238
|
+
if @state.literal_state?
|
|
239
|
+
# Normal literal (8-bit tree)
|
|
240
|
+
encode_normal_literal(literal_base, symbol)
|
|
241
|
+
else
|
|
242
|
+
# Matched literal (compare with match byte at rep0)
|
|
243
|
+
match_byte = @mf.get_byte(-@reps[0]) # reps[0] is 0-based offset
|
|
244
|
+
encode_matched_literal(literal_base, match_byte, symbol)
|
|
245
|
+
end
|
|
246
|
+
|
|
247
|
+
# Update state and prev_byte
|
|
248
|
+
@state.update_literal
|
|
249
|
+
@prev_byte = symbol
|
|
250
|
+
end
|
|
251
|
+
|
|
252
|
+
# Encode rep match
|
|
253
|
+
#
|
|
254
|
+
# @param rep_index [Integer] Rep index (0-3)
|
|
255
|
+
# @param length [Integer] Match length (>= 2)
|
|
256
|
+
def encode_rep_match(rep_index, length)
|
|
257
|
+
pos_state = @mf.pos & ((1 << @pb) - 1)
|
|
258
|
+
|
|
259
|
+
# Encode is_match bit (1 for match)
|
|
260
|
+
prob_is_match = @models.is_match[@state.value][pos_state]
|
|
261
|
+
@encoder.queue_bit(prob_is_match, 1)
|
|
262
|
+
|
|
263
|
+
# Encode is_rep bit (1 for rep)
|
|
264
|
+
prob_is_rep = @models.is_rep[@state.value]
|
|
265
|
+
@encoder.queue_bit(prob_is_rep, 1)
|
|
266
|
+
|
|
267
|
+
prob_is_rep0 = @models.is_rep0[@state.value]
|
|
268
|
+
case rep_index
|
|
269
|
+
when 0
|
|
270
|
+
# rep0
|
|
271
|
+
@encoder.queue_bit(prob_is_rep0, 0) # FIX: 0 means "yes, use rep0"
|
|
272
|
+
|
|
273
|
+
prob_is_rep0_long = @models.is_rep0_long[@state.value][pos_state]
|
|
274
|
+
if length == 1
|
|
275
|
+
# Short rep (1 byte)
|
|
276
|
+
@encoder.queue_bit(prob_is_rep0_long, 0)
|
|
277
|
+
@state.update_short_rep
|
|
278
|
+
else
|
|
279
|
+
# Long rep0
|
|
280
|
+
@encoder.queue_bit(prob_is_rep0_long, 1)
|
|
281
|
+
encode_rep_length(length, pos_state)
|
|
282
|
+
@state.update_long_rep
|
|
283
|
+
end
|
|
284
|
+
when 1
|
|
285
|
+
# rep1
|
|
286
|
+
@encoder.queue_bit(prob_is_rep0, 1)
|
|
287
|
+
prob_is_rep1 = @models.is_rep1[@state.value]
|
|
288
|
+
@encoder.queue_bit(prob_is_rep1, 0) # FIX: 0 means "yes, use rep1"
|
|
289
|
+
encode_rep_length(length, pos_state)
|
|
290
|
+
@state.update_long_rep
|
|
291
|
+
when 2
|
|
292
|
+
# rep2
|
|
293
|
+
@encoder.queue_bit(prob_is_rep0, 1)
|
|
294
|
+
prob_is_rep1 = @models.is_rep1[@state.value]
|
|
295
|
+
@encoder.queue_bit(prob_is_rep1, 1)
|
|
296
|
+
prob_is_rep2 = @models.is_rep2[@state.value]
|
|
297
|
+
@encoder.queue_bit(prob_is_rep2, 0) # FIX: 0 means "yes, use rep2"
|
|
298
|
+
encode_rep_length(length, pos_state)
|
|
299
|
+
@state.update_long_rep
|
|
300
|
+
else
|
|
301
|
+
# rep3
|
|
302
|
+
@encoder.queue_bit(prob_is_rep0, 1)
|
|
303
|
+
prob_is_rep1 = @models.is_rep1[@state.value]
|
|
304
|
+
@encoder.queue_bit(prob_is_rep1, 1)
|
|
305
|
+
prob_is_rep2 = @models.is_rep2[@state.value]
|
|
306
|
+
@encoder.queue_bit(prob_is_rep2, 1)
|
|
307
|
+
encode_rep_length(length, pos_state)
|
|
308
|
+
@state.update_long_rep
|
|
309
|
+
end
|
|
310
|
+
|
|
311
|
+
# Update prev_byte (last byte of match)
|
|
312
|
+
@prev_byte = @mf.get_byte(length - 1)
|
|
313
|
+
end
|
|
314
|
+
|
|
315
|
+
# Encode normal match
|
|
316
|
+
#
|
|
317
|
+
# @param distance [Integer] Match distance (0-based)
|
|
318
|
+
# @param length [Integer] Match length (>= 2)
|
|
319
|
+
def encode_normal_match(distance, length)
|
|
320
|
+
pos_state = @mf.pos & ((1 << @pb) - 1)
|
|
321
|
+
|
|
322
|
+
# Encode is_match bit (1 for match)
|
|
323
|
+
prob_is_match = @models.is_match[@state.value][pos_state]
|
|
324
|
+
@encoder.queue_bit(prob_is_match, 1)
|
|
325
|
+
|
|
326
|
+
# Encode is_rep bit (0 for normal match)
|
|
327
|
+
prob_is_rep = @models.is_rep[@state.value]
|
|
328
|
+
@encoder.queue_bit(prob_is_rep, 0)
|
|
329
|
+
|
|
330
|
+
# Encode length
|
|
331
|
+
encode_match_length(length, pos_state)
|
|
332
|
+
|
|
333
|
+
# Encode distance
|
|
334
|
+
encode_distance(distance, length)
|
|
335
|
+
|
|
336
|
+
# Update state and prev_byte
|
|
337
|
+
@state.update_match
|
|
338
|
+
@prev_byte = @mf.get_byte(length - 1)
|
|
339
|
+
end
|
|
340
|
+
|
|
341
|
+
private
|
|
342
|
+
|
|
343
|
+
# Check all rep matches at current position
|
|
344
|
+
#
|
|
345
|
+
# @param buf_avail [Integer] Bytes available
|
|
346
|
+
# @return [Array<Integer, Integer>] [best_rep_len, best_rep_index]
|
|
347
|
+
def check_rep_matches(buf_avail)
|
|
348
|
+
rep_len = 0
|
|
349
|
+
rep_index = 0
|
|
350
|
+
|
|
351
|
+
# Guard: Skip rep matching if all distances are 0 (uninitialized)
|
|
352
|
+
# This prevents false matches before the first normal match
|
|
353
|
+
return [0, 0] if @reps.all?(0)
|
|
354
|
+
|
|
355
|
+
@reps.each_with_index do |rep_dist, i|
|
|
356
|
+
# Skip rep distances of 0 (same position, invalid)
|
|
357
|
+
next if rep_dist.zero?
|
|
358
|
+
|
|
359
|
+
# Check first 2 bytes (MATCH_LEN_MIN)
|
|
360
|
+
next unless matches_at_distance?(rep_dist, MATCH_LEN_MIN)
|
|
361
|
+
|
|
362
|
+
# Calculate full match length
|
|
363
|
+
len = calculate_match_length(rep_dist, buf_avail)
|
|
364
|
+
|
|
365
|
+
if len > rep_len
|
|
366
|
+
rep_len = len
|
|
367
|
+
rep_index = i
|
|
368
|
+
end
|
|
369
|
+
end
|
|
370
|
+
|
|
371
|
+
[rep_len, rep_index]
|
|
372
|
+
end
|
|
373
|
+
|
|
374
|
+
# Check if first n bytes match at given distance
|
|
375
|
+
#
|
|
376
|
+
# @param distance [Integer] Distance to check (0-based: 0=same pos, 1=1 byte back)
|
|
377
|
+
# @param n [Integer] Number of bytes to check
|
|
378
|
+
# @return [Boolean] True if matches
|
|
379
|
+
def matches_at_distance?(distance, n)
|
|
380
|
+
return false if @mf.pos < distance
|
|
381
|
+
|
|
382
|
+
n.times do |i|
|
|
383
|
+
curr = @mf.get_byte(i)
|
|
384
|
+
prev = @mf.get_byte(i - distance)
|
|
385
|
+
return false if curr != prev
|
|
386
|
+
end
|
|
387
|
+
|
|
388
|
+
true
|
|
389
|
+
end
|
|
390
|
+
|
|
391
|
+
# Calculate match length at given distance
|
|
392
|
+
#
|
|
393
|
+
# @param distance [Integer] Distance (0-based: 0=same pos, 1=1 byte back)
|
|
394
|
+
# @param max_len [Integer] Maximum length to check
|
|
395
|
+
# @return [Integer] Match length
|
|
396
|
+
def calculate_match_length(distance, max_len)
|
|
397
|
+
return 0 if @mf.pos < distance
|
|
398
|
+
|
|
399
|
+
len = 0
|
|
400
|
+
|
|
401
|
+
while len < max_len
|
|
402
|
+
curr = @mf.get_byte(len)
|
|
403
|
+
prev = @mf.get_byte(len - distance)
|
|
404
|
+
break if curr != prev
|
|
405
|
+
|
|
406
|
+
len += 1
|
|
407
|
+
end
|
|
408
|
+
|
|
409
|
+
len
|
|
410
|
+
end
|
|
411
|
+
|
|
412
|
+
# Compare bytes at offset with bytes at distance
|
|
413
|
+
#
|
|
414
|
+
# Used for checking reps after lookahead.
|
|
415
|
+
#
|
|
416
|
+
# @param offset [Integer] Offset from current position
|
|
417
|
+
# @param distance [Integer] Distance to check (1-based)
|
|
418
|
+
# @param limit [Integer] Number of bytes to compare
|
|
419
|
+
# @return [Boolean] True if all bytes match
|
|
420
|
+
def memcmp_at_offset(offset, distance, limit)
|
|
421
|
+
limit.times do |i|
|
|
422
|
+
curr = @mf.get_byte(offset + i)
|
|
423
|
+
prev = @mf.get_byte(offset + i - distance)
|
|
424
|
+
return false if curr != prev
|
|
425
|
+
end
|
|
426
|
+
|
|
427
|
+
true
|
|
428
|
+
end
|
|
429
|
+
|
|
430
|
+
# Apply change_pair heuristic
|
|
431
|
+
#
|
|
432
|
+
# Prefer closer distances if far distance is much larger.
|
|
433
|
+
#
|
|
434
|
+
# @param small_dist [Integer] Smaller distance
|
|
435
|
+
# @param big_dist [Integer] Larger distance
|
|
436
|
+
# @return [Boolean] True if should change to smaller distance
|
|
437
|
+
def change_pair?(small_dist, big_dist)
|
|
438
|
+
(big_dist >> 7) > small_dist
|
|
439
|
+
end
|
|
440
|
+
|
|
441
|
+
# Get literal subcoder BASE index
|
|
442
|
+
#
|
|
443
|
+
# Ported from XZ Utils literal_subcoder() macro in lzma_common.h
|
|
444
|
+
# Returns the base index into the flat literal models array
|
|
445
|
+
# Each subcoder has 768 probability models (0x300)
|
|
446
|
+
#
|
|
447
|
+
# @param pos [Integer] Current position
|
|
448
|
+
# @param prev_byte [Integer] Previous byte
|
|
449
|
+
# @return [Integer] Base index into @models.literal array
|
|
450
|
+
def get_literal_base(pos, prev_byte)
|
|
451
|
+
# literal_mask = (UINT32_C(0x100) << (lp)) - (UINT32_C(0x100) >> (lc))
|
|
452
|
+
literal_mask = (0x100 << @lp) - (0x100 >> @lc)
|
|
453
|
+
|
|
454
|
+
# ((((pos) << 8) + (prev_byte)) & (literal_mask)) << (lc)
|
|
455
|
+
context = (((pos << 8) + prev_byte) & literal_mask) << @lc
|
|
456
|
+
|
|
457
|
+
# 3 * context (each subcoder has 768 models, indexed as 3 * context + offset)
|
|
458
|
+
3 * context
|
|
459
|
+
end
|
|
460
|
+
|
|
461
|
+
# Encode normal literal (8-bit tree)
|
|
462
|
+
#
|
|
463
|
+
# @param literal_base [Integer] Base index into literal models array
|
|
464
|
+
# @param symbol [Integer] Byte value
|
|
465
|
+
def encode_normal_literal(literal_base, symbol)
|
|
466
|
+
context = 1
|
|
467
|
+
8.downto(1) do |i|
|
|
468
|
+
bit = (symbol >> (i - 1)) & 1
|
|
469
|
+
@encoder.queue_bit(@models.literal[literal_base + context], bit)
|
|
470
|
+
context = (context << 1) | bit
|
|
471
|
+
end
|
|
472
|
+
end
|
|
473
|
+
|
|
474
|
+
# Encode matched literal (compare with match byte)
|
|
475
|
+
#
|
|
476
|
+
# @param literal_base [Integer] Base index into literal models array
|
|
477
|
+
# @param match_byte [Integer] Byte at match position
|
|
478
|
+
# @param symbol [Integer] Byte value to encode
|
|
479
|
+
def encode_matched_literal(literal_base, match_byte, symbol)
|
|
480
|
+
offset = 0x100
|
|
481
|
+
symbol += 0x100 # Start symbol at 256 (XZ Utils algorithm)
|
|
482
|
+
|
|
483
|
+
# Loop until symbol reaches 0x10000 (65536)
|
|
484
|
+
while symbol < 0x10000
|
|
485
|
+
match_byte <<= 1
|
|
486
|
+
match_bit = match_byte & offset
|
|
487
|
+
subcoder_index = offset + match_bit + (symbol >> 8)
|
|
488
|
+
bit = (symbol >> 7) & 1
|
|
489
|
+
|
|
490
|
+
@encoder.queue_bit(@models.literal[literal_base + subcoder_index], bit)
|
|
491
|
+
|
|
492
|
+
symbol <<= 1
|
|
493
|
+
offset &= ~(match_byte ^ symbol)
|
|
494
|
+
end
|
|
495
|
+
end
|
|
496
|
+
|
|
497
|
+
# Encode rep match length
|
|
498
|
+
#
|
|
499
|
+
# @param length [Integer] Match length (>= 2)
|
|
500
|
+
# @param pos_state [Integer] Position state
|
|
501
|
+
def encode_rep_length(length, pos_state)
|
|
502
|
+
encode_length(@models.rep_len_encoder, length, pos_state)
|
|
503
|
+
end
|
|
504
|
+
|
|
505
|
+
# Encode normal match length
|
|
506
|
+
#
|
|
507
|
+
# @param length [Integer] Match length (>= 2)
|
|
508
|
+
# @param pos_state [Integer] Position state
|
|
509
|
+
def encode_match_length(length, pos_state)
|
|
510
|
+
encode_length(@models.match_len_encoder, length, pos_state)
|
|
511
|
+
end
|
|
512
|
+
|
|
513
|
+
# Encode length using length encoder
|
|
514
|
+
#
|
|
515
|
+
# @param len_encoder [LengthEncoder] Length encoder
|
|
516
|
+
# @param length [Integer] Match length (2-273)
|
|
517
|
+
# @param pos_state [Integer] Position state
|
|
518
|
+
def encode_length(len_encoder, length, pos_state)
|
|
519
|
+
len = length - MATCH_LEN_MIN
|
|
520
|
+
|
|
521
|
+
if len < LEN_LOW_SYMBOLS
|
|
522
|
+
# Low: 0-7
|
|
523
|
+
@encoder.queue_bit(len_encoder.choice, 0)
|
|
524
|
+
encode_bittree(len_encoder.low[pos_state], NUM_LEN_LOW_BITS, len)
|
|
525
|
+
elsif len < LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS
|
|
526
|
+
# Mid: 8-15
|
|
527
|
+
@encoder.queue_bit(len_encoder.choice, 1)
|
|
528
|
+
@encoder.queue_bit(len_encoder.choice2, 0)
|
|
529
|
+
encode_bittree(len_encoder.mid[pos_state], NUM_LEN_MID_BITS,
|
|
530
|
+
len - LEN_LOW_SYMBOLS)
|
|
531
|
+
else
|
|
532
|
+
# High: 16-271
|
|
533
|
+
@encoder.queue_bit(len_encoder.choice, 1)
|
|
534
|
+
@encoder.queue_bit(len_encoder.choice2, 1)
|
|
535
|
+
encode_bittree(len_encoder.high, NUM_LEN_HIGH_BITS,
|
|
536
|
+
len - LEN_LOW_SYMBOLS - LEN_MID_SYMBOLS)
|
|
537
|
+
end
|
|
538
|
+
end
|
|
539
|
+
|
|
540
|
+
# Encode distance
|
|
541
|
+
#
|
|
542
|
+
# @param distance [Integer] Distance (0-based)
|
|
543
|
+
# @param length [Integer] Match length
|
|
544
|
+
def encode_distance(distance, length)
|
|
545
|
+
dist_slot = get_dist_slot(distance)
|
|
546
|
+
len_state = get_len_to_pos_state(length)
|
|
547
|
+
|
|
548
|
+
# Encode distance slot
|
|
549
|
+
encode_bittree(@models.dist_slot[len_state], NUM_DIST_SLOT_BITS,
|
|
550
|
+
dist_slot)
|
|
551
|
+
|
|
552
|
+
# Encode distance footer
|
|
553
|
+
if dist_slot >= START_POS_MODEL_INDEX
|
|
554
|
+
footer_bits = (dist_slot >> 1) - 1
|
|
555
|
+
base = (2 | (dist_slot & 1)) << footer_bits
|
|
556
|
+
dist_reduced = distance - base
|
|
557
|
+
|
|
558
|
+
if dist_slot < END_POS_MODEL_INDEX
|
|
559
|
+
# Use probability models
|
|
560
|
+
encode_bittree_reverse(@models.dist_special, dist_reduced,
|
|
561
|
+
footer_bits, base - dist_slot)
|
|
562
|
+
else
|
|
563
|
+
# Direct bits + alignment
|
|
564
|
+
direct_bits = footer_bits - DIST_ALIGN_BITS
|
|
565
|
+
@encoder.queue_direct_bits(dist_reduced >> DIST_ALIGN_BITS,
|
|
566
|
+
direct_bits)
|
|
567
|
+
encode_bittree_reverse(@models.dist_align, dist_reduced & ((1 << DIST_ALIGN_BITS) - 1),
|
|
568
|
+
DIST_ALIGN_BITS, 0)
|
|
569
|
+
end
|
|
570
|
+
end
|
|
571
|
+
end
|
|
572
|
+
|
|
573
|
+
# Encode bittree (MSB first)
|
|
574
|
+
#
|
|
575
|
+
# @param probs [Array<BitModel>] Probability models
|
|
576
|
+
# @param num_bits [Integer] Number of bits
|
|
577
|
+
# @param value [Integer] Value to encode
|
|
578
|
+
def encode_bittree(probs, num_bits, value)
|
|
579
|
+
context = 1
|
|
580
|
+
num_bits.downto(1) do |i|
|
|
581
|
+
bit = (value >> (i - 1)) & 1
|
|
582
|
+
@encoder.queue_bit(probs[context], bit)
|
|
583
|
+
context = (context << 1) | bit
|
|
584
|
+
end
|
|
585
|
+
end
|
|
586
|
+
|
|
587
|
+
# Encode bittree in reverse (LSB first)
|
|
588
|
+
#
|
|
589
|
+
# @param probs [Array<BitModel>] Probability models
|
|
590
|
+
# @param value [Integer] Value to encode
|
|
591
|
+
# @param num_bits [Integer] Number of bits
|
|
592
|
+
# @param offset [Integer] Probability array offset
|
|
593
|
+
def encode_bittree_reverse(probs, value, num_bits, offset)
|
|
594
|
+
context = 1
|
|
595
|
+
num_bits.times do |i|
|
|
596
|
+
bit = (value >> i) & 1
|
|
597
|
+
@encoder.queue_bit(probs[offset + context], bit)
|
|
598
|
+
context = (context << 1) | bit
|
|
599
|
+
end
|
|
600
|
+
end
|
|
601
|
+
|
|
602
|
+
# Get distance slot for distance
|
|
603
|
+
#
|
|
604
|
+
# @param distance [Integer] Distance (0-based)
|
|
605
|
+
# @return [Integer] Distance slot (0-63)
|
|
606
|
+
def get_dist_slot(distance)
|
|
607
|
+
if distance < NUM_FULL_DISTANCES
|
|
608
|
+
# Use precomputed table for small distances
|
|
609
|
+
distance < 4 ? distance : fast_pos_small(distance)
|
|
610
|
+
else
|
|
611
|
+
# Formula for large distances
|
|
612
|
+
fast_pos_large(distance)
|
|
613
|
+
end
|
|
614
|
+
end
|
|
615
|
+
|
|
616
|
+
# Fast position calculation for small distances
|
|
617
|
+
def fast_pos_small(distance)
|
|
618
|
+
# Simplified slot calculation
|
|
619
|
+
slot = 0
|
|
620
|
+
dist = distance
|
|
621
|
+
while dist > 3
|
|
622
|
+
dist >>= 1
|
|
623
|
+
slot += 2
|
|
624
|
+
end
|
|
625
|
+
slot + dist
|
|
626
|
+
end
|
|
627
|
+
|
|
628
|
+
# Fast position calculation for large distances
|
|
629
|
+
def fast_pos_large(distance)
|
|
630
|
+
slot = fast_pos_small(distance >> 6)
|
|
631
|
+
slot + 12
|
|
632
|
+
end
|
|
633
|
+
|
|
634
|
+
# Map length to position state
|
|
635
|
+
#
|
|
636
|
+
# @param length [Integer] Match length
|
|
637
|
+
# @return [Integer] Position state (0-3)
|
|
638
|
+
def get_len_to_pos_state(length)
|
|
639
|
+
len = length - MATCH_LEN_MIN
|
|
640
|
+
len < NUM_LEN_TO_POS_STATES ? len : NUM_LEN_TO_POS_STATES - 1
|
|
641
|
+
end
|
|
642
|
+
end
|
|
643
|
+
end
|
|
644
|
+
end
|
|
645
|
+
end
|