omnizip 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/.rubocop.yml +32 -0
- data/.rubocop_todo.yml +754 -0
- data/COPYING +502 -0
- data/Gemfile +17 -0
- data/LICENSE +12 -0
- data/README.adoc +1045 -0
- data/Rakefile +12 -0
- data/benchmark/README.md +260 -0
- data/benchmark/benchmark_suite.rb +125 -0
- data/benchmark/compression_bench.rb +181 -0
- data/benchmark/filter_bench.rb +180 -0
- data/benchmark/models/benchmark_result.rb +59 -0
- data/benchmark/models/comparison_result.rb +69 -0
- data/benchmark/profile_suite.rb +167 -0
- data/benchmark/reporter.rb +150 -0
- data/benchmark/run_benchmarks.rb +66 -0
- data/benchmark/test_data.rb +137 -0
- data/config/formats/rar3_spec.yml +91 -0
- data/config/formats/rar5_spec.yml +102 -0
- data/docs/.github/workflows/docs.yml +142 -0
- data/docs/.gitignore +21 -0
- data/docs/.lychee.toml +67 -0
- data/docs/Gemfile +13 -0
- data/docs/RAR_WRITE_SUPPORT.md +26 -0
- data/docs/README.md +101 -0
- data/docs/_config.yml +112 -0
- data/docs/assets/logo.svg +1 -0
- data/docs/assets/omnizip-logo.pdf +1540 -11
- data/docs/comparison/feature-matrix.adoc +694 -0
- data/docs/comparison/index.adoc +113 -0
- data/docs/comparison/vs-7zip.adoc +309 -0
- data/docs/comparison/vs-peazip.adoc +77 -0
- data/docs/comparison/vs-rubyzip.adoc +342 -0
- data/docs/comparison/vs-winrar.adoc +100 -0
- data/docs/compatibility.adoc +579 -0
- data/docs/concepts/index.adoc +129 -0
- data/docs/developer/architecture.adoc +256 -0
- data/docs/developer/contributing.adoc +158 -0
- data/docs/developer/index.adoc +25 -0
- data/docs/developer/testing.adoc +212 -0
- data/docs/getting-started/basic-usage.adoc +271 -0
- data/docs/getting-started/index.adoc +42 -0
- data/docs/getting-started/installation.adoc +138 -0
- data/docs/getting-started/quick-start.adoc +185 -0
- data/docs/getting-started/your-first-archive.adoc +218 -0
- data/docs/guides/advanced-features/encryption.adoc +300 -0
- data/docs/guides/advanced-features/index.adoc +49 -0
- data/docs/guides/advanced-features/parallel-processing.adoc +246 -0
- data/docs/guides/advanced-features/progress-tracking.adoc +320 -0
- data/docs/guides/advanced-features/streaming.adoc +212 -0
- data/docs/guides/archive-formats/gzip-format.adoc +107 -0
- data/docs/guides/archive-formats/index.adoc +130 -0
- data/docs/guides/archive-formats/rar-format.adoc +104 -0
- data/docs/guides/archive-formats/rar5.adoc +521 -0
- data/docs/guides/archive-formats/seven-zip-format.adoc +35 -0
- data/docs/guides/archive-formats/tar-format.adoc +106 -0
- data/docs/guides/archive-formats/xz-format.adoc +118 -0
- data/docs/guides/archive-formats/zip-format.adoc +35 -0
- data/docs/guides/compression-algorithms/bzip2.adoc +113 -0
- data/docs/guides/compression-algorithms/deflate.adoc +319 -0
- data/docs/guides/compression-algorithms/index.adoc +190 -0
- data/docs/guides/compression-algorithms/lzma.adoc +398 -0
- data/docs/guides/compression-algorithms/lzma2.adoc +327 -0
- data/docs/guides/compression-algorithms/ppmd.adoc +316 -0
- data/docs/guides/compression-algorithms/zstandard.adoc +361 -0
- data/docs/guides/creating-archives.adoc +354 -0
- data/docs/guides/extracting-archives.adoc +53 -0
- data/docs/guides/format-conversion.adoc +64 -0
- data/docs/guides/index.adoc +49 -0
- data/docs/guides/migration-rubyzip.adoc +217 -0
- data/docs/guides/parity-archives.adoc +605 -0
- data/docs/guides/performance-tuning.adoc +88 -0
- data/docs/index.adoc +218 -0
- data/docs/lychee.toml +67 -0
- data/docs/reference/api/overview.adoc +188 -0
- data/docs/reference/cli/compress-command.adoc +114 -0
- data/docs/reference/cli/overview.adoc +140 -0
- data/docs/reference/index.adoc +26 -0
- data/docs/resources/faq.adoc +185 -0
- data/docs/resources/quick-reference.adoc +222 -0
- data/docs/troubleshooting/index.adoc +208 -0
- data/examples/api_comparison.rb +205 -0
- data/examples/deflate64_example.rb +96 -0
- data/examples/par2_demo.rb +121 -0
- data/examples/quick_start_native.rb +150 -0
- data/examples/quick_start_rubyzip.rb +115 -0
- data/examples/rubyzip_compatibility_demo.rb +194 -0
- data/exe/omnizip +27 -0
- data/lib/omnizip/algorithm.rb +130 -0
- data/lib/omnizip/algorithm_registry.rb +86 -0
- data/lib/omnizip/algorithms/.keep +0 -0
- data/lib/omnizip/algorithms/bzip2/bwt.rb +225 -0
- data/lib/omnizip/algorithms/bzip2/decoder.rb +193 -0
- data/lib/omnizip/algorithms/bzip2/encoder.rb +237 -0
- data/lib/omnizip/algorithms/bzip2/huffman.rb +206 -0
- data/lib/omnizip/algorithms/bzip2/mtf.rb +101 -0
- data/lib/omnizip/algorithms/bzip2/rle.rb +151 -0
- data/lib/omnizip/algorithms/bzip2.rb +130 -0
- data/lib/omnizip/algorithms/deflate/constants.rb +28 -0
- data/lib/omnizip/algorithms/deflate/decoder.rb +38 -0
- data/lib/omnizip/algorithms/deflate/encoder.rb +46 -0
- data/lib/omnizip/algorithms/deflate.rb +128 -0
- data/lib/omnizip/algorithms/deflate64/constants.rb +45 -0
- data/lib/omnizip/algorithms/deflate64/decoder.rb +153 -0
- data/lib/omnizip/algorithms/deflate64/encoder.rb +98 -0
- data/lib/omnizip/algorithms/deflate64/huffman_coder.rb +354 -0
- data/lib/omnizip/algorithms/deflate64/lz77_encoder.rb +142 -0
- data/lib/omnizip/algorithms/deflate64.rb +109 -0
- data/lib/omnizip/algorithms/lzma/bit_model.rb +120 -0
- data/lib/omnizip/algorithms/lzma/constants.rb +112 -0
- data/lib/omnizip/algorithms/lzma/decoder.rb +148 -0
- data/lib/omnizip/algorithms/lzma/dictionary.rb +69 -0
- data/lib/omnizip/algorithms/lzma/distance_coder.rb +415 -0
- data/lib/omnizip/algorithms/lzma/encoder.rb +142 -0
- data/lib/omnizip/algorithms/lzma/length_coder.rb +260 -0
- data/lib/omnizip/algorithms/lzma/literal_decoder.rb +320 -0
- data/lib/omnizip/algorithms/lzma/literal_encoder.rb +210 -0
- data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +341 -0
- data/lib/omnizip/algorithms/lzma/lzma_alone_decoder.rb +192 -0
- data/lib/omnizip/algorithms/lzma/lzma_state.rb +128 -0
- data/lib/omnizip/algorithms/lzma/match.rb +32 -0
- data/lib/omnizip/algorithms/lzma/match_finder.rb +205 -0
- data/lib/omnizip/algorithms/lzma/match_finder_config.rb +142 -0
- data/lib/omnizip/algorithms/lzma/match_finder_factory.rb +88 -0
- data/lib/omnizip/algorithms/lzma/optimal_encoder.rb +130 -0
- data/lib/omnizip/algorithms/lzma/probability_models.rb +72 -0
- data/lib/omnizip/algorithms/lzma/range_coder.rb +85 -0
- data/lib/omnizip/algorithms/lzma/range_decoder.rb +434 -0
- data/lib/omnizip/algorithms/lzma/range_encoder.rb +194 -0
- data/lib/omnizip/algorithms/lzma/state.rb +127 -0
- data/lib/omnizip/algorithms/lzma/xz_buffered_range_encoder.rb +325 -0
- data/lib/omnizip/algorithms/lzma/xz_encoder.rb +426 -0
- data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +645 -0
- data/lib/omnizip/algorithms/lzma/xz_match_finder_adapter.rb +227 -0
- data/lib/omnizip/algorithms/lzma/xz_price_calculator.rb +169 -0
- data/lib/omnizip/algorithms/lzma/xz_probability_models.rb +261 -0
- data/lib/omnizip/algorithms/lzma/xz_range_encoder.rb +223 -0
- data/lib/omnizip/algorithms/lzma/xz_range_encoder_exact.rb +331 -0
- data/lib/omnizip/algorithms/lzma/xz_state.rb +116 -0
- data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +2055 -0
- data/lib/omnizip/algorithms/lzma.rb +238 -0
- data/lib/omnizip/algorithms/lzma2/chunk_manager.rb +182 -0
- data/lib/omnizip/algorithms/lzma2/constants.rb +41 -0
- data/lib/omnizip/algorithms/lzma2/encoder.rb +147 -0
- data/lib/omnizip/algorithms/lzma2/lzma2_chunk.rb +161 -0
- data/lib/omnizip/algorithms/lzma2/properties.rb +179 -0
- data/lib/omnizip/algorithms/lzma2/simple_lzma2_encoder.rb +127 -0
- data/lib/omnizip/algorithms/lzma2/xz_encoder_adapter.rb +85 -0
- data/lib/omnizip/algorithms/lzma2.rb +141 -0
- data/lib/omnizip/algorithms/ppmd7/constants.rb +74 -0
- data/lib/omnizip/algorithms/ppmd7/context.rb +154 -0
- data/lib/omnizip/algorithms/ppmd7/decoder.rb +126 -0
- data/lib/omnizip/algorithms/ppmd7/encoder.rb +163 -0
- data/lib/omnizip/algorithms/ppmd7/model.rb +248 -0
- data/lib/omnizip/algorithms/ppmd7/symbol_state.rb +57 -0
- data/lib/omnizip/algorithms/ppmd7.rb +116 -0
- data/lib/omnizip/algorithms/ppmd8/constants.rb +61 -0
- data/lib/omnizip/algorithms/ppmd8/context.rb +34 -0
- data/lib/omnizip/algorithms/ppmd8/decoder.rb +107 -0
- data/lib/omnizip/algorithms/ppmd8/encoder.rb +138 -0
- data/lib/omnizip/algorithms/ppmd8/model.rb +250 -0
- data/lib/omnizip/algorithms/ppmd8/restoration_method.rb +78 -0
- data/lib/omnizip/algorithms/ppmd8.rb +82 -0
- data/lib/omnizip/algorithms/ppmd_base.rb +138 -0
- data/lib/omnizip/algorithms/sevenzip_lzma2.rb +123 -0
- data/lib/omnizip/algorithms/xz_lzma2.rb +118 -0
- data/lib/omnizip/algorithms/zstandard/constants.rb +25 -0
- data/lib/omnizip/algorithms/zstandard/decoder.rb +46 -0
- data/lib/omnizip/algorithms/zstandard/encoder.rb +51 -0
- data/lib/omnizip/algorithms/zstandard.rb +138 -0
- data/lib/omnizip/buffer/memory_archive.rb +251 -0
- data/lib/omnizip/buffer/memory_extractor.rb +224 -0
- data/lib/omnizip/buffer.rb +176 -0
- data/lib/omnizip/checksum_registry.rb +114 -0
- data/lib/omnizip/checksums/crc32.rb +100 -0
- data/lib/omnizip/checksums/crc64.rb +101 -0
- data/lib/omnizip/checksums/crc_base.rb +158 -0
- data/lib/omnizip/checksums/verifier.rb +131 -0
- data/lib/omnizip/chunked/memory_manager.rb +194 -0
- data/lib/omnizip/chunked/reader.rb +78 -0
- data/lib/omnizip/chunked/writer.rb +120 -0
- data/lib/omnizip/chunked.rb +129 -0
- data/lib/omnizip/cli/output_formatter.rb +104 -0
- data/lib/omnizip/cli.rb +572 -0
- data/lib/omnizip/commands/.keep +0 -0
- data/lib/omnizip/commands/archive_create_command.rb +427 -0
- data/lib/omnizip/commands/archive_extract_command.rb +272 -0
- data/lib/omnizip/commands/archive_list_command.rb +218 -0
- data/lib/omnizip/commands/archive_repair_command.rb +131 -0
- data/lib/omnizip/commands/archive_verify_command.rb +117 -0
- data/lib/omnizip/commands/compress_command.rb +117 -0
- data/lib/omnizip/commands/decompress_command.rb +120 -0
- data/lib/omnizip/commands/list_command.rb +53 -0
- data/lib/omnizip/commands/metadata_command.rb +153 -0
- data/lib/omnizip/commands/parity_create_command.rb +122 -0
- data/lib/omnizip/commands/parity_repair_command.rb +122 -0
- data/lib/omnizip/commands/parity_verify_command.rb +124 -0
- data/lib/omnizip/commands/profile_list_command.rb +56 -0
- data/lib/omnizip/commands/profile_show_command.rb +44 -0
- data/lib/omnizip/convenience.rb +359 -0
- data/lib/omnizip/converter/conversion_registry.rb +49 -0
- data/lib/omnizip/converter/conversion_strategy.rb +121 -0
- data/lib/omnizip/converter/seven_zip_to_zip_strategy.rb +97 -0
- data/lib/omnizip/converter/zip_to_seven_zip_strategy.rb +112 -0
- data/lib/omnizip/converter.rb +105 -0
- data/lib/omnizip/crypto/aes256/cipher.rb +100 -0
- data/lib/omnizip/crypto/aes256/constants.rb +28 -0
- data/lib/omnizip/crypto/aes256/key_derivation.rb +101 -0
- data/lib/omnizip/crypto/aes256.rb +102 -0
- data/lib/omnizip/error.rb +106 -0
- data/lib/omnizip/eta/exponential_smoothing_estimator.rb +98 -0
- data/lib/omnizip/eta/moving_average_estimator.rb +99 -0
- data/lib/omnizip/eta/rate_calculator.rb +104 -0
- data/lib/omnizip/eta/sample_history.rb +143 -0
- data/lib/omnizip/eta/time_estimator.rb +106 -0
- data/lib/omnizip/eta.rb +63 -0
- data/lib/omnizip/extraction/filter_chain.rb +177 -0
- data/lib/omnizip/extraction/glob_pattern.rb +140 -0
- data/lib/omnizip/extraction/pattern_matcher.rb +70 -0
- data/lib/omnizip/extraction/predicate_pattern.rb +52 -0
- data/lib/omnizip/extraction/regex_pattern.rb +50 -0
- data/lib/omnizip/extraction/selective_extractor.rb +240 -0
- data/lib/omnizip/extraction.rb +111 -0
- data/lib/omnizip/file_type/mime_classifier.rb +144 -0
- data/lib/omnizip/file_type.rb +113 -0
- data/lib/omnizip/filter.rb +139 -0
- data/lib/omnizip/filter_pipeline.rb +108 -0
- data/lib/omnizip/filter_registry.rb +166 -0
- data/lib/omnizip/filters/bcj.rb +279 -0
- data/lib/omnizip/filters/bcj2/constants.rb +53 -0
- data/lib/omnizip/filters/bcj2/decoder.rb +200 -0
- data/lib/omnizip/filters/bcj2/encoder.rb +61 -0
- data/lib/omnizip/filters/bcj2/stream_data.rb +93 -0
- data/lib/omnizip/filters/bcj2.rb +99 -0
- data/lib/omnizip/filters/bcj_arm.rb +176 -0
- data/lib/omnizip/filters/bcj_arm64.rb +244 -0
- data/lib/omnizip/filters/bcj_ia64.rb +196 -0
- data/lib/omnizip/filters/bcj_ppc.rb +190 -0
- data/lib/omnizip/filters/bcj_sparc.rb +176 -0
- data/lib/omnizip/filters/bcj_x86.rb +193 -0
- data/lib/omnizip/filters/delta.rb +196 -0
- data/lib/omnizip/filters/filter_base.rb +72 -0
- data/lib/omnizip/filters/registry.rb +123 -0
- data/lib/omnizip/filters/xz_delta.rb +258 -0
- data/lib/omnizip/format_detector.rb +162 -0
- data/lib/omnizip/format_registry.rb +59 -0
- data/lib/omnizip/formats/.keep +0 -0
- data/lib/omnizip/formats/bzip2_file.rb +172 -0
- data/lib/omnizip/formats/cpio/constants.rb +55 -0
- data/lib/omnizip/formats/cpio/entry.rb +385 -0
- data/lib/omnizip/formats/cpio/reader.rb +196 -0
- data/lib/omnizip/formats/cpio/writer.rb +234 -0
- data/lib/omnizip/formats/cpio.rb +140 -0
- data/lib/omnizip/formats/format_spec_loader.rb +230 -0
- data/lib/omnizip/formats/gzip.rb +238 -0
- data/lib/omnizip/formats/iso/directory_builder.rb +297 -0
- data/lib/omnizip/formats/iso/directory_record.rb +152 -0
- data/lib/omnizip/formats/iso/joliet.rb +204 -0
- data/lib/omnizip/formats/iso/path_table.rb +125 -0
- data/lib/omnizip/formats/iso/reader.rb +197 -0
- data/lib/omnizip/formats/iso/rock_ridge.rb +349 -0
- data/lib/omnizip/formats/iso/volume_builder.rb +320 -0
- data/lib/omnizip/formats/iso/volume_descriptor.rb +168 -0
- data/lib/omnizip/formats/iso/writer.rb +530 -0
- data/lib/omnizip/formats/iso.rb +140 -0
- data/lib/omnizip/formats/lzip.rb +175 -0
- data/lib/omnizip/formats/lzma_alone.rb +171 -0
- data/lib/omnizip/formats/rar/archive_repairer.rb +243 -0
- data/lib/omnizip/formats/rar/archive_verifier.rb +195 -0
- data/lib/omnizip/formats/rar/block_parser.rb +243 -0
- data/lib/omnizip/formats/rar/compression/bit_stream.rb +180 -0
- data/lib/omnizip/formats/rar/compression/dispatcher.rb +217 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/decoder.rb +216 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/encoder.rb +158 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_builder.rb +217 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_coder.rb +189 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/match_finder.rb +135 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/sliding_window.rb +165 -0
- data/lib/omnizip/formats/rar/compression/ppmd/context.rb +105 -0
- data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +219 -0
- data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +262 -0
- data/lib/omnizip/formats/rar/compression_method_registry.rb +106 -0
- data/lib/omnizip/formats/rar/constants.rb +82 -0
- data/lib/omnizip/formats/rar/decompressor.rb +238 -0
- data/lib/omnizip/formats/rar/external_writer.rb +312 -0
- data/lib/omnizip/formats/rar/header.rb +192 -0
- data/lib/omnizip/formats/rar/license_validator.rb +109 -0
- data/lib/omnizip/formats/rar/models/rar_archive.rb +77 -0
- data/lib/omnizip/formats/rar/models/rar_entry.rb +65 -0
- data/lib/omnizip/formats/rar/models/rar_volume.rb +56 -0
- data/lib/omnizip/formats/rar/parity_handler.rb +292 -0
- data/lib/omnizip/formats/rar/rar5/compression/lzma.rb +202 -0
- data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +578 -0
- data/lib/omnizip/formats/rar/rar5/compression/store.rb +60 -0
- data/lib/omnizip/formats/rar/rar5/crc32.rb +39 -0
- data/lib/omnizip/formats/rar/rar5/encryption/aes256_cbc.rb +97 -0
- data/lib/omnizip/formats/rar/rar5/encryption/encryption_header.rb +114 -0
- data/lib/omnizip/formats/rar/rar5/encryption/encryption_manager.rb +166 -0
- data/lib/omnizip/formats/rar/rar5/encryption/key_derivation.rb +97 -0
- data/lib/omnizip/formats/rar/rar5/header.rb +187 -0
- data/lib/omnizip/formats/rar/rar5/models/encryption_options.rb +74 -0
- data/lib/omnizip/formats/rar/rar5/models/recovery_options.rb +63 -0
- data/lib/omnizip/formats/rar/rar5/models/solid_options.rb +63 -0
- data/lib/omnizip/formats/rar/rar5/models/volume_options.rb +74 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/ARCHITECTURE.md +290 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_manager.rb +264 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_splitter.rb +155 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_writer.rb +194 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_encoder.rb +109 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_manager.rb +142 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_stream.rb +121 -0
- data/lib/omnizip/formats/rar/rar5/vint.rb +65 -0
- data/lib/omnizip/formats/rar/rar5/writer.rb +466 -0
- data/lib/omnizip/formats/rar/rar_format_base.rb +241 -0
- data/lib/omnizip/formats/rar/reader.rb +366 -0
- data/lib/omnizip/formats/rar/recovery_record.rb +245 -0
- data/lib/omnizip/formats/rar/volume_manager.rb +168 -0
- data/lib/omnizip/formats/rar/writer.rb +431 -0
- data/lib/omnizip/formats/rar.rb +205 -0
- data/lib/omnizip/formats/rar3/compressor.rb +73 -0
- data/lib/omnizip/formats/rar3/decompressor.rb +66 -0
- data/lib/omnizip/formats/rar3/reader.rb +386 -0
- data/lib/omnizip/formats/rar3/writer.rb +219 -0
- data/lib/omnizip/formats/rar5/compressor.rb +73 -0
- data/lib/omnizip/formats/rar5/decompressor.rb +66 -0
- data/lib/omnizip/formats/rar5/reader.rb +342 -0
- data/lib/omnizip/formats/rar5/writer.rb +214 -0
- data/lib/omnizip/formats/seven_zip/coder_chain.rb +150 -0
- data/lib/omnizip/formats/seven_zip/constants.rb +126 -0
- data/lib/omnizip/formats/seven_zip/encoded_header.rb +114 -0
- data/lib/omnizip/formats/seven_zip/encrypted_header.rb +142 -0
- data/lib/omnizip/formats/seven_zip/file_collector.rb +144 -0
- data/lib/omnizip/formats/seven_zip/header.rb +106 -0
- data/lib/omnizip/formats/seven_zip/header_encryptor.rb +134 -0
- data/lib/omnizip/formats/seven_zip/header_writer.rb +466 -0
- data/lib/omnizip/formats/seven_zip/models/coder_info.rb +30 -0
- data/lib/omnizip/formats/seven_zip/models/file_entry.rb +58 -0
- data/lib/omnizip/formats/seven_zip/models/folder.rb +69 -0
- data/lib/omnizip/formats/seven_zip/models/stream_info.rb +42 -0
- data/lib/omnizip/formats/seven_zip/parser.rb +660 -0
- data/lib/omnizip/formats/seven_zip/reader.rb +458 -0
- data/lib/omnizip/formats/seven_zip/split_archive_reader.rb +632 -0
- data/lib/omnizip/formats/seven_zip/split_archive_writer.rb +315 -0
- data/lib/omnizip/formats/seven_zip/stream_compressor.rb +151 -0
- data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +162 -0
- data/lib/omnizip/formats/seven_zip/writer.rb +740 -0
- data/lib/omnizip/formats/seven_zip.rb +93 -0
- data/lib/omnizip/formats/tar/constants.rb +73 -0
- data/lib/omnizip/formats/tar/entry.rb +94 -0
- data/lib/omnizip/formats/tar/header.rb +168 -0
- data/lib/omnizip/formats/tar/reader.rb +121 -0
- data/lib/omnizip/formats/tar/writer.rb +216 -0
- data/lib/omnizip/formats/tar.rb +84 -0
- data/lib/omnizip/formats/xz/reader.rb +116 -0
- data/lib/omnizip/formats/xz.rb +237 -0
- data/lib/omnizip/formats/xz_impl/block_decoder.rb +754 -0
- data/lib/omnizip/formats/xz_impl/block_encoder.rb +306 -0
- data/lib/omnizip/formats/xz_impl/block_header.rb +210 -0
- data/lib/omnizip/formats/xz_impl/block_header_parser.rb +186 -0
- data/lib/omnizip/formats/xz_impl/constants.rb +49 -0
- data/lib/omnizip/formats/xz_impl/index_decoder.rb +174 -0
- data/lib/omnizip/formats/xz_impl/index_encoder.rb +122 -0
- data/lib/omnizip/formats/xz_impl/stream_decoder.rb +468 -0
- data/lib/omnizip/formats/xz_impl/stream_encoder.rb +99 -0
- data/lib/omnizip/formats/xz_impl/stream_footer.rb +81 -0
- data/lib/omnizip/formats/xz_impl/stream_footer_parser.rb +117 -0
- data/lib/omnizip/formats/xz_impl/stream_header.rb +55 -0
- data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +108 -0
- data/lib/omnizip/formats/xz_impl/vli.rb +128 -0
- data/lib/omnizip/formats/xz_impl/writer.rb +421 -0
- data/lib/omnizip/formats/zip/central_directory_header.rb +195 -0
- data/lib/omnizip/formats/zip/constants.rb +69 -0
- data/lib/omnizip/formats/zip/end_of_central_directory.rb +133 -0
- data/lib/omnizip/formats/zip/local_file_header.rb +138 -0
- data/lib/omnizip/formats/zip/reader.rb +250 -0
- data/lib/omnizip/formats/zip/unix_extra_field.rb +153 -0
- data/lib/omnizip/formats/zip/writer.rb +375 -0
- data/lib/omnizip/formats/zip/zip64_end_of_central_directory.rb +104 -0
- data/lib/omnizip/formats/zip/zip64_end_of_central_directory_locator.rb +66 -0
- data/lib/omnizip/formats/zip/zip64_extra_field.rb +114 -0
- data/lib/omnizip/formats/zip.rb +50 -0
- data/lib/omnizip/implementations/base/lzma2_decoder_base.rb +75 -0
- data/lib/omnizip/implementations/base/lzma2_encoder_base.rb +128 -0
- data/lib/omnizip/implementations/base/lzma_decoder_base.rb +83 -0
- data/lib/omnizip/implementations/base/lzma_encoder_base.rb +108 -0
- data/lib/omnizip/implementations/base/state_machine_base.rb +182 -0
- data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +421 -0
- data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +465 -0
- data/lib/omnizip/implementations/seven_zip/lzma/match_finder.rb +288 -0
- data/lib/omnizip/implementations/seven_zip/lzma/range_decoder.rb +200 -0
- data/lib/omnizip/implementations/seven_zip/lzma/range_encoder.rb +197 -0
- data/lib/omnizip/implementations/seven_zip/lzma/state_machine.rb +141 -0
- data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +519 -0
- data/lib/omnizip/implementations/xz_utils/lzma2/decoder.rb +723 -0
- data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +750 -0
- data/lib/omnizip/io/buffered_input.rb +146 -0
- data/lib/omnizip/io/buffered_output.rb +105 -0
- data/lib/omnizip/io/stream_manager.rb +115 -0
- data/lib/omnizip/link_handler/hard_link.rb +79 -0
- data/lib/omnizip/link_handler/symbolic_link.rb +74 -0
- data/lib/omnizip/link_handler.rb +124 -0
- data/lib/omnizip/metadata/archive_metadata.rb +114 -0
- data/lib/omnizip/metadata/entry_metadata.rb +146 -0
- data/lib/omnizip/metadata/metadata_editor.rb +171 -0
- data/lib/omnizip/metadata/metadata_registry.rb +64 -0
- data/lib/omnizip/metadata/metadata_validator.rb +99 -0
- data/lib/omnizip/metadata.rb +57 -0
- data/lib/omnizip/models/.keep +0 -0
- data/lib/omnizip/models/algorithm_metadata.rb +73 -0
- data/lib/omnizip/models/compression_options.rb +71 -0
- data/lib/omnizip/models/conversion_options.rb +87 -0
- data/lib/omnizip/models/conversion_result.rb +135 -0
- data/lib/omnizip/models/eta_result.rb +46 -0
- data/lib/omnizip/models/extraction_rule.rb +115 -0
- data/lib/omnizip/models/filter_chain.rb +144 -0
- data/lib/omnizip/models/filter_config.rb +183 -0
- data/lib/omnizip/models/match_result.rb +124 -0
- data/lib/omnizip/models/optimization_suggestion.rb +91 -0
- data/lib/omnizip/models/parallel_options.rb +104 -0
- data/lib/omnizip/models/performance_result.rb +79 -0
- data/lib/omnizip/models/profile_report.rb +82 -0
- data/lib/omnizip/models/progress_options.rb +38 -0
- data/lib/omnizip/models/split_options.rb +116 -0
- data/lib/omnizip/optimization_registry.rb +81 -0
- data/lib/omnizip/parallel/job_queue.rb +209 -0
- data/lib/omnizip/parallel/job_scheduler.rb +203 -0
- data/lib/omnizip/parallel/parallel_compressor.rb +347 -0
- data/lib/omnizip/parallel/parallel_extractor.rb +329 -0
- data/lib/omnizip/parallel/worker_pool.rb +223 -0
- data/lib/omnizip/parallel.rb +149 -0
- data/lib/omnizip/parity/chunked_block_processor.rb +196 -0
- data/lib/omnizip/parity/galois16.rb +145 -0
- data/lib/omnizip/parity/models/creator_packet.rb +73 -0
- data/lib/omnizip/parity/models/file_description_packet.rb +133 -0
- data/lib/omnizip/parity/models/ifsc_packet.rb +123 -0
- data/lib/omnizip/parity/models/main_packet.rb +128 -0
- data/lib/omnizip/parity/models/packet.rb +156 -0
- data/lib/omnizip/parity/models/packet_registry.rb +109 -0
- data/lib/omnizip/parity/models/recovery_slice_packet.rb +78 -0
- data/lib/omnizip/parity/par2_creator.rb +531 -0
- data/lib/omnizip/parity/par2_repairer.rb +407 -0
- data/lib/omnizip/parity/par2_verifier.rb +364 -0
- data/lib/omnizip/parity/par2cmdline_algorithm.rb +110 -0
- data/lib/omnizip/parity/par2cmdline_coefficients.rb +78 -0
- data/lib/omnizip/parity/reed_solomon_decoder.rb +266 -0
- data/lib/omnizip/parity/reed_solomon_encoder.rb +111 -0
- data/lib/omnizip/parity/reed_solomon_matrix.rb +342 -0
- data/lib/omnizip/parity.rb +186 -0
- data/lib/omnizip/password/encryption_registry.rb +65 -0
- data/lib/omnizip/password/encryption_strategy.rb +96 -0
- data/lib/omnizip/password/password_validator.rb +129 -0
- data/lib/omnizip/password/winzip_aes_strategy.rb +192 -0
- data/lib/omnizip/password/zip_crypto_strategy.rb +141 -0
- data/lib/omnizip/password.rb +87 -0
- data/lib/omnizip/pipe/stream_compressor.rb +124 -0
- data/lib/omnizip/pipe/stream_decompressor.rb +174 -0
- data/lib/omnizip/pipe.rb +121 -0
- data/lib/omnizip/platform/ntfs_streams.rb +201 -0
- data/lib/omnizip/platform.rb +189 -0
- data/lib/omnizip/profile/archive_profile.rb +39 -0
- data/lib/omnizip/profile/balanced_profile.rb +33 -0
- data/lib/omnizip/profile/binary_profile.rb +36 -0
- data/lib/omnizip/profile/compression_profile.rb +158 -0
- data/lib/omnizip/profile/custom_profile.rb +157 -0
- data/lib/omnizip/profile/fast_profile.rb +33 -0
- data/lib/omnizip/profile/maximum_profile.rb +33 -0
- data/lib/omnizip/profile/profile_detector.rb +110 -0
- data/lib/omnizip/profile/profile_registry.rb +161 -0
- data/lib/omnizip/profile/text_profile.rb +36 -0
- data/lib/omnizip/profile.rb +190 -0
- data/lib/omnizip/profiler/memory_profiler.rb +66 -0
- data/lib/omnizip/profiler/method_profiler.rb +49 -0
- data/lib/omnizip/profiler/report_generator.rb +169 -0
- data/lib/omnizip/profiler.rb +204 -0
- data/lib/omnizip/progress/callback_reporter.rb +36 -0
- data/lib/omnizip/progress/console_reporter.rb +62 -0
- data/lib/omnizip/progress/log_reporter.rb +91 -0
- data/lib/omnizip/progress/operation_progress.rb +118 -0
- data/lib/omnizip/progress/progress_bar.rb +156 -0
- data/lib/omnizip/progress/progress_reporter.rb +40 -0
- data/lib/omnizip/progress/progress_tracker.rb +190 -0
- data/lib/omnizip/progress/silent_reporter.rb +24 -0
- data/lib/omnizip/progress.rb +127 -0
- data/lib/omnizip/rubyzip_compat.rb +63 -0
- data/lib/omnizip/temp/safe_extract.rb +168 -0
- data/lib/omnizip/temp/temp_file.rb +124 -0
- data/lib/omnizip/temp/temp_file_pool.rb +109 -0
- data/lib/omnizip/temp.rb +181 -0
- data/lib/omnizip/version.rb +5 -0
- data/lib/omnizip/zip/entry.rb +156 -0
- data/lib/omnizip/zip/file.rb +485 -0
- data/lib/omnizip/zip/input_stream.rb +273 -0
- data/lib/omnizip/zip/output_stream.rb +324 -0
- data/lib/omnizip.rb +156 -0
- data/readme-docs/advanced-features.adoc +515 -0
- data/readme-docs/api-usage.adoc +444 -0
- data/readme-docs/architecture.adoc +449 -0
- data/readme-docs/archive-formats.adoc +479 -0
- data/readme-docs/cli-usage.adoc +222 -0
- data/readme-docs/compression-algorithms.adoc +442 -0
- data/readme-docs/compression-profiles.adoc +247 -0
- data/readme-docs/encryption-checksums.adoc +328 -0
- data/readme-docs/format-converter.adoc +325 -0
- data/readme-docs/installation.adoc +228 -0
- data/readme-docs/par2-archives.adoc +608 -0
- data/readme-docs/performance-profiler.adoc +389 -0
- data/readme-docs/preprocessing-filters.adoc +280 -0
- data/xz-file-format-1.2.1.txt +1174 -0
- metadata +617 -0
|
@@ -0,0 +1,750 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Copyright (C) 2025 Ribose Inc.
|
|
4
|
+
#
|
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a
|
|
6
|
+
# copy of this software and associated documentation files (the "Software"),
|
|
7
|
+
# to deal in the Software without restriction, including without limitation
|
|
8
|
+
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
9
|
+
# and/or sell copies of the Software, and to permit persons to whom the
|
|
10
|
+
# Software is furnished to do so, subject to the following conditions:
|
|
11
|
+
#
|
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
|
13
|
+
# all copies or substantial portions of the Software.
|
|
14
|
+
#
|
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
20
|
+
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
21
|
+
# DEALINGS IN THE SOFTWARE.
|
|
22
|
+
|
|
23
|
+
require "stringio"
|
|
24
|
+
require_relative "../../base/lzma2_encoder_base"
|
|
25
|
+
require_relative "../../../algorithms/lzma"
|
|
26
|
+
require_relative "../../../algorithms/lzma2/constants"
|
|
27
|
+
require_relative "../../../algorithms/lzma2/lzma2_chunk"
|
|
28
|
+
require_relative "../../../algorithms/lzma2/properties"
|
|
29
|
+
require_relative "../../../algorithms/lzma/dictionary"
|
|
30
|
+
require_relative "../../../algorithms/lzma/lzma_state"
|
|
31
|
+
require_relative "../../../algorithms/lzma/xz_probability_models"
|
|
32
|
+
require_relative "../../../algorithms/lzma/match_finder"
|
|
33
|
+
require_relative "../../../algorithms/lzma/optimal_encoder"
|
|
34
|
+
require_relative "../../../algorithms/lzma/xz_range_encoder_exact"
|
|
35
|
+
require_relative "../../../algorithms/lzma/constants"
|
|
36
|
+
|
|
37
|
+
module Omnizip
|
|
38
|
+
module Implementations
|
|
39
|
+
module XZUtils
|
|
40
|
+
module LZMA2
|
|
41
|
+
# XZ Utils LZMA2 encoder implementation.
|
|
42
|
+
#
|
|
43
|
+
# This is the original XzLZMA2Encoder moved from algorithms/lzma2/xz_lzma2_encoder.rb
|
|
44
|
+
# to the new namespace structure.
|
|
45
|
+
#
|
|
46
|
+
# Ported from XZ Utils liblzma/lzma2_encoder.c
|
|
47
|
+
#
|
|
48
|
+
# Compatibility helper for Ruby 3.0-3.1 where String#byteslice doesn't exist
|
|
49
|
+
module StringCompat
|
|
50
|
+
if "".respond_to?(:byteslice)
|
|
51
|
+
def self.byteslice(string, start, length)
|
|
52
|
+
string.byteslice(start, length)
|
|
53
|
+
end
|
|
54
|
+
else
|
|
55
|
+
def self.byteslice(string, start, length)
|
|
56
|
+
string.bytes[start, length]&.pack("C*") || ""
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# Constants
|
|
62
|
+
UINT32_MAX = 0xFFFFFFFF
|
|
63
|
+
REPS = 4
|
|
64
|
+
|
|
65
|
+
# XZ Utils LZMA2 encoder.
|
|
66
|
+
class Encoder < Base::LZMA2EncoderBase
|
|
67
|
+
include Omnizip::Algorithms::LZMA::Constants
|
|
68
|
+
|
|
69
|
+
# XZ Utils limits (from lzma2_encoder.h)
|
|
70
|
+
# Maximum UNCOMPRESSED size per chunk: 2MB
|
|
71
|
+
UNCOMPRESSED_MAX = 1 << 21 # 2,097,152 bytes
|
|
72
|
+
# Maximum COMPRESSED size per chunk: 64KB
|
|
73
|
+
COMPRESSED_MAX = 1 << 16 # 65,536 bytes
|
|
74
|
+
|
|
75
|
+
# Initialize the encoder
|
|
76
|
+
# @param options [Hash] Encoding options
|
|
77
|
+
# @option options [Integer] :dict_size Dictionary size (default: 8MB)
|
|
78
|
+
# @option options [Integer] :lc Literal context bits (default: 3)
|
|
79
|
+
# @option options [Integer] :lp Literal position bits (default: 0)
|
|
80
|
+
# @option options [Integer] :pb Position bits (default: 2)
|
|
81
|
+
# @option options [Boolean] :standalone If true, write property byte at start (default: true)
|
|
82
|
+
def initialize(options = {})
|
|
83
|
+
dict_size = options.fetch(:dict_size, 8 * 1024 * 1024)
|
|
84
|
+
lc = options.fetch(:lc, 3)
|
|
85
|
+
lp = options.fetch(:lp, 0)
|
|
86
|
+
pb = options.fetch(:pb, 2)
|
|
87
|
+
standalone = options.fetch(:standalone, true)
|
|
88
|
+
|
|
89
|
+
super(
|
|
90
|
+
dict_size: dict_size,
|
|
91
|
+
lc: lc,
|
|
92
|
+
lp: lp,
|
|
93
|
+
pb: pb,
|
|
94
|
+
standalone: standalone
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
# Shared state across all chunks
|
|
98
|
+
@dictionary = Omnizip::Algorithms::LZMA::Dictionary.new(dict_size)
|
|
99
|
+
@state = Omnizip::Algorithms::LZMA::LZMAState.new(0)
|
|
100
|
+
@models = Omnizip::Algorithms::LZMA::XzProbabilityModels.new(lc, lp, pb)
|
|
101
|
+
@match_finder = Omnizip::Algorithms::LZMA::MatchFinder.new(@dictionary)
|
|
102
|
+
@optimal = Omnizip::Algorithms::LZMA::OptimalEncoder.new(mode: :fast)
|
|
103
|
+
|
|
104
|
+
# Track previous byte for literal context
|
|
105
|
+
@prev_byte = 0
|
|
106
|
+
|
|
107
|
+
# CRITICAL: For XZ Utils compatibility, first chunk MUST reset the dictionary
|
|
108
|
+
# (matches XZ Utils behavior - see lzma2_encoder.c:334-336)
|
|
109
|
+
# need_dictionary_reset is set to true for the first compressed chunk
|
|
110
|
+
@need_properties = false # Properties will be written in first compressed chunk
|
|
111
|
+
@need_state_reset = false
|
|
112
|
+
@need_dictionary_reset = true # Always reset dictionary for first chunk (XZ Utils compatibility)
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
def encode(input_data)
|
|
116
|
+
# CRITICAL: Reset match finder state for each encoding session
|
|
117
|
+
# This ensures hash table and hash chain start fresh for each Xz.create call
|
|
118
|
+
@match_finder.reset
|
|
119
|
+
|
|
120
|
+
output = StringIO.new
|
|
121
|
+
output.set_encoding(Encoding::BINARY)
|
|
122
|
+
|
|
123
|
+
# Write property byte if standalone mode (for .lz2 files)
|
|
124
|
+
# The property byte encodes dictionary size
|
|
125
|
+
# Formula: For power-of-2 sizes, d = 2 * (log2_size - 12)
|
|
126
|
+
if @standalone
|
|
127
|
+
prop_byte = encode_dict_size(@dict_size)
|
|
128
|
+
output.putc(prop_byte)
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
input = StringIO.new(input_data)
|
|
132
|
+
input.set_encoding(Encoding::BINARY)
|
|
133
|
+
|
|
134
|
+
# Process in chunks (UNCOMPRESSED_MAX = 2MB per chunk)
|
|
135
|
+
while !input.eof?
|
|
136
|
+
chunk_data = input.read(UNCOMPRESSED_MAX)
|
|
137
|
+
break if chunk_data.nil? || chunk_data.empty?
|
|
138
|
+
|
|
139
|
+
chunk = encode_chunk(chunk_data)
|
|
140
|
+
output.write(chunk.to_bytes)
|
|
141
|
+
|
|
142
|
+
@need_properties = false
|
|
143
|
+
@need_state_reset = false
|
|
144
|
+
@need_dictionary_reset = false
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
# End marker (0x00) is REQUIRED for all LZMA2 streams
|
|
148
|
+
# The @standalone flag only controls whether a property byte is written
|
|
149
|
+
# at the START (for raw LZMA2 format like .lz2), not the end marker.
|
|
150
|
+
# XZ format requires the end marker to properly terminate the LZMA2 stream.
|
|
151
|
+
output.write([0x00].pack("C"))
|
|
152
|
+
|
|
153
|
+
output.string
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
# Get implementation identifier.
|
|
157
|
+
#
|
|
158
|
+
# @return [Symbol] :xz_utils
|
|
159
|
+
def implementation_name
|
|
160
|
+
:xz_utils
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
private
|
|
164
|
+
|
|
165
|
+
def encode_chunk(uncompressed_data)
|
|
166
|
+
compressed = try_compress(uncompressed_data)
|
|
167
|
+
|
|
168
|
+
# XZ Utils chunk type selection:
|
|
169
|
+
# Use uncompressed chunk if: compressed_size >= uncompressed_size
|
|
170
|
+
# Use compressed chunk if: compressed_size < uncompressed_size
|
|
171
|
+
# NOTE: Compare only DATA sizes, NOT including headers!
|
|
172
|
+
# This matches XZ Utils implementation exactly (lzma2_encoder.c line 205)
|
|
173
|
+
|
|
174
|
+
if compressed.bytesize >= uncompressed_data.bytesize
|
|
175
|
+
# Use uncompressed chunk (compression didn't help)
|
|
176
|
+
chunk = Omnizip::Algorithms::LZMA2::LZMA2Chunk.new(
|
|
177
|
+
chunk_type: :uncompressed,
|
|
178
|
+
uncompressed_data: uncompressed_data,
|
|
179
|
+
compressed_data: "",
|
|
180
|
+
need_dict_reset: @need_dictionary_reset,
|
|
181
|
+
need_state_reset: false,
|
|
182
|
+
need_props: false,
|
|
183
|
+
)
|
|
184
|
+
# After uncompressed chunk, next chunk needs state reset
|
|
185
|
+
# (XZ Utils does this - see lzma2_encoder.c line 211)
|
|
186
|
+
@need_state_reset = true
|
|
187
|
+
else
|
|
188
|
+
# Use compressed chunk (compression helped)
|
|
189
|
+
# For compressed chunks, properties encode lc/lp/pb:
|
|
190
|
+
# (pb * 5 + lp) * 9 + lc
|
|
191
|
+
chunk_properties = (((@pb * 5) + @lp) * 9) + @lc
|
|
192
|
+
# CRITICAL: need_props must be TRUE when we're providing properties!
|
|
193
|
+
# This tells the chunk to encode properties in the control byte
|
|
194
|
+
# CRITICAL: compressed_size includes ALL bytes (LZMA data + flush bytes)
|
|
195
|
+
# The flush bytes are part of the range encoder output and must be included
|
|
196
|
+
chunk = Omnizip::Algorithms::LZMA2::LZMA2Chunk.new(
|
|
197
|
+
chunk_type: :compressed,
|
|
198
|
+
uncompressed_data: uncompressed_data,
|
|
199
|
+
compressed_data: compressed,
|
|
200
|
+
compressed_size: compressed.bytesize, # Full size including flush bytes
|
|
201
|
+
properties: chunk_properties,
|
|
202
|
+
need_dict_reset: @need_dictionary_reset,
|
|
203
|
+
need_state_reset: @need_state_reset,
|
|
204
|
+
need_props: true, # Always true for compressed chunks with properties
|
|
205
|
+
)
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
# Update dictionary with the chunk data (done once per chunk)
|
|
209
|
+
@dictionary.append(uncompressed_data)
|
|
210
|
+
|
|
211
|
+
# Update prev_byte for next chunk
|
|
212
|
+
if uncompressed_data.bytesize.positive?
|
|
213
|
+
@prev_byte = uncompressed_data.getbyte(uncompressed_data.bytesize - 1)
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
chunk
|
|
217
|
+
end
|
|
218
|
+
|
|
219
|
+
def try_compress(data)
|
|
220
|
+
# Create output buffer to capture compressed data
|
|
221
|
+
output_buffer = StringIO.new
|
|
222
|
+
output_buffer.set_encoding(Encoding::BINARY)
|
|
223
|
+
|
|
224
|
+
# Create range encoder (direct XZ Utils port)
|
|
225
|
+
encoder = Omnizip::Algorithms::LZMA::XzRangeEncoder.new(output_buffer)
|
|
226
|
+
|
|
227
|
+
# Feed all data to match finder first
|
|
228
|
+
# This ensures all bytes are available for finding matches
|
|
229
|
+
@match_finder.feed(data)
|
|
230
|
+
|
|
231
|
+
# CRITICAL: Initialize hash table for positions BEFORE encoding starts
|
|
232
|
+
# This ensures that matches can be found for repeated data patterns
|
|
233
|
+
# Matches XZ Utils lzma_encoder.c: mf_skip() behavior
|
|
234
|
+
# We skip to position (start_pos + data.bytesize - MATCH_LEN_MAX),
|
|
235
|
+
# but ensure we don't go negative for small inputs
|
|
236
|
+
match_len_max = 2 # Minimum match length in LZMA2
|
|
237
|
+
end_pos = [@dictionary.buffer.bytesize + data.bytesize - match_len_max, 0].max
|
|
238
|
+
@match_finder.skip(end_pos)
|
|
239
|
+
|
|
240
|
+
# Position in match finder's buffer for encoding
|
|
241
|
+
# Start after the data we just fed
|
|
242
|
+
start_pos = @dictionary.buffer.bytesize
|
|
243
|
+
|
|
244
|
+
# Store current start position for matched literal encoding
|
|
245
|
+
@current_start_pos = start_pos
|
|
246
|
+
|
|
247
|
+
pos = 0
|
|
248
|
+
while pos < data.bytesize
|
|
249
|
+
# Encode queued symbols if buffer getting full
|
|
250
|
+
# Keep headroom for largest operation
|
|
251
|
+
# (~30 symbols for match+distance)
|
|
252
|
+
if encoder.count > 20
|
|
253
|
+
encode_queued_symbols(encoder, output_buffer)
|
|
254
|
+
end
|
|
255
|
+
|
|
256
|
+
# Find matches at current position in match finder buffer
|
|
257
|
+
match_pos = start_pos + pos
|
|
258
|
+
@match_finder.find_matches(match_pos)
|
|
259
|
+
|
|
260
|
+
# Get optimal encoding choice
|
|
261
|
+
distance, length = @optimal.find_optimal(
|
|
262
|
+
match_pos,
|
|
263
|
+
@match_finder,
|
|
264
|
+
@state,
|
|
265
|
+
@state.reps,
|
|
266
|
+
@models,
|
|
267
|
+
)
|
|
268
|
+
|
|
269
|
+
# DEBUG: Trace encoding decisions
|
|
270
|
+
puts "[DEBUG] pos=#{pos} distance=#{distance} length=#{length} state=#{@state.value} reps=#{@state.reps.inspect}" if ENV["DEBUG"]
|
|
271
|
+
|
|
272
|
+
# Encode based on choice
|
|
273
|
+
# CRITICAL: Use UINT32_MAX to check for literal (not distance.zero?)
|
|
274
|
+
# because distance=0 means repeated match rep0, not literal!
|
|
275
|
+
if distance == UINT32_MAX || length == 1
|
|
276
|
+
# Encode literal
|
|
277
|
+
# puts "[DEBUG] -> LITERAL 0x#{'%02x' % data.getbyte(pos)}" if ENV['DEBUG']
|
|
278
|
+
encode_literal(data.getbyte(pos), encoder, pos)
|
|
279
|
+
pos += 1
|
|
280
|
+
elsif distance < REPS
|
|
281
|
+
# Encode repeated match (distance is 0-3 for rep0-rep3)
|
|
282
|
+
# puts "[DEBUG] -> REPEATED MATCH rep#{distance} len=#{length}" if ENV['DEBUG']
|
|
283
|
+
encode_repeated_match(distance, length, encoder, pos, match_pos)
|
|
284
|
+
pos += length
|
|
285
|
+
else
|
|
286
|
+
# Encode normal match (distance is actual_distance + REPS)
|
|
287
|
+
actual_distance = distance - REPS
|
|
288
|
+
# puts "[DEBUG] -> NORMAL MATCH distance=#{actual_distance} len=#{length}" if ENV['DEBUG']
|
|
289
|
+
encode_match(actual_distance, length, encoder, pos, match_pos,
|
|
290
|
+
data)
|
|
291
|
+
pos += length
|
|
292
|
+
end
|
|
293
|
+
end
|
|
294
|
+
|
|
295
|
+
# Flush encoder to write remaining bytes
|
|
296
|
+
# IMPORTANT: Encode all pending symbols FIRST, before queue_flush
|
|
297
|
+
encode_queued_symbols(encoder, output_buffer)
|
|
298
|
+
|
|
299
|
+
# Now flush the encoder (adds 5 RC_FLUSH symbols)
|
|
300
|
+
encoder.queue_flush
|
|
301
|
+
|
|
302
|
+
# Encode the flush symbols
|
|
303
|
+
# This will write additional bytes to output_buffer
|
|
304
|
+
encode_queued_symbols(encoder, output_buffer)
|
|
305
|
+
|
|
306
|
+
# Full output includes all bytes (LZMA data + flush bytes)
|
|
307
|
+
full_output = output_buffer.string
|
|
308
|
+
|
|
309
|
+
puts "[DEBUG] try_compress: full_output.size=#{full_output.bytesize}, encoder.out_total=#{encoder.out_total}" if ENV["DEBUG_FLUSH"]
|
|
310
|
+
|
|
311
|
+
# Return all bytes (flush bytes are part of the LZMA data)
|
|
312
|
+
full_output
|
|
313
|
+
end
|
|
314
|
+
|
|
315
|
+
# Encode queued symbols to output
|
|
316
|
+
# rubocop:disable Style/CollectionQuerying
|
|
317
|
+
def encode_queued_symbols(encoder, output)
|
|
318
|
+
return if encoder.count.zero?
|
|
319
|
+
|
|
320
|
+
# Encode symbols to buffer
|
|
321
|
+
encoder.encode_symbols(temp_buffer, out_pos, 10000)
|
|
322
|
+
|
|
323
|
+
# Track size before encoding
|
|
324
|
+
size_before = output.size
|
|
325
|
+
|
|
326
|
+
# Encode symbols to buffer
|
|
327
|
+
encoder.encode_symbols(temp_buffer, out_pos, 10000)
|
|
328
|
+
|
|
329
|
+
# Write to output stream
|
|
330
|
+
if out_pos.value.positive?
|
|
331
|
+
# Use StringCompat.byteslice for Ruby 3.0-3.1 compatibility
|
|
332
|
+
# Ruby's [] operator has a bug with null bytes that can return extra bytes
|
|
333
|
+
# See: https://bugs.ruby-lang.org/issues/15985
|
|
334
|
+
output.write(StringCompat.byteslice(temp_buffer, 0, out_pos.value))
|
|
335
|
+
end
|
|
336
|
+
|
|
337
|
+
# Return the number of bytes written
|
|
338
|
+
output.size - size_before
|
|
339
|
+
end
|
|
340
|
+
|
|
341
|
+
# Encode queued symbols to output
|
|
342
|
+
# rubocop:disable Style/CollectionQuerying
|
|
343
|
+
def encode_queued_symbols(encoder, output)
|
|
344
|
+
return if encoder.count.zero?
|
|
345
|
+
|
|
346
|
+
# Create temporary buffer for encoding
|
|
347
|
+
temp_buffer = "\0" * 10000
|
|
348
|
+
out_pos = Omnizip::Algorithms::LZMA::IntRef.new(0)
|
|
349
|
+
|
|
350
|
+
# Track size before encoding
|
|
351
|
+
size_before = output.size
|
|
352
|
+
|
|
353
|
+
# Encode symbols to buffer
|
|
354
|
+
encoder.encode_symbols(temp_buffer, out_pos, 10000)
|
|
355
|
+
|
|
356
|
+
# Write to output stream
|
|
357
|
+
if out_pos.value.positive?
|
|
358
|
+
# Use StringCompat.byteslice for Ruby 3.0-3.1 compatibility
|
|
359
|
+
# Ruby's [] operator has a bug with null bytes that can return extra bytes
|
|
360
|
+
# See: https://bugs.ruby-lang.org/issues/15985
|
|
361
|
+
output.write(StringCompat.byteslice(temp_buffer, 0, out_pos.value))
|
|
362
|
+
end
|
|
363
|
+
|
|
364
|
+
# Return the number of bytes written
|
|
365
|
+
output.size - size_before
|
|
366
|
+
end
|
|
367
|
+
|
|
368
|
+
# Encode literal byte
|
|
369
|
+
def encode_literal(symbol, encoder, pos)
|
|
370
|
+
pos_state = pos & ((1 << @pb) - 1)
|
|
371
|
+
|
|
372
|
+
# Encode is_match bit (0 for literal) - uses OLD state value
|
|
373
|
+
prob_is_match = @models.is_match[@state.value][pos_state]
|
|
374
|
+
encoder.queue_bit(prob_is_match, 0)
|
|
375
|
+
|
|
376
|
+
# Get literal subcoder flat index (uses OLD state value via @prev_byte)
|
|
377
|
+
# This is the base offset into the flat literal array
|
|
378
|
+
literal_offset = get_literal_state(pos, @prev_byte)
|
|
379
|
+
|
|
380
|
+
# CRITICAL: Check encoding path BEFORE updating state (XZ Utils order)
|
|
381
|
+
# The is_literal_state check happens on the current state
|
|
382
|
+
use_matched = @state.use_matched_literal?
|
|
383
|
+
|
|
384
|
+
# Now update state (this is the update_literal() call in XZ)
|
|
385
|
+
@state.update_literal!
|
|
386
|
+
|
|
387
|
+
if use_matched
|
|
388
|
+
# Matched literal (compare with match byte at rep0)
|
|
389
|
+
# XZ Utils: mf->buffer[mf->read_pos - coder->reps[0] - 1 - mf->read_ahead]
|
|
390
|
+
# We don't use read_ahead, so it's 0
|
|
391
|
+
match_pos = @current_start_pos + pos
|
|
392
|
+
match_byte_pos = match_pos - @state.reps[0] - 1
|
|
393
|
+
match_byte = @match_finder.buffer.getbyte(match_byte_pos) if match_byte_pos >= 0 && match_byte_pos < @match_finder.buffer.bytesize
|
|
394
|
+
|
|
395
|
+
# If match_byte is nil (shouldn't happen in normal operation),
|
|
396
|
+
# fall back to normal literal encoding
|
|
397
|
+
if match_byte.nil?
|
|
398
|
+
encode_normal_literal(literal_offset, symbol, encoder)
|
|
399
|
+
else
|
|
400
|
+
encode_matched_literal(literal_offset, match_byte, symbol,
|
|
401
|
+
encoder)
|
|
402
|
+
end
|
|
403
|
+
else
|
|
404
|
+
# Normal literal (8-bit tree)
|
|
405
|
+
encode_normal_literal(literal_offset, symbol, encoder)
|
|
406
|
+
end
|
|
407
|
+
|
|
408
|
+
# Update prev_byte
|
|
409
|
+
@prev_byte = symbol
|
|
410
|
+
end
|
|
411
|
+
|
|
412
|
+
# Encode normal match
|
|
413
|
+
def encode_match(distance, length, encoder, pos, match_pos, _input_data)
|
|
414
|
+
pos_state = pos & ((1 << @pb) - 1)
|
|
415
|
+
|
|
416
|
+
# Encode is_match bit (1 for match) - uses OLD state value
|
|
417
|
+
prob_is_match = @models.is_match[@state.value][pos_state]
|
|
418
|
+
encoder.queue_bit(prob_is_match, 1)
|
|
419
|
+
|
|
420
|
+
# Encode is_rep bit (0 for normal match) - uses OLD state value
|
|
421
|
+
prob_is_rep = @models.is_rep[@state.value]
|
|
422
|
+
encoder.queue_bit(prob_is_rep, 0)
|
|
423
|
+
|
|
424
|
+
# CRITICAL: Update state BEFORE encoding length/distance (XZ Utils order)
|
|
425
|
+
# This also updates reps
|
|
426
|
+
@state.update_match!(distance)
|
|
427
|
+
|
|
428
|
+
# Encode length - uses NEW state value
|
|
429
|
+
encode_match_length(length, pos_state, encoder)
|
|
430
|
+
|
|
431
|
+
# Encode distance - uses NEW state value
|
|
432
|
+
encode_distance(distance, length, encoder)
|
|
433
|
+
|
|
434
|
+
# Update prev_byte (last byte of match)
|
|
435
|
+
# Read from match finder buffer: match_pos - distance + length - 1
|
|
436
|
+
last_byte_pos = match_pos - distance + length - 1
|
|
437
|
+
@prev_byte = @match_finder.buffer.getbyte(last_byte_pos) if last_byte_pos >= 0 && last_byte_pos < @match_finder.buffer.bytesize
|
|
438
|
+
end
|
|
439
|
+
|
|
440
|
+
# Encode repeated match (using rep0-rep3)
|
|
441
|
+
# Ported from XZ Utils rep_match function
|
|
442
|
+
def encode_repeated_match(rep, length, encoder, pos, match_pos)
|
|
443
|
+
pos_state = pos & ((1 << @pb) - 1)
|
|
444
|
+
|
|
445
|
+
# Encode is_match bit (1 for match) - uses OLD state value
|
|
446
|
+
prob_is_match = @models.is_match[@state.value][pos_state]
|
|
447
|
+
encoder.queue_bit(prob_is_match, 1)
|
|
448
|
+
|
|
449
|
+
# Encode is_rep bit (1 for repeated match) - uses OLD state value
|
|
450
|
+
prob_is_rep = @models.is_rep[@state.value]
|
|
451
|
+
encoder.queue_bit(prob_is_rep, 1)
|
|
452
|
+
|
|
453
|
+
prob_is_rep0 = @models.is_rep0[@state.value]
|
|
454
|
+
if rep.zero?
|
|
455
|
+
# rep0 (shortest distance)
|
|
456
|
+
encoder.queue_bit(prob_is_rep0, 0)
|
|
457
|
+
|
|
458
|
+
prob_is_rep0_long = @models.is_rep0_long[@state.value][pos_state]
|
|
459
|
+
encoder.queue_bit(prob_is_rep0_long, length == 1 ? 0 : 1)
|
|
460
|
+
else
|
|
461
|
+
# rep1, rep2, or rep3
|
|
462
|
+
encoder.queue_bit(prob_is_rep0, 1)
|
|
463
|
+
|
|
464
|
+
prob_is_rep1 = @models.is_rep1[@state.value]
|
|
465
|
+
if rep == 1
|
|
466
|
+
# rep1
|
|
467
|
+
encoder.queue_bit(prob_is_rep1, 0)
|
|
468
|
+
else
|
|
469
|
+
# rep2 or rep3
|
|
470
|
+
encoder.queue_bit(prob_is_rep1, 1)
|
|
471
|
+
|
|
472
|
+
prob_is_rep2 = @models.is_rep2[@state.value]
|
|
473
|
+
encoder.queue_bit(prob_is_rep2, rep - 2)
|
|
474
|
+
|
|
475
|
+
if rep == 3
|
|
476
|
+
# Update reps[3] = reps[2] before updating reps[2]
|
|
477
|
+
@state.reps[3] = @state.reps[2]
|
|
478
|
+
end
|
|
479
|
+
|
|
480
|
+
# Update reps[2] = reps[1]
|
|
481
|
+
@state.reps[2] = @state.reps[1]
|
|
482
|
+
end
|
|
483
|
+
|
|
484
|
+
# Update reps[1] = reps[0]
|
|
485
|
+
@state.reps[1] = @state.reps[0]
|
|
486
|
+
|
|
487
|
+
# Update reps[0] = distance from reps[rep]
|
|
488
|
+
distance = @state.reps[rep]
|
|
489
|
+
|
|
490
|
+
# Defensive check: distance should never be nil
|
|
491
|
+
if distance.nil?
|
|
492
|
+
raise "Distance is nil for rep #{rep}, reps=#{@state.reps.inspect}"
|
|
493
|
+
end
|
|
494
|
+
|
|
495
|
+
@state.reps[0] = distance
|
|
496
|
+
end
|
|
497
|
+
|
|
498
|
+
# Update state based on match length
|
|
499
|
+
if length == 1
|
|
500
|
+
@state.update_short_rep!
|
|
501
|
+
else
|
|
502
|
+
# Encode length
|
|
503
|
+
encode_match_length(length, pos_state, encoder)
|
|
504
|
+
@state.update_long_rep!
|
|
505
|
+
end
|
|
506
|
+
|
|
507
|
+
# Update prev_byte (last byte of match)
|
|
508
|
+
# For rep match: match_pos - reps[rep] - 1 + length - 1 = match_pos - reps[rep] + length - 2
|
|
509
|
+
# But after updating reps above, reps[0] now contains the distance
|
|
510
|
+
last_byte_pos = match_pos - @state.reps[0] + length - 1
|
|
511
|
+
@prev_byte = @match_finder.buffer.getbyte(last_byte_pos) if last_byte_pos >= 0 && last_byte_pos < @match_finder.buffer.bytesize
|
|
512
|
+
end
|
|
513
|
+
|
|
514
|
+
# Get literal subcoder flat index
|
|
515
|
+
# Ported from XZ Utils literal_subcoder macro in lzma_common.h:
|
|
516
|
+
# #define literal_subcoder(probs, lc, literal_mask, pos, prev_byte) \
|
|
517
|
+
# ((probs) + UINT32_C(3) * \
|
|
518
|
+
# (((((pos) << 8) + (prev_byte)) & (literal_mask)) << (lc)))
|
|
519
|
+
# where literal_mask = (0x100 << lp) - (0x100 >> lc)
|
|
520
|
+
#
|
|
521
|
+
# Returns the flat index into the literal probability array.
|
|
522
|
+
# The literal array is now a flat array (matching XZ Utils) with
|
|
523
|
+
# size 0x300 << (lc + lp), not a 2D array.
|
|
524
|
+
def get_literal_state(pos, prev_byte)
|
|
525
|
+
literal_mask = (0x100 << @lp) - (0x100 >> @lc)
|
|
526
|
+
((((pos << 8) + prev_byte) & literal_mask) << @lc)
|
|
527
|
+
end
|
|
528
|
+
|
|
529
|
+
# Get byte from dictionary at distance back
|
|
530
|
+
def get_dictionary_byte(distance)
|
|
531
|
+
if distance.positive? &&
|
|
532
|
+
distance <= @dictionary.buffer.bytesize
|
|
533
|
+
@dictionary.get_byte(distance)
|
|
534
|
+
end
|
|
535
|
+
end
|
|
536
|
+
|
|
537
|
+
# Encode normal literal (8-bit tree)
|
|
538
|
+
# Ported from XZ Utils rc_bittree() for normal literals
|
|
539
|
+
# @param literal_offset [Integer] Base offset into flat literal array
|
|
540
|
+
# @param symbol [Integer] The literal byte to encode (0-255)
|
|
541
|
+
# @param encoder [XZBufferedRangeEncoder] The range encoder
|
|
542
|
+
def encode_normal_literal(literal_offset, symbol, encoder)
|
|
543
|
+
context = 1
|
|
544
|
+
8.downto(1) do |i|
|
|
545
|
+
bit = (symbol >> (i - 1)) & 1
|
|
546
|
+
encoder.queue_bit(@models.literal[literal_offset + context], bit)
|
|
547
|
+
context = (context << 1) | bit
|
|
548
|
+
end
|
|
549
|
+
end
|
|
550
|
+
|
|
551
|
+
# Encode matched literal (compare with match byte)
|
|
552
|
+
# Ported from XZ Utils literal_matched() in lzma_encoder.c
|
|
553
|
+
# @param literal_offset [Integer] Base offset into flat literal array
|
|
554
|
+
# @param match_byte [Integer] The match byte to compare against
|
|
555
|
+
# @param symbol [Integer] The literal byte to encode (0-255)
|
|
556
|
+
# @param encoder [XZBufferedRangeEncoder] The range encoder
|
|
557
|
+
def encode_matched_literal(literal_offset, match_byte, symbol, encoder)
|
|
558
|
+
offset = 0x100
|
|
559
|
+
symbol += 0x100 # Start symbol at 256 (XZ Utils algorithm)
|
|
560
|
+
|
|
561
|
+
# Loop until symbol reaches 0x10000 (65536)
|
|
562
|
+
while symbol < 0x10000
|
|
563
|
+
match_byte <<= 1
|
|
564
|
+
match_bit = match_byte & offset
|
|
565
|
+
subcoder_index = offset + match_bit + (symbol >> 8)
|
|
566
|
+
bit = (symbol >> 7) & 1
|
|
567
|
+
|
|
568
|
+
encoder.queue_bit(@models.literal[literal_offset + subcoder_index],
|
|
569
|
+
bit)
|
|
570
|
+
|
|
571
|
+
symbol <<= 1
|
|
572
|
+
offset &= ~(match_byte ^ symbol)
|
|
573
|
+
end
|
|
574
|
+
end
|
|
575
|
+
|
|
576
|
+
# Encode match length
|
|
577
|
+
def encode_match_length(length, pos_state, encoder)
|
|
578
|
+
len = length - MATCH_LEN_MIN
|
|
579
|
+
|
|
580
|
+
if len < LEN_LOW_SYMBOLS
|
|
581
|
+
# Low: 0-7
|
|
582
|
+
encoder.queue_bit(@models.match_len_encoder.choice, 0)
|
|
583
|
+
encode_bittree(
|
|
584
|
+
@models.match_len_encoder.low[pos_state],
|
|
585
|
+
NUM_LEN_LOW_BITS,
|
|
586
|
+
len,
|
|
587
|
+
encoder,
|
|
588
|
+
)
|
|
589
|
+
elsif len < LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS
|
|
590
|
+
# Mid: 8-15
|
|
591
|
+
encoder.queue_bit(@models.match_len_encoder.choice, 1)
|
|
592
|
+
encoder.queue_bit(@models.match_len_encoder.choice2, 0)
|
|
593
|
+
encode_bittree(
|
|
594
|
+
@models.match_len_encoder.mid[pos_state],
|
|
595
|
+
NUM_LEN_MID_BITS,
|
|
596
|
+
len - LEN_LOW_SYMBOLS,
|
|
597
|
+
encoder,
|
|
598
|
+
)
|
|
599
|
+
else
|
|
600
|
+
# High: 16-271
|
|
601
|
+
encoder.queue_bit(@models.match_len_encoder.choice, 1)
|
|
602
|
+
encoder.queue_bit(@models.match_len_encoder.choice2, 1)
|
|
603
|
+
high_len = len - LEN_LOW_SYMBOLS - LEN_MID_SYMBOLS
|
|
604
|
+
encode_bittree(
|
|
605
|
+
@models.match_len_encoder.high,
|
|
606
|
+
NUM_LEN_HIGH_BITS,
|
|
607
|
+
high_len,
|
|
608
|
+
encoder,
|
|
609
|
+
)
|
|
610
|
+
end
|
|
611
|
+
end
|
|
612
|
+
|
|
613
|
+
# Encode distance using slot encoding
|
|
614
|
+
def encode_distance(distance, length, encoder)
|
|
615
|
+
dist_slot = get_dist_slot(distance)
|
|
616
|
+
len_state = get_len_to_pos_state(length)
|
|
617
|
+
|
|
618
|
+
# Encode distance slot
|
|
619
|
+
# @dist_slot is organized as [len_to_pos_state][dist_slot]
|
|
620
|
+
encode_bittree(
|
|
621
|
+
@models.dist_slot[len_state],
|
|
622
|
+
NUM_DIST_SLOT_BITS,
|
|
623
|
+
dist_slot,
|
|
624
|
+
encoder,
|
|
625
|
+
)
|
|
626
|
+
|
|
627
|
+
# Encode distance footer
|
|
628
|
+
if dist_slot >= START_POS_MODEL_INDEX
|
|
629
|
+
footer_bits = (dist_slot >> 1) - 1
|
|
630
|
+
base = (2 | (dist_slot & 1)) << footer_bits
|
|
631
|
+
dist_reduced = distance - base
|
|
632
|
+
|
|
633
|
+
if dist_slot < END_POS_MODEL_INDEX
|
|
634
|
+
# Use probability models
|
|
635
|
+
# XZ Utils: rc_bittree_reverse(&coder->rc, coder->dist_special + base - dist_slot - 1, ...)
|
|
636
|
+
encode_bittree_reverse(
|
|
637
|
+
@models.dist_special,
|
|
638
|
+
dist_reduced,
|
|
639
|
+
footer_bits,
|
|
640
|
+
base - dist_slot - 1,
|
|
641
|
+
encoder,
|
|
642
|
+
)
|
|
643
|
+
else
|
|
644
|
+
# Direct bits + alignment
|
|
645
|
+
direct_bits = footer_bits - DIST_ALIGN_BITS
|
|
646
|
+
encoder.queue_direct_bits(
|
|
647
|
+
dist_reduced >> DIST_ALIGN_BITS,
|
|
648
|
+
direct_bits,
|
|
649
|
+
)
|
|
650
|
+
align_mask = (1 << DIST_ALIGN_BITS) - 1
|
|
651
|
+
encode_bittree_reverse(
|
|
652
|
+
@models.dist_align,
|
|
653
|
+
dist_reduced & align_mask,
|
|
654
|
+
DIST_ALIGN_BITS,
|
|
655
|
+
0,
|
|
656
|
+
encoder,
|
|
657
|
+
)
|
|
658
|
+
end
|
|
659
|
+
end
|
|
660
|
+
end
|
|
661
|
+
|
|
662
|
+
# Encode bittree (MSB first)
|
|
663
|
+
def encode_bittree(probs, num_bits, value, encoder)
|
|
664
|
+
context = 1
|
|
665
|
+
num_bits.downto(1) do |i|
|
|
666
|
+
bit = (value >> (i - 1)) & 1
|
|
667
|
+
encoder.queue_bit(probs[context], bit)
|
|
668
|
+
context = (context << 1) | bit
|
|
669
|
+
end
|
|
670
|
+
end
|
|
671
|
+
|
|
672
|
+
# Encode bittree in reverse (LSB first)
|
|
673
|
+
def encode_bittree_reverse(probs, value, num_bits, offset, encoder)
|
|
674
|
+
context = 1
|
|
675
|
+
num_bits.times do |i|
|
|
676
|
+
bit = (value >> i) & 1
|
|
677
|
+
encoder.queue_bit(probs[offset + context], bit)
|
|
678
|
+
context = (context << 1) | bit
|
|
679
|
+
end
|
|
680
|
+
end
|
|
681
|
+
|
|
682
|
+
# Get distance slot for distance
|
|
683
|
+
def get_dist_slot(distance)
|
|
684
|
+
if distance < NUM_FULL_DISTANCES
|
|
685
|
+
distance < 4 ? distance : fast_pos_small(distance)
|
|
686
|
+
else
|
|
687
|
+
fast_pos_large(distance)
|
|
688
|
+
end
|
|
689
|
+
end
|
|
690
|
+
|
|
691
|
+
# Fast position calculation for small distances
|
|
692
|
+
def fast_pos_small(distance)
|
|
693
|
+
slot = 0
|
|
694
|
+
dist = distance
|
|
695
|
+
while dist > 3
|
|
696
|
+
dist >>= 1
|
|
697
|
+
slot += 2
|
|
698
|
+
end
|
|
699
|
+
slot + dist
|
|
700
|
+
end
|
|
701
|
+
|
|
702
|
+
# Fast position calculation for large distances
|
|
703
|
+
def fast_pos_large(distance)
|
|
704
|
+
# Find highest bit position
|
|
705
|
+
n = 31
|
|
706
|
+
while n >= 0
|
|
707
|
+
break if (distance >> n) != 0
|
|
708
|
+
|
|
709
|
+
n -= 1
|
|
710
|
+
end
|
|
711
|
+
# slot = 2 * n + high_bit
|
|
712
|
+
((n << 1) + ((distance >> (n - 1)) & 1))
|
|
713
|
+
end
|
|
714
|
+
|
|
715
|
+
# Map length to position state
|
|
716
|
+
def get_len_to_pos_state(length)
|
|
717
|
+
len = length - MATCH_LEN_MIN
|
|
718
|
+
len < NUM_LEN_TO_POS_STATES ? len : NUM_LEN_TO_POS_STATES - 1
|
|
719
|
+
end
|
|
720
|
+
|
|
721
|
+
# Encode dictionary size to LZMA2 property byte
|
|
722
|
+
# @param dict_size [Integer] Dictionary size
|
|
723
|
+
# @return [Integer] Property byte (0-40)
|
|
724
|
+
def encode_dict_size(dict_size)
|
|
725
|
+
# Clamp to valid range
|
|
726
|
+
d = [dict_size, Omnizip::Algorithms::LZMA2Const::DICT_SIZE_MIN].max
|
|
727
|
+
|
|
728
|
+
# Calculate log2 of dict_size
|
|
729
|
+
log2_size = 0
|
|
730
|
+
temp = d
|
|
731
|
+
while temp > 1
|
|
732
|
+
log2_size += 1
|
|
733
|
+
temp >>= 1
|
|
734
|
+
end
|
|
735
|
+
|
|
736
|
+
# Encoding formula for power-of-2 sizes:
|
|
737
|
+
# d = 2 * (log2_size - 12)
|
|
738
|
+
if d == (1 << log2_size)
|
|
739
|
+
# Exact power of 2
|
|
740
|
+
[(log2_size - 12) * 2, 40].min
|
|
741
|
+
else
|
|
742
|
+
# Between 2^n and 2^n + 2^(n-1), use odd encoding
|
|
743
|
+
[((log2_size - 12) * 2) + 1, 40].min
|
|
744
|
+
end
|
|
745
|
+
end
|
|
746
|
+
end
|
|
747
|
+
end
|
|
748
|
+
end
|
|
749
|
+
end
|
|
750
|
+
end
|