omnizip 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/.rubocop.yml +32 -0
- data/.rubocop_todo.yml +754 -0
- data/COPYING +502 -0
- data/Gemfile +17 -0
- data/LICENSE +12 -0
- data/README.adoc +1045 -0
- data/Rakefile +12 -0
- data/benchmark/README.md +260 -0
- data/benchmark/benchmark_suite.rb +125 -0
- data/benchmark/compression_bench.rb +181 -0
- data/benchmark/filter_bench.rb +180 -0
- data/benchmark/models/benchmark_result.rb +59 -0
- data/benchmark/models/comparison_result.rb +69 -0
- data/benchmark/profile_suite.rb +167 -0
- data/benchmark/reporter.rb +150 -0
- data/benchmark/run_benchmarks.rb +66 -0
- data/benchmark/test_data.rb +137 -0
- data/config/formats/rar3_spec.yml +91 -0
- data/config/formats/rar5_spec.yml +102 -0
- data/docs/.github/workflows/docs.yml +142 -0
- data/docs/.gitignore +21 -0
- data/docs/.lychee.toml +67 -0
- data/docs/Gemfile +13 -0
- data/docs/RAR_WRITE_SUPPORT.md +26 -0
- data/docs/README.md +101 -0
- data/docs/_config.yml +112 -0
- data/docs/assets/logo.svg +1 -0
- data/docs/assets/omnizip-logo.pdf +1540 -11
- data/docs/comparison/feature-matrix.adoc +694 -0
- data/docs/comparison/index.adoc +113 -0
- data/docs/comparison/vs-7zip.adoc +309 -0
- data/docs/comparison/vs-peazip.adoc +77 -0
- data/docs/comparison/vs-rubyzip.adoc +342 -0
- data/docs/comparison/vs-winrar.adoc +100 -0
- data/docs/compatibility.adoc +579 -0
- data/docs/concepts/index.adoc +129 -0
- data/docs/developer/architecture.adoc +256 -0
- data/docs/developer/contributing.adoc +158 -0
- data/docs/developer/index.adoc +25 -0
- data/docs/developer/testing.adoc +212 -0
- data/docs/getting-started/basic-usage.adoc +271 -0
- data/docs/getting-started/index.adoc +42 -0
- data/docs/getting-started/installation.adoc +138 -0
- data/docs/getting-started/quick-start.adoc +185 -0
- data/docs/getting-started/your-first-archive.adoc +218 -0
- data/docs/guides/advanced-features/encryption.adoc +300 -0
- data/docs/guides/advanced-features/index.adoc +49 -0
- data/docs/guides/advanced-features/parallel-processing.adoc +246 -0
- data/docs/guides/advanced-features/progress-tracking.adoc +320 -0
- data/docs/guides/advanced-features/streaming.adoc +212 -0
- data/docs/guides/archive-formats/gzip-format.adoc +107 -0
- data/docs/guides/archive-formats/index.adoc +130 -0
- data/docs/guides/archive-formats/rar-format.adoc +104 -0
- data/docs/guides/archive-formats/rar5.adoc +521 -0
- data/docs/guides/archive-formats/seven-zip-format.adoc +35 -0
- data/docs/guides/archive-formats/tar-format.adoc +106 -0
- data/docs/guides/archive-formats/xz-format.adoc +118 -0
- data/docs/guides/archive-formats/zip-format.adoc +35 -0
- data/docs/guides/compression-algorithms/bzip2.adoc +113 -0
- data/docs/guides/compression-algorithms/deflate.adoc +319 -0
- data/docs/guides/compression-algorithms/index.adoc +190 -0
- data/docs/guides/compression-algorithms/lzma.adoc +398 -0
- data/docs/guides/compression-algorithms/lzma2.adoc +327 -0
- data/docs/guides/compression-algorithms/ppmd.adoc +316 -0
- data/docs/guides/compression-algorithms/zstandard.adoc +361 -0
- data/docs/guides/creating-archives.adoc +354 -0
- data/docs/guides/extracting-archives.adoc +53 -0
- data/docs/guides/format-conversion.adoc +64 -0
- data/docs/guides/index.adoc +49 -0
- data/docs/guides/migration-rubyzip.adoc +217 -0
- data/docs/guides/parity-archives.adoc +605 -0
- data/docs/guides/performance-tuning.adoc +88 -0
- data/docs/index.adoc +218 -0
- data/docs/lychee.toml +67 -0
- data/docs/reference/api/overview.adoc +188 -0
- data/docs/reference/cli/compress-command.adoc +114 -0
- data/docs/reference/cli/overview.adoc +140 -0
- data/docs/reference/index.adoc +26 -0
- data/docs/resources/faq.adoc +185 -0
- data/docs/resources/quick-reference.adoc +222 -0
- data/docs/troubleshooting/index.adoc +208 -0
- data/examples/api_comparison.rb +205 -0
- data/examples/deflate64_example.rb +96 -0
- data/examples/par2_demo.rb +121 -0
- data/examples/quick_start_native.rb +150 -0
- data/examples/quick_start_rubyzip.rb +115 -0
- data/examples/rubyzip_compatibility_demo.rb +194 -0
- data/exe/omnizip +27 -0
- data/lib/omnizip/algorithm.rb +130 -0
- data/lib/omnizip/algorithm_registry.rb +86 -0
- data/lib/omnizip/algorithms/.keep +0 -0
- data/lib/omnizip/algorithms/bzip2/bwt.rb +225 -0
- data/lib/omnizip/algorithms/bzip2/decoder.rb +193 -0
- data/lib/omnizip/algorithms/bzip2/encoder.rb +237 -0
- data/lib/omnizip/algorithms/bzip2/huffman.rb +206 -0
- data/lib/omnizip/algorithms/bzip2/mtf.rb +101 -0
- data/lib/omnizip/algorithms/bzip2/rle.rb +151 -0
- data/lib/omnizip/algorithms/bzip2.rb +130 -0
- data/lib/omnizip/algorithms/deflate/constants.rb +28 -0
- data/lib/omnizip/algorithms/deflate/decoder.rb +38 -0
- data/lib/omnizip/algorithms/deflate/encoder.rb +46 -0
- data/lib/omnizip/algorithms/deflate.rb +128 -0
- data/lib/omnizip/algorithms/deflate64/constants.rb +45 -0
- data/lib/omnizip/algorithms/deflate64/decoder.rb +153 -0
- data/lib/omnizip/algorithms/deflate64/encoder.rb +98 -0
- data/lib/omnizip/algorithms/deflate64/huffman_coder.rb +354 -0
- data/lib/omnizip/algorithms/deflate64/lz77_encoder.rb +142 -0
- data/lib/omnizip/algorithms/deflate64.rb +109 -0
- data/lib/omnizip/algorithms/lzma/bit_model.rb +120 -0
- data/lib/omnizip/algorithms/lzma/constants.rb +112 -0
- data/lib/omnizip/algorithms/lzma/decoder.rb +148 -0
- data/lib/omnizip/algorithms/lzma/dictionary.rb +69 -0
- data/lib/omnizip/algorithms/lzma/distance_coder.rb +415 -0
- data/lib/omnizip/algorithms/lzma/encoder.rb +142 -0
- data/lib/omnizip/algorithms/lzma/length_coder.rb +260 -0
- data/lib/omnizip/algorithms/lzma/literal_decoder.rb +320 -0
- data/lib/omnizip/algorithms/lzma/literal_encoder.rb +210 -0
- data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +341 -0
- data/lib/omnizip/algorithms/lzma/lzma_alone_decoder.rb +192 -0
- data/lib/omnizip/algorithms/lzma/lzma_state.rb +128 -0
- data/lib/omnizip/algorithms/lzma/match.rb +32 -0
- data/lib/omnizip/algorithms/lzma/match_finder.rb +205 -0
- data/lib/omnizip/algorithms/lzma/match_finder_config.rb +142 -0
- data/lib/omnizip/algorithms/lzma/match_finder_factory.rb +88 -0
- data/lib/omnizip/algorithms/lzma/optimal_encoder.rb +130 -0
- data/lib/omnizip/algorithms/lzma/probability_models.rb +72 -0
- data/lib/omnizip/algorithms/lzma/range_coder.rb +85 -0
- data/lib/omnizip/algorithms/lzma/range_decoder.rb +434 -0
- data/lib/omnizip/algorithms/lzma/range_encoder.rb +194 -0
- data/lib/omnizip/algorithms/lzma/state.rb +127 -0
- data/lib/omnizip/algorithms/lzma/xz_buffered_range_encoder.rb +325 -0
- data/lib/omnizip/algorithms/lzma/xz_encoder.rb +426 -0
- data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +645 -0
- data/lib/omnizip/algorithms/lzma/xz_match_finder_adapter.rb +227 -0
- data/lib/omnizip/algorithms/lzma/xz_price_calculator.rb +169 -0
- data/lib/omnizip/algorithms/lzma/xz_probability_models.rb +261 -0
- data/lib/omnizip/algorithms/lzma/xz_range_encoder.rb +223 -0
- data/lib/omnizip/algorithms/lzma/xz_range_encoder_exact.rb +331 -0
- data/lib/omnizip/algorithms/lzma/xz_state.rb +116 -0
- data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +2055 -0
- data/lib/omnizip/algorithms/lzma.rb +238 -0
- data/lib/omnizip/algorithms/lzma2/chunk_manager.rb +182 -0
- data/lib/omnizip/algorithms/lzma2/constants.rb +41 -0
- data/lib/omnizip/algorithms/lzma2/encoder.rb +147 -0
- data/lib/omnizip/algorithms/lzma2/lzma2_chunk.rb +161 -0
- data/lib/omnizip/algorithms/lzma2/properties.rb +179 -0
- data/lib/omnizip/algorithms/lzma2/simple_lzma2_encoder.rb +127 -0
- data/lib/omnizip/algorithms/lzma2/xz_encoder_adapter.rb +85 -0
- data/lib/omnizip/algorithms/lzma2.rb +141 -0
- data/lib/omnizip/algorithms/ppmd7/constants.rb +74 -0
- data/lib/omnizip/algorithms/ppmd7/context.rb +154 -0
- data/lib/omnizip/algorithms/ppmd7/decoder.rb +126 -0
- data/lib/omnizip/algorithms/ppmd7/encoder.rb +163 -0
- data/lib/omnizip/algorithms/ppmd7/model.rb +248 -0
- data/lib/omnizip/algorithms/ppmd7/symbol_state.rb +57 -0
- data/lib/omnizip/algorithms/ppmd7.rb +116 -0
- data/lib/omnizip/algorithms/ppmd8/constants.rb +61 -0
- data/lib/omnizip/algorithms/ppmd8/context.rb +34 -0
- data/lib/omnizip/algorithms/ppmd8/decoder.rb +107 -0
- data/lib/omnizip/algorithms/ppmd8/encoder.rb +138 -0
- data/lib/omnizip/algorithms/ppmd8/model.rb +250 -0
- data/lib/omnizip/algorithms/ppmd8/restoration_method.rb +78 -0
- data/lib/omnizip/algorithms/ppmd8.rb +82 -0
- data/lib/omnizip/algorithms/ppmd_base.rb +138 -0
- data/lib/omnizip/algorithms/sevenzip_lzma2.rb +123 -0
- data/lib/omnizip/algorithms/xz_lzma2.rb +118 -0
- data/lib/omnizip/algorithms/zstandard/constants.rb +25 -0
- data/lib/omnizip/algorithms/zstandard/decoder.rb +46 -0
- data/lib/omnizip/algorithms/zstandard/encoder.rb +51 -0
- data/lib/omnizip/algorithms/zstandard.rb +138 -0
- data/lib/omnizip/buffer/memory_archive.rb +251 -0
- data/lib/omnizip/buffer/memory_extractor.rb +224 -0
- data/lib/omnizip/buffer.rb +176 -0
- data/lib/omnizip/checksum_registry.rb +114 -0
- data/lib/omnizip/checksums/crc32.rb +100 -0
- data/lib/omnizip/checksums/crc64.rb +101 -0
- data/lib/omnizip/checksums/crc_base.rb +158 -0
- data/lib/omnizip/checksums/verifier.rb +131 -0
- data/lib/omnizip/chunked/memory_manager.rb +194 -0
- data/lib/omnizip/chunked/reader.rb +78 -0
- data/lib/omnizip/chunked/writer.rb +120 -0
- data/lib/omnizip/chunked.rb +129 -0
- data/lib/omnizip/cli/output_formatter.rb +104 -0
- data/lib/omnizip/cli.rb +572 -0
- data/lib/omnizip/commands/.keep +0 -0
- data/lib/omnizip/commands/archive_create_command.rb +427 -0
- data/lib/omnizip/commands/archive_extract_command.rb +272 -0
- data/lib/omnizip/commands/archive_list_command.rb +218 -0
- data/lib/omnizip/commands/archive_repair_command.rb +131 -0
- data/lib/omnizip/commands/archive_verify_command.rb +117 -0
- data/lib/omnizip/commands/compress_command.rb +117 -0
- data/lib/omnizip/commands/decompress_command.rb +120 -0
- data/lib/omnizip/commands/list_command.rb +53 -0
- data/lib/omnizip/commands/metadata_command.rb +153 -0
- data/lib/omnizip/commands/parity_create_command.rb +122 -0
- data/lib/omnizip/commands/parity_repair_command.rb +122 -0
- data/lib/omnizip/commands/parity_verify_command.rb +124 -0
- data/lib/omnizip/commands/profile_list_command.rb +56 -0
- data/lib/omnizip/commands/profile_show_command.rb +44 -0
- data/lib/omnizip/convenience.rb +359 -0
- data/lib/omnizip/converter/conversion_registry.rb +49 -0
- data/lib/omnizip/converter/conversion_strategy.rb +121 -0
- data/lib/omnizip/converter/seven_zip_to_zip_strategy.rb +97 -0
- data/lib/omnizip/converter/zip_to_seven_zip_strategy.rb +112 -0
- data/lib/omnizip/converter.rb +105 -0
- data/lib/omnizip/crypto/aes256/cipher.rb +100 -0
- data/lib/omnizip/crypto/aes256/constants.rb +28 -0
- data/lib/omnizip/crypto/aes256/key_derivation.rb +101 -0
- data/lib/omnizip/crypto/aes256.rb +102 -0
- data/lib/omnizip/error.rb +106 -0
- data/lib/omnizip/eta/exponential_smoothing_estimator.rb +98 -0
- data/lib/omnizip/eta/moving_average_estimator.rb +99 -0
- data/lib/omnizip/eta/rate_calculator.rb +104 -0
- data/lib/omnizip/eta/sample_history.rb +143 -0
- data/lib/omnizip/eta/time_estimator.rb +106 -0
- data/lib/omnizip/eta.rb +63 -0
- data/lib/omnizip/extraction/filter_chain.rb +177 -0
- data/lib/omnizip/extraction/glob_pattern.rb +140 -0
- data/lib/omnizip/extraction/pattern_matcher.rb +70 -0
- data/lib/omnizip/extraction/predicate_pattern.rb +52 -0
- data/lib/omnizip/extraction/regex_pattern.rb +50 -0
- data/lib/omnizip/extraction/selective_extractor.rb +240 -0
- data/lib/omnizip/extraction.rb +111 -0
- data/lib/omnizip/file_type/mime_classifier.rb +144 -0
- data/lib/omnizip/file_type.rb +113 -0
- data/lib/omnizip/filter.rb +139 -0
- data/lib/omnizip/filter_pipeline.rb +108 -0
- data/lib/omnizip/filter_registry.rb +166 -0
- data/lib/omnizip/filters/bcj.rb +279 -0
- data/lib/omnizip/filters/bcj2/constants.rb +53 -0
- data/lib/omnizip/filters/bcj2/decoder.rb +200 -0
- data/lib/omnizip/filters/bcj2/encoder.rb +61 -0
- data/lib/omnizip/filters/bcj2/stream_data.rb +93 -0
- data/lib/omnizip/filters/bcj2.rb +99 -0
- data/lib/omnizip/filters/bcj_arm.rb +176 -0
- data/lib/omnizip/filters/bcj_arm64.rb +244 -0
- data/lib/omnizip/filters/bcj_ia64.rb +196 -0
- data/lib/omnizip/filters/bcj_ppc.rb +190 -0
- data/lib/omnizip/filters/bcj_sparc.rb +176 -0
- data/lib/omnizip/filters/bcj_x86.rb +193 -0
- data/lib/omnizip/filters/delta.rb +196 -0
- data/lib/omnizip/filters/filter_base.rb +72 -0
- data/lib/omnizip/filters/registry.rb +123 -0
- data/lib/omnizip/filters/xz_delta.rb +258 -0
- data/lib/omnizip/format_detector.rb +162 -0
- data/lib/omnizip/format_registry.rb +59 -0
- data/lib/omnizip/formats/.keep +0 -0
- data/lib/omnizip/formats/bzip2_file.rb +172 -0
- data/lib/omnizip/formats/cpio/constants.rb +55 -0
- data/lib/omnizip/formats/cpio/entry.rb +385 -0
- data/lib/omnizip/formats/cpio/reader.rb +196 -0
- data/lib/omnizip/formats/cpio/writer.rb +234 -0
- data/lib/omnizip/formats/cpio.rb +140 -0
- data/lib/omnizip/formats/format_spec_loader.rb +230 -0
- data/lib/omnizip/formats/gzip.rb +238 -0
- data/lib/omnizip/formats/iso/directory_builder.rb +297 -0
- data/lib/omnizip/formats/iso/directory_record.rb +152 -0
- data/lib/omnizip/formats/iso/joliet.rb +204 -0
- data/lib/omnizip/formats/iso/path_table.rb +125 -0
- data/lib/omnizip/formats/iso/reader.rb +197 -0
- data/lib/omnizip/formats/iso/rock_ridge.rb +349 -0
- data/lib/omnizip/formats/iso/volume_builder.rb +320 -0
- data/lib/omnizip/formats/iso/volume_descriptor.rb +168 -0
- data/lib/omnizip/formats/iso/writer.rb +530 -0
- data/lib/omnizip/formats/iso.rb +140 -0
- data/lib/omnizip/formats/lzip.rb +175 -0
- data/lib/omnizip/formats/lzma_alone.rb +171 -0
- data/lib/omnizip/formats/rar/archive_repairer.rb +243 -0
- data/lib/omnizip/formats/rar/archive_verifier.rb +195 -0
- data/lib/omnizip/formats/rar/block_parser.rb +243 -0
- data/lib/omnizip/formats/rar/compression/bit_stream.rb +180 -0
- data/lib/omnizip/formats/rar/compression/dispatcher.rb +217 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/decoder.rb +216 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/encoder.rb +158 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_builder.rb +217 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_coder.rb +189 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/match_finder.rb +135 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/sliding_window.rb +165 -0
- data/lib/omnizip/formats/rar/compression/ppmd/context.rb +105 -0
- data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +219 -0
- data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +262 -0
- data/lib/omnizip/formats/rar/compression_method_registry.rb +106 -0
- data/lib/omnizip/formats/rar/constants.rb +82 -0
- data/lib/omnizip/formats/rar/decompressor.rb +238 -0
- data/lib/omnizip/formats/rar/external_writer.rb +312 -0
- data/lib/omnizip/formats/rar/header.rb +192 -0
- data/lib/omnizip/formats/rar/license_validator.rb +109 -0
- data/lib/omnizip/formats/rar/models/rar_archive.rb +77 -0
- data/lib/omnizip/formats/rar/models/rar_entry.rb +65 -0
- data/lib/omnizip/formats/rar/models/rar_volume.rb +56 -0
- data/lib/omnizip/formats/rar/parity_handler.rb +292 -0
- data/lib/omnizip/formats/rar/rar5/compression/lzma.rb +202 -0
- data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +578 -0
- data/lib/omnizip/formats/rar/rar5/compression/store.rb +60 -0
- data/lib/omnizip/formats/rar/rar5/crc32.rb +39 -0
- data/lib/omnizip/formats/rar/rar5/encryption/aes256_cbc.rb +97 -0
- data/lib/omnizip/formats/rar/rar5/encryption/encryption_header.rb +114 -0
- data/lib/omnizip/formats/rar/rar5/encryption/encryption_manager.rb +166 -0
- data/lib/omnizip/formats/rar/rar5/encryption/key_derivation.rb +97 -0
- data/lib/omnizip/formats/rar/rar5/header.rb +187 -0
- data/lib/omnizip/formats/rar/rar5/models/encryption_options.rb +74 -0
- data/lib/omnizip/formats/rar/rar5/models/recovery_options.rb +63 -0
- data/lib/omnizip/formats/rar/rar5/models/solid_options.rb +63 -0
- data/lib/omnizip/formats/rar/rar5/models/volume_options.rb +74 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/ARCHITECTURE.md +290 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_manager.rb +264 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_splitter.rb +155 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_writer.rb +194 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_encoder.rb +109 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_manager.rb +142 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_stream.rb +121 -0
- data/lib/omnizip/formats/rar/rar5/vint.rb +65 -0
- data/lib/omnizip/formats/rar/rar5/writer.rb +466 -0
- data/lib/omnizip/formats/rar/rar_format_base.rb +241 -0
- data/lib/omnizip/formats/rar/reader.rb +366 -0
- data/lib/omnizip/formats/rar/recovery_record.rb +245 -0
- data/lib/omnizip/formats/rar/volume_manager.rb +168 -0
- data/lib/omnizip/formats/rar/writer.rb +431 -0
- data/lib/omnizip/formats/rar.rb +205 -0
- data/lib/omnizip/formats/rar3/compressor.rb +73 -0
- data/lib/omnizip/formats/rar3/decompressor.rb +66 -0
- data/lib/omnizip/formats/rar3/reader.rb +386 -0
- data/lib/omnizip/formats/rar3/writer.rb +219 -0
- data/lib/omnizip/formats/rar5/compressor.rb +73 -0
- data/lib/omnizip/formats/rar5/decompressor.rb +66 -0
- data/lib/omnizip/formats/rar5/reader.rb +342 -0
- data/lib/omnizip/formats/rar5/writer.rb +214 -0
- data/lib/omnizip/formats/seven_zip/coder_chain.rb +150 -0
- data/lib/omnizip/formats/seven_zip/constants.rb +126 -0
- data/lib/omnizip/formats/seven_zip/encoded_header.rb +114 -0
- data/lib/omnizip/formats/seven_zip/encrypted_header.rb +142 -0
- data/lib/omnizip/formats/seven_zip/file_collector.rb +144 -0
- data/lib/omnizip/formats/seven_zip/header.rb +106 -0
- data/lib/omnizip/formats/seven_zip/header_encryptor.rb +134 -0
- data/lib/omnizip/formats/seven_zip/header_writer.rb +466 -0
- data/lib/omnizip/formats/seven_zip/models/coder_info.rb +30 -0
- data/lib/omnizip/formats/seven_zip/models/file_entry.rb +58 -0
- data/lib/omnizip/formats/seven_zip/models/folder.rb +69 -0
- data/lib/omnizip/formats/seven_zip/models/stream_info.rb +42 -0
- data/lib/omnizip/formats/seven_zip/parser.rb +660 -0
- data/lib/omnizip/formats/seven_zip/reader.rb +458 -0
- data/lib/omnizip/formats/seven_zip/split_archive_reader.rb +632 -0
- data/lib/omnizip/formats/seven_zip/split_archive_writer.rb +315 -0
- data/lib/omnizip/formats/seven_zip/stream_compressor.rb +151 -0
- data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +162 -0
- data/lib/omnizip/formats/seven_zip/writer.rb +740 -0
- data/lib/omnizip/formats/seven_zip.rb +93 -0
- data/lib/omnizip/formats/tar/constants.rb +73 -0
- data/lib/omnizip/formats/tar/entry.rb +94 -0
- data/lib/omnizip/formats/tar/header.rb +168 -0
- data/lib/omnizip/formats/tar/reader.rb +121 -0
- data/lib/omnizip/formats/tar/writer.rb +216 -0
- data/lib/omnizip/formats/tar.rb +84 -0
- data/lib/omnizip/formats/xz/reader.rb +116 -0
- data/lib/omnizip/formats/xz.rb +237 -0
- data/lib/omnizip/formats/xz_impl/block_decoder.rb +754 -0
- data/lib/omnizip/formats/xz_impl/block_encoder.rb +306 -0
- data/lib/omnizip/formats/xz_impl/block_header.rb +210 -0
- data/lib/omnizip/formats/xz_impl/block_header_parser.rb +186 -0
- data/lib/omnizip/formats/xz_impl/constants.rb +49 -0
- data/lib/omnizip/formats/xz_impl/index_decoder.rb +174 -0
- data/lib/omnizip/formats/xz_impl/index_encoder.rb +122 -0
- data/lib/omnizip/formats/xz_impl/stream_decoder.rb +468 -0
- data/lib/omnizip/formats/xz_impl/stream_encoder.rb +99 -0
- data/lib/omnizip/formats/xz_impl/stream_footer.rb +81 -0
- data/lib/omnizip/formats/xz_impl/stream_footer_parser.rb +117 -0
- data/lib/omnizip/formats/xz_impl/stream_header.rb +55 -0
- data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +108 -0
- data/lib/omnizip/formats/xz_impl/vli.rb +128 -0
- data/lib/omnizip/formats/xz_impl/writer.rb +421 -0
- data/lib/omnizip/formats/zip/central_directory_header.rb +195 -0
- data/lib/omnizip/formats/zip/constants.rb +69 -0
- data/lib/omnizip/formats/zip/end_of_central_directory.rb +133 -0
- data/lib/omnizip/formats/zip/local_file_header.rb +138 -0
- data/lib/omnizip/formats/zip/reader.rb +250 -0
- data/lib/omnizip/formats/zip/unix_extra_field.rb +153 -0
- data/lib/omnizip/formats/zip/writer.rb +375 -0
- data/lib/omnizip/formats/zip/zip64_end_of_central_directory.rb +104 -0
- data/lib/omnizip/formats/zip/zip64_end_of_central_directory_locator.rb +66 -0
- data/lib/omnizip/formats/zip/zip64_extra_field.rb +114 -0
- data/lib/omnizip/formats/zip.rb +50 -0
- data/lib/omnizip/implementations/base/lzma2_decoder_base.rb +75 -0
- data/lib/omnizip/implementations/base/lzma2_encoder_base.rb +128 -0
- data/lib/omnizip/implementations/base/lzma_decoder_base.rb +83 -0
- data/lib/omnizip/implementations/base/lzma_encoder_base.rb +108 -0
- data/lib/omnizip/implementations/base/state_machine_base.rb +182 -0
- data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +421 -0
- data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +465 -0
- data/lib/omnizip/implementations/seven_zip/lzma/match_finder.rb +288 -0
- data/lib/omnizip/implementations/seven_zip/lzma/range_decoder.rb +200 -0
- data/lib/omnizip/implementations/seven_zip/lzma/range_encoder.rb +197 -0
- data/lib/omnizip/implementations/seven_zip/lzma/state_machine.rb +141 -0
- data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +519 -0
- data/lib/omnizip/implementations/xz_utils/lzma2/decoder.rb +723 -0
- data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +750 -0
- data/lib/omnizip/io/buffered_input.rb +146 -0
- data/lib/omnizip/io/buffered_output.rb +105 -0
- data/lib/omnizip/io/stream_manager.rb +115 -0
- data/lib/omnizip/link_handler/hard_link.rb +79 -0
- data/lib/omnizip/link_handler/symbolic_link.rb +74 -0
- data/lib/omnizip/link_handler.rb +124 -0
- data/lib/omnizip/metadata/archive_metadata.rb +114 -0
- data/lib/omnizip/metadata/entry_metadata.rb +146 -0
- data/lib/omnizip/metadata/metadata_editor.rb +171 -0
- data/lib/omnizip/metadata/metadata_registry.rb +64 -0
- data/lib/omnizip/metadata/metadata_validator.rb +99 -0
- data/lib/omnizip/metadata.rb +57 -0
- data/lib/omnizip/models/.keep +0 -0
- data/lib/omnizip/models/algorithm_metadata.rb +73 -0
- data/lib/omnizip/models/compression_options.rb +71 -0
- data/lib/omnizip/models/conversion_options.rb +87 -0
- data/lib/omnizip/models/conversion_result.rb +135 -0
- data/lib/omnizip/models/eta_result.rb +46 -0
- data/lib/omnizip/models/extraction_rule.rb +115 -0
- data/lib/omnizip/models/filter_chain.rb +144 -0
- data/lib/omnizip/models/filter_config.rb +183 -0
- data/lib/omnizip/models/match_result.rb +124 -0
- data/lib/omnizip/models/optimization_suggestion.rb +91 -0
- data/lib/omnizip/models/parallel_options.rb +104 -0
- data/lib/omnizip/models/performance_result.rb +79 -0
- data/lib/omnizip/models/profile_report.rb +82 -0
- data/lib/omnizip/models/progress_options.rb +38 -0
- data/lib/omnizip/models/split_options.rb +116 -0
- data/lib/omnizip/optimization_registry.rb +81 -0
- data/lib/omnizip/parallel/job_queue.rb +209 -0
- data/lib/omnizip/parallel/job_scheduler.rb +203 -0
- data/lib/omnizip/parallel/parallel_compressor.rb +347 -0
- data/lib/omnizip/parallel/parallel_extractor.rb +329 -0
- data/lib/omnizip/parallel/worker_pool.rb +223 -0
- data/lib/omnizip/parallel.rb +149 -0
- data/lib/omnizip/parity/chunked_block_processor.rb +196 -0
- data/lib/omnizip/parity/galois16.rb +145 -0
- data/lib/omnizip/parity/models/creator_packet.rb +73 -0
- data/lib/omnizip/parity/models/file_description_packet.rb +133 -0
- data/lib/omnizip/parity/models/ifsc_packet.rb +123 -0
- data/lib/omnizip/parity/models/main_packet.rb +128 -0
- data/lib/omnizip/parity/models/packet.rb +156 -0
- data/lib/omnizip/parity/models/packet_registry.rb +109 -0
- data/lib/omnizip/parity/models/recovery_slice_packet.rb +78 -0
- data/lib/omnizip/parity/par2_creator.rb +531 -0
- data/lib/omnizip/parity/par2_repairer.rb +407 -0
- data/lib/omnizip/parity/par2_verifier.rb +364 -0
- data/lib/omnizip/parity/par2cmdline_algorithm.rb +110 -0
- data/lib/omnizip/parity/par2cmdline_coefficients.rb +78 -0
- data/lib/omnizip/parity/reed_solomon_decoder.rb +266 -0
- data/lib/omnizip/parity/reed_solomon_encoder.rb +111 -0
- data/lib/omnizip/parity/reed_solomon_matrix.rb +342 -0
- data/lib/omnizip/parity.rb +186 -0
- data/lib/omnizip/password/encryption_registry.rb +65 -0
- data/lib/omnizip/password/encryption_strategy.rb +96 -0
- data/lib/omnizip/password/password_validator.rb +129 -0
- data/lib/omnizip/password/winzip_aes_strategy.rb +192 -0
- data/lib/omnizip/password/zip_crypto_strategy.rb +141 -0
- data/lib/omnizip/password.rb +87 -0
- data/lib/omnizip/pipe/stream_compressor.rb +124 -0
- data/lib/omnizip/pipe/stream_decompressor.rb +174 -0
- data/lib/omnizip/pipe.rb +121 -0
- data/lib/omnizip/platform/ntfs_streams.rb +201 -0
- data/lib/omnizip/platform.rb +189 -0
- data/lib/omnizip/profile/archive_profile.rb +39 -0
- data/lib/omnizip/profile/balanced_profile.rb +33 -0
- data/lib/omnizip/profile/binary_profile.rb +36 -0
- data/lib/omnizip/profile/compression_profile.rb +158 -0
- data/lib/omnizip/profile/custom_profile.rb +157 -0
- data/lib/omnizip/profile/fast_profile.rb +33 -0
- data/lib/omnizip/profile/maximum_profile.rb +33 -0
- data/lib/omnizip/profile/profile_detector.rb +110 -0
- data/lib/omnizip/profile/profile_registry.rb +161 -0
- data/lib/omnizip/profile/text_profile.rb +36 -0
- data/lib/omnizip/profile.rb +190 -0
- data/lib/omnizip/profiler/memory_profiler.rb +66 -0
- data/lib/omnizip/profiler/method_profiler.rb +49 -0
- data/lib/omnizip/profiler/report_generator.rb +169 -0
- data/lib/omnizip/profiler.rb +204 -0
- data/lib/omnizip/progress/callback_reporter.rb +36 -0
- data/lib/omnizip/progress/console_reporter.rb +62 -0
- data/lib/omnizip/progress/log_reporter.rb +91 -0
- data/lib/omnizip/progress/operation_progress.rb +118 -0
- data/lib/omnizip/progress/progress_bar.rb +156 -0
- data/lib/omnizip/progress/progress_reporter.rb +40 -0
- data/lib/omnizip/progress/progress_tracker.rb +190 -0
- data/lib/omnizip/progress/silent_reporter.rb +24 -0
- data/lib/omnizip/progress.rb +127 -0
- data/lib/omnizip/rubyzip_compat.rb +63 -0
- data/lib/omnizip/temp/safe_extract.rb +168 -0
- data/lib/omnizip/temp/temp_file.rb +124 -0
- data/lib/omnizip/temp/temp_file_pool.rb +109 -0
- data/lib/omnizip/temp.rb +181 -0
- data/lib/omnizip/version.rb +5 -0
- data/lib/omnizip/zip/entry.rb +156 -0
- data/lib/omnizip/zip/file.rb +485 -0
- data/lib/omnizip/zip/input_stream.rb +273 -0
- data/lib/omnizip/zip/output_stream.rb +324 -0
- data/lib/omnizip.rb +156 -0
- data/readme-docs/advanced-features.adoc +515 -0
- data/readme-docs/api-usage.adoc +444 -0
- data/readme-docs/architecture.adoc +449 -0
- data/readme-docs/archive-formats.adoc +479 -0
- data/readme-docs/cli-usage.adoc +222 -0
- data/readme-docs/compression-algorithms.adoc +442 -0
- data/readme-docs/compression-profiles.adoc +247 -0
- data/readme-docs/encryption-checksums.adoc +328 -0
- data/readme-docs/format-converter.adoc +325 -0
- data/readme-docs/installation.adoc +228 -0
- data/readme-docs/par2-archives.adoc +608 -0
- data/readme-docs/performance-profiler.adoc +389 -0
- data/readme-docs/preprocessing-filters.adoc +280 -0
- data/xz-file-format-1.2.1.txt +1174 -0
- metadata +617 -0
|
@@ -0,0 +1,260 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Copyright (C) 2025 Ribose Inc.
|
|
4
|
+
#
|
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a
|
|
6
|
+
# copy of this software and associated documentation files (the "Software"),
|
|
7
|
+
# to deal in the Software without restriction, including without limitation
|
|
8
|
+
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
9
|
+
# and/or sell copies of the Software, and to permit persons to whom the
|
|
10
|
+
# Software is furnished to do so, subject to the following conditions:
|
|
11
|
+
#
|
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
|
13
|
+
# all copies or substantial portions of the Software.
|
|
14
|
+
#
|
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
20
|
+
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
21
|
+
# DEALINGS IN THE SOFTWARE.
|
|
22
|
+
|
|
23
|
+
require_relative "constants"
|
|
24
|
+
require_relative "bit_model"
|
|
25
|
+
|
|
26
|
+
module Omnizip
|
|
27
|
+
module Algorithms
|
|
28
|
+
class LZMA < Algorithm
|
|
29
|
+
# SDK-compatible length encoder/decoder
|
|
30
|
+
#
|
|
31
|
+
# This class implements the LZMA SDK's length encoding scheme:
|
|
32
|
+
# - Lengths 0-7: choice=0, 3 bits from low tree
|
|
33
|
+
# - Lengths 8-15: choice=1, choice2=0, 3 bits from mid tree
|
|
34
|
+
# - Lengths 16+: choice=1, choice2=1, 8 bits from high tree
|
|
35
|
+
#
|
|
36
|
+
# Position state is used to select which low/mid tree to use,
|
|
37
|
+
# providing context-dependent compression.
|
|
38
|
+
class LengthCoder
|
|
39
|
+
include Constants
|
|
40
|
+
|
|
41
|
+
# Initialize the length coder
|
|
42
|
+
#
|
|
43
|
+
# @param num_pos_states [Integer] Number of position states (1 << pb)
|
|
44
|
+
def initialize(num_pos_states)
|
|
45
|
+
@num_pos_states = num_pos_states
|
|
46
|
+
@choice = BitModel.new
|
|
47
|
+
@choice2 = BitModel.new
|
|
48
|
+
|
|
49
|
+
# Low trees: one per position state, 8 symbols each
|
|
50
|
+
# Tree needs 2^(num_bits+1) models: 2^4 = 16 for 3-bit tree
|
|
51
|
+
@low = Array.new(num_pos_states) do
|
|
52
|
+
Array.new(1 << (NUM_LEN_LOW_BITS + 1)) { BitModel.new }
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
# Mid trees: one per position state, 8 symbols each
|
|
56
|
+
# Tree needs 2^(num_bits+1) models: 2^4 = 16 for 3-bit tree
|
|
57
|
+
@mid = Array.new(num_pos_states) do
|
|
58
|
+
Array.new(1 << (NUM_LEN_MID_BITS + 1)) { BitModel.new }
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# High tree: shared across all position states, 256 symbols
|
|
62
|
+
# Tree needs 2^(num_bits+1) models: 2^9 = 512 for 8-bit tree
|
|
63
|
+
@high = Array.new(1 << (NUM_LEN_HIGH_BITS + 1)) { BitModel.new }
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# Encode a match length using SDK-compatible encoding
|
|
67
|
+
#
|
|
68
|
+
# @param range_encoder [RangeEncoder] The range encoder
|
|
69
|
+
# @param length [Integer] Length value (already subtracted MATCH_LEN_MIN)
|
|
70
|
+
# @param pos_state [Integer] Position state for tree selection
|
|
71
|
+
# @return [void]
|
|
72
|
+
def encode(range_encoder, length, pos_state)
|
|
73
|
+
trace_encode = ENV.fetch("LZMA_DEBUG_ENCODE", nil) && ENV.fetch("TRACE_LENGTH_CODER", nil)
|
|
74
|
+
|
|
75
|
+
if trace_encode
|
|
76
|
+
puts " [LengthCoder.encode] START: length=#{length}, pos_state=#{pos_state}"
|
|
77
|
+
puts " @choice.prob=#{@choice.probability} @choice2.prob=#{@choice2.probability}"
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
if length < LEN_LOW_SYMBOLS
|
|
81
|
+
# 0-7: Use low tree
|
|
82
|
+
if trace_encode
|
|
83
|
+
puts " Using LOW tree (length #{length} < #{LEN_LOW_SYMBOLS})"
|
|
84
|
+
puts " Encoding choice=0 with prob=#{@choice.probability}"
|
|
85
|
+
end
|
|
86
|
+
range_encoder.encode_bit(@choice, 0)
|
|
87
|
+
if trace_encode
|
|
88
|
+
puts " After choice: @choice.prob=#{@choice.probability}"
|
|
89
|
+
end
|
|
90
|
+
encode_tree(range_encoder, @low[pos_state], length,
|
|
91
|
+
NUM_LEN_LOW_BITS)
|
|
92
|
+
elsif length < LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS
|
|
93
|
+
# 8-15: Use mid tree
|
|
94
|
+
if trace_encode
|
|
95
|
+
puts " Using MID tree (length #{length} < #{LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS})"
|
|
96
|
+
puts " Encoding choice=1 with prob=#{@choice.probability}"
|
|
97
|
+
end
|
|
98
|
+
range_encoder.encode_bit(@choice, 1)
|
|
99
|
+
if trace_encode
|
|
100
|
+
puts " After choice: @choice.prob=#{@choice.probability}"
|
|
101
|
+
puts " Encoding choice2=0 with prob=#{@choice2.probability}"
|
|
102
|
+
end
|
|
103
|
+
range_encoder.encode_bit(@choice2, 0)
|
|
104
|
+
if trace_encode
|
|
105
|
+
puts " After choice2: @choice2.prob=#{@choice2.probability}"
|
|
106
|
+
end
|
|
107
|
+
encode_tree(range_encoder, @mid[pos_state],
|
|
108
|
+
length - LEN_LOW_SYMBOLS, NUM_LEN_MID_BITS)
|
|
109
|
+
else
|
|
110
|
+
# 16+: Use high tree
|
|
111
|
+
if trace_encode
|
|
112
|
+
puts " Using HIGH tree (length #{length} >= #{LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS})"
|
|
113
|
+
puts " Encoding choice=1 with prob=#{@choice.probability}"
|
|
114
|
+
end
|
|
115
|
+
range_encoder.encode_bit(@choice, 1)
|
|
116
|
+
if trace_encode
|
|
117
|
+
puts " After choice: @choice.prob=#{@choice.probability}"
|
|
118
|
+
puts " Encoding choice2=1 with prob=#{@choice2.probability}"
|
|
119
|
+
end
|
|
120
|
+
range_encoder.encode_bit(@choice2, 1)
|
|
121
|
+
if trace_encode
|
|
122
|
+
puts " After choice2: @choice2.prob=#{@choice2.probability}"
|
|
123
|
+
end
|
|
124
|
+
encode_tree(range_encoder, @high,
|
|
125
|
+
length - LEN_LOW_SYMBOLS - LEN_MID_SYMBOLS,
|
|
126
|
+
NUM_LEN_HIGH_BITS)
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
if trace_encode
|
|
130
|
+
puts " FINAL @choice.prob=#{@choice.probability} @choice2.prob=#{@choice2.probability}"
|
|
131
|
+
puts " [LengthCoder.encode] END"
|
|
132
|
+
end
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
# Decode a match length using SDK-compatible decoding
|
|
136
|
+
#
|
|
137
|
+
# @param range_decoder [RangeDecoder] The range decoder
|
|
138
|
+
# @param pos_state [Integer] Position state for tree selection
|
|
139
|
+
# @return [Integer] Decoded length value (before adding MATCH_LEN_MIN)
|
|
140
|
+
def decode(range_decoder, pos_state)
|
|
141
|
+
trace_decode = ENV.fetch("LZMA_DEBUG_DISTANCE", nil) && ENV.fetch("TRACE_LENGTH_CODER", nil)
|
|
142
|
+
|
|
143
|
+
if trace_decode
|
|
144
|
+
caller_loc = caller_locations(2, 1).first
|
|
145
|
+
puts " [LengthCoder.decode] START: pos_state=#{pos_state}"
|
|
146
|
+
puts " self.object_id=#{object_id}"
|
|
147
|
+
puts " @choice.object_id=#{@choice.object_id} prob=#{@choice.probability}"
|
|
148
|
+
puts " @choice2.object_id=#{@choice2.object_id} prob=#{@choice2.probability}"
|
|
149
|
+
puts " Called from: #{caller_loc.label} at #{caller_loc.lineno}"
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
choice_bit = range_decoder.decode_bit(@choice)
|
|
153
|
+
if trace_decode
|
|
154
|
+
puts " Decoded choice=#{choice_bit} with prob=#{@choice.probability}"
|
|
155
|
+
puts " After choice decode: @choice.prob=#{@choice.probability}"
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
if choice_bit.zero?
|
|
159
|
+
# Low tree
|
|
160
|
+
if trace_decode
|
|
161
|
+
puts " Using LOW tree"
|
|
162
|
+
end
|
|
163
|
+
result = decode_tree(range_decoder, @low[pos_state], NUM_LEN_LOW_BITS)
|
|
164
|
+
elsif range_decoder.decode_bit(@choice2).zero?
|
|
165
|
+
# Mid tree
|
|
166
|
+
if trace_decode
|
|
167
|
+
puts " Decoded choice2=0 with prob=#{@choice2.probability}"
|
|
168
|
+
puts " After choice2 decode: @choice2.prob=#{@choice2.probability}"
|
|
169
|
+
puts " Using MID tree"
|
|
170
|
+
end
|
|
171
|
+
result = LEN_LOW_SYMBOLS +
|
|
172
|
+
decode_tree(range_decoder, @mid[pos_state], NUM_LEN_MID_BITS)
|
|
173
|
+
else
|
|
174
|
+
# High tree
|
|
175
|
+
if trace_decode
|
|
176
|
+
puts " Decoded choice2=1 with prob=#{@choice2.probability}"
|
|
177
|
+
puts " After choice2 decode: @choice2.prob=#{@choice2.probability}"
|
|
178
|
+
puts " Using HIGH tree"
|
|
179
|
+
end
|
|
180
|
+
result = LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS +
|
|
181
|
+
decode_tree(range_decoder, @high, NUM_LEN_HIGH_BITS)
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
if trace_decode
|
|
185
|
+
puts " FINAL @choice.prob=#{@choice.probability} @choice2.prob=#{@choice2.probability}"
|
|
186
|
+
puts " Result: length_encoded=#{result}"
|
|
187
|
+
puts " [LengthCoder.decode] END"
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
result
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
# Reset probability models to initial values
|
|
194
|
+
#
|
|
195
|
+
# Called during state reset (control >= 0xA0) to reset the length
|
|
196
|
+
# coder's probability models. This matches XZ Utils behavior.
|
|
197
|
+
#
|
|
198
|
+
# @return [void]
|
|
199
|
+
def reset_models
|
|
200
|
+
if ENV["TRACE_RESET_MODELS"]
|
|
201
|
+
puts " [LengthCoder.reset_models] CALLED!"
|
|
202
|
+
puts " Before reset: @choice.prob=#{@choice.probability} @choice2.prob=#{@choice2.probability}"
|
|
203
|
+
caller_loc = caller_locations(2, 1).first
|
|
204
|
+
puts " Called from: #{caller_loc.label} at #{caller_loc.path}:#{caller_loc.lineno}"
|
|
205
|
+
end
|
|
206
|
+
@choice.reset
|
|
207
|
+
@choice2.reset
|
|
208
|
+
|
|
209
|
+
@low.each do |state_models|
|
|
210
|
+
state_models.each(&:reset)
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
@mid.each do |state_models|
|
|
214
|
+
state_models.each(&:reset)
|
|
215
|
+
end
|
|
216
|
+
|
|
217
|
+
@high.each(&:reset)
|
|
218
|
+
if ENV["TRACE_RESET_MODELS"]
|
|
219
|
+
puts " After reset: @choice.prob=#{@choice.probability} @choice2.prob=#{@choice2.probability}"
|
|
220
|
+
end
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
private
|
|
224
|
+
|
|
225
|
+
# Encode a value using a tree of bit models
|
|
226
|
+
#
|
|
227
|
+
# @param range_encoder [RangeEncoder] The range encoder
|
|
228
|
+
# @param models [Array<BitModel>] Array of bit models for the tree
|
|
229
|
+
# @param symbol [Integer] Symbol to encode
|
|
230
|
+
# @param num_bits [Integer] Number of bits in the tree
|
|
231
|
+
# @return [void]
|
|
232
|
+
def encode_tree(range_encoder, models, symbol, num_bits)
|
|
233
|
+
m = 1
|
|
234
|
+
(num_bits - 1).downto(0) do |i|
|
|
235
|
+
bit = (symbol >> i) & 1
|
|
236
|
+
range_encoder.encode_bit(models[m], bit)
|
|
237
|
+
m = (m << 1) | bit
|
|
238
|
+
end
|
|
239
|
+
end
|
|
240
|
+
|
|
241
|
+
# Decode a value using a tree of bit models
|
|
242
|
+
#
|
|
243
|
+
# @param range_decoder [RangeDecoder] The range decoder
|
|
244
|
+
# @param models [Array<BitModel>] Array of bit models for the tree
|
|
245
|
+
# @param num_bits [Integer] Number of bits in the tree
|
|
246
|
+
# @return [Integer] Decoded symbol
|
|
247
|
+
def decode_tree(range_decoder, models, num_bits)
|
|
248
|
+
m = 1
|
|
249
|
+
symbol = 0
|
|
250
|
+
(num_bits - 1).downto(0) do |i|
|
|
251
|
+
bit = range_decoder.decode_bit(models[m])
|
|
252
|
+
m = (m << 1) | bit
|
|
253
|
+
symbol |= (bit << i)
|
|
254
|
+
end
|
|
255
|
+
symbol
|
|
256
|
+
end
|
|
257
|
+
end
|
|
258
|
+
end
|
|
259
|
+
end
|
|
260
|
+
end
|
|
@@ -0,0 +1,320 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Copyright (C) 2025 Ribose Inc.
|
|
4
|
+
#
|
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a
|
|
6
|
+
# copy of this software and associated documentation files (the "Software"),
|
|
7
|
+
# to deal in the Software without restriction, including without limitation
|
|
8
|
+
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
9
|
+
# and/or sell copies of the Software, and to permit persons to whom the
|
|
10
|
+
# Software is furnished to do so, subject to the following conditions:
|
|
11
|
+
#
|
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
|
13
|
+
# all copies or substantial portions of the Software.
|
|
14
|
+
#
|
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
20
|
+
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
21
|
+
# DEALINGS IN THE SOFTWARE.
|
|
22
|
+
|
|
23
|
+
require_relative "constants"
|
|
24
|
+
|
|
25
|
+
module Omnizip
|
|
26
|
+
module Algorithms
|
|
27
|
+
class LZMA < Algorithm
|
|
28
|
+
# Literal byte decoder
|
|
29
|
+
#
|
|
30
|
+
# This class is responsible for decoding literal bytes using
|
|
31
|
+
# probability models. It supports two modes matching the encoder:
|
|
32
|
+
#
|
|
33
|
+
# 1. Unmatched mode: Simple 8-bit decoding
|
|
34
|
+
# 2. Matched mode: Uses match byte for context (SDK feature)
|
|
35
|
+
#
|
|
36
|
+
# The decoder must perfectly mirror the encoder's decisions
|
|
37
|
+
# about which probability models to use.
|
|
38
|
+
#
|
|
39
|
+
# Single Responsibility: Literal byte decoding only
|
|
40
|
+
#
|
|
41
|
+
# @example Unmatched decoding
|
|
42
|
+
# decoder = LiteralDecoder.new
|
|
43
|
+
# byte = decoder.decode_unmatched(lit_state, range_decoder, models)
|
|
44
|
+
#
|
|
45
|
+
# @example Matched decoding (SDK mode)
|
|
46
|
+
# decoder = LiteralDecoder.new
|
|
47
|
+
# byte = decoder.decode_matched(match_byte, lit_state, range_decoder, models)
|
|
48
|
+
class LiteralDecoder
|
|
49
|
+
include Constants
|
|
50
|
+
|
|
51
|
+
# Decode literal byte in unmatched mode
|
|
52
|
+
#
|
|
53
|
+
# This is the standard LZMA literal decoding where each bit
|
|
54
|
+
# is decoded using probability models based on the partial
|
|
55
|
+
# symbol value.
|
|
56
|
+
#
|
|
57
|
+
# @param lit_state [Integer] Literal context value (0-7 for lc=3, unshifted)
|
|
58
|
+
# @param lc [Integer] Literal context bits (unused, kept for compatibility)
|
|
59
|
+
# @param range_decoder [RangeDecoder] Range decoder instance
|
|
60
|
+
# @param models [Array<BitModel>] Literal probability models
|
|
61
|
+
# @return [Integer] Decoded byte value (0-255)
|
|
62
|
+
def decode_unmatched(lit_state, lc, range_decoder, models)
|
|
63
|
+
# XZ Utils literal_subcoder returns: probs + 3 * (context_value << lc)
|
|
64
|
+
# where context_value = (((pos << 8) + prev_byte) & literal_mask)
|
|
65
|
+
# Our lit_state is context_value (unshifted)
|
|
66
|
+
# IMPORTANT: Shift BEFORE multiplying by 3 (XZ Utils formula order)
|
|
67
|
+
base_offset = 3 * (lit_state << lc)
|
|
68
|
+
|
|
69
|
+
# Start with symbol = 1
|
|
70
|
+
# We build it up bit by bit until it reaches 0x100
|
|
71
|
+
symbol = 1
|
|
72
|
+
|
|
73
|
+
# TEMP DEBUG: Trace first literal decode
|
|
74
|
+
if ENV["TRACE_LITERAL_DECODE"] && lit_state.zero?
|
|
75
|
+
# range = range_decoder.instance_variable_get(:@range)
|
|
76
|
+
# code = range_decoder.instance_variable_get(:@code)
|
|
77
|
+
# puts ""
|
|
78
|
+
# puts "=== decode_unmatched START: lit_state=#{lit_state}, base_offset=#{base_offset} ==="
|
|
79
|
+
# puts "Initial: range=0x#{range.to_s(16)}, code=0x#{code.to_s(16)}"
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
# DEBUG: Trace lit_state=96 (the corrupted literal)
|
|
83
|
+
if lit_state == 96
|
|
84
|
+
# range = range_decoder.instance_variable_get(:@range)
|
|
85
|
+
# code = range_decoder.instance_variable_get(:@code)
|
|
86
|
+
# puts ""
|
|
87
|
+
# puts "=== decode_unmatched START: lit_state=#{lit_state}, base_offset=#{base_offset} ==="
|
|
88
|
+
# puts "Initial: range=0x#{range.to_s(16)}, code=0x#{code.to_s(16)}"
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# Decode 8 bits to build the symbol from 1 to 0x100
|
|
92
|
+
while symbol < 0x100
|
|
93
|
+
# Model index based on current symbol value
|
|
94
|
+
model_index = base_offset + symbol
|
|
95
|
+
|
|
96
|
+
# Decode next bit
|
|
97
|
+
bit = range_decoder.decode_bit(models[model_index])
|
|
98
|
+
|
|
99
|
+
if ENV["TRACE_LITERAL_DECODE"] && lit_state.zero?
|
|
100
|
+
range_after = range_decoder.instance_variable_get(:@range)
|
|
101
|
+
code_after = range_decoder.instance_variable_get(:@code)
|
|
102
|
+
puts "Bit #{symbol}: model_index=#{model_index}, bit=#{bit}, range=0x#{range_after.to_s(16)}, code=0x#{code_after.to_s(16)}" if ENV["LZMA_DEBUG_BITS"]
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
# DEBUG: Trace bits for lit_state=96
|
|
106
|
+
if ENV["LZMA_DEBUG_BITS"] && lit_state == 96
|
|
107
|
+
range_after = range_decoder.instance_variable_get(:@range)
|
|
108
|
+
code_after = range_decoder.instance_variable_get(:@code)
|
|
109
|
+
puts " symbol=#{symbol}: model_index=#{model_index}, bit=#{bit}, range=0x#{range_after.to_s(16)}, code=0x#{code_after.to_s(16)}"
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
# Update symbol: shift left and add bit
|
|
113
|
+
symbol = (symbol << 1) | bit
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
# Symbol is now in range 0x100-0x1FF
|
|
117
|
+
# Extract the byte value by subtracting 0x100
|
|
118
|
+
result = symbol - 0x100
|
|
119
|
+
|
|
120
|
+
if ENV["TRACE_LITERAL_DECODE"] && lit_state.zero?
|
|
121
|
+
puts "Result: 0x#{result.to_s(16)} ('#{result.chr}')"
|
|
122
|
+
puts "=== decode_unmatched END ==="
|
|
123
|
+
puts ""
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
result
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
# Decode literal byte in matched mode (SDK feature)
|
|
130
|
+
#
|
|
131
|
+
# This mode uses a byte from the dictionary (the "match byte")
|
|
132
|
+
# as context for decoding the literal. The decoder must use
|
|
133
|
+
# the same probability model selection as the encoder.
|
|
134
|
+
#
|
|
135
|
+
# SDK algorithm (from LzmaDec.c):
|
|
136
|
+
# - Processes bits in pairs (match bit, literal bit)
|
|
137
|
+
# - Uses match bit to select probability model
|
|
138
|
+
# - Offset updates based on DECODED bit, not match bit (XZ Utils rc_matched_literal)
|
|
139
|
+
# - Switches to unmatched mode when bits diverge
|
|
140
|
+
#
|
|
141
|
+
# @param match_byte [Integer] Corresponding byte from dictionary
|
|
142
|
+
# @param lit_state [Integer] Literal context value (0-7 for lc=3, unshifted)
|
|
143
|
+
# @param lc [Integer] Literal context bits (unused, kept for compatibility)
|
|
144
|
+
# @param range_decoder [RangeDecoder] Range decoder instance
|
|
145
|
+
# @param models [Array<BitModel>] Literal probability models
|
|
146
|
+
# @return [Integer] Decoded byte value (0-255)
|
|
147
|
+
def decode_matched(match_byte, lit_state, lc, range_decoder, models)
|
|
148
|
+
base_offset = 3 * (lit_state << lc)
|
|
149
|
+
symbol = 1
|
|
150
|
+
# XZ Utils: uint32_t t_match_byte = (match_byte);
|
|
151
|
+
# IMPORTANT: Do NOT OR with 0x100 - start with just match_byte!
|
|
152
|
+
# The offset mechanism handles the bit selection.
|
|
153
|
+
match_sym = match_byte
|
|
154
|
+
# XZ Utils: offset starts at 0x100 and is updated based on DECODED bits
|
|
155
|
+
# See: /Users/mulgogi/src/external/xz/src/liblzma/rangecoder/range_decoder.h:342-357
|
|
156
|
+
offset = 0x100
|
|
157
|
+
|
|
158
|
+
# DEBUG: Trace matched literal decode at position 61
|
|
159
|
+
if ENV["TRACE_MATCHED_DECODE"] && lit_state == 96
|
|
160
|
+
puts "=== MATCHED LITERAL DECODE: lit_state=#{lit_state}, match_byte=0x#{match_byte.to_s(16).upcase} ==="
|
|
161
|
+
puts " base_offset=#{base_offset}"
|
|
162
|
+
puts " Initial: symbol=#{symbol}, offset=0x#{offset.to_s(16).upcase}"
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
# SDK matched literal decoding algorithm
|
|
166
|
+
# Process bits while match byte provides context
|
|
167
|
+
bit_num = 0
|
|
168
|
+
result_bits = [] # DEBUG: Track decoded bits
|
|
169
|
+
|
|
170
|
+
# DEBUG: Trace at dict_full=233
|
|
171
|
+
trace_233 = ENV.fetch("DICT_FULL_233_TRACE", nil) && lit_state.zero?
|
|
172
|
+
|
|
173
|
+
if trace_233
|
|
174
|
+
puts "=== MATCHED LITERAL TRACE at dict_full=233 ==="
|
|
175
|
+
puts " match_byte=0x#{match_byte.to_s(16).upcase}"
|
|
176
|
+
puts " base_offset=#{base_offset}"
|
|
177
|
+
puts " Initial: symbol=#{symbol}, offset=0x#{offset.to_s(16).upcase}"
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
loop do
|
|
181
|
+
if trace_233
|
|
182
|
+
puts "\n Bit #{bit_num}:"
|
|
183
|
+
puts " match_sym=0x#{(match_sym & 0xFF).to_s(16).upcase}, offset=0x#{offset.to_s(16).upcase}"
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
# XZ Utils pattern: t_match_byte <<= 1; t_match_bit = t_match_byte & t_offset;
|
|
187
|
+
# IMPORTANT: Shift FIRST, then extract the bit!
|
|
188
|
+
# Shift match symbol (brings next bit into position 8)
|
|
189
|
+
match_sym <<= 1
|
|
190
|
+
|
|
191
|
+
# Extract current bit from match symbol
|
|
192
|
+
# XZ Utils: t_match_bit = t_match_byte & t_offset
|
|
193
|
+
# IMPORTANT: This is not just checking if non-zero! The result is used directly:
|
|
194
|
+
# - If the bit is 1: t_match_bit = t_offset (e.g., 0x100)
|
|
195
|
+
# - If the bit is 0: t_match_bit = 0
|
|
196
|
+
# This value is used in model_index calculation AND offset updates
|
|
197
|
+
match_bit = match_sym & offset
|
|
198
|
+
|
|
199
|
+
# Calculate model index: XZ Utils formula is t_subcoder_index = t_offset + t_match_bit + symbol
|
|
200
|
+
# where t_offset is updated based on PREVIOUS decoded bit, t_match_bit is from match byte
|
|
201
|
+
model_index = base_offset + offset + match_bit + symbol
|
|
202
|
+
|
|
203
|
+
if trace_233
|
|
204
|
+
puts " match_bit=0x#{match_bit.to_s(16).upcase}, symbol=#{symbol}"
|
|
205
|
+
puts " model_index=#{model_index}"
|
|
206
|
+
puts " offset_from_base=#{model_index - base_offset}"
|
|
207
|
+
prob_before = models[model_index].probability
|
|
208
|
+
puts " probability_before=0x#{prob_before.to_s(16).upcase} (#{prob_before})"
|
|
209
|
+
# Trace range decoder state BEFORE decode_bit
|
|
210
|
+
rd_range_before = range_decoder.instance_variable_get(:@range)
|
|
211
|
+
rd_code_before = range_decoder.instance_variable_get(:@code)
|
|
212
|
+
puts " range_decoder BEFORE: range=0x#{rd_range_before.to_s(16)}, code=0x#{rd_code_before.to_s(16)}"
|
|
213
|
+
end
|
|
214
|
+
|
|
215
|
+
# Decode literal bit
|
|
216
|
+
bit = range_decoder.decode_bit(models[model_index])
|
|
217
|
+
result_bits << bit # DEBUG: Track bit
|
|
218
|
+
|
|
219
|
+
if trace_233
|
|
220
|
+
prob_after = models[model_index].probability
|
|
221
|
+
puts " decoded_bit=#{bit}"
|
|
222
|
+
puts " probability_after=0x#{prob_after.to_s(16).upcase} (#{prob_after})"
|
|
223
|
+
# Also trace the range decoder state after decode_bit
|
|
224
|
+
rd_range = range_decoder.instance_variable_get(:@range)
|
|
225
|
+
rd_code = range_decoder.instance_variable_get(:@code)
|
|
226
|
+
puts " range_decoder AFTER: range=0x#{rd_range.to_s(16)}, code=0x#{rd_code.to_s(16)}"
|
|
227
|
+
end
|
|
228
|
+
|
|
229
|
+
# Update offset based on DECODED bit (XZ Utils pattern)
|
|
230
|
+
# IMPORTANT: XZ Utils rc_bit macro updates symbol BEFORE running the action!
|
|
231
|
+
# So we must update symbol FIRST, then use it for offset calculation.
|
|
232
|
+
# XZ Utils pattern:
|
|
233
|
+
# - bit=0: symbol <<= 1; t_offset &= ~t_match_bit
|
|
234
|
+
# - bit=1: symbol = (symbol << 1) + 1; t_offset &= t_match_bit
|
|
235
|
+
# We can simplify this to:
|
|
236
|
+
# - If bit=0: offset &= ~match_bit
|
|
237
|
+
# - If bit=1: offset &= match_bit
|
|
238
|
+
|
|
239
|
+
if bit.zero?
|
|
240
|
+
# Clear the match_bit from offset
|
|
241
|
+
offset &= ~match_bit
|
|
242
|
+
# Update symbol (shift left, add 0)
|
|
243
|
+
symbol <<= 1
|
|
244
|
+
else
|
|
245
|
+
# Keep only the match_bit in offset
|
|
246
|
+
offset &= match_bit
|
|
247
|
+
# Update symbol (shift left, add 1)
|
|
248
|
+
symbol = (symbol << 1) | 1
|
|
249
|
+
end
|
|
250
|
+
|
|
251
|
+
if ENV["TRACE_MATCHED_DECODE"] && lit_state == 96
|
|
252
|
+
puts " new_offset=0x#{offset.to_s(16).upcase}"
|
|
253
|
+
puts " new_symbol=#{symbol} (0x#{symbol.to_s(16).upcase})"
|
|
254
|
+
end
|
|
255
|
+
|
|
256
|
+
# If bits diverge, switch to unmatched mode
|
|
257
|
+
if (match_bit.positive? ? 1 : 0) != bit
|
|
258
|
+
if ENV["TRACE_MATCHED_DECODE"] && lit_state == 96
|
|
259
|
+
puts " *** BITS DIVERGE - switching to unmatched mode ***"
|
|
260
|
+
end
|
|
261
|
+
if trace_233
|
|
262
|
+
puts " *** BITS DIVERGE at bit #{bit_num} - match_bit=#{match_bit.positive? ? 1 : 0}, decoded_bit=#{bit} ***"
|
|
263
|
+
end
|
|
264
|
+
# Continue in unmatched mode for remaining bits
|
|
265
|
+
break if symbol >= 0x100
|
|
266
|
+
|
|
267
|
+
result = decode_unmatched_tail(symbol, base_offset, lc, range_decoder,
|
|
268
|
+
models)
|
|
269
|
+
if trace_233
|
|
270
|
+
puts "\n FINAL RESULT (after unmatched tail): 0x#{result.to_s(16).upcase} ('#{result.chr}')"
|
|
271
|
+
puts " Result bits: #{result_bits.join}"
|
|
272
|
+
puts "=== END MATCHED LITERAL TRACE ===\n"
|
|
273
|
+
end
|
|
274
|
+
return result
|
|
275
|
+
end
|
|
276
|
+
|
|
277
|
+
# Done when symbol reaches 0x100
|
|
278
|
+
break if symbol >= 0x100
|
|
279
|
+
|
|
280
|
+
bit_num += 1
|
|
281
|
+
end
|
|
282
|
+
|
|
283
|
+
result = symbol - 0x100
|
|
284
|
+
if trace_233 || (ENV.fetch("TRACE_MATCHED_DECODE", nil) && lit_state == 96)
|
|
285
|
+
puts "\n FINAL RESULT: 0x#{result.to_s(16).upcase} ('#{result.chr}')"
|
|
286
|
+
if trace_233
|
|
287
|
+
puts " Result bits: #{result_bits.join}"
|
|
288
|
+
end
|
|
289
|
+
puts "=== END MATCHED LITERAL DECODE ===\n"
|
|
290
|
+
end
|
|
291
|
+
result
|
|
292
|
+
end
|
|
293
|
+
|
|
294
|
+
private
|
|
295
|
+
|
|
296
|
+
# Decode remaining bits in unmatched mode
|
|
297
|
+
#
|
|
298
|
+
# Called from matched mode when bits diverge.
|
|
299
|
+
# Similar to decode_unmatched but starts with partial symbol.
|
|
300
|
+
#
|
|
301
|
+
# @param symbol [Integer] Partial symbol value
|
|
302
|
+
# @param base_offset [Integer] Model base offset
|
|
303
|
+
# @param lc [Integer] Literal context bits
|
|
304
|
+
# @param range_decoder [RangeDecoder] Range decoder instance
|
|
305
|
+
# @param models [Array<BitModel>] Literal probability models
|
|
306
|
+
# @return [Integer] Decoded byte value (0-255)
|
|
307
|
+
def decode_unmatched_tail(symbol, base_offset, _lc, range_decoder,
|
|
308
|
+
models)
|
|
309
|
+
# Continue building symbol from current value to 0x100
|
|
310
|
+
while symbol < 0x100
|
|
311
|
+
model_index = base_offset + symbol
|
|
312
|
+
bit = range_decoder.decode_bit(models[model_index])
|
|
313
|
+
symbol = (symbol << 1) | bit
|
|
314
|
+
end
|
|
315
|
+
symbol - 0x100
|
|
316
|
+
end
|
|
317
|
+
end
|
|
318
|
+
end
|
|
319
|
+
end
|
|
320
|
+
end
|