omnizip 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/.rubocop.yml +32 -0
- data/.rubocop_todo.yml +754 -0
- data/COPYING +502 -0
- data/Gemfile +17 -0
- data/LICENSE +12 -0
- data/README.adoc +1045 -0
- data/Rakefile +12 -0
- data/benchmark/README.md +260 -0
- data/benchmark/benchmark_suite.rb +125 -0
- data/benchmark/compression_bench.rb +181 -0
- data/benchmark/filter_bench.rb +180 -0
- data/benchmark/models/benchmark_result.rb +59 -0
- data/benchmark/models/comparison_result.rb +69 -0
- data/benchmark/profile_suite.rb +167 -0
- data/benchmark/reporter.rb +150 -0
- data/benchmark/run_benchmarks.rb +66 -0
- data/benchmark/test_data.rb +137 -0
- data/config/formats/rar3_spec.yml +91 -0
- data/config/formats/rar5_spec.yml +102 -0
- data/docs/.github/workflows/docs.yml +142 -0
- data/docs/.gitignore +21 -0
- data/docs/.lychee.toml +67 -0
- data/docs/Gemfile +13 -0
- data/docs/RAR_WRITE_SUPPORT.md +26 -0
- data/docs/README.md +101 -0
- data/docs/_config.yml +112 -0
- data/docs/assets/logo.svg +1 -0
- data/docs/assets/omnizip-logo.pdf +1540 -11
- data/docs/comparison/feature-matrix.adoc +694 -0
- data/docs/comparison/index.adoc +113 -0
- data/docs/comparison/vs-7zip.adoc +309 -0
- data/docs/comparison/vs-peazip.adoc +77 -0
- data/docs/comparison/vs-rubyzip.adoc +342 -0
- data/docs/comparison/vs-winrar.adoc +100 -0
- data/docs/compatibility.adoc +579 -0
- data/docs/concepts/index.adoc +129 -0
- data/docs/developer/architecture.adoc +256 -0
- data/docs/developer/contributing.adoc +158 -0
- data/docs/developer/index.adoc +25 -0
- data/docs/developer/testing.adoc +212 -0
- data/docs/getting-started/basic-usage.adoc +271 -0
- data/docs/getting-started/index.adoc +42 -0
- data/docs/getting-started/installation.adoc +138 -0
- data/docs/getting-started/quick-start.adoc +185 -0
- data/docs/getting-started/your-first-archive.adoc +218 -0
- data/docs/guides/advanced-features/encryption.adoc +300 -0
- data/docs/guides/advanced-features/index.adoc +49 -0
- data/docs/guides/advanced-features/parallel-processing.adoc +246 -0
- data/docs/guides/advanced-features/progress-tracking.adoc +320 -0
- data/docs/guides/advanced-features/streaming.adoc +212 -0
- data/docs/guides/archive-formats/gzip-format.adoc +107 -0
- data/docs/guides/archive-formats/index.adoc +130 -0
- data/docs/guides/archive-formats/rar-format.adoc +104 -0
- data/docs/guides/archive-formats/rar5.adoc +521 -0
- data/docs/guides/archive-formats/seven-zip-format.adoc +35 -0
- data/docs/guides/archive-formats/tar-format.adoc +106 -0
- data/docs/guides/archive-formats/xz-format.adoc +118 -0
- data/docs/guides/archive-formats/zip-format.adoc +35 -0
- data/docs/guides/compression-algorithms/bzip2.adoc +113 -0
- data/docs/guides/compression-algorithms/deflate.adoc +319 -0
- data/docs/guides/compression-algorithms/index.adoc +190 -0
- data/docs/guides/compression-algorithms/lzma.adoc +398 -0
- data/docs/guides/compression-algorithms/lzma2.adoc +327 -0
- data/docs/guides/compression-algorithms/ppmd.adoc +316 -0
- data/docs/guides/compression-algorithms/zstandard.adoc +361 -0
- data/docs/guides/creating-archives.adoc +354 -0
- data/docs/guides/extracting-archives.adoc +53 -0
- data/docs/guides/format-conversion.adoc +64 -0
- data/docs/guides/index.adoc +49 -0
- data/docs/guides/migration-rubyzip.adoc +217 -0
- data/docs/guides/parity-archives.adoc +605 -0
- data/docs/guides/performance-tuning.adoc +88 -0
- data/docs/index.adoc +218 -0
- data/docs/lychee.toml +67 -0
- data/docs/reference/api/overview.adoc +188 -0
- data/docs/reference/cli/compress-command.adoc +114 -0
- data/docs/reference/cli/overview.adoc +140 -0
- data/docs/reference/index.adoc +26 -0
- data/docs/resources/faq.adoc +185 -0
- data/docs/resources/quick-reference.adoc +222 -0
- data/docs/troubleshooting/index.adoc +208 -0
- data/examples/api_comparison.rb +205 -0
- data/examples/deflate64_example.rb +96 -0
- data/examples/par2_demo.rb +121 -0
- data/examples/quick_start_native.rb +150 -0
- data/examples/quick_start_rubyzip.rb +115 -0
- data/examples/rubyzip_compatibility_demo.rb +194 -0
- data/exe/omnizip +27 -0
- data/lib/omnizip/algorithm.rb +130 -0
- data/lib/omnizip/algorithm_registry.rb +86 -0
- data/lib/omnizip/algorithms/.keep +0 -0
- data/lib/omnizip/algorithms/bzip2/bwt.rb +225 -0
- data/lib/omnizip/algorithms/bzip2/decoder.rb +193 -0
- data/lib/omnizip/algorithms/bzip2/encoder.rb +237 -0
- data/lib/omnizip/algorithms/bzip2/huffman.rb +206 -0
- data/lib/omnizip/algorithms/bzip2/mtf.rb +101 -0
- data/lib/omnizip/algorithms/bzip2/rle.rb +151 -0
- data/lib/omnizip/algorithms/bzip2.rb +130 -0
- data/lib/omnizip/algorithms/deflate/constants.rb +28 -0
- data/lib/omnizip/algorithms/deflate/decoder.rb +38 -0
- data/lib/omnizip/algorithms/deflate/encoder.rb +46 -0
- data/lib/omnizip/algorithms/deflate.rb +128 -0
- data/lib/omnizip/algorithms/deflate64/constants.rb +45 -0
- data/lib/omnizip/algorithms/deflate64/decoder.rb +153 -0
- data/lib/omnizip/algorithms/deflate64/encoder.rb +98 -0
- data/lib/omnizip/algorithms/deflate64/huffman_coder.rb +354 -0
- data/lib/omnizip/algorithms/deflate64/lz77_encoder.rb +142 -0
- data/lib/omnizip/algorithms/deflate64.rb +109 -0
- data/lib/omnizip/algorithms/lzma/bit_model.rb +120 -0
- data/lib/omnizip/algorithms/lzma/constants.rb +112 -0
- data/lib/omnizip/algorithms/lzma/decoder.rb +148 -0
- data/lib/omnizip/algorithms/lzma/dictionary.rb +69 -0
- data/lib/omnizip/algorithms/lzma/distance_coder.rb +415 -0
- data/lib/omnizip/algorithms/lzma/encoder.rb +142 -0
- data/lib/omnizip/algorithms/lzma/length_coder.rb +260 -0
- data/lib/omnizip/algorithms/lzma/literal_decoder.rb +320 -0
- data/lib/omnizip/algorithms/lzma/literal_encoder.rb +210 -0
- data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +341 -0
- data/lib/omnizip/algorithms/lzma/lzma_alone_decoder.rb +192 -0
- data/lib/omnizip/algorithms/lzma/lzma_state.rb +128 -0
- data/lib/omnizip/algorithms/lzma/match.rb +32 -0
- data/lib/omnizip/algorithms/lzma/match_finder.rb +205 -0
- data/lib/omnizip/algorithms/lzma/match_finder_config.rb +142 -0
- data/lib/omnizip/algorithms/lzma/match_finder_factory.rb +88 -0
- data/lib/omnizip/algorithms/lzma/optimal_encoder.rb +130 -0
- data/lib/omnizip/algorithms/lzma/probability_models.rb +72 -0
- data/lib/omnizip/algorithms/lzma/range_coder.rb +85 -0
- data/lib/omnizip/algorithms/lzma/range_decoder.rb +434 -0
- data/lib/omnizip/algorithms/lzma/range_encoder.rb +194 -0
- data/lib/omnizip/algorithms/lzma/state.rb +127 -0
- data/lib/omnizip/algorithms/lzma/xz_buffered_range_encoder.rb +325 -0
- data/lib/omnizip/algorithms/lzma/xz_encoder.rb +426 -0
- data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +645 -0
- data/lib/omnizip/algorithms/lzma/xz_match_finder_adapter.rb +227 -0
- data/lib/omnizip/algorithms/lzma/xz_price_calculator.rb +169 -0
- data/lib/omnizip/algorithms/lzma/xz_probability_models.rb +261 -0
- data/lib/omnizip/algorithms/lzma/xz_range_encoder.rb +223 -0
- data/lib/omnizip/algorithms/lzma/xz_range_encoder_exact.rb +331 -0
- data/lib/omnizip/algorithms/lzma/xz_state.rb +116 -0
- data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +2055 -0
- data/lib/omnizip/algorithms/lzma.rb +238 -0
- data/lib/omnizip/algorithms/lzma2/chunk_manager.rb +182 -0
- data/lib/omnizip/algorithms/lzma2/constants.rb +41 -0
- data/lib/omnizip/algorithms/lzma2/encoder.rb +147 -0
- data/lib/omnizip/algorithms/lzma2/lzma2_chunk.rb +161 -0
- data/lib/omnizip/algorithms/lzma2/properties.rb +179 -0
- data/lib/omnizip/algorithms/lzma2/simple_lzma2_encoder.rb +127 -0
- data/lib/omnizip/algorithms/lzma2/xz_encoder_adapter.rb +85 -0
- data/lib/omnizip/algorithms/lzma2.rb +141 -0
- data/lib/omnizip/algorithms/ppmd7/constants.rb +74 -0
- data/lib/omnizip/algorithms/ppmd7/context.rb +154 -0
- data/lib/omnizip/algorithms/ppmd7/decoder.rb +126 -0
- data/lib/omnizip/algorithms/ppmd7/encoder.rb +163 -0
- data/lib/omnizip/algorithms/ppmd7/model.rb +248 -0
- data/lib/omnizip/algorithms/ppmd7/symbol_state.rb +57 -0
- data/lib/omnizip/algorithms/ppmd7.rb +116 -0
- data/lib/omnizip/algorithms/ppmd8/constants.rb +61 -0
- data/lib/omnizip/algorithms/ppmd8/context.rb +34 -0
- data/lib/omnizip/algorithms/ppmd8/decoder.rb +107 -0
- data/lib/omnizip/algorithms/ppmd8/encoder.rb +138 -0
- data/lib/omnizip/algorithms/ppmd8/model.rb +250 -0
- data/lib/omnizip/algorithms/ppmd8/restoration_method.rb +78 -0
- data/lib/omnizip/algorithms/ppmd8.rb +82 -0
- data/lib/omnizip/algorithms/ppmd_base.rb +138 -0
- data/lib/omnizip/algorithms/sevenzip_lzma2.rb +123 -0
- data/lib/omnizip/algorithms/xz_lzma2.rb +118 -0
- data/lib/omnizip/algorithms/zstandard/constants.rb +25 -0
- data/lib/omnizip/algorithms/zstandard/decoder.rb +46 -0
- data/lib/omnizip/algorithms/zstandard/encoder.rb +51 -0
- data/lib/omnizip/algorithms/zstandard.rb +138 -0
- data/lib/omnizip/buffer/memory_archive.rb +251 -0
- data/lib/omnizip/buffer/memory_extractor.rb +224 -0
- data/lib/omnizip/buffer.rb +176 -0
- data/lib/omnizip/checksum_registry.rb +114 -0
- data/lib/omnizip/checksums/crc32.rb +100 -0
- data/lib/omnizip/checksums/crc64.rb +101 -0
- data/lib/omnizip/checksums/crc_base.rb +158 -0
- data/lib/omnizip/checksums/verifier.rb +131 -0
- data/lib/omnizip/chunked/memory_manager.rb +194 -0
- data/lib/omnizip/chunked/reader.rb +78 -0
- data/lib/omnizip/chunked/writer.rb +120 -0
- data/lib/omnizip/chunked.rb +129 -0
- data/lib/omnizip/cli/output_formatter.rb +104 -0
- data/lib/omnizip/cli.rb +572 -0
- data/lib/omnizip/commands/.keep +0 -0
- data/lib/omnizip/commands/archive_create_command.rb +427 -0
- data/lib/omnizip/commands/archive_extract_command.rb +272 -0
- data/lib/omnizip/commands/archive_list_command.rb +218 -0
- data/lib/omnizip/commands/archive_repair_command.rb +131 -0
- data/lib/omnizip/commands/archive_verify_command.rb +117 -0
- data/lib/omnizip/commands/compress_command.rb +117 -0
- data/lib/omnizip/commands/decompress_command.rb +120 -0
- data/lib/omnizip/commands/list_command.rb +53 -0
- data/lib/omnizip/commands/metadata_command.rb +153 -0
- data/lib/omnizip/commands/parity_create_command.rb +122 -0
- data/lib/omnizip/commands/parity_repair_command.rb +122 -0
- data/lib/omnizip/commands/parity_verify_command.rb +124 -0
- data/lib/omnizip/commands/profile_list_command.rb +56 -0
- data/lib/omnizip/commands/profile_show_command.rb +44 -0
- data/lib/omnizip/convenience.rb +359 -0
- data/lib/omnizip/converter/conversion_registry.rb +49 -0
- data/lib/omnizip/converter/conversion_strategy.rb +121 -0
- data/lib/omnizip/converter/seven_zip_to_zip_strategy.rb +97 -0
- data/lib/omnizip/converter/zip_to_seven_zip_strategy.rb +112 -0
- data/lib/omnizip/converter.rb +105 -0
- data/lib/omnizip/crypto/aes256/cipher.rb +100 -0
- data/lib/omnizip/crypto/aes256/constants.rb +28 -0
- data/lib/omnizip/crypto/aes256/key_derivation.rb +101 -0
- data/lib/omnizip/crypto/aes256.rb +102 -0
- data/lib/omnizip/error.rb +106 -0
- data/lib/omnizip/eta/exponential_smoothing_estimator.rb +98 -0
- data/lib/omnizip/eta/moving_average_estimator.rb +99 -0
- data/lib/omnizip/eta/rate_calculator.rb +104 -0
- data/lib/omnizip/eta/sample_history.rb +143 -0
- data/lib/omnizip/eta/time_estimator.rb +106 -0
- data/lib/omnizip/eta.rb +63 -0
- data/lib/omnizip/extraction/filter_chain.rb +177 -0
- data/lib/omnizip/extraction/glob_pattern.rb +140 -0
- data/lib/omnizip/extraction/pattern_matcher.rb +70 -0
- data/lib/omnizip/extraction/predicate_pattern.rb +52 -0
- data/lib/omnizip/extraction/regex_pattern.rb +50 -0
- data/lib/omnizip/extraction/selective_extractor.rb +240 -0
- data/lib/omnizip/extraction.rb +111 -0
- data/lib/omnizip/file_type/mime_classifier.rb +144 -0
- data/lib/omnizip/file_type.rb +113 -0
- data/lib/omnizip/filter.rb +139 -0
- data/lib/omnizip/filter_pipeline.rb +108 -0
- data/lib/omnizip/filter_registry.rb +166 -0
- data/lib/omnizip/filters/bcj.rb +279 -0
- data/lib/omnizip/filters/bcj2/constants.rb +53 -0
- data/lib/omnizip/filters/bcj2/decoder.rb +200 -0
- data/lib/omnizip/filters/bcj2/encoder.rb +61 -0
- data/lib/omnizip/filters/bcj2/stream_data.rb +93 -0
- data/lib/omnizip/filters/bcj2.rb +99 -0
- data/lib/omnizip/filters/bcj_arm.rb +176 -0
- data/lib/omnizip/filters/bcj_arm64.rb +244 -0
- data/lib/omnizip/filters/bcj_ia64.rb +196 -0
- data/lib/omnizip/filters/bcj_ppc.rb +190 -0
- data/lib/omnizip/filters/bcj_sparc.rb +176 -0
- data/lib/omnizip/filters/bcj_x86.rb +193 -0
- data/lib/omnizip/filters/delta.rb +196 -0
- data/lib/omnizip/filters/filter_base.rb +72 -0
- data/lib/omnizip/filters/registry.rb +123 -0
- data/lib/omnizip/filters/xz_delta.rb +258 -0
- data/lib/omnizip/format_detector.rb +162 -0
- data/lib/omnizip/format_registry.rb +59 -0
- data/lib/omnizip/formats/.keep +0 -0
- data/lib/omnizip/formats/bzip2_file.rb +172 -0
- data/lib/omnizip/formats/cpio/constants.rb +55 -0
- data/lib/omnizip/formats/cpio/entry.rb +385 -0
- data/lib/omnizip/formats/cpio/reader.rb +196 -0
- data/lib/omnizip/formats/cpio/writer.rb +234 -0
- data/lib/omnizip/formats/cpio.rb +140 -0
- data/lib/omnizip/formats/format_spec_loader.rb +230 -0
- data/lib/omnizip/formats/gzip.rb +238 -0
- data/lib/omnizip/formats/iso/directory_builder.rb +297 -0
- data/lib/omnizip/formats/iso/directory_record.rb +152 -0
- data/lib/omnizip/formats/iso/joliet.rb +204 -0
- data/lib/omnizip/formats/iso/path_table.rb +125 -0
- data/lib/omnizip/formats/iso/reader.rb +197 -0
- data/lib/omnizip/formats/iso/rock_ridge.rb +349 -0
- data/lib/omnizip/formats/iso/volume_builder.rb +320 -0
- data/lib/omnizip/formats/iso/volume_descriptor.rb +168 -0
- data/lib/omnizip/formats/iso/writer.rb +530 -0
- data/lib/omnizip/formats/iso.rb +140 -0
- data/lib/omnizip/formats/lzip.rb +175 -0
- data/lib/omnizip/formats/lzma_alone.rb +171 -0
- data/lib/omnizip/formats/rar/archive_repairer.rb +243 -0
- data/lib/omnizip/formats/rar/archive_verifier.rb +195 -0
- data/lib/omnizip/formats/rar/block_parser.rb +243 -0
- data/lib/omnizip/formats/rar/compression/bit_stream.rb +180 -0
- data/lib/omnizip/formats/rar/compression/dispatcher.rb +217 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/decoder.rb +216 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/encoder.rb +158 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_builder.rb +217 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_coder.rb +189 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/match_finder.rb +135 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/sliding_window.rb +165 -0
- data/lib/omnizip/formats/rar/compression/ppmd/context.rb +105 -0
- data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +219 -0
- data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +262 -0
- data/lib/omnizip/formats/rar/compression_method_registry.rb +106 -0
- data/lib/omnizip/formats/rar/constants.rb +82 -0
- data/lib/omnizip/formats/rar/decompressor.rb +238 -0
- data/lib/omnizip/formats/rar/external_writer.rb +312 -0
- data/lib/omnizip/formats/rar/header.rb +192 -0
- data/lib/omnizip/formats/rar/license_validator.rb +109 -0
- data/lib/omnizip/formats/rar/models/rar_archive.rb +77 -0
- data/lib/omnizip/formats/rar/models/rar_entry.rb +65 -0
- data/lib/omnizip/formats/rar/models/rar_volume.rb +56 -0
- data/lib/omnizip/formats/rar/parity_handler.rb +292 -0
- data/lib/omnizip/formats/rar/rar5/compression/lzma.rb +202 -0
- data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +578 -0
- data/lib/omnizip/formats/rar/rar5/compression/store.rb +60 -0
- data/lib/omnizip/formats/rar/rar5/crc32.rb +39 -0
- data/lib/omnizip/formats/rar/rar5/encryption/aes256_cbc.rb +97 -0
- data/lib/omnizip/formats/rar/rar5/encryption/encryption_header.rb +114 -0
- data/lib/omnizip/formats/rar/rar5/encryption/encryption_manager.rb +166 -0
- data/lib/omnizip/formats/rar/rar5/encryption/key_derivation.rb +97 -0
- data/lib/omnizip/formats/rar/rar5/header.rb +187 -0
- data/lib/omnizip/formats/rar/rar5/models/encryption_options.rb +74 -0
- data/lib/omnizip/formats/rar/rar5/models/recovery_options.rb +63 -0
- data/lib/omnizip/formats/rar/rar5/models/solid_options.rb +63 -0
- data/lib/omnizip/formats/rar/rar5/models/volume_options.rb +74 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/ARCHITECTURE.md +290 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_manager.rb +264 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_splitter.rb +155 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_writer.rb +194 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_encoder.rb +109 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_manager.rb +142 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_stream.rb +121 -0
- data/lib/omnizip/formats/rar/rar5/vint.rb +65 -0
- data/lib/omnizip/formats/rar/rar5/writer.rb +466 -0
- data/lib/omnizip/formats/rar/rar_format_base.rb +241 -0
- data/lib/omnizip/formats/rar/reader.rb +366 -0
- data/lib/omnizip/formats/rar/recovery_record.rb +245 -0
- data/lib/omnizip/formats/rar/volume_manager.rb +168 -0
- data/lib/omnizip/formats/rar/writer.rb +431 -0
- data/lib/omnizip/formats/rar.rb +205 -0
- data/lib/omnizip/formats/rar3/compressor.rb +73 -0
- data/lib/omnizip/formats/rar3/decompressor.rb +66 -0
- data/lib/omnizip/formats/rar3/reader.rb +386 -0
- data/lib/omnizip/formats/rar3/writer.rb +219 -0
- data/lib/omnizip/formats/rar5/compressor.rb +73 -0
- data/lib/omnizip/formats/rar5/decompressor.rb +66 -0
- data/lib/omnizip/formats/rar5/reader.rb +342 -0
- data/lib/omnizip/formats/rar5/writer.rb +214 -0
- data/lib/omnizip/formats/seven_zip/coder_chain.rb +150 -0
- data/lib/omnizip/formats/seven_zip/constants.rb +126 -0
- data/lib/omnizip/formats/seven_zip/encoded_header.rb +114 -0
- data/lib/omnizip/formats/seven_zip/encrypted_header.rb +142 -0
- data/lib/omnizip/formats/seven_zip/file_collector.rb +144 -0
- data/lib/omnizip/formats/seven_zip/header.rb +106 -0
- data/lib/omnizip/formats/seven_zip/header_encryptor.rb +134 -0
- data/lib/omnizip/formats/seven_zip/header_writer.rb +466 -0
- data/lib/omnizip/formats/seven_zip/models/coder_info.rb +30 -0
- data/lib/omnizip/formats/seven_zip/models/file_entry.rb +58 -0
- data/lib/omnizip/formats/seven_zip/models/folder.rb +69 -0
- data/lib/omnizip/formats/seven_zip/models/stream_info.rb +42 -0
- data/lib/omnizip/formats/seven_zip/parser.rb +660 -0
- data/lib/omnizip/formats/seven_zip/reader.rb +458 -0
- data/lib/omnizip/formats/seven_zip/split_archive_reader.rb +632 -0
- data/lib/omnizip/formats/seven_zip/split_archive_writer.rb +315 -0
- data/lib/omnizip/formats/seven_zip/stream_compressor.rb +151 -0
- data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +162 -0
- data/lib/omnizip/formats/seven_zip/writer.rb +740 -0
- data/lib/omnizip/formats/seven_zip.rb +93 -0
- data/lib/omnizip/formats/tar/constants.rb +73 -0
- data/lib/omnizip/formats/tar/entry.rb +94 -0
- data/lib/omnizip/formats/tar/header.rb +168 -0
- data/lib/omnizip/formats/tar/reader.rb +121 -0
- data/lib/omnizip/formats/tar/writer.rb +216 -0
- data/lib/omnizip/formats/tar.rb +84 -0
- data/lib/omnizip/formats/xz/reader.rb +116 -0
- data/lib/omnizip/formats/xz.rb +237 -0
- data/lib/omnizip/formats/xz_impl/block_decoder.rb +754 -0
- data/lib/omnizip/formats/xz_impl/block_encoder.rb +306 -0
- data/lib/omnizip/formats/xz_impl/block_header.rb +210 -0
- data/lib/omnizip/formats/xz_impl/block_header_parser.rb +186 -0
- data/lib/omnizip/formats/xz_impl/constants.rb +49 -0
- data/lib/omnizip/formats/xz_impl/index_decoder.rb +174 -0
- data/lib/omnizip/formats/xz_impl/index_encoder.rb +122 -0
- data/lib/omnizip/formats/xz_impl/stream_decoder.rb +468 -0
- data/lib/omnizip/formats/xz_impl/stream_encoder.rb +99 -0
- data/lib/omnizip/formats/xz_impl/stream_footer.rb +81 -0
- data/lib/omnizip/formats/xz_impl/stream_footer_parser.rb +117 -0
- data/lib/omnizip/formats/xz_impl/stream_header.rb +55 -0
- data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +108 -0
- data/lib/omnizip/formats/xz_impl/vli.rb +128 -0
- data/lib/omnizip/formats/xz_impl/writer.rb +421 -0
- data/lib/omnizip/formats/zip/central_directory_header.rb +195 -0
- data/lib/omnizip/formats/zip/constants.rb +69 -0
- data/lib/omnizip/formats/zip/end_of_central_directory.rb +133 -0
- data/lib/omnizip/formats/zip/local_file_header.rb +138 -0
- data/lib/omnizip/formats/zip/reader.rb +250 -0
- data/lib/omnizip/formats/zip/unix_extra_field.rb +153 -0
- data/lib/omnizip/formats/zip/writer.rb +375 -0
- data/lib/omnizip/formats/zip/zip64_end_of_central_directory.rb +104 -0
- data/lib/omnizip/formats/zip/zip64_end_of_central_directory_locator.rb +66 -0
- data/lib/omnizip/formats/zip/zip64_extra_field.rb +114 -0
- data/lib/omnizip/formats/zip.rb +50 -0
- data/lib/omnizip/implementations/base/lzma2_decoder_base.rb +75 -0
- data/lib/omnizip/implementations/base/lzma2_encoder_base.rb +128 -0
- data/lib/omnizip/implementations/base/lzma_decoder_base.rb +83 -0
- data/lib/omnizip/implementations/base/lzma_encoder_base.rb +108 -0
- data/lib/omnizip/implementations/base/state_machine_base.rb +182 -0
- data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +421 -0
- data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +465 -0
- data/lib/omnizip/implementations/seven_zip/lzma/match_finder.rb +288 -0
- data/lib/omnizip/implementations/seven_zip/lzma/range_decoder.rb +200 -0
- data/lib/omnizip/implementations/seven_zip/lzma/range_encoder.rb +197 -0
- data/lib/omnizip/implementations/seven_zip/lzma/state_machine.rb +141 -0
- data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +519 -0
- data/lib/omnizip/implementations/xz_utils/lzma2/decoder.rb +723 -0
- data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +750 -0
- data/lib/omnizip/io/buffered_input.rb +146 -0
- data/lib/omnizip/io/buffered_output.rb +105 -0
- data/lib/omnizip/io/stream_manager.rb +115 -0
- data/lib/omnizip/link_handler/hard_link.rb +79 -0
- data/lib/omnizip/link_handler/symbolic_link.rb +74 -0
- data/lib/omnizip/link_handler.rb +124 -0
- data/lib/omnizip/metadata/archive_metadata.rb +114 -0
- data/lib/omnizip/metadata/entry_metadata.rb +146 -0
- data/lib/omnizip/metadata/metadata_editor.rb +171 -0
- data/lib/omnizip/metadata/metadata_registry.rb +64 -0
- data/lib/omnizip/metadata/metadata_validator.rb +99 -0
- data/lib/omnizip/metadata.rb +57 -0
- data/lib/omnizip/models/.keep +0 -0
- data/lib/omnizip/models/algorithm_metadata.rb +73 -0
- data/lib/omnizip/models/compression_options.rb +71 -0
- data/lib/omnizip/models/conversion_options.rb +87 -0
- data/lib/omnizip/models/conversion_result.rb +135 -0
- data/lib/omnizip/models/eta_result.rb +46 -0
- data/lib/omnizip/models/extraction_rule.rb +115 -0
- data/lib/omnizip/models/filter_chain.rb +144 -0
- data/lib/omnizip/models/filter_config.rb +183 -0
- data/lib/omnizip/models/match_result.rb +124 -0
- data/lib/omnizip/models/optimization_suggestion.rb +91 -0
- data/lib/omnizip/models/parallel_options.rb +104 -0
- data/lib/omnizip/models/performance_result.rb +79 -0
- data/lib/omnizip/models/profile_report.rb +82 -0
- data/lib/omnizip/models/progress_options.rb +38 -0
- data/lib/omnizip/models/split_options.rb +116 -0
- data/lib/omnizip/optimization_registry.rb +81 -0
- data/lib/omnizip/parallel/job_queue.rb +209 -0
- data/lib/omnizip/parallel/job_scheduler.rb +203 -0
- data/lib/omnizip/parallel/parallel_compressor.rb +347 -0
- data/lib/omnizip/parallel/parallel_extractor.rb +329 -0
- data/lib/omnizip/parallel/worker_pool.rb +223 -0
- data/lib/omnizip/parallel.rb +149 -0
- data/lib/omnizip/parity/chunked_block_processor.rb +196 -0
- data/lib/omnizip/parity/galois16.rb +145 -0
- data/lib/omnizip/parity/models/creator_packet.rb +73 -0
- data/lib/omnizip/parity/models/file_description_packet.rb +133 -0
- data/lib/omnizip/parity/models/ifsc_packet.rb +123 -0
- data/lib/omnizip/parity/models/main_packet.rb +128 -0
- data/lib/omnizip/parity/models/packet.rb +156 -0
- data/lib/omnizip/parity/models/packet_registry.rb +109 -0
- data/lib/omnizip/parity/models/recovery_slice_packet.rb +78 -0
- data/lib/omnizip/parity/par2_creator.rb +531 -0
- data/lib/omnizip/parity/par2_repairer.rb +407 -0
- data/lib/omnizip/parity/par2_verifier.rb +364 -0
- data/lib/omnizip/parity/par2cmdline_algorithm.rb +110 -0
- data/lib/omnizip/parity/par2cmdline_coefficients.rb +78 -0
- data/lib/omnizip/parity/reed_solomon_decoder.rb +266 -0
- data/lib/omnizip/parity/reed_solomon_encoder.rb +111 -0
- data/lib/omnizip/parity/reed_solomon_matrix.rb +342 -0
- data/lib/omnizip/parity.rb +186 -0
- data/lib/omnizip/password/encryption_registry.rb +65 -0
- data/lib/omnizip/password/encryption_strategy.rb +96 -0
- data/lib/omnizip/password/password_validator.rb +129 -0
- data/lib/omnizip/password/winzip_aes_strategy.rb +192 -0
- data/lib/omnizip/password/zip_crypto_strategy.rb +141 -0
- data/lib/omnizip/password.rb +87 -0
- data/lib/omnizip/pipe/stream_compressor.rb +124 -0
- data/lib/omnizip/pipe/stream_decompressor.rb +174 -0
- data/lib/omnizip/pipe.rb +121 -0
- data/lib/omnizip/platform/ntfs_streams.rb +201 -0
- data/lib/omnizip/platform.rb +189 -0
- data/lib/omnizip/profile/archive_profile.rb +39 -0
- data/lib/omnizip/profile/balanced_profile.rb +33 -0
- data/lib/omnizip/profile/binary_profile.rb +36 -0
- data/lib/omnizip/profile/compression_profile.rb +158 -0
- data/lib/omnizip/profile/custom_profile.rb +157 -0
- data/lib/omnizip/profile/fast_profile.rb +33 -0
- data/lib/omnizip/profile/maximum_profile.rb +33 -0
- data/lib/omnizip/profile/profile_detector.rb +110 -0
- data/lib/omnizip/profile/profile_registry.rb +161 -0
- data/lib/omnizip/profile/text_profile.rb +36 -0
- data/lib/omnizip/profile.rb +190 -0
- data/lib/omnizip/profiler/memory_profiler.rb +66 -0
- data/lib/omnizip/profiler/method_profiler.rb +49 -0
- data/lib/omnizip/profiler/report_generator.rb +169 -0
- data/lib/omnizip/profiler.rb +204 -0
- data/lib/omnizip/progress/callback_reporter.rb +36 -0
- data/lib/omnizip/progress/console_reporter.rb +62 -0
- data/lib/omnizip/progress/log_reporter.rb +91 -0
- data/lib/omnizip/progress/operation_progress.rb +118 -0
- data/lib/omnizip/progress/progress_bar.rb +156 -0
- data/lib/omnizip/progress/progress_reporter.rb +40 -0
- data/lib/omnizip/progress/progress_tracker.rb +190 -0
- data/lib/omnizip/progress/silent_reporter.rb +24 -0
- data/lib/omnizip/progress.rb +127 -0
- data/lib/omnizip/rubyzip_compat.rb +63 -0
- data/lib/omnizip/temp/safe_extract.rb +168 -0
- data/lib/omnizip/temp/temp_file.rb +124 -0
- data/lib/omnizip/temp/temp_file_pool.rb +109 -0
- data/lib/omnizip/temp.rb +181 -0
- data/lib/omnizip/version.rb +5 -0
- data/lib/omnizip/zip/entry.rb +156 -0
- data/lib/omnizip/zip/file.rb +485 -0
- data/lib/omnizip/zip/input_stream.rb +273 -0
- data/lib/omnizip/zip/output_stream.rb +324 -0
- data/lib/omnizip.rb +156 -0
- data/readme-docs/advanced-features.adoc +515 -0
- data/readme-docs/api-usage.adoc +444 -0
- data/readme-docs/architecture.adoc +449 -0
- data/readme-docs/archive-formats.adoc +479 -0
- data/readme-docs/cli-usage.adoc +222 -0
- data/readme-docs/compression-algorithms.adoc +442 -0
- data/readme-docs/compression-profiles.adoc +247 -0
- data/readme-docs/encryption-checksums.adoc +328 -0
- data/readme-docs/format-converter.adoc +325 -0
- data/readme-docs/installation.adoc +228 -0
- data/readme-docs/par2-archives.adoc +608 -0
- data/readme-docs/performance-profiler.adoc +389 -0
- data/readme-docs/preprocessing-filters.adoc +280 -0
- data/xz-file-format-1.2.1.txt +1174 -0
- metadata +617 -0
|
@@ -0,0 +1,434 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Copyright (C) 2025 Ribose Inc.
|
|
4
|
+
#
|
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a
|
|
6
|
+
# copy of this software and associated documentation files (the "Software"),
|
|
7
|
+
# to deal in the Software without restriction, including without limitation
|
|
8
|
+
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
9
|
+
# and/or sell copies of the Software, and to permit persons to whom the
|
|
10
|
+
# Software is furnished to do so, subject to the following conditions:
|
|
11
|
+
#
|
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
|
13
|
+
# all copies or substantial portions of the Software.
|
|
14
|
+
#
|
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
20
|
+
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
21
|
+
# DEALINGS IN THE SOFTWARE.
|
|
22
|
+
|
|
23
|
+
require_relative "range_coder"
|
|
24
|
+
|
|
25
|
+
module Omnizip
|
|
26
|
+
module Algorithms
|
|
27
|
+
class LZMA < Algorithm
|
|
28
|
+
# Range decoder for LZMA decompression
|
|
29
|
+
#
|
|
30
|
+
# This class implements the decoding side of arithmetic coding
|
|
31
|
+
# using integer range arithmetic. It decodes bits from the
|
|
32
|
+
# compressed byte stream based on their probability models.
|
|
33
|
+
#
|
|
34
|
+
# The decoder mirrors the encoder's range subdivisions to
|
|
35
|
+
# extract the original bit values. It maintains a code value
|
|
36
|
+
# that represents the current position within the range.
|
|
37
|
+
class RangeDecoder < RangeCoder
|
|
38
|
+
attr_reader :code
|
|
39
|
+
|
|
40
|
+
# Initialize the range decoder
|
|
41
|
+
#
|
|
42
|
+
# @param input_stream [IO] The input stream of encoded bytes
|
|
43
|
+
def initialize(input_stream)
|
|
44
|
+
super
|
|
45
|
+
@code = 0
|
|
46
|
+
@initialization_complete = false
|
|
47
|
+
@init_bytes_remaining = 5
|
|
48
|
+
init_decoder
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# Update the input stream (for LZMA2 multi-chunk streams)
|
|
52
|
+
#
|
|
53
|
+
# When processing LZMA2 chunks, we need to update the stream
|
|
54
|
+
# reference for each new chunk while preserving the range decoder
|
|
55
|
+
# state (range, code) across chunks.
|
|
56
|
+
#
|
|
57
|
+
# XZ Utils pattern: The range coder uses a buffer pointer that's
|
|
58
|
+
# updated for each chunk, while rc_reset() resets range/code.
|
|
59
|
+
#
|
|
60
|
+
# @param new_stream [IO] New input stream
|
|
61
|
+
# @return [void]
|
|
62
|
+
def update_stream(new_stream)
|
|
63
|
+
@stream = new_stream
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# Decode a single bit using a probability model
|
|
67
|
+
#
|
|
68
|
+
# The range is split based on the bit's probability,
|
|
69
|
+
# and the code value determines which portion contains
|
|
70
|
+
# the actual bit value.
|
|
71
|
+
#
|
|
72
|
+
# XZ Utils pattern (rc_if_0): normalize BEFORE bound calculation
|
|
73
|
+
# See: /Users/mulgogi/src/external/xz/src/liblzma/rangecoder/range_decoder.h:181-184
|
|
74
|
+
#
|
|
75
|
+
# @param model [BitModel] The probability model for this bit
|
|
76
|
+
# @return [Integer] The decoded bit value (0 or 1)
|
|
77
|
+
def decode_bit(model)
|
|
78
|
+
# XZ Utils: rc_normalize FIRST, then calculate bound
|
|
79
|
+
normalize
|
|
80
|
+
bound = (@range >> 11) * model.probability
|
|
81
|
+
|
|
82
|
+
# DEBUG: Trace model updates to find probability corruption
|
|
83
|
+
trace_model_updates = ENV.fetch("TRACE_MODEL_UPDATES", nil)
|
|
84
|
+
prob_before = model.probability if trace_model_updates
|
|
85
|
+
|
|
86
|
+
# DEBUG: Trace is_rep bit decoding
|
|
87
|
+
trace_is_rep = ENV.fetch("TRACE_IS_REP_BITS", nil) && (bound > 1_000_000)
|
|
88
|
+
|
|
89
|
+
if trace_is_rep
|
|
90
|
+
puts " [RangeDecoder.decode_bit] BEFORE: range=#{@range}, code=#{@code}, bound=#{bound}, prob=#{model.probability}"
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
# DEBUG: Trace model selection at dict_full=227
|
|
94
|
+
if ENV["TRACE_MODEL_SELECTION"]
|
|
95
|
+
begin
|
|
96
|
+
ObjectSpace.each_object(Omnizip::Algorithms::XzUtilsDecoder) do |decoder|
|
|
97
|
+
dict_full = decoder.instance_variable_get(:@dict_full)
|
|
98
|
+
if dict_full && dict_full >= 220 && dict_full <= 235
|
|
99
|
+
pos = decoder.instance_variable_get(:@pos)
|
|
100
|
+
state = decoder.instance_variable_get(:@state)
|
|
101
|
+
puts " [decode_bit] dict_full=#{dict_full}, pos=#{pos}, state=#{state}"
|
|
102
|
+
puts " [decode_bit] model.object_id=#{model.object_id}, prob=#{model.probability}"
|
|
103
|
+
puts " [decode_bit] range=0x#{@range.to_s(16)}, code=0x#{@code.to_s(16)}, bound=0x#{bound.to_s(16)}"
|
|
104
|
+
$stderr.flush
|
|
105
|
+
end
|
|
106
|
+
break
|
|
107
|
+
end
|
|
108
|
+
rescue StandardError => e
|
|
109
|
+
# Context not available
|
|
110
|
+
puts " [decode_bit] ERROR: #{e.message}"
|
|
111
|
+
$stderr.flush
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
# DEBUG: Trace decode_bit for lit_state=96 literal decoding
|
|
116
|
+
if ENV["TRACE_DECODE_BIT_LIT96"]
|
|
117
|
+
puts " decode_bit: range=0x#{@range.to_s(16)}, code=0x#{@code.to_s(16)}, prob=#{model.probability}, bound=0x#{bound.to_s(16)}, code<bound?=#{@code < bound}"
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
# DEBUG: Trace decode_bit for specific problematic state
|
|
121
|
+
if ENV.fetch("TRACE_SPECIFIC_DECODE", nil) && @range == 0x40000000 && @code == 0x21407d82
|
|
122
|
+
puts " === CRITICAL DECODE_BIT (MATCHED LITERAL) ==="
|
|
123
|
+
puts " BEFORE: range=0x#{@range.to_s(16)} (#{@range})"
|
|
124
|
+
puts " BEFORE: code=0x#{@code.to_s(16)} (#{@code})"
|
|
125
|
+
puts " probability=#{model.probability}"
|
|
126
|
+
puts " bound=0x#{bound.to_s(16)} (#{bound})"
|
|
127
|
+
puts " range >> 11 = 0x#{(@range >> 11).to_s(16)} (#{@range >> 11})"
|
|
128
|
+
puts " (range >> 11) * probability = 0x#{((@range >> 11) * model.probability).to_s(16)} (#{(@range >> 11) * model.probability})"
|
|
129
|
+
puts " code < bound? #{@code < bound}"
|
|
130
|
+
puts " result should be: #{@code < bound ? 0 : 1}"
|
|
131
|
+
puts " =========================================="
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
# DEBUG: Trace decode_bit for model_index=257 (the problematic one)
|
|
135
|
+
if ENV["TRACE_DECODE_BIT_257"]
|
|
136
|
+
# We need to know which model is being used
|
|
137
|
+
# Unfortunately, we don't have direct access to the model_index here
|
|
138
|
+
puts " [decode_bit] range=0x#{@range.to_s(16)}, code=0x#{@code.to_s(16)}, prob=#{model.probability}, bound=0x#{bound.to_s(16)}, code<bound?=#{@code < bound}, result=#{@code < bound ? 0 : 1}"
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
if @code < bound
|
|
142
|
+
@range = bound
|
|
143
|
+
model.update(0)
|
|
144
|
+
if trace_model_updates && prob_before != model.probability
|
|
145
|
+
puts " [decode_bit] model UPDATE: #{prob_before} -> #{model.probability} (bit=0, object_id=#{model.object_id})"
|
|
146
|
+
end
|
|
147
|
+
if trace_is_rep
|
|
148
|
+
puts " [RangeDecoder.decode_bit] AFTER (bit=0): range=#{@range}, code=#{@code}"
|
|
149
|
+
end
|
|
150
|
+
0
|
|
151
|
+
else
|
|
152
|
+
@code -= bound
|
|
153
|
+
@range -= bound
|
|
154
|
+
model.update(1)
|
|
155
|
+
if trace_model_updates && prob_before != model.probability
|
|
156
|
+
puts " [decode_bit] model UPDATE: #{prob_before} -> #{model.probability} (bit=1, object_id=#{model.object_id})"
|
|
157
|
+
end
|
|
158
|
+
if trace_is_rep
|
|
159
|
+
puts " [RangeDecoder.decode_bit] AFTER (bit=1): range=#{@range}, code=#{@code}"
|
|
160
|
+
end
|
|
161
|
+
1
|
|
162
|
+
end
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
# Decode bits directly without using probability model
|
|
166
|
+
#
|
|
167
|
+
# This is used for decoding values with uniform distribution
|
|
168
|
+
# where all bit values are equally likely.
|
|
169
|
+
#
|
|
170
|
+
# @param num_bits [Integer] Number of bits to decode
|
|
171
|
+
# @return [Integer] The decoded value
|
|
172
|
+
def decode_direct_bits(num_bits)
|
|
173
|
+
result = 0
|
|
174
|
+
trace_this = (num_bits == 25)
|
|
175
|
+
iteration = 0
|
|
176
|
+
|
|
177
|
+
if trace_this
|
|
178
|
+
begin
|
|
179
|
+
warn " decode_direct_bits START: num_bits=#{num_bits}"
|
|
180
|
+
warn " BEFORE: range=#{@range.inspect}, code=#{@code.inspect}"
|
|
181
|
+
$stderr.flush
|
|
182
|
+
rescue StandardError => e
|
|
183
|
+
warn " ERROR in trace: #{e.message}"
|
|
184
|
+
$stderr.flush
|
|
185
|
+
end
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
begin
|
|
189
|
+
num_bits.downto(1) do |_i|
|
|
190
|
+
iteration += 1
|
|
191
|
+
normalize
|
|
192
|
+
@range >>= 1
|
|
193
|
+
|
|
194
|
+
bit = @code >= @range ? 1 : 0
|
|
195
|
+
if trace_this && iteration <= 3 # Only first 3 iterations
|
|
196
|
+
warn " [#{iteration}/#{num_bits}] range=#{@range.inspect}, code=#{@code.inspect}, bit=#{bit}, result=#{result}"
|
|
197
|
+
$stderr.flush
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
if bit == 1
|
|
201
|
+
@code -= @range
|
|
202
|
+
result = (result << 1) | 1
|
|
203
|
+
else
|
|
204
|
+
result = (result << 1) | 0
|
|
205
|
+
end
|
|
206
|
+
end
|
|
207
|
+
rescue StandardError => e
|
|
208
|
+
warn " ERROR in iteration #{iteration}: #{e.message}"
|
|
209
|
+
warn " range=#{@range.inspect}, code=#{@code.inspect}"
|
|
210
|
+
$stderr.flush
|
|
211
|
+
raise
|
|
212
|
+
end
|
|
213
|
+
|
|
214
|
+
if trace_this
|
|
215
|
+
warn " AFTER #{iteration} iterations: result=#{result}"
|
|
216
|
+
$stderr.flush
|
|
217
|
+
end
|
|
218
|
+
|
|
219
|
+
result
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
# Decode bits directly using a base value (XZ Utils rc_direct pattern)
|
|
223
|
+
#
|
|
224
|
+
# This method implements the XZ Utils rc_direct macro which is used
|
|
225
|
+
# for decoding distance values in slots 14+. The pattern matches
|
|
226
|
+
# XZ Utils' implementation in rangecoder/range_decoder.h:366-375.
|
|
227
|
+
#
|
|
228
|
+
# XZ Utils rc_direct behavior (from C macro):
|
|
229
|
+
# - dest = (dest << 1) + 1 (unconditionally)
|
|
230
|
+
# - Normalize range, halve it, subtract from code
|
|
231
|
+
# - bound = 0 - (code >> 31) extracts sign bit
|
|
232
|
+
# - If code >= range (bit=1): sign=0, bound=0, dest stays at (dest << 1) + 1
|
|
233
|
+
# - If code < range (bit=0): sign=1, bound=-1, dest = (dest << 1) + 1 - 1 = dest << 1
|
|
234
|
+
# - dest += bound
|
|
235
|
+
# - code += range & bound (restore code if bit=0)
|
|
236
|
+
#
|
|
237
|
+
# In Ruby (without unsigned wraparound), we explicitly check if code >= range
|
|
238
|
+
# and undo the +1 if the bit is 0.
|
|
239
|
+
#
|
|
240
|
+
# @param num_bits [Integer] Number of bits to decode
|
|
241
|
+
# @param base [Integer] Base value to start from (2 or 3 for distances)
|
|
242
|
+
# @return [Integer] The decoded value
|
|
243
|
+
def decode_direct_bits_with_base(num_bits, base)
|
|
244
|
+
result = base
|
|
245
|
+
# DEBUG: Trace for slot=40 (num_bits=15)
|
|
246
|
+
if ENV["TRACE_DIRECT_BITS_SLOT40"]
|
|
247
|
+
puts " [decode_direct_bits_with_base] START: base=#{base}, num_bits=#{num_bits}"
|
|
248
|
+
puts " BEFORE: range=0x#{@range.to_s(16)}, code=0x#{@code.to_s(16)}"
|
|
249
|
+
end
|
|
250
|
+
num_bits.times do |i|
|
|
251
|
+
result = (result << 1) + 1
|
|
252
|
+
normalize
|
|
253
|
+
@range >>= 1
|
|
254
|
+
|
|
255
|
+
# Check if bit is 1 before modifying @code
|
|
256
|
+
# If code >= range, bit is 1; otherwise bit is 0
|
|
257
|
+
bit = @code >= @range ? 1 : 0
|
|
258
|
+
|
|
259
|
+
if ENV["TRACE_DIRECT_BITS_SLOT40"] && i < 15
|
|
260
|
+
puts " [#{i + 1}/#{num_bits}] bit=#{bit}, result after this step = #{result - (bit.zero? ? 1 : 0)}, range=0x#{@range.to_s(16)}, code=0x#{@code.to_s(16)}"
|
|
261
|
+
end
|
|
262
|
+
|
|
263
|
+
if bit == 1
|
|
264
|
+
# Bit is 1: result stays at (result << 1) + 1
|
|
265
|
+
@code -= @range
|
|
266
|
+
else
|
|
267
|
+
# Bit is 0: undo the +1, result = (result << 1) + 1 - 1 = result << 1
|
|
268
|
+
result -= 1
|
|
269
|
+
end
|
|
270
|
+
end
|
|
271
|
+
if ENV["TRACE_DIRECT_BITS_SLOT40"]
|
|
272
|
+
puts " [decode_direct_bits_with_base] END: result=#{result}"
|
|
273
|
+
end
|
|
274
|
+
result
|
|
275
|
+
end
|
|
276
|
+
|
|
277
|
+
# Reset the range decoder for a new chunk
|
|
278
|
+
#
|
|
279
|
+
# This matches XZ Utils rc_reset() behavior:
|
|
280
|
+
# - Reset range to UINT32_MAX (0xFFFFFFFF)
|
|
281
|
+
# - Reset code to 0
|
|
282
|
+
# - Set init_bytes_remaining to 5 (lazy initialization)
|
|
283
|
+
# - Let normalize() read the initialization bytes during actual decoding
|
|
284
|
+
#
|
|
285
|
+
# Called during state reset (control >= 0xA0) to reset the range decoder
|
|
286
|
+
# for the new chunk's compressed data.
|
|
287
|
+
#
|
|
288
|
+
# XZ Utils reference: /Users/mulgogi/src/external/xz/src/liblzma/rangecoder/range_decoder.h:181
|
|
289
|
+
#
|
|
290
|
+
# @return [void]
|
|
291
|
+
def reset
|
|
292
|
+
if ENV["LZMA_DEBUG"]
|
|
293
|
+
stream_pos = begin
|
|
294
|
+
@stream.pos
|
|
295
|
+
rescue StandardError
|
|
296
|
+
"N/A"
|
|
297
|
+
end
|
|
298
|
+
warn " RangeDecoder.reset: BEFORE reset, range=0x#{@range.to_s(16)}, code=0x#{@code.to_s(16)}, stream.pos=#{stream_pos}, init_bytes_remaining=#{@init_bytes_remaining}"
|
|
299
|
+
end
|
|
300
|
+
@range = 0xFFFFFFFF
|
|
301
|
+
@code = 0
|
|
302
|
+
# Lazy initialization: set remaining bytes but don't read yet
|
|
303
|
+
# normalize() will read these bytes during actual decoding
|
|
304
|
+
@init_bytes_remaining = 5
|
|
305
|
+
if ENV["LZMA_DEBUG"]
|
|
306
|
+
stream_pos_after = begin
|
|
307
|
+
@stream.pos
|
|
308
|
+
rescue StandardError
|
|
309
|
+
"N/A"
|
|
310
|
+
end
|
|
311
|
+
warn " RangeDecoder.reset: AFTER reset, code=0x#{@code.to_s(16)}, stream.pos=#{stream_pos_after}, init_bytes_remaining=#{@init_bytes_remaining}"
|
|
312
|
+
end
|
|
313
|
+
end
|
|
314
|
+
|
|
315
|
+
# Normalize the range when it becomes too small
|
|
316
|
+
#
|
|
317
|
+
# When range drops below TOP threshold, shift in a new
|
|
318
|
+
# byte from the input stream and scale up the range by 256.
|
|
319
|
+
#
|
|
320
|
+
# XZ Utils pattern (rc_normalize): uses IF, not WHILE!
|
|
321
|
+
# Each normalize call shifts in at most ONE byte.
|
|
322
|
+
# See: /Users/mulgogi/src/external/xz/src/liblzma/rangecoder/range_decoder.h:143-149
|
|
323
|
+
#
|
|
324
|
+
# XZ Utils lazy initialization (range_decoder.h:146-149):
|
|
325
|
+
# If init_bytes_remaining > 0, read byte for code initialization
|
|
326
|
+
# Otherwise, read byte for range normalization
|
|
327
|
+
#
|
|
328
|
+
# @return [void]
|
|
329
|
+
def normalize
|
|
330
|
+
# DEBUG: Trace normalize calls
|
|
331
|
+
if @init_bytes_remaining.positive?
|
|
332
|
+
stream_pos_before = begin
|
|
333
|
+
@stream.pos
|
|
334
|
+
rescue StandardError
|
|
335
|
+
"N/A"
|
|
336
|
+
end
|
|
337
|
+
stream_size = begin
|
|
338
|
+
@stream.size
|
|
339
|
+
rescue StandardError
|
|
340
|
+
"N/A"
|
|
341
|
+
end
|
|
342
|
+
end
|
|
343
|
+
|
|
344
|
+
# XZ Utils: Handle lazy initialization first
|
|
345
|
+
# IMPORTANT: Read ALL initialization bytes in a loop, not just one!
|
|
346
|
+
# XZ Utils rc_normalize reads one byte per call, but decode_bit only calls
|
|
347
|
+
# normalize once at the start, so we need to loop to read all 5 bytes.
|
|
348
|
+
while @init_bytes_remaining.positive?
|
|
349
|
+
byte = @stream.getbyte
|
|
350
|
+
byte ||= 0
|
|
351
|
+
code_before = @code
|
|
352
|
+
@code = ((code_before << 8) | byte) & 0xFFFFFFFF
|
|
353
|
+
@init_bytes_remaining -= 1
|
|
354
|
+
|
|
355
|
+
if ENV["RANGE_DECODER_TRACE"]
|
|
356
|
+
puts "\n=== RangeDecoder.normalize (init_bytes_remaining=#{@init_bytes_remaining + 1}) ==="
|
|
357
|
+
puts " stream_pos_before=#{stream_pos_before}, stream_size=#{stream_size}"
|
|
358
|
+
puts " byte=0x#{byte.to_s(16).upcase}, code_before=0x#{code_before.to_s(16).upcase}"
|
|
359
|
+
puts " (code_before << 8) = 0x#{(code_before << 8).to_s(16).upcase}"
|
|
360
|
+
puts " ((code_before << 8) | byte) = 0x#{((code_before << 8) | byte).to_s(16).upcase}"
|
|
361
|
+
puts " code_after=0x#{@code.to_s(16).upcase}"
|
|
362
|
+
end
|
|
363
|
+
end
|
|
364
|
+
|
|
365
|
+
if @range < TOP
|
|
366
|
+
byte = read_byte
|
|
367
|
+
@range <<= 8
|
|
368
|
+
@code = ((@code << 8) | byte) & 0xFFFFFFFF
|
|
369
|
+
if ENV["RANGE_DECODER_TRACE"]
|
|
370
|
+
pos = begin
|
|
371
|
+
@stream.pos
|
|
372
|
+
rescue StandardError
|
|
373
|
+
"N/A"
|
|
374
|
+
end
|
|
375
|
+
warn " NORMALIZE: pos=#{pos}, byte=0x#{byte.to_s(16).upcase}, code=0x#{@code.to_s(16).upcase}, range=0x#{@range.to_s(16).upcase}"
|
|
376
|
+
$stderr.flush
|
|
377
|
+
end
|
|
378
|
+
end
|
|
379
|
+
end
|
|
380
|
+
|
|
381
|
+
private
|
|
382
|
+
|
|
383
|
+
# Initialize the decoder by reading the first bytes
|
|
384
|
+
#
|
|
385
|
+
# XZ Utils rc_read_init (range_decoder.h:160-167):
|
|
386
|
+
# - Read 5 bytes and construct code value
|
|
387
|
+
# - code is uint32_t, so it's automatically masked to 32 bits
|
|
388
|
+
# - In Ruby, we need to explicitly mask to ensure 32-bit value
|
|
389
|
+
#
|
|
390
|
+
# @return [void]
|
|
391
|
+
def init_decoder
|
|
392
|
+
5.times do
|
|
393
|
+
@code = ((@code << 8) | read_byte) & 0xFFFFFFFF
|
|
394
|
+
@init_bytes_remaining -= 1 if @init_bytes_remaining.positive?
|
|
395
|
+
end
|
|
396
|
+
@initialization_complete = true
|
|
397
|
+
end
|
|
398
|
+
|
|
399
|
+
# Read a single byte from the input stream
|
|
400
|
+
#
|
|
401
|
+
# @return [Integer] The byte value (0-255)
|
|
402
|
+
# @raise [Omnizip::DecompressionError] If stream is exhausted during normal decoding
|
|
403
|
+
def read_byte
|
|
404
|
+
byte = @stream.getbyte
|
|
405
|
+
|
|
406
|
+
# During normal decoding (after initialization), if we run out of input,
|
|
407
|
+
# this indicates corrupted data - the compressed stream ended prematurely
|
|
408
|
+
if byte.nil? && @initialization_complete && @init_bytes_remaining.zero?
|
|
409
|
+
raise Omnizip::DecompressionError,
|
|
410
|
+
"LZMA compressed data exhausted prematurely. The file may be corrupted or the uncompressed size field may be incorrect."
|
|
411
|
+
end
|
|
412
|
+
|
|
413
|
+
# Only track as data byte if initialization is complete
|
|
414
|
+
if @initialization_complete && @init_bytes_remaining.zero?
|
|
415
|
+
pos = begin
|
|
416
|
+
@stream.pos
|
|
417
|
+
rescue StandardError
|
|
418
|
+
"N/A"
|
|
419
|
+
end
|
|
420
|
+
if ENV["RANGE_DECODER_TRACE"]
|
|
421
|
+
warn " READ_BYTE: pos=#{pos.inspect}, byte=0x#{byte.to_s(16).upcase}"
|
|
422
|
+
$stderr.flush
|
|
423
|
+
end
|
|
424
|
+
if ENV["LZMA_DEBUG"]
|
|
425
|
+
warn " READ_BYTE: pos=#{pos.inspect}, byte=0x#{byte.to_s(16).upcase}, @code now=0x#{@code.to_s(16)}"
|
|
426
|
+
end
|
|
427
|
+
end
|
|
428
|
+
|
|
429
|
+
byte || 0
|
|
430
|
+
end
|
|
431
|
+
end
|
|
432
|
+
end
|
|
433
|
+
end
|
|
434
|
+
end
|
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Copyright (C) 2025 Ribose Inc.
|
|
4
|
+
#
|
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a
|
|
6
|
+
# copy of this software and associated documentation files (the "Software"),
|
|
7
|
+
# to deal in the Software without restriction, including without limitation
|
|
8
|
+
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
9
|
+
# and/or sell copies of the Software, and to permit persons to whom the
|
|
10
|
+
# Software is furnished to do so, subject to the following conditions:
|
|
11
|
+
#
|
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
|
13
|
+
# all copies or substantial portions of the Software.
|
|
14
|
+
#
|
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
20
|
+
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
21
|
+
# DEALINGS IN THE SOFTWARE.
|
|
22
|
+
|
|
23
|
+
# Ported from XZ Utils src/liblzma/rangecoder/range_encoder.h
|
|
24
|
+
# Direct port of the reference implementation for byte-for-byte compatibility.
|
|
25
|
+
|
|
26
|
+
require_relative "range_coder"
|
|
27
|
+
|
|
28
|
+
module Omnizip
|
|
29
|
+
module Algorithms
|
|
30
|
+
class LZMA < Algorithm
|
|
31
|
+
# Range encoder for LZMA compression
|
|
32
|
+
#
|
|
33
|
+
# This is a direct port of XZ Utils' range encoder implementation
|
|
34
|
+
# for guaranteed byte-for-byte compatibility.
|
|
35
|
+
#
|
|
36
|
+
# The encoder maintains a range [low, low+range) and subdivides
|
|
37
|
+
# it proportionally based on symbol probabilities.
|
|
38
|
+
class RangeEncoder < RangeCoder
|
|
39
|
+
# Initialize the range encoder
|
|
40
|
+
#
|
|
41
|
+
# @param output_stream [IO] The output stream for encoded bytes
|
|
42
|
+
def initialize(output_stream)
|
|
43
|
+
super
|
|
44
|
+
@cache = 0
|
|
45
|
+
@cache_size = 1 # XZ Utils initializes to 1, not 0
|
|
46
|
+
@pre_flush_pos = 0
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# Encode a single bit using a probability model
|
|
50
|
+
#
|
|
51
|
+
# Ported from XZ Utils rc_encode() - RC_BIT_0 and RC_BIT_1 cases.
|
|
52
|
+
# The key is that normalization happens BEFORE encoding the bit.
|
|
53
|
+
#
|
|
54
|
+
# IMPORTANT: We must emulate 32-bit unsigned arithmetic by masking
|
|
55
|
+
# after each operation, since Ruby's integers are arbitrary precision.
|
|
56
|
+
#
|
|
57
|
+
# @param model [BitModel] The probability model for this bit
|
|
58
|
+
# @param bit [Integer] The bit value (0 or 1)
|
|
59
|
+
# @return [void]
|
|
60
|
+
def encode_bit(model, bit)
|
|
61
|
+
# Normalize BEFORE encoding (matches XZ Utils)
|
|
62
|
+
normalize
|
|
63
|
+
|
|
64
|
+
prob = model.probability
|
|
65
|
+
|
|
66
|
+
# DEBUG: Trace is_rep bit encoding
|
|
67
|
+
if ENV["TRACE_IS_REP_BITS"] && bit.zero?
|
|
68
|
+
puts " [RangeEncoder.encode_bit] BEFORE: range=#{@range}, low=#{@low}, prob=#{prob}, bit=#{bit}"
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
if bit.zero?
|
|
72
|
+
# RC_BIT_0: shrink range to lower portion
|
|
73
|
+
# rc->range = (rc->range >> 11) * prob
|
|
74
|
+
# Emulate 32-bit unsigned multiplication with truncation
|
|
75
|
+
@range = ((@range >> 11) * prob) & 0xFFFFFFFF
|
|
76
|
+
else
|
|
77
|
+
# RC_BIT_1: add bound to low, shrink range to upper portion
|
|
78
|
+
# const uint32_t bound = prob * (rc->range >> 11)
|
|
79
|
+
# rc->low += bound
|
|
80
|
+
# rc->range -= bound
|
|
81
|
+
bound = prob * (@range >> 11)
|
|
82
|
+
@low = (@low + bound) & 0xFFFFFFFFFFFFFFFF # low can grow beyond 32 bits
|
|
83
|
+
@range = (@range - bound) & 0xFFFFFFFF
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
if ENV["TRACE_IS_REP_BITS"] && bit.zero?
|
|
87
|
+
puts " [RangeEncoder.encode_bit] AFTER: range=#{@range}, low=#{@low}"
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
# Update probability model based on the bit value
|
|
91
|
+
# This matches the decoder's update behavior (proper OOP symmetry)
|
|
92
|
+
model.update(bit)
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
# Encode bits directly without using probability model
|
|
96
|
+
#
|
|
97
|
+
# Used for encoding values with uniform distribution.
|
|
98
|
+
# Emulates 32-bit unsigned arithmetic.
|
|
99
|
+
#
|
|
100
|
+
# @param value [Integer] The value to encode
|
|
101
|
+
# @param num_bits [Integer] Number of bits to encode
|
|
102
|
+
# @return [void]
|
|
103
|
+
def encode_direct_bits(value, num_bits)
|
|
104
|
+
num_bits.downto(1) do |i|
|
|
105
|
+
normalize
|
|
106
|
+
@range = (@range >> 1) & 0xFFFFFFFF
|
|
107
|
+
bit = (value >> (i - 1)) & 1
|
|
108
|
+
@low = (@low + @range) & 0xFFFFFFFFFFFFFFFF if bit == 1
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
# Flush remaining bytes to output stream
|
|
113
|
+
#
|
|
114
|
+
# Ported from XZ Utils rc_flush().
|
|
115
|
+
#
|
|
116
|
+
# @return [void]
|
|
117
|
+
def flush
|
|
118
|
+
# Store position BEFORE flush for LZMA2 compatibility
|
|
119
|
+
# The decoder only needs bytes up to this point
|
|
120
|
+
@pre_flush_pos = @stream.pos
|
|
121
|
+
|
|
122
|
+
# Prevent further normalizations
|
|
123
|
+
@range = 0xFFFFFFFF
|
|
124
|
+
|
|
125
|
+
# Flush 5 bytes (see rc_flush() in xz)
|
|
126
|
+
5.times { shift_low }
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
# Return bytes needed for decoding
|
|
130
|
+
#
|
|
131
|
+
# For LZMA2: returns pre-flush position (excludes 5-byte flush padding)
|
|
132
|
+
# For regular LZMA: returns full output size
|
|
133
|
+
#
|
|
134
|
+
# @return [Integer] Number of bytes decoder will consume
|
|
135
|
+
def bytes_for_decode
|
|
136
|
+
@pre_flush_pos || @stream.pos
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
protected
|
|
140
|
+
|
|
141
|
+
# Normalize the range when it becomes too small
|
|
142
|
+
#
|
|
143
|
+
# Ported from XZ Utils rc_encode() normalization logic.
|
|
144
|
+
# IMPORTANT: shift_low is called BEFORE range is shifted!
|
|
145
|
+
#
|
|
146
|
+
# @return [void]
|
|
147
|
+
def normalize
|
|
148
|
+
while @range < TOP
|
|
149
|
+
shift_low
|
|
150
|
+
@range <<= 8
|
|
151
|
+
end
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
private
|
|
155
|
+
|
|
156
|
+
# Shift the top byte of 'low' to output
|
|
157
|
+
#
|
|
158
|
+
# Direct port of XZ Utils rc_shift_low() from range_encoder.h:136-159
|
|
159
|
+
# Handles carry propagation through the cache mechanism.
|
|
160
|
+
#
|
|
161
|
+
# @return [void]
|
|
162
|
+
def shift_low
|
|
163
|
+
# if ((uint32_t)(rc->low) < (uint32_t)(0xFF000000)
|
|
164
|
+
# || (uint32_t)(rc->low >> 32) != 0)
|
|
165
|
+
low_32 = @low & 0xFFFFFFFF
|
|
166
|
+
carry = (@low >> 32) & 0xFF
|
|
167
|
+
|
|
168
|
+
if low_32 < 0xFF000000 || carry != 0
|
|
169
|
+
# do {
|
|
170
|
+
# out[*out_pos] = rc->cache + (uint8_t)(rc->low >> 32);
|
|
171
|
+
# ++*out_pos;
|
|
172
|
+
# rc->cache = 0xFF;
|
|
173
|
+
# } while (--rc->cache_size != 0);
|
|
174
|
+
loop do
|
|
175
|
+
@stream.putc((@cache + carry) & 0xFF)
|
|
176
|
+
@cache = 0xFF
|
|
177
|
+
@cache_size -= 1
|
|
178
|
+
break if @cache_size.zero?
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
# rc->cache = (rc->low >> 24) & 0xFF;
|
|
182
|
+
@cache = (low_32 >> 24) & 0xFF
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
# ++rc->cache_size;
|
|
186
|
+
@cache_size += 1
|
|
187
|
+
|
|
188
|
+
# rc->low = (rc->low & 0x00FFFFFF) << RC_SHIFT_BITS;
|
|
189
|
+
@low = (@low & 0x00FFFFFF) << 8
|
|
190
|
+
end
|
|
191
|
+
end
|
|
192
|
+
end
|
|
193
|
+
end
|
|
194
|
+
end
|