omnizip 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/.rubocop.yml +32 -0
- data/.rubocop_todo.yml +754 -0
- data/COPYING +502 -0
- data/Gemfile +17 -0
- data/LICENSE +12 -0
- data/README.adoc +1045 -0
- data/Rakefile +12 -0
- data/benchmark/README.md +260 -0
- data/benchmark/benchmark_suite.rb +125 -0
- data/benchmark/compression_bench.rb +181 -0
- data/benchmark/filter_bench.rb +180 -0
- data/benchmark/models/benchmark_result.rb +59 -0
- data/benchmark/models/comparison_result.rb +69 -0
- data/benchmark/profile_suite.rb +167 -0
- data/benchmark/reporter.rb +150 -0
- data/benchmark/run_benchmarks.rb +66 -0
- data/benchmark/test_data.rb +137 -0
- data/config/formats/rar3_spec.yml +91 -0
- data/config/formats/rar5_spec.yml +102 -0
- data/docs/.github/workflows/docs.yml +142 -0
- data/docs/.gitignore +21 -0
- data/docs/.lychee.toml +67 -0
- data/docs/Gemfile +13 -0
- data/docs/RAR_WRITE_SUPPORT.md +26 -0
- data/docs/README.md +101 -0
- data/docs/_config.yml +112 -0
- data/docs/assets/logo.svg +1 -0
- data/docs/assets/omnizip-logo.pdf +1540 -11
- data/docs/comparison/feature-matrix.adoc +694 -0
- data/docs/comparison/index.adoc +113 -0
- data/docs/comparison/vs-7zip.adoc +309 -0
- data/docs/comparison/vs-peazip.adoc +77 -0
- data/docs/comparison/vs-rubyzip.adoc +342 -0
- data/docs/comparison/vs-winrar.adoc +100 -0
- data/docs/compatibility.adoc +579 -0
- data/docs/concepts/index.adoc +129 -0
- data/docs/developer/architecture.adoc +256 -0
- data/docs/developer/contributing.adoc +158 -0
- data/docs/developer/index.adoc +25 -0
- data/docs/developer/testing.adoc +212 -0
- data/docs/getting-started/basic-usage.adoc +271 -0
- data/docs/getting-started/index.adoc +42 -0
- data/docs/getting-started/installation.adoc +138 -0
- data/docs/getting-started/quick-start.adoc +185 -0
- data/docs/getting-started/your-first-archive.adoc +218 -0
- data/docs/guides/advanced-features/encryption.adoc +300 -0
- data/docs/guides/advanced-features/index.adoc +49 -0
- data/docs/guides/advanced-features/parallel-processing.adoc +246 -0
- data/docs/guides/advanced-features/progress-tracking.adoc +320 -0
- data/docs/guides/advanced-features/streaming.adoc +212 -0
- data/docs/guides/archive-formats/gzip-format.adoc +107 -0
- data/docs/guides/archive-formats/index.adoc +130 -0
- data/docs/guides/archive-formats/rar-format.adoc +104 -0
- data/docs/guides/archive-formats/rar5.adoc +521 -0
- data/docs/guides/archive-formats/seven-zip-format.adoc +35 -0
- data/docs/guides/archive-formats/tar-format.adoc +106 -0
- data/docs/guides/archive-formats/xz-format.adoc +118 -0
- data/docs/guides/archive-formats/zip-format.adoc +35 -0
- data/docs/guides/compression-algorithms/bzip2.adoc +113 -0
- data/docs/guides/compression-algorithms/deflate.adoc +319 -0
- data/docs/guides/compression-algorithms/index.adoc +190 -0
- data/docs/guides/compression-algorithms/lzma.adoc +398 -0
- data/docs/guides/compression-algorithms/lzma2.adoc +327 -0
- data/docs/guides/compression-algorithms/ppmd.adoc +316 -0
- data/docs/guides/compression-algorithms/zstandard.adoc +361 -0
- data/docs/guides/creating-archives.adoc +354 -0
- data/docs/guides/extracting-archives.adoc +53 -0
- data/docs/guides/format-conversion.adoc +64 -0
- data/docs/guides/index.adoc +49 -0
- data/docs/guides/migration-rubyzip.adoc +217 -0
- data/docs/guides/parity-archives.adoc +605 -0
- data/docs/guides/performance-tuning.adoc +88 -0
- data/docs/index.adoc +218 -0
- data/docs/lychee.toml +67 -0
- data/docs/reference/api/overview.adoc +188 -0
- data/docs/reference/cli/compress-command.adoc +114 -0
- data/docs/reference/cli/overview.adoc +140 -0
- data/docs/reference/index.adoc +26 -0
- data/docs/resources/faq.adoc +185 -0
- data/docs/resources/quick-reference.adoc +222 -0
- data/docs/troubleshooting/index.adoc +208 -0
- data/examples/api_comparison.rb +205 -0
- data/examples/deflate64_example.rb +96 -0
- data/examples/par2_demo.rb +121 -0
- data/examples/quick_start_native.rb +150 -0
- data/examples/quick_start_rubyzip.rb +115 -0
- data/examples/rubyzip_compatibility_demo.rb +194 -0
- data/exe/omnizip +27 -0
- data/lib/omnizip/algorithm.rb +130 -0
- data/lib/omnizip/algorithm_registry.rb +86 -0
- data/lib/omnizip/algorithms/.keep +0 -0
- data/lib/omnizip/algorithms/bzip2/bwt.rb +225 -0
- data/lib/omnizip/algorithms/bzip2/decoder.rb +193 -0
- data/lib/omnizip/algorithms/bzip2/encoder.rb +237 -0
- data/lib/omnizip/algorithms/bzip2/huffman.rb +206 -0
- data/lib/omnizip/algorithms/bzip2/mtf.rb +101 -0
- data/lib/omnizip/algorithms/bzip2/rle.rb +151 -0
- data/lib/omnizip/algorithms/bzip2.rb +130 -0
- data/lib/omnizip/algorithms/deflate/constants.rb +28 -0
- data/lib/omnizip/algorithms/deflate/decoder.rb +38 -0
- data/lib/omnizip/algorithms/deflate/encoder.rb +46 -0
- data/lib/omnizip/algorithms/deflate.rb +128 -0
- data/lib/omnizip/algorithms/deflate64/constants.rb +45 -0
- data/lib/omnizip/algorithms/deflate64/decoder.rb +153 -0
- data/lib/omnizip/algorithms/deflate64/encoder.rb +98 -0
- data/lib/omnizip/algorithms/deflate64/huffman_coder.rb +354 -0
- data/lib/omnizip/algorithms/deflate64/lz77_encoder.rb +142 -0
- data/lib/omnizip/algorithms/deflate64.rb +109 -0
- data/lib/omnizip/algorithms/lzma/bit_model.rb +120 -0
- data/lib/omnizip/algorithms/lzma/constants.rb +112 -0
- data/lib/omnizip/algorithms/lzma/decoder.rb +148 -0
- data/lib/omnizip/algorithms/lzma/dictionary.rb +69 -0
- data/lib/omnizip/algorithms/lzma/distance_coder.rb +415 -0
- data/lib/omnizip/algorithms/lzma/encoder.rb +142 -0
- data/lib/omnizip/algorithms/lzma/length_coder.rb +260 -0
- data/lib/omnizip/algorithms/lzma/literal_decoder.rb +320 -0
- data/lib/omnizip/algorithms/lzma/literal_encoder.rb +210 -0
- data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +341 -0
- data/lib/omnizip/algorithms/lzma/lzma_alone_decoder.rb +192 -0
- data/lib/omnizip/algorithms/lzma/lzma_state.rb +128 -0
- data/lib/omnizip/algorithms/lzma/match.rb +32 -0
- data/lib/omnizip/algorithms/lzma/match_finder.rb +205 -0
- data/lib/omnizip/algorithms/lzma/match_finder_config.rb +142 -0
- data/lib/omnizip/algorithms/lzma/match_finder_factory.rb +88 -0
- data/lib/omnizip/algorithms/lzma/optimal_encoder.rb +130 -0
- data/lib/omnizip/algorithms/lzma/probability_models.rb +72 -0
- data/lib/omnizip/algorithms/lzma/range_coder.rb +85 -0
- data/lib/omnizip/algorithms/lzma/range_decoder.rb +434 -0
- data/lib/omnizip/algorithms/lzma/range_encoder.rb +194 -0
- data/lib/omnizip/algorithms/lzma/state.rb +127 -0
- data/lib/omnizip/algorithms/lzma/xz_buffered_range_encoder.rb +325 -0
- data/lib/omnizip/algorithms/lzma/xz_encoder.rb +426 -0
- data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +645 -0
- data/lib/omnizip/algorithms/lzma/xz_match_finder_adapter.rb +227 -0
- data/lib/omnizip/algorithms/lzma/xz_price_calculator.rb +169 -0
- data/lib/omnizip/algorithms/lzma/xz_probability_models.rb +261 -0
- data/lib/omnizip/algorithms/lzma/xz_range_encoder.rb +223 -0
- data/lib/omnizip/algorithms/lzma/xz_range_encoder_exact.rb +331 -0
- data/lib/omnizip/algorithms/lzma/xz_state.rb +116 -0
- data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +2055 -0
- data/lib/omnizip/algorithms/lzma.rb +238 -0
- data/lib/omnizip/algorithms/lzma2/chunk_manager.rb +182 -0
- data/lib/omnizip/algorithms/lzma2/constants.rb +41 -0
- data/lib/omnizip/algorithms/lzma2/encoder.rb +147 -0
- data/lib/omnizip/algorithms/lzma2/lzma2_chunk.rb +161 -0
- data/lib/omnizip/algorithms/lzma2/properties.rb +179 -0
- data/lib/omnizip/algorithms/lzma2/simple_lzma2_encoder.rb +127 -0
- data/lib/omnizip/algorithms/lzma2/xz_encoder_adapter.rb +85 -0
- data/lib/omnizip/algorithms/lzma2.rb +141 -0
- data/lib/omnizip/algorithms/ppmd7/constants.rb +74 -0
- data/lib/omnizip/algorithms/ppmd7/context.rb +154 -0
- data/lib/omnizip/algorithms/ppmd7/decoder.rb +126 -0
- data/lib/omnizip/algorithms/ppmd7/encoder.rb +163 -0
- data/lib/omnizip/algorithms/ppmd7/model.rb +248 -0
- data/lib/omnizip/algorithms/ppmd7/symbol_state.rb +57 -0
- data/lib/omnizip/algorithms/ppmd7.rb +116 -0
- data/lib/omnizip/algorithms/ppmd8/constants.rb +61 -0
- data/lib/omnizip/algorithms/ppmd8/context.rb +34 -0
- data/lib/omnizip/algorithms/ppmd8/decoder.rb +107 -0
- data/lib/omnizip/algorithms/ppmd8/encoder.rb +138 -0
- data/lib/omnizip/algorithms/ppmd8/model.rb +250 -0
- data/lib/omnizip/algorithms/ppmd8/restoration_method.rb +78 -0
- data/lib/omnizip/algorithms/ppmd8.rb +82 -0
- data/lib/omnizip/algorithms/ppmd_base.rb +138 -0
- data/lib/omnizip/algorithms/sevenzip_lzma2.rb +123 -0
- data/lib/omnizip/algorithms/xz_lzma2.rb +118 -0
- data/lib/omnizip/algorithms/zstandard/constants.rb +25 -0
- data/lib/omnizip/algorithms/zstandard/decoder.rb +46 -0
- data/lib/omnizip/algorithms/zstandard/encoder.rb +51 -0
- data/lib/omnizip/algorithms/zstandard.rb +138 -0
- data/lib/omnizip/buffer/memory_archive.rb +251 -0
- data/lib/omnizip/buffer/memory_extractor.rb +224 -0
- data/lib/omnizip/buffer.rb +176 -0
- data/lib/omnizip/checksum_registry.rb +114 -0
- data/lib/omnizip/checksums/crc32.rb +100 -0
- data/lib/omnizip/checksums/crc64.rb +101 -0
- data/lib/omnizip/checksums/crc_base.rb +158 -0
- data/lib/omnizip/checksums/verifier.rb +131 -0
- data/lib/omnizip/chunked/memory_manager.rb +194 -0
- data/lib/omnizip/chunked/reader.rb +78 -0
- data/lib/omnizip/chunked/writer.rb +120 -0
- data/lib/omnizip/chunked.rb +129 -0
- data/lib/omnizip/cli/output_formatter.rb +104 -0
- data/lib/omnizip/cli.rb +572 -0
- data/lib/omnizip/commands/.keep +0 -0
- data/lib/omnizip/commands/archive_create_command.rb +427 -0
- data/lib/omnizip/commands/archive_extract_command.rb +272 -0
- data/lib/omnizip/commands/archive_list_command.rb +218 -0
- data/lib/omnizip/commands/archive_repair_command.rb +131 -0
- data/lib/omnizip/commands/archive_verify_command.rb +117 -0
- data/lib/omnizip/commands/compress_command.rb +117 -0
- data/lib/omnizip/commands/decompress_command.rb +120 -0
- data/lib/omnizip/commands/list_command.rb +53 -0
- data/lib/omnizip/commands/metadata_command.rb +153 -0
- data/lib/omnizip/commands/parity_create_command.rb +122 -0
- data/lib/omnizip/commands/parity_repair_command.rb +122 -0
- data/lib/omnizip/commands/parity_verify_command.rb +124 -0
- data/lib/omnizip/commands/profile_list_command.rb +56 -0
- data/lib/omnizip/commands/profile_show_command.rb +44 -0
- data/lib/omnizip/convenience.rb +359 -0
- data/lib/omnizip/converter/conversion_registry.rb +49 -0
- data/lib/omnizip/converter/conversion_strategy.rb +121 -0
- data/lib/omnizip/converter/seven_zip_to_zip_strategy.rb +97 -0
- data/lib/omnizip/converter/zip_to_seven_zip_strategy.rb +112 -0
- data/lib/omnizip/converter.rb +105 -0
- data/lib/omnizip/crypto/aes256/cipher.rb +100 -0
- data/lib/omnizip/crypto/aes256/constants.rb +28 -0
- data/lib/omnizip/crypto/aes256/key_derivation.rb +101 -0
- data/lib/omnizip/crypto/aes256.rb +102 -0
- data/lib/omnizip/error.rb +106 -0
- data/lib/omnizip/eta/exponential_smoothing_estimator.rb +98 -0
- data/lib/omnizip/eta/moving_average_estimator.rb +99 -0
- data/lib/omnizip/eta/rate_calculator.rb +104 -0
- data/lib/omnizip/eta/sample_history.rb +143 -0
- data/lib/omnizip/eta/time_estimator.rb +106 -0
- data/lib/omnizip/eta.rb +63 -0
- data/lib/omnizip/extraction/filter_chain.rb +177 -0
- data/lib/omnizip/extraction/glob_pattern.rb +140 -0
- data/lib/omnizip/extraction/pattern_matcher.rb +70 -0
- data/lib/omnizip/extraction/predicate_pattern.rb +52 -0
- data/lib/omnizip/extraction/regex_pattern.rb +50 -0
- data/lib/omnizip/extraction/selective_extractor.rb +240 -0
- data/lib/omnizip/extraction.rb +111 -0
- data/lib/omnizip/file_type/mime_classifier.rb +144 -0
- data/lib/omnizip/file_type.rb +113 -0
- data/lib/omnizip/filter.rb +139 -0
- data/lib/omnizip/filter_pipeline.rb +108 -0
- data/lib/omnizip/filter_registry.rb +166 -0
- data/lib/omnizip/filters/bcj.rb +279 -0
- data/lib/omnizip/filters/bcj2/constants.rb +53 -0
- data/lib/omnizip/filters/bcj2/decoder.rb +200 -0
- data/lib/omnizip/filters/bcj2/encoder.rb +61 -0
- data/lib/omnizip/filters/bcj2/stream_data.rb +93 -0
- data/lib/omnizip/filters/bcj2.rb +99 -0
- data/lib/omnizip/filters/bcj_arm.rb +176 -0
- data/lib/omnizip/filters/bcj_arm64.rb +244 -0
- data/lib/omnizip/filters/bcj_ia64.rb +196 -0
- data/lib/omnizip/filters/bcj_ppc.rb +190 -0
- data/lib/omnizip/filters/bcj_sparc.rb +176 -0
- data/lib/omnizip/filters/bcj_x86.rb +193 -0
- data/lib/omnizip/filters/delta.rb +196 -0
- data/lib/omnizip/filters/filter_base.rb +72 -0
- data/lib/omnizip/filters/registry.rb +123 -0
- data/lib/omnizip/filters/xz_delta.rb +258 -0
- data/lib/omnizip/format_detector.rb +162 -0
- data/lib/omnizip/format_registry.rb +59 -0
- data/lib/omnizip/formats/.keep +0 -0
- data/lib/omnizip/formats/bzip2_file.rb +172 -0
- data/lib/omnizip/formats/cpio/constants.rb +55 -0
- data/lib/omnizip/formats/cpio/entry.rb +385 -0
- data/lib/omnizip/formats/cpio/reader.rb +196 -0
- data/lib/omnizip/formats/cpio/writer.rb +234 -0
- data/lib/omnizip/formats/cpio.rb +140 -0
- data/lib/omnizip/formats/format_spec_loader.rb +230 -0
- data/lib/omnizip/formats/gzip.rb +238 -0
- data/lib/omnizip/formats/iso/directory_builder.rb +297 -0
- data/lib/omnizip/formats/iso/directory_record.rb +152 -0
- data/lib/omnizip/formats/iso/joliet.rb +204 -0
- data/lib/omnizip/formats/iso/path_table.rb +125 -0
- data/lib/omnizip/formats/iso/reader.rb +197 -0
- data/lib/omnizip/formats/iso/rock_ridge.rb +349 -0
- data/lib/omnizip/formats/iso/volume_builder.rb +320 -0
- data/lib/omnizip/formats/iso/volume_descriptor.rb +168 -0
- data/lib/omnizip/formats/iso/writer.rb +530 -0
- data/lib/omnizip/formats/iso.rb +140 -0
- data/lib/omnizip/formats/lzip.rb +175 -0
- data/lib/omnizip/formats/lzma_alone.rb +171 -0
- data/lib/omnizip/formats/rar/archive_repairer.rb +243 -0
- data/lib/omnizip/formats/rar/archive_verifier.rb +195 -0
- data/lib/omnizip/formats/rar/block_parser.rb +243 -0
- data/lib/omnizip/formats/rar/compression/bit_stream.rb +180 -0
- data/lib/omnizip/formats/rar/compression/dispatcher.rb +217 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/decoder.rb +216 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/encoder.rb +158 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_builder.rb +217 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_coder.rb +189 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/match_finder.rb +135 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/sliding_window.rb +165 -0
- data/lib/omnizip/formats/rar/compression/ppmd/context.rb +105 -0
- data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +219 -0
- data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +262 -0
- data/lib/omnizip/formats/rar/compression_method_registry.rb +106 -0
- data/lib/omnizip/formats/rar/constants.rb +82 -0
- data/lib/omnizip/formats/rar/decompressor.rb +238 -0
- data/lib/omnizip/formats/rar/external_writer.rb +312 -0
- data/lib/omnizip/formats/rar/header.rb +192 -0
- data/lib/omnizip/formats/rar/license_validator.rb +109 -0
- data/lib/omnizip/formats/rar/models/rar_archive.rb +77 -0
- data/lib/omnizip/formats/rar/models/rar_entry.rb +65 -0
- data/lib/omnizip/formats/rar/models/rar_volume.rb +56 -0
- data/lib/omnizip/formats/rar/parity_handler.rb +292 -0
- data/lib/omnizip/formats/rar/rar5/compression/lzma.rb +202 -0
- data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +578 -0
- data/lib/omnizip/formats/rar/rar5/compression/store.rb +60 -0
- data/lib/omnizip/formats/rar/rar5/crc32.rb +39 -0
- data/lib/omnizip/formats/rar/rar5/encryption/aes256_cbc.rb +97 -0
- data/lib/omnizip/formats/rar/rar5/encryption/encryption_header.rb +114 -0
- data/lib/omnizip/formats/rar/rar5/encryption/encryption_manager.rb +166 -0
- data/lib/omnizip/formats/rar/rar5/encryption/key_derivation.rb +97 -0
- data/lib/omnizip/formats/rar/rar5/header.rb +187 -0
- data/lib/omnizip/formats/rar/rar5/models/encryption_options.rb +74 -0
- data/lib/omnizip/formats/rar/rar5/models/recovery_options.rb +63 -0
- data/lib/omnizip/formats/rar/rar5/models/solid_options.rb +63 -0
- data/lib/omnizip/formats/rar/rar5/models/volume_options.rb +74 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/ARCHITECTURE.md +290 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_manager.rb +264 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_splitter.rb +155 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_writer.rb +194 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_encoder.rb +109 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_manager.rb +142 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_stream.rb +121 -0
- data/lib/omnizip/formats/rar/rar5/vint.rb +65 -0
- data/lib/omnizip/formats/rar/rar5/writer.rb +466 -0
- data/lib/omnizip/formats/rar/rar_format_base.rb +241 -0
- data/lib/omnizip/formats/rar/reader.rb +366 -0
- data/lib/omnizip/formats/rar/recovery_record.rb +245 -0
- data/lib/omnizip/formats/rar/volume_manager.rb +168 -0
- data/lib/omnizip/formats/rar/writer.rb +431 -0
- data/lib/omnizip/formats/rar.rb +205 -0
- data/lib/omnizip/formats/rar3/compressor.rb +73 -0
- data/lib/omnizip/formats/rar3/decompressor.rb +66 -0
- data/lib/omnizip/formats/rar3/reader.rb +386 -0
- data/lib/omnizip/formats/rar3/writer.rb +219 -0
- data/lib/omnizip/formats/rar5/compressor.rb +73 -0
- data/lib/omnizip/formats/rar5/decompressor.rb +66 -0
- data/lib/omnizip/formats/rar5/reader.rb +342 -0
- data/lib/omnizip/formats/rar5/writer.rb +214 -0
- data/lib/omnizip/formats/seven_zip/coder_chain.rb +150 -0
- data/lib/omnizip/formats/seven_zip/constants.rb +126 -0
- data/lib/omnizip/formats/seven_zip/encoded_header.rb +114 -0
- data/lib/omnizip/formats/seven_zip/encrypted_header.rb +142 -0
- data/lib/omnizip/formats/seven_zip/file_collector.rb +144 -0
- data/lib/omnizip/formats/seven_zip/header.rb +106 -0
- data/lib/omnizip/formats/seven_zip/header_encryptor.rb +134 -0
- data/lib/omnizip/formats/seven_zip/header_writer.rb +466 -0
- data/lib/omnizip/formats/seven_zip/models/coder_info.rb +30 -0
- data/lib/omnizip/formats/seven_zip/models/file_entry.rb +58 -0
- data/lib/omnizip/formats/seven_zip/models/folder.rb +69 -0
- data/lib/omnizip/formats/seven_zip/models/stream_info.rb +42 -0
- data/lib/omnizip/formats/seven_zip/parser.rb +660 -0
- data/lib/omnizip/formats/seven_zip/reader.rb +458 -0
- data/lib/omnizip/formats/seven_zip/split_archive_reader.rb +632 -0
- data/lib/omnizip/formats/seven_zip/split_archive_writer.rb +315 -0
- data/lib/omnizip/formats/seven_zip/stream_compressor.rb +151 -0
- data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +162 -0
- data/lib/omnizip/formats/seven_zip/writer.rb +740 -0
- data/lib/omnizip/formats/seven_zip.rb +93 -0
- data/lib/omnizip/formats/tar/constants.rb +73 -0
- data/lib/omnizip/formats/tar/entry.rb +94 -0
- data/lib/omnizip/formats/tar/header.rb +168 -0
- data/lib/omnizip/formats/tar/reader.rb +121 -0
- data/lib/omnizip/formats/tar/writer.rb +216 -0
- data/lib/omnizip/formats/tar.rb +84 -0
- data/lib/omnizip/formats/xz/reader.rb +116 -0
- data/lib/omnizip/formats/xz.rb +237 -0
- data/lib/omnizip/formats/xz_impl/block_decoder.rb +754 -0
- data/lib/omnizip/formats/xz_impl/block_encoder.rb +306 -0
- data/lib/omnizip/formats/xz_impl/block_header.rb +210 -0
- data/lib/omnizip/formats/xz_impl/block_header_parser.rb +186 -0
- data/lib/omnizip/formats/xz_impl/constants.rb +49 -0
- data/lib/omnizip/formats/xz_impl/index_decoder.rb +174 -0
- data/lib/omnizip/formats/xz_impl/index_encoder.rb +122 -0
- data/lib/omnizip/formats/xz_impl/stream_decoder.rb +468 -0
- data/lib/omnizip/formats/xz_impl/stream_encoder.rb +99 -0
- data/lib/omnizip/formats/xz_impl/stream_footer.rb +81 -0
- data/lib/omnizip/formats/xz_impl/stream_footer_parser.rb +117 -0
- data/lib/omnizip/formats/xz_impl/stream_header.rb +55 -0
- data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +108 -0
- data/lib/omnizip/formats/xz_impl/vli.rb +128 -0
- data/lib/omnizip/formats/xz_impl/writer.rb +421 -0
- data/lib/omnizip/formats/zip/central_directory_header.rb +195 -0
- data/lib/omnizip/formats/zip/constants.rb +69 -0
- data/lib/omnizip/formats/zip/end_of_central_directory.rb +133 -0
- data/lib/omnizip/formats/zip/local_file_header.rb +138 -0
- data/lib/omnizip/formats/zip/reader.rb +250 -0
- data/lib/omnizip/formats/zip/unix_extra_field.rb +153 -0
- data/lib/omnizip/formats/zip/writer.rb +375 -0
- data/lib/omnizip/formats/zip/zip64_end_of_central_directory.rb +104 -0
- data/lib/omnizip/formats/zip/zip64_end_of_central_directory_locator.rb +66 -0
- data/lib/omnizip/formats/zip/zip64_extra_field.rb +114 -0
- data/lib/omnizip/formats/zip.rb +50 -0
- data/lib/omnizip/implementations/base/lzma2_decoder_base.rb +75 -0
- data/lib/omnizip/implementations/base/lzma2_encoder_base.rb +128 -0
- data/lib/omnizip/implementations/base/lzma_decoder_base.rb +83 -0
- data/lib/omnizip/implementations/base/lzma_encoder_base.rb +108 -0
- data/lib/omnizip/implementations/base/state_machine_base.rb +182 -0
- data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +421 -0
- data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +465 -0
- data/lib/omnizip/implementations/seven_zip/lzma/match_finder.rb +288 -0
- data/lib/omnizip/implementations/seven_zip/lzma/range_decoder.rb +200 -0
- data/lib/omnizip/implementations/seven_zip/lzma/range_encoder.rb +197 -0
- data/lib/omnizip/implementations/seven_zip/lzma/state_machine.rb +141 -0
- data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +519 -0
- data/lib/omnizip/implementations/xz_utils/lzma2/decoder.rb +723 -0
- data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +750 -0
- data/lib/omnizip/io/buffered_input.rb +146 -0
- data/lib/omnizip/io/buffered_output.rb +105 -0
- data/lib/omnizip/io/stream_manager.rb +115 -0
- data/lib/omnizip/link_handler/hard_link.rb +79 -0
- data/lib/omnizip/link_handler/symbolic_link.rb +74 -0
- data/lib/omnizip/link_handler.rb +124 -0
- data/lib/omnizip/metadata/archive_metadata.rb +114 -0
- data/lib/omnizip/metadata/entry_metadata.rb +146 -0
- data/lib/omnizip/metadata/metadata_editor.rb +171 -0
- data/lib/omnizip/metadata/metadata_registry.rb +64 -0
- data/lib/omnizip/metadata/metadata_validator.rb +99 -0
- data/lib/omnizip/metadata.rb +57 -0
- data/lib/omnizip/models/.keep +0 -0
- data/lib/omnizip/models/algorithm_metadata.rb +73 -0
- data/lib/omnizip/models/compression_options.rb +71 -0
- data/lib/omnizip/models/conversion_options.rb +87 -0
- data/lib/omnizip/models/conversion_result.rb +135 -0
- data/lib/omnizip/models/eta_result.rb +46 -0
- data/lib/omnizip/models/extraction_rule.rb +115 -0
- data/lib/omnizip/models/filter_chain.rb +144 -0
- data/lib/omnizip/models/filter_config.rb +183 -0
- data/lib/omnizip/models/match_result.rb +124 -0
- data/lib/omnizip/models/optimization_suggestion.rb +91 -0
- data/lib/omnizip/models/parallel_options.rb +104 -0
- data/lib/omnizip/models/performance_result.rb +79 -0
- data/lib/omnizip/models/profile_report.rb +82 -0
- data/lib/omnizip/models/progress_options.rb +38 -0
- data/lib/omnizip/models/split_options.rb +116 -0
- data/lib/omnizip/optimization_registry.rb +81 -0
- data/lib/omnizip/parallel/job_queue.rb +209 -0
- data/lib/omnizip/parallel/job_scheduler.rb +203 -0
- data/lib/omnizip/parallel/parallel_compressor.rb +347 -0
- data/lib/omnizip/parallel/parallel_extractor.rb +329 -0
- data/lib/omnizip/parallel/worker_pool.rb +223 -0
- data/lib/omnizip/parallel.rb +149 -0
- data/lib/omnizip/parity/chunked_block_processor.rb +196 -0
- data/lib/omnizip/parity/galois16.rb +145 -0
- data/lib/omnizip/parity/models/creator_packet.rb +73 -0
- data/lib/omnizip/parity/models/file_description_packet.rb +133 -0
- data/lib/omnizip/parity/models/ifsc_packet.rb +123 -0
- data/lib/omnizip/parity/models/main_packet.rb +128 -0
- data/lib/omnizip/parity/models/packet.rb +156 -0
- data/lib/omnizip/parity/models/packet_registry.rb +109 -0
- data/lib/omnizip/parity/models/recovery_slice_packet.rb +78 -0
- data/lib/omnizip/parity/par2_creator.rb +531 -0
- data/lib/omnizip/parity/par2_repairer.rb +407 -0
- data/lib/omnizip/parity/par2_verifier.rb +364 -0
- data/lib/omnizip/parity/par2cmdline_algorithm.rb +110 -0
- data/lib/omnizip/parity/par2cmdline_coefficients.rb +78 -0
- data/lib/omnizip/parity/reed_solomon_decoder.rb +266 -0
- data/lib/omnizip/parity/reed_solomon_encoder.rb +111 -0
- data/lib/omnizip/parity/reed_solomon_matrix.rb +342 -0
- data/lib/omnizip/parity.rb +186 -0
- data/lib/omnizip/password/encryption_registry.rb +65 -0
- data/lib/omnizip/password/encryption_strategy.rb +96 -0
- data/lib/omnizip/password/password_validator.rb +129 -0
- data/lib/omnizip/password/winzip_aes_strategy.rb +192 -0
- data/lib/omnizip/password/zip_crypto_strategy.rb +141 -0
- data/lib/omnizip/password.rb +87 -0
- data/lib/omnizip/pipe/stream_compressor.rb +124 -0
- data/lib/omnizip/pipe/stream_decompressor.rb +174 -0
- data/lib/omnizip/pipe.rb +121 -0
- data/lib/omnizip/platform/ntfs_streams.rb +201 -0
- data/lib/omnizip/platform.rb +189 -0
- data/lib/omnizip/profile/archive_profile.rb +39 -0
- data/lib/omnizip/profile/balanced_profile.rb +33 -0
- data/lib/omnizip/profile/binary_profile.rb +36 -0
- data/lib/omnizip/profile/compression_profile.rb +158 -0
- data/lib/omnizip/profile/custom_profile.rb +157 -0
- data/lib/omnizip/profile/fast_profile.rb +33 -0
- data/lib/omnizip/profile/maximum_profile.rb +33 -0
- data/lib/omnizip/profile/profile_detector.rb +110 -0
- data/lib/omnizip/profile/profile_registry.rb +161 -0
- data/lib/omnizip/profile/text_profile.rb +36 -0
- data/lib/omnizip/profile.rb +190 -0
- data/lib/omnizip/profiler/memory_profiler.rb +66 -0
- data/lib/omnizip/profiler/method_profiler.rb +49 -0
- data/lib/omnizip/profiler/report_generator.rb +169 -0
- data/lib/omnizip/profiler.rb +204 -0
- data/lib/omnizip/progress/callback_reporter.rb +36 -0
- data/lib/omnizip/progress/console_reporter.rb +62 -0
- data/lib/omnizip/progress/log_reporter.rb +91 -0
- data/lib/omnizip/progress/operation_progress.rb +118 -0
- data/lib/omnizip/progress/progress_bar.rb +156 -0
- data/lib/omnizip/progress/progress_reporter.rb +40 -0
- data/lib/omnizip/progress/progress_tracker.rb +190 -0
- data/lib/omnizip/progress/silent_reporter.rb +24 -0
- data/lib/omnizip/progress.rb +127 -0
- data/lib/omnizip/rubyzip_compat.rb +63 -0
- data/lib/omnizip/temp/safe_extract.rb +168 -0
- data/lib/omnizip/temp/temp_file.rb +124 -0
- data/lib/omnizip/temp/temp_file_pool.rb +109 -0
- data/lib/omnizip/temp.rb +181 -0
- data/lib/omnizip/version.rb +5 -0
- data/lib/omnizip/zip/entry.rb +156 -0
- data/lib/omnizip/zip/file.rb +485 -0
- data/lib/omnizip/zip/input_stream.rb +273 -0
- data/lib/omnizip/zip/output_stream.rb +324 -0
- data/lib/omnizip.rb +156 -0
- data/readme-docs/advanced-features.adoc +515 -0
- data/readme-docs/api-usage.adoc +444 -0
- data/readme-docs/architecture.adoc +449 -0
- data/readme-docs/archive-formats.adoc +479 -0
- data/readme-docs/cli-usage.adoc +222 -0
- data/readme-docs/compression-algorithms.adoc +442 -0
- data/readme-docs/compression-profiles.adoc +247 -0
- data/readme-docs/encryption-checksums.adoc +328 -0
- data/readme-docs/format-converter.adoc +325 -0
- data/readme-docs/installation.adoc +228 -0
- data/readme-docs/par2-archives.adoc +608 -0
- data/readme-docs/performance-profiler.adoc +389 -0
- data/readme-docs/preprocessing-filters.adoc +280 -0
- data/xz-file-format-1.2.1.txt +1174 -0
- metadata +617 -0
|
@@ -0,0 +1,723 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Copyright (C) 2025 Ribose Inc.
|
|
4
|
+
#
|
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a
|
|
6
|
+
# copy of this software and associated documentation files (the "Software"),
|
|
7
|
+
# to deal in the Software without restriction, including without limitation
|
|
8
|
+
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
9
|
+
# and/or sell copies of the Software, and to permit persons to whom the
|
|
10
|
+
# Software is furnished to do so, subject to the following conditions:
|
|
11
|
+
#
|
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
|
13
|
+
# all copies or substantial portions of the Software.
|
|
14
|
+
#
|
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
20
|
+
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
21
|
+
# DEALINGS IN THE SOFTWARE.
|
|
22
|
+
|
|
23
|
+
require_relative "../../../error"
|
|
24
|
+
require_relative "../../../algorithms/lzma2/constants"
|
|
25
|
+
require_relative "../../../algorithms/lzma2/properties"
|
|
26
|
+
require_relative "../../../algorithms/lzma/decoder"
|
|
27
|
+
require_relative "../../../algorithms/lzma/xz_utils_decoder"
|
|
28
|
+
|
|
29
|
+
module Omnizip
|
|
30
|
+
module Implementations
|
|
31
|
+
module XZUtils
|
|
32
|
+
module LZMA2
|
|
33
|
+
# XZ Utils LZMA2 decoder implementation.
|
|
34
|
+
#
|
|
35
|
+
# This is the original Decoder moved from algorithms/lzma2/decoder.rb
|
|
36
|
+
# to the new namespace structure.
|
|
37
|
+
class Decoder
|
|
38
|
+
include Omnizip::Algorithms::LZMA2Const
|
|
39
|
+
|
|
40
|
+
attr_reader :dict_size
|
|
41
|
+
|
|
42
|
+
# Initialize the decoder
|
|
43
|
+
#
|
|
44
|
+
# @param input [IO] Input stream of compressed data
|
|
45
|
+
# @param options [Hash] Decoding options
|
|
46
|
+
# @option options [Boolean] :raw_mode If true, skip property byte reading (for XZ format)
|
|
47
|
+
# @option options [Integer] :dict_size Dictionary size to use (required for raw_mode)
|
|
48
|
+
def initialize(input, options = {})
|
|
49
|
+
@input = input
|
|
50
|
+
@options = options
|
|
51
|
+
@raw_mode = options[:raw_mode] || false
|
|
52
|
+
|
|
53
|
+
if @raw_mode
|
|
54
|
+
# In raw_mode (XZ format), property byte is provided by caller
|
|
55
|
+
# Only dict_size comes from the XZ filter properties
|
|
56
|
+
@dict_size = options[:dict_size] || (8 * 1024 * 1024)
|
|
57
|
+
@properties = Omnizip::Algorithms::LZMA2::Properties.new(@dict_size)
|
|
58
|
+
else
|
|
59
|
+
read_property_byte
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# Decode a compressed stream
|
|
64
|
+
#
|
|
65
|
+
# XZ Utils pattern (lzma2_decoder.c):
|
|
66
|
+
# - LZMA decoder is created ONCE and reused across all chunks
|
|
67
|
+
# - State (dictionary, probability models) persists between chunks
|
|
68
|
+
# - Reset only when control byte indicates new properties (control >= 0xC0)
|
|
69
|
+
#
|
|
70
|
+
# @return [String] Decompressed data
|
|
71
|
+
def decode_stream
|
|
72
|
+
output = []
|
|
73
|
+
|
|
74
|
+
if ENV["LZMA2_DEBUG"]
|
|
75
|
+
warn "DEBUG: decode_stream - starting..."
|
|
76
|
+
# Note: Can't peek at input without consuming, skip debug output
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# XZ Utils pattern: Create LZMA decoder ONCE (lzma2_decoder_init)
|
|
80
|
+
# The decoder will be reused across all chunks
|
|
81
|
+
@lzma_decoder = nil
|
|
82
|
+
@need_properties = true # First LZMA chunk needs properties (XZ Utils line 45)
|
|
83
|
+
@need_dictionary_reset = true # First chunk must reset dictionary (XZ Utils line 43)
|
|
84
|
+
|
|
85
|
+
chunk_num = 0
|
|
86
|
+
loop do
|
|
87
|
+
control = read_control_byte
|
|
88
|
+
|
|
89
|
+
# puts "DEBUG LZMA2 chunk ##{chunk_num}: control=0x#{control.to_s(16)}" if ENV["LZMA2_DEBUG"]
|
|
90
|
+
|
|
91
|
+
if ENV["LZMA2_DEBUG"]
|
|
92
|
+
warn "DEBUG: decode_stream - chunk ##{chunk_num}, control=0x#{control.to_s(16)}"
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
break if control == CONTROL_END
|
|
96
|
+
|
|
97
|
+
# XZ Utils pattern (lzma2_decoder.c:75-82):
|
|
98
|
+
# Dictionary reset is needed if control >= 0xE0 or control == 1
|
|
99
|
+
# If dictionary reset is needed but control doesn't do it, error
|
|
100
|
+
# Reference: /Users/mulgogi/src/external/xz/src/liblzma/lzma/lzma2_decoder.c:75-82
|
|
101
|
+
if control >= 0xE0 || control == CONTROL_UNCOMPRESSED_RESET
|
|
102
|
+
@need_properties = true
|
|
103
|
+
@need_dictionary_reset = true
|
|
104
|
+
elsif @need_dictionary_reset
|
|
105
|
+
raise Omnizip::FormatError,
|
|
106
|
+
"LZMA2 dictionary reset required but not performed (control=0x#{control.to_s(16).upcase})"
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
# XZ Utils pattern (lzma2_decoder.c:121-126):
|
|
110
|
+
# Perform dictionary reset if needed
|
|
111
|
+
# For control >= 0xE0 or control == 1, need_dictionary_reset is set above
|
|
112
|
+
# and we perform the reset here, then clear the flag
|
|
113
|
+
# IMPORTANT: Only UNCOMPRESSED chunks with reset (control == 1) should
|
|
114
|
+
# suppress output. Compressed chunks with reset (control >= 0x80) should
|
|
115
|
+
# ALWAYS produce output - the dictionary reset happens before decoding.
|
|
116
|
+
# Reference: /Users/mulgogi/src/external/xz/src/liblzma/lzma/lzma2_decoder.c:121-127
|
|
117
|
+
false
|
|
118
|
+
if @need_dictionary_reset
|
|
119
|
+
@need_dictionary_reset = false
|
|
120
|
+
# For uncompressed chunks with reset (control == 1), output is suppressed
|
|
121
|
+
# For compressed chunks (control >= 0x80), output is always produced
|
|
122
|
+
(control == CONTROL_UNCOMPRESSED_RESET)
|
|
123
|
+
# Note: Dictionary reset will be handled by the LZMA decoder
|
|
124
|
+
# based on the control byte
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
# XZ Utils pattern (lzma2_decoder.c:84-110):
|
|
128
|
+
# For LZMA chunks (control >= 0x80), validate properties requirements
|
|
129
|
+
# Reference: /Users/mulgogi/src/external/xz/src/liblzma/lzma/lzma2_decoder.c:98-99
|
|
130
|
+
if control >= 0x80
|
|
131
|
+
if control >= 0xC0
|
|
132
|
+
# New properties present - properties will be read below
|
|
133
|
+
@need_properties = false
|
|
134
|
+
elsif @need_properties
|
|
135
|
+
# LZMA chunk without properties but properties are needed
|
|
136
|
+
# This happens after dictionary reset when next chunk must have properties
|
|
137
|
+
raise Omnizip::FormatError,
|
|
138
|
+
"LZMA2 properties required but not provided (control=0x#{control.to_s(16).upcase})"
|
|
139
|
+
end
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
chunk_data = decode_chunk(control, chunk_num)
|
|
143
|
+
|
|
144
|
+
if ENV["LZMA2_DEBUG"]
|
|
145
|
+
warn "DEBUG: decode_stream - chunk ##{chunk_num} produced #{chunk_data.bytesize} bytes"
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
# XZ Utils pattern: Uncompressed chunks ALWAYS produce output
|
|
149
|
+
# Dictionary reset chunks (control == 1) initialize the dictionary
|
|
150
|
+
# with the chunk data, then the dictionary is flushed to output
|
|
151
|
+
# So we should NEVER skip output for valid chunks
|
|
152
|
+
# Reference: /Users/mulgogi/src/external/xz/src/liblzma/lzma/lzma2_decoder.c:121-127
|
|
153
|
+
output << chunk_data
|
|
154
|
+
chunk_num += 1
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
if ENV["LZMA2_DEBUG"]
|
|
158
|
+
total_size = output.sum(&:bytesize)
|
|
159
|
+
warn "DEBUG: decode_stream - finished, total chunks=#{chunk_num}, total_size=#{total_size}"
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
output.join.force_encoding("ASCII-8BIT")
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
private
|
|
166
|
+
|
|
167
|
+
# Read and parse LZMA2 property byte
|
|
168
|
+
#
|
|
169
|
+
# @return [void]
|
|
170
|
+
# @raise [Omnizip::FormatError] If property byte is invalid
|
|
171
|
+
def read_property_byte
|
|
172
|
+
prop_byte = @input.getbyte
|
|
173
|
+
raise Omnizip::FormatError, "Invalid LZMA2 header" if prop_byte.nil?
|
|
174
|
+
|
|
175
|
+
@properties = Omnizip::Algorithms::LZMA2::Properties.from_byte(prop_byte)
|
|
176
|
+
@dict_size = @properties.actual_dict_size
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
# Read control byte
|
|
180
|
+
#
|
|
181
|
+
# @return [Integer] Control byte value
|
|
182
|
+
# @raise [Omnizip::IOError] If stream ends unexpectedly
|
|
183
|
+
def read_control_byte
|
|
184
|
+
byte = @input.getbyte
|
|
185
|
+
raise Omnizip::IOError, "Unexpected end of stream" if byte.nil?
|
|
186
|
+
|
|
187
|
+
byte
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
# Decode chunk based on control byte
|
|
191
|
+
#
|
|
192
|
+
# XZ Utils pattern (lzma2_decoder.c:75-102):
|
|
193
|
+
# - control >= 0xE0 or control == 1: Dictionary reset + properties needed
|
|
194
|
+
# - control >= 0xC0: State reset + properties
|
|
195
|
+
# - control >= 0xA0: State reset only
|
|
196
|
+
# - control >= 0x80: LZMA chunk (no reset)
|
|
197
|
+
# - control == 0x01 or 0x02: Uncompressed chunk
|
|
198
|
+
# - control > 2 and < 0x80: INVALID (LZMA2_DATA_ERROR)
|
|
199
|
+
#
|
|
200
|
+
# @param control [Integer] Control byte
|
|
201
|
+
# @param chunk_num [Integer] Chunk sequence number
|
|
202
|
+
# @return [String] Decoded chunk data
|
|
203
|
+
def decode_chunk(control, chunk_num)
|
|
204
|
+
if ENV["LZMA2_DEBUG"]
|
|
205
|
+
pos = @input.respond_to?(:pos) ? @input.pos : "N/A"
|
|
206
|
+
warn "DEBUG: decode_chunk - chunk=#{chunk_num}, control=0x#{control.to_s(16)}, pos=#{pos}"
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
# XZ Utils pattern (lzma2_decoder.c:138-140):
|
|
210
|
+
# Invalid control values: control > 2 and < 0x80 are invalid
|
|
211
|
+
# Reference: /Users/mulgogi/src/external/xz/src/liblzma/lzma/lzma2_decoder.c:138-140
|
|
212
|
+
if control > 2 && control < 0x80
|
|
213
|
+
raise Omnizip::FormatError,
|
|
214
|
+
"Invalid LZMA2 control byte: 0x#{control.to_s(16).upcase} " \
|
|
215
|
+
"(valid ranges: 0x00-0x02, 0x80-0xFF)"
|
|
216
|
+
end
|
|
217
|
+
|
|
218
|
+
if uncompressed_chunk?(control)
|
|
219
|
+
decode_uncompressed_chunk(control)
|
|
220
|
+
else
|
|
221
|
+
decode_compressed_chunk(control, chunk_num)
|
|
222
|
+
end
|
|
223
|
+
end
|
|
224
|
+
|
|
225
|
+
# Check if control byte indicates uncompressed chunk
|
|
226
|
+
#
|
|
227
|
+
# @param control [Integer] Control byte
|
|
228
|
+
# @return [Boolean] True if uncompressed
|
|
229
|
+
def uncompressed_chunk?(control)
|
|
230
|
+
[CONTROL_UNCOMPRESSED_RESET,
|
|
231
|
+
CONTROL_UNCOMPRESSED].include?(control)
|
|
232
|
+
end
|
|
233
|
+
|
|
234
|
+
# Decode uncompressed chunk
|
|
235
|
+
#
|
|
236
|
+
# XZ Utils pattern (lzma2_decoder.c:193-200):
|
|
237
|
+
# - Copy from input to the dictionary as is using dict_write()
|
|
238
|
+
# - This ensures subsequent compressed chunks can reference the data
|
|
239
|
+
# - If LZMA decoder exists, add data to dictionary directly
|
|
240
|
+
# - If LZMA decoder doesn't exist, store data in @uncompressed_buffer
|
|
241
|
+
#
|
|
242
|
+
# @param control [Integer] Control byte
|
|
243
|
+
# @return [String] Uncompressed data
|
|
244
|
+
def decode_uncompressed_chunk(_control)
|
|
245
|
+
# Read uncompressed size (2 bytes, big-endian)
|
|
246
|
+
size = read_size_bytes(2) + 1
|
|
247
|
+
|
|
248
|
+
if ENV["LZMA2_DEBUG"]
|
|
249
|
+
pos_before = @input.respond_to?(:pos) ? @input.pos : "N/A"
|
|
250
|
+
warn "DEBUG: decode_uncompressed_chunk - size=#{size}, pos_before=#{pos_before}"
|
|
251
|
+
end
|
|
252
|
+
|
|
253
|
+
# Read uncompressed data
|
|
254
|
+
data = @input.read(size)
|
|
255
|
+
|
|
256
|
+
if ENV["LZMA2_DEBUG"]
|
|
257
|
+
pos_after = @input.respond_to?(:pos) ? @input.pos : "N/A"
|
|
258
|
+
actual_size = data&.bytesize || 0
|
|
259
|
+
warn "DEBUG: decode_uncompressed_chunk - expected=#{size}, actual=#{actual_size}, pos_after=#{pos_after}"
|
|
260
|
+
warn "DEBUG: decode_uncompressed_chunk - data_nil=#{data.nil?}"
|
|
261
|
+
end
|
|
262
|
+
|
|
263
|
+
if data.nil? || data.bytesize != size
|
|
264
|
+
raise Omnizip::IOError,
|
|
265
|
+
"Unexpected end of stream"
|
|
266
|
+
end
|
|
267
|
+
|
|
268
|
+
# XZ Utils pattern: Copy from input to the dictionary as is.
|
|
269
|
+
# Reference: lzma2_decoder.c:195 - dict_write(dict, in, in_pos, in_size, &coder->compressed_size)
|
|
270
|
+
#
|
|
271
|
+
# If the LZMA decoder exists, add the data to its dictionary directly
|
|
272
|
+
# Otherwise, store it in @uncompressed_buffer for later use
|
|
273
|
+
if @lzma_decoder
|
|
274
|
+
# LZMA decoder exists - add data to its dictionary
|
|
275
|
+
@lzma_decoder.add_to_dictionary(data)
|
|
276
|
+
if ENV["LZMA2_DEBUG"]
|
|
277
|
+
warn "DEBUG: decode_uncompressed_chunk - Added #{data.bytesize} bytes to LZMA decoder's dictionary"
|
|
278
|
+
end
|
|
279
|
+
else
|
|
280
|
+
# LZMA decoder doesn't exist yet - store data for later
|
|
281
|
+
# This will be added to the dictionary when the first compressed chunk arrives
|
|
282
|
+
@uncompressed_buffer ||= String.new(encoding: "ASCII-8BIT")
|
|
283
|
+
@uncompressed_buffer << data
|
|
284
|
+
if ENV["LZMA2_DEBUG"]
|
|
285
|
+
warn "DEBUG: decode_uncompressed_chunk - Stored #{data.bytesize} bytes in uncompressed_buffer (total #{@uncompressed_buffer.bytesize} bytes)"
|
|
286
|
+
end
|
|
287
|
+
end
|
|
288
|
+
|
|
289
|
+
data
|
|
290
|
+
end
|
|
291
|
+
|
|
292
|
+
# Decode compressed chunk
|
|
293
|
+
#
|
|
294
|
+
# XZ Utils pattern (lzma2_decoder.c:84-103, 154-161, 163-191):
|
|
295
|
+
# - control >= 0xC0: New properties present, call decoder.reset()
|
|
296
|
+
# - control >= 0xA0: State reset only
|
|
297
|
+
# - control >= 0x80: LZMA chunk with explicit uncompressed/compressed size
|
|
298
|
+
# - control 0x03-0x7F: INVALID (rejected in decode_chunk)
|
|
299
|
+
# - LZMA decoder is created once and reused across all chunks
|
|
300
|
+
#
|
|
301
|
+
# DEBUG: Trace chunk decompression
|
|
302
|
+
dict_full_before = begin
|
|
303
|
+
@lzma_decoder.instance_variable_get(:@dict_full)
|
|
304
|
+
rescue StandardError
|
|
305
|
+
"nil"
|
|
306
|
+
end
|
|
307
|
+
warn "DEBUG: decode_compressed_chunk START (control=#{control}, dict_full=#{dict_full_before})" if dict_full_before.is_a?(Integer) && dict_full_before >= 210
|
|
308
|
+
# @param control [Integer] Control byte
|
|
309
|
+
# @param chunk_num [Integer] Chunk sequence number
|
|
310
|
+
# @return [String] Decompressed data
|
|
311
|
+
def decode_compressed_chunk(control, chunk_num)
|
|
312
|
+
if control >= 0x80
|
|
313
|
+
# Compressed chunk with explicit uncompressed/compressed size
|
|
314
|
+
# Read uncompressed size (2 bytes, big-endian)
|
|
315
|
+
# High 3 bits are in bits 2-0 of the control byte (bits 19-17 of uncompressed size)
|
|
316
|
+
uncompressed_low_bytes = [@input.getbyte, @input.getbyte]
|
|
317
|
+
uncompressed_low = (uncompressed_low_bytes[0] << 8) | uncompressed_low_bytes[1]
|
|
318
|
+
# XZ Utils lzma2_decoder.c:87: (control & 0x1F) << 16, then += each byte
|
|
319
|
+
# High 5 bits of (uncompressed_size - 1) are in bits 4-0 of control byte
|
|
320
|
+
uncompressed_high = control & 0x1F
|
|
321
|
+
uncompressed_size = (uncompressed_high << 16) + uncompressed_low + 1
|
|
322
|
+
|
|
323
|
+
# Read compressed size (2 bytes, big-endian)
|
|
324
|
+
compressed_size = read_size_bytes(2) + 1
|
|
325
|
+
else
|
|
326
|
+
# This should never be reached because control bytes 0x03-0x7F are
|
|
327
|
+
# rejected in decode_chunk() before this method is called.
|
|
328
|
+
# Control bytes < 0x80 should only be 0x01 or 0x02, which are
|
|
329
|
+
# handled by decode_uncompressed_chunk(), not this method.
|
|
330
|
+
raise Omnizip::FormatError,
|
|
331
|
+
"Invalid LZMA2 control byte: 0x#{control.to_s(16).upcase} " \
|
|
332
|
+
"(control < 0x80 but not 0x01 or 0x02)"
|
|
333
|
+
end
|
|
334
|
+
# Note: For control >= 0x80, compressed_data will be read below.
|
|
335
|
+
# For control < 0x80 (unreachable), this method raises above.
|
|
336
|
+
|
|
337
|
+
# Read properties byte
|
|
338
|
+
# LZMA2 format: Properties byte is ONLY present for control >= 0xC0
|
|
339
|
+
# For control >= 0xA0 but < 0xC0, use default properties (no properties byte)
|
|
340
|
+
# For control < 0xA0 (but >= 0x80), use previous properties (no properties byte)
|
|
341
|
+
# Reference: XZ Utils lzma2_decoder.c:92-96, 154-160
|
|
342
|
+
if control >= 0xC0
|
|
343
|
+
# New properties present - read properties byte
|
|
344
|
+
properties = @input.getbyte
|
|
345
|
+
if properties.nil?
|
|
346
|
+
raise Omnizip::IOError,
|
|
347
|
+
"Unexpected end of stream"
|
|
348
|
+
end
|
|
349
|
+
else
|
|
350
|
+
# No properties byte for control >= 0xA0 but < 0xC0
|
|
351
|
+
# Use default properties for LZMA2
|
|
352
|
+
properties = nil
|
|
353
|
+
end
|
|
354
|
+
|
|
355
|
+
if ENV["LZMA2_DEBUG"]
|
|
356
|
+
warn "DEBUG: decode_compressed_chunk - control=0x#{control.to_s(16)}"
|
|
357
|
+
# Note: control >= 0x80 is guaranteed here since:
|
|
358
|
+
# 1. decode_chunk() rejects control bytes 0x03-0x7F
|
|
359
|
+
# 2. decode_uncompressed_chunk() handles control bytes 0x01-0x02
|
|
360
|
+
# So only control >= 0x80 reaches this method
|
|
361
|
+
warn " uncompressed_size: #{uncompressed_size}"
|
|
362
|
+
warn " compressed_size: #{compressed_size}"
|
|
363
|
+
warn " properties: #{properties&.to_s(16)}"
|
|
364
|
+
end
|
|
365
|
+
|
|
366
|
+
if control >= 0x80
|
|
367
|
+
if ENV["LZMA2_DEBUG"]
|
|
368
|
+
pos_before = @input.respond_to?(:pos) ? @input.pos : "N/A"
|
|
369
|
+
warn "DEBUG: decode_compressed_chunk - uncompressed=#{uncompressed_size}, compressed=#{compressed_size}, properties=#{properties&.to_s(16)}, pos_before=#{pos_before}"
|
|
370
|
+
warn "DEBUG: @input.respond_to?(:pos)=#{@input.respond_to?(:pos)}, @input.class=#{@input.class}"
|
|
371
|
+
end
|
|
372
|
+
|
|
373
|
+
# Read compressed data
|
|
374
|
+
compressed_data = @input.read(compressed_size)
|
|
375
|
+
if ENV["LZMA2_DEBUG"]
|
|
376
|
+
@input.respond_to?(:pos) ? @input.pos : "N/A"
|
|
377
|
+
actual_size = compressed_data&.bytesize || 0
|
|
378
|
+
warn "DEBUG: decode_compressed_chunk - expected=#{compressed_size}, actual=#{actual_size}"
|
|
379
|
+
warn "DEBUG: compressed_data hex: #{compressed_data.bytes.map do |b|
|
|
380
|
+
"0x#{b.to_s(16).rjust(2, '0')}"
|
|
381
|
+
end.join(' ')}"
|
|
382
|
+
end
|
|
383
|
+
if compressed_data.nil? || compressed_data.bytesize != compressed_size
|
|
384
|
+
if ENV["LZMA2_DEBUG"]
|
|
385
|
+
actual_size = compressed_data&.bytesize || 0
|
|
386
|
+
warn "DEBUG: decode_compressed_chunk - FAILED - expected=#{compressed_size}, actual=#{actual_size}"
|
|
387
|
+
end
|
|
388
|
+
raise Omnizip::IOError, "Unexpected end of stream"
|
|
389
|
+
end
|
|
390
|
+
end
|
|
391
|
+
|
|
392
|
+
# Decompress using LZMA
|
|
393
|
+
# Pass control byte to handle decoder creation/reset logic
|
|
394
|
+
decompress_lzma_chunk(compressed_data, uncompressed_size, properties,
|
|
395
|
+
control, chunk_num)
|
|
396
|
+
end
|
|
397
|
+
|
|
398
|
+
# Decompress LZMA chunk
|
|
399
|
+
#
|
|
400
|
+
# XZ Utils pattern (lzma2_decoder.c:92-103, 154-191):
|
|
401
|
+
# - Create LZMA decoder on first chunk or when control >= 0xC0
|
|
402
|
+
# - Call decoder.reset() when new properties are present (control >= 0xC0)
|
|
403
|
+
# - Reuse decoder state across chunks (preserves probability models)
|
|
404
|
+
# - Reset range decoder between chunks (lzma_decoder.c:1014-1017)
|
|
405
|
+
#
|
|
406
|
+
# @param compressed_data [String] Compressed data (no LZMA header)
|
|
407
|
+
# @param expected_size [Integer] Expected decompressed size (from LZMA2 chunk header)
|
|
408
|
+
# @param properties [Integer, nil] LZMA properties byte from LZMA2 chunk (if present)
|
|
409
|
+
# @param control [Integer] LZMA2 control byte for this chunk
|
|
410
|
+
# @param chunk_num [Integer] Chunk sequence number
|
|
411
|
+
# @return [String] Decompressed data
|
|
412
|
+
def decompress_lzma_chunk(compressed_data, expected_size, properties,
|
|
413
|
+
control, chunk_num)
|
|
414
|
+
# puts "\nDEBUG decompress_lzma_chunk: chunk=#{chunk_num}, expected_size=#{expected_size}, control=0x#{control.to_s(16)}" if ENV["LZMA2_DEBUG"]
|
|
415
|
+
|
|
416
|
+
if ENV["LZMA2_DEBUG"]
|
|
417
|
+
warn "DEBUG: decompress_lzma_chunk - expected_size=#{expected_size}, compressed_size=#{compressed_data.bytesize}, properties=#{properties&.to_s(16)}"
|
|
418
|
+
warn "DEBUG: @expected_uncompressed_size=#{@expected_uncompressed_size}" if defined?(@expected_uncompressed_size)
|
|
419
|
+
end
|
|
420
|
+
|
|
421
|
+
# XZ Utils pattern (lzma2_decoder.c:140-141):
|
|
422
|
+
# Pass the chunk's uncompressed_size to the LZMA decoder.
|
|
423
|
+
# The block header's uncompressed_size is for validation only.
|
|
424
|
+
# For simple compressed chunks (control < 0x80), expected_size is nil,
|
|
425
|
+
# which means decode until LZMA end-of-stream marker.
|
|
426
|
+
lzma_uncompressed_size = expected_size || 0xFFFFFFFFFFFFFFFF # UNKNOWN = decode until EOS
|
|
427
|
+
|
|
428
|
+
# Decode lc, lp, pb from LZMA chunk properties byte
|
|
429
|
+
# In XZ format, the chunk properties byte is inside the compressed chunk
|
|
430
|
+
# and contains: (pb * 9 * 5) + (lp * 9) + lc
|
|
431
|
+
# Reference: /tmp/xz-source/src/liblzma/lzma/lzma_decoder.c:1199-1209
|
|
432
|
+
if properties && properties >= 0
|
|
433
|
+
# Decode lc, lp, pb from chunk properties byte using XZ Utils formula
|
|
434
|
+
pb = properties / (9 * 5)
|
|
435
|
+
remainder = properties - (pb * 9 * 5)
|
|
436
|
+
lp = remainder / 9
|
|
437
|
+
lc = remainder - (lp * 9)
|
|
438
|
+
else
|
|
439
|
+
# Default values when no properties present
|
|
440
|
+
# XZ Utils defaults: lc=3, lp=0, pb=2
|
|
441
|
+
lc = 3
|
|
442
|
+
lp = 0
|
|
443
|
+
pb = 2
|
|
444
|
+
end
|
|
445
|
+
|
|
446
|
+
if ENV["LZMA2_DEBUG"]
|
|
447
|
+
warn "DEBUG: decompress_lzma_chunk - lc=#{lc}, lp=#{lp}, pb=#{pb}, properties=#{properties&.to_s(16)}"
|
|
448
|
+
end
|
|
449
|
+
|
|
450
|
+
# XZ Utils pattern: Create/reuse LZMA decoder across chunks
|
|
451
|
+
# lzma2_decoder.c:92-103, 154-161: Handle decoder creation and reset
|
|
452
|
+
#
|
|
453
|
+
# IMPORTANT: We need to handle the case where the first chunk(s) are
|
|
454
|
+
# uncompressed. The uncompressed data must be added to the LZMA decoder's
|
|
455
|
+
# dictionary BEFORE we create the decoder, so we'll pass it as preloaded data.
|
|
456
|
+
if chunk_num.zero? || !@lzma_decoder
|
|
457
|
+
# First chunk - create LZMA decoder in lzma2_mode
|
|
458
|
+
# NO LZMA HEADER - pass compressed data directly
|
|
459
|
+
# XZ Utils: lzma_lz_decoder_create + lzma_lzma_decoder_create
|
|
460
|
+
input_buffer = StringIO.new(compressed_data)
|
|
461
|
+
input_buffer.set_encoding("ASCII-8BIT")
|
|
462
|
+
|
|
463
|
+
if ENV["LZMA2_DEBUG"]
|
|
464
|
+
warn "DEBUG: input_buffer created, pos=#{input_buffer.pos}, size=#{compressed_data.bytesize}"
|
|
465
|
+
warn "DEBUG: compressed_data bytes (first 20): #{compressed_data[0..20].bytes.map do |b|
|
|
466
|
+
b.to_s(16).rjust(2, '0')
|
|
467
|
+
end.join(' ')}"
|
|
468
|
+
end
|
|
469
|
+
|
|
470
|
+
# Check if we have uncompressed data to preload into the dictionary
|
|
471
|
+
preloaded_data = @uncompressed_buffer if @uncompressed_buffer && !@uncompressed_buffer.empty?
|
|
472
|
+
|
|
473
|
+
@lzma_decoder = Omnizip::Algorithms::XzUtilsDecoder.new(input_buffer,
|
|
474
|
+
lzma2_mode: true,
|
|
475
|
+
lc: lc,
|
|
476
|
+
lp: lp,
|
|
477
|
+
pb: pb,
|
|
478
|
+
dict_size: @dict_size,
|
|
479
|
+
uncompressed_size: lzma_uncompressed_size,
|
|
480
|
+
preloaded_data: preloaded_data) # Pass uncompressed data to preload
|
|
481
|
+
|
|
482
|
+
# Clear uncompressed buffer after passing to decoder
|
|
483
|
+
@uncompressed_buffer = nil if preloaded_data
|
|
484
|
+
|
|
485
|
+
if ENV["LZMA2_DEBUG"]
|
|
486
|
+
warn "DEBUG: decompress_lzma_chunk - Created new LZMA decoder (lzma2_mode)#{" with #{preloaded_data.bytesize} bytes of preloaded data" if preloaded_data}"
|
|
487
|
+
end
|
|
488
|
+
else
|
|
489
|
+
# Subsequent chunks - reuse decoder, reset if needed
|
|
490
|
+
# XZ Utils lzma2_decoder.c:92-96, 154-161
|
|
491
|
+
|
|
492
|
+
# Determine if dictionary should be preserved
|
|
493
|
+
# Use the same logic as at line 414 for consistency
|
|
494
|
+
# XZ Utils LZMA2 control byte decoding (lzma2_decoder.c:75-79):
|
|
495
|
+
# - control >= 0xE0: LZMA2 compressed + reset dictionary + properties byte present
|
|
496
|
+
# - control = 0x01: end of chunk marker
|
|
497
|
+
# XZ Utils sets need_dictionary_reset = true ONLY for control >= 0xE0 || control == 1
|
|
498
|
+
# Therefore, dict_reset is ONLY called for control >= 0xE0 || control == 1
|
|
499
|
+
# - control = 0xC0: LZMA2 compressed + state reset + default properties (NO dict reset!)
|
|
500
|
+
# - control < 0x80: LZMA2 uncompressed
|
|
501
|
+
# - 0x80 <= control < 0xC0: LZMA2 compressed + preserve dictionary
|
|
502
|
+
# Note: chunk_num >= 1 here (not the first chunk)
|
|
503
|
+
preserve_dict = !(control >= 0xE0 || control == 1)
|
|
504
|
+
|
|
505
|
+
if control >= 0xC0
|
|
506
|
+
# New properties present - reset decoder with new properties
|
|
507
|
+
@lzma_decoder.reset(new_lc: lc, new_lp: lp, new_pb: pb,
|
|
508
|
+
preserve_dict: preserve_dict)
|
|
509
|
+
|
|
510
|
+
# Pass compressed data directly (NO LZMA HEADER)
|
|
511
|
+
input_buffer = StringIO.new(compressed_data)
|
|
512
|
+
input_buffer.set_encoding("ASCII-8BIT")
|
|
513
|
+
|
|
514
|
+
@lzma_decoder.set_input(input_buffer)
|
|
515
|
+
|
|
516
|
+
if ENV["LZMA2_DEBUG"]
|
|
517
|
+
warn "DEBUG: decompress_lzma_chunk - Reset LZMA decoder with new properties (preserve_dict=#{preserve_dict})"
|
|
518
|
+
end
|
|
519
|
+
elsif control >= 0xA0
|
|
520
|
+
# State reset only (no new properties)
|
|
521
|
+
# IMPORTANT: XZ Utils source code (lzma2_decoder.c:107-109) shows that
|
|
522
|
+
# for control >= 0xA0, it calls coder->lzma.reset(), which resets
|
|
523
|
+
# rep distances to 0 (see lzma_decoder.c:1071-1074).
|
|
524
|
+
#
|
|
525
|
+
# A rep match with distance=0 is valid - it means "copy the last byte"
|
|
526
|
+
# (distance 0 from the current position, i.e., the byte just written).
|
|
527
|
+
decoder_dict_full = begin
|
|
528
|
+
@lzma_decoder.instance_variable_get(:@dict_full)
|
|
529
|
+
rescue StandardError
|
|
530
|
+
nil
|
|
531
|
+
end
|
|
532
|
+
if ENV["LZMA2_DEBUG"] || (decoder_dict_full && decoder_dict_full >= 220 && decoder_dict_full <= 230)
|
|
533
|
+
warn "DEBUG: decompress_lzma_chunk - Calling reset with preserved dict (control=#{control}, dict_full=#{decoder_dict_full})"
|
|
534
|
+
end
|
|
535
|
+
@lzma_decoder.reset(preserve_dict: preserve_dict)
|
|
536
|
+
|
|
537
|
+
# Pass compressed data directly (NO LZMA HEADER)
|
|
538
|
+
input_buffer = StringIO.new(compressed_data)
|
|
539
|
+
input_buffer.set_encoding("ASCII-8BIT")
|
|
540
|
+
|
|
541
|
+
@lzma_decoder.set_input(input_buffer)
|
|
542
|
+
|
|
543
|
+
if ENV["LZMA2_DEBUG"]
|
|
544
|
+
warn "DEBUG: decompress_lzma_chunk - After set_input, checking range_decoder..."
|
|
545
|
+
# Check if the decoder has a range_decoder variable
|
|
546
|
+
if @lzma_decoder.instance_variable_defined?(:@range_decoder)
|
|
547
|
+
range_decoder = @lzma_decoder.instance_variable_get(:@range_decoder)
|
|
548
|
+
if range_decoder
|
|
549
|
+
warn " range_decoder exists: code=0x#{range_decoder.instance_variable_get(:@code).to_s(16)}, range=0x#{range_decoder.instance_variable_get(:@range).to_s(16)}, init_bytes_remaining=#{range_decoder.instance_variable_get(:@init_bytes_remaining)}"
|
|
550
|
+
else
|
|
551
|
+
warn " range_decoder is nil"
|
|
552
|
+
end
|
|
553
|
+
else
|
|
554
|
+
warn " @range_decoder not defined yet"
|
|
555
|
+
end
|
|
556
|
+
end
|
|
557
|
+
else
|
|
558
|
+
# For control >= 0x80 but < 0xA0: No reset
|
|
559
|
+
# Pass compressed data directly (NO LZMA HEADER)
|
|
560
|
+
input_buffer = StringIO.new(compressed_data)
|
|
561
|
+
input_buffer.set_encoding("ASCII-8BIT")
|
|
562
|
+
|
|
563
|
+
@lzma_decoder.set_input(input_buffer)
|
|
564
|
+
end
|
|
565
|
+
|
|
566
|
+
# XZ Utils: Set uncompressed size for each chunk (lzma2_decoder.c:140-141)
|
|
567
|
+
@lzma_decoder.set_uncompressed_size(lzma_uncompressed_size,
|
|
568
|
+
allow_eopm: false)
|
|
569
|
+
|
|
570
|
+
if ENV["LZMA2_DEBUG"]
|
|
571
|
+
warn "DEBUG: decompress_lzma_chunk - Reusing LZMA decoder, set uncompressed_size=#{lzma_uncompressed_size}"
|
|
572
|
+
end
|
|
573
|
+
end
|
|
574
|
+
|
|
575
|
+
# For first chunk or when control >= 0xE0 or control == 1, reset dictionary (preserve_dict = false)
|
|
576
|
+
# For other chunks with control < 0xE0 and control != 1, preserve dictionary
|
|
577
|
+
# XZ Utils LZMA2 control byte decoding (lzma2_decoder.c:75-79):
|
|
578
|
+
# - control >= 0xE0: LZMA2 compressed + reset dictionary + properties byte present
|
|
579
|
+
# - control = 0x01: end of chunk marker
|
|
580
|
+
# XZ Utils sets need_dictionary_reset = true ONLY for control >= 0xE0 || control == 1
|
|
581
|
+
# Therefore, dict_reset is ONLY called for control >= 0xE0 || control == 1
|
|
582
|
+
# - control = 0xC0: LZMA2 compressed + state reset + default properties (NO dict reset!)
|
|
583
|
+
# - control < 0x80: LZMA2 uncompressed
|
|
584
|
+
# - 0x80 <= control < 0xC0: LZMA2 compressed + preserve dictionary
|
|
585
|
+
preserve_dictionary = chunk_num.zero? ? false : !(control >= 0xE0 || control == 1)
|
|
586
|
+
|
|
587
|
+
decompressed = @lzma_decoder.decode_stream(nil,
|
|
588
|
+
preserve_dict: preserve_dictionary,
|
|
589
|
+
check_rc_finished: false)
|
|
590
|
+
|
|
591
|
+
if ENV["LZMA2_DEBUG"]
|
|
592
|
+
warn "DEBUG: decompress_lzma_chunk - expected=#{lzma_uncompressed_size}, got=#{decompressed.bytesize}"
|
|
593
|
+
end
|
|
594
|
+
|
|
595
|
+
# Verify size matches expected
|
|
596
|
+
if ENV["LZMA2_DEBUG"]
|
|
597
|
+
# puts "DEBUG: Size check - decompressed=#{decompressed.bytesize}, expected=#{lzma_uncompressed_size}"
|
|
598
|
+
end
|
|
599
|
+
if decompressed.bytesize != lzma_uncompressed_size
|
|
600
|
+
puts "DEBUG: Size mismatch - decompressed=#{decompressed.bytesize}, expected=#{lzma_uncompressed_size}"
|
|
601
|
+
raise Omnizip::DecompressionError, "Decompressed size mismatch: expected #{lzma_uncompressed_size}, " \
|
|
602
|
+
"got #{decompressed.bytesize}"
|
|
603
|
+
end
|
|
604
|
+
|
|
605
|
+
decompressed
|
|
606
|
+
end
|
|
607
|
+
|
|
608
|
+
# Build LZMA header for decompression
|
|
609
|
+
#
|
|
610
|
+
# @param uncompressed_size [Integer] Expected size after decompression
|
|
611
|
+
# @param properties [Integer, nil] LZMA properties byte (lc/lp/pb encoding) from LZMA2 chunk
|
|
612
|
+
# @return [String] LZMA header (13 bytes)
|
|
613
|
+
def build_lzma_header(uncompressed_size, properties = nil)
|
|
614
|
+
header = String.new(encoding: "ASCII-8BIT")
|
|
615
|
+
|
|
616
|
+
# The properties byte from LZMA2 encodes lc, lp, pb (not dictionary size!)
|
|
617
|
+
# Decode using XZ Utils formula from lzma_lzma_lclppb_decode:
|
|
618
|
+
# pb = byte / (9 * 5)
|
|
619
|
+
# byte -= pb * 9 * 5
|
|
620
|
+
# lp = byte / 9
|
|
621
|
+
# lc = byte - lp * 9
|
|
622
|
+
if properties && properties >= 0
|
|
623
|
+
prop_byte = properties
|
|
624
|
+
pb = prop_byte / (9 * 5)
|
|
625
|
+
remainder = prop_byte - (pb * 9 * 5)
|
|
626
|
+
lp = remainder / 9
|
|
627
|
+
lc = remainder - (lp * 9)
|
|
628
|
+
|
|
629
|
+
props = lc + (lp * 9) + (pb * 9 * 5)
|
|
630
|
+
|
|
631
|
+
if ENV["LZMA2_DEBUG"]
|
|
632
|
+
warn "DEBUG: build_lzma_header - properties=0x#{prop_byte.to_s(16)} -> lc=#{lc}, lp=#{lp}, pb=#{pb}, props=0x#{props.to_s(16)}"
|
|
633
|
+
end
|
|
634
|
+
else
|
|
635
|
+
# Default values when no properties present
|
|
636
|
+
lc = 0
|
|
637
|
+
lp = 0
|
|
638
|
+
pb = 0
|
|
639
|
+
|
|
640
|
+
if ENV["LZMA2_DEBUG"]
|
|
641
|
+
warn "DEBUG: build_lzma_header - no properties, using defaults lc=0, lp=0, pb=0"
|
|
642
|
+
end
|
|
643
|
+
end
|
|
644
|
+
|
|
645
|
+
# Calculate props encoding (props encoding is calculated the same way for both cases)
|
|
646
|
+
props = lc + (lp * 9) + (pb * 9 * 5)
|
|
647
|
+
|
|
648
|
+
header << [props].pack("C")
|
|
649
|
+
|
|
650
|
+
# Dictionary size from @dict_size (set during initialization from LZMA2 filter properties)
|
|
651
|
+
header << [@dict_size].pack("V")
|
|
652
|
+
|
|
653
|
+
# Uncompressed size (8 bytes, little-endian)
|
|
654
|
+
header << [uncompressed_size].pack("Q<")
|
|
655
|
+
|
|
656
|
+
header
|
|
657
|
+
end
|
|
658
|
+
|
|
659
|
+
# Read size bytes in big-endian order
|
|
660
|
+
#
|
|
661
|
+
# @param num_bytes [Integer] Number of bytes to read
|
|
662
|
+
# @return [Integer] Size value
|
|
663
|
+
def read_size_bytes(num_bytes)
|
|
664
|
+
size = 0
|
|
665
|
+
num_bytes.times do
|
|
666
|
+
byte = @input.getbyte
|
|
667
|
+
raise Omnizip::IOError, "Unexpected end of stream" if byte.nil?
|
|
668
|
+
|
|
669
|
+
size = (size << 8) | byte
|
|
670
|
+
end
|
|
671
|
+
size
|
|
672
|
+
end
|
|
673
|
+
|
|
674
|
+
# Ensure LZMA decoder exists
|
|
675
|
+
# Creates a decoder with default properties if one doesn't exist yet
|
|
676
|
+
# This is needed for uncompressed chunks that come before the first compressed chunk
|
|
677
|
+
def ensure_lzma_decoder_exists
|
|
678
|
+
return if @lzma_decoder
|
|
679
|
+
|
|
680
|
+
if ENV["LZMA2_DEBUG"]
|
|
681
|
+
warn "DEBUG: ensure_lzma_decoder_exists - Creating LZMA decoder for uncompressed chunk"
|
|
682
|
+
end
|
|
683
|
+
|
|
684
|
+
# Create LZMA decoder with default properties (lc=3, lp=0, pb=2)
|
|
685
|
+
# These defaults match XZ Utils and ensure compatibility
|
|
686
|
+
@lzma_decoder = Omnizip::Algorithms::XzUtilsDecoder.new(
|
|
687
|
+
StringIO.new(""), # Empty input for now
|
|
688
|
+
lzma2_mode: true,
|
|
689
|
+
lc: 3,
|
|
690
|
+
lp: 0,
|
|
691
|
+
pb: 2,
|
|
692
|
+
dict_size: @dict_size,
|
|
693
|
+
uncompressed_size: 0xFFFFFFFFFFFFFFFF, # Unknown size
|
|
694
|
+
)
|
|
695
|
+
|
|
696
|
+
# Initialize dictionary buffer explicitly since we're not calling decode_stream
|
|
697
|
+
# This mimics the initialization done in decode_stream
|
|
698
|
+
dict_buf_size = @dict_size + Omnizip::Algorithms::LZMA::XzUtilsDecoder::LZ_DICT_INIT_POS
|
|
699
|
+
@lzma_decoder.instance_variable_set(:@dict_buf,
|
|
700
|
+
Array.new(dict_buf_size, 0))
|
|
701
|
+
@lzma_decoder.instance_variable_set(:@pos, Omnizip::Algorithms::LZMA::XzUtilsDecoder::LZ_DICT_INIT_POS)
|
|
702
|
+
@lzma_decoder.instance_variable_set(:@dict_full, 0)
|
|
703
|
+
@lzma_decoder.instance_variable_set(:@has_wrapped, false)
|
|
704
|
+
|
|
705
|
+
# Initialize rep distances
|
|
706
|
+
@lzma_decoder.instance_variable_set(:@rep0, 0)
|
|
707
|
+
@lzma_decoder.instance_variable_set(:@rep1, 0)
|
|
708
|
+
@lzma_decoder.instance_variable_set(:@rep2, 0)
|
|
709
|
+
@lzma_decoder.instance_variable_set(:@rep3, 0)
|
|
710
|
+
|
|
711
|
+
# Initialize state machine
|
|
712
|
+
@lzma_decoder.instance_variable_set(:@state, Omnizip::Algorithms::LZMA::SdkStateMachine.new)
|
|
713
|
+
|
|
714
|
+
if ENV["LZMA2_DEBUG"]
|
|
715
|
+
warn "DEBUG: ensure_lzma_decoder_exists - Created LZMA decoder with lc=3, lp=0, pb=2, dict_size=#{@dict_size}"
|
|
716
|
+
warn "DEBUG: ensure_lzma_decoder_exists - Initialized dict_buf_size=#{dict_buf_size}, pos=#{Omnizip::Algorithms::LZMA::XzUtilsDecoder::LZ_DICT_INIT_POS}"
|
|
717
|
+
end
|
|
718
|
+
end
|
|
719
|
+
end
|
|
720
|
+
end
|
|
721
|
+
end
|
|
722
|
+
end
|
|
723
|
+
end
|