omnizip 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/.rubocop.yml +32 -0
- data/.rubocop_todo.yml +754 -0
- data/COPYING +502 -0
- data/Gemfile +17 -0
- data/LICENSE +12 -0
- data/README.adoc +1045 -0
- data/Rakefile +12 -0
- data/benchmark/README.md +260 -0
- data/benchmark/benchmark_suite.rb +125 -0
- data/benchmark/compression_bench.rb +181 -0
- data/benchmark/filter_bench.rb +180 -0
- data/benchmark/models/benchmark_result.rb +59 -0
- data/benchmark/models/comparison_result.rb +69 -0
- data/benchmark/profile_suite.rb +167 -0
- data/benchmark/reporter.rb +150 -0
- data/benchmark/run_benchmarks.rb +66 -0
- data/benchmark/test_data.rb +137 -0
- data/config/formats/rar3_spec.yml +91 -0
- data/config/formats/rar5_spec.yml +102 -0
- data/docs/.github/workflows/docs.yml +142 -0
- data/docs/.gitignore +21 -0
- data/docs/.lychee.toml +67 -0
- data/docs/Gemfile +13 -0
- data/docs/RAR_WRITE_SUPPORT.md +26 -0
- data/docs/README.md +101 -0
- data/docs/_config.yml +112 -0
- data/docs/assets/logo.svg +1 -0
- data/docs/assets/omnizip-logo.pdf +1540 -11
- data/docs/comparison/feature-matrix.adoc +694 -0
- data/docs/comparison/index.adoc +113 -0
- data/docs/comparison/vs-7zip.adoc +309 -0
- data/docs/comparison/vs-peazip.adoc +77 -0
- data/docs/comparison/vs-rubyzip.adoc +342 -0
- data/docs/comparison/vs-winrar.adoc +100 -0
- data/docs/compatibility.adoc +579 -0
- data/docs/concepts/index.adoc +129 -0
- data/docs/developer/architecture.adoc +256 -0
- data/docs/developer/contributing.adoc +158 -0
- data/docs/developer/index.adoc +25 -0
- data/docs/developer/testing.adoc +212 -0
- data/docs/getting-started/basic-usage.adoc +271 -0
- data/docs/getting-started/index.adoc +42 -0
- data/docs/getting-started/installation.adoc +138 -0
- data/docs/getting-started/quick-start.adoc +185 -0
- data/docs/getting-started/your-first-archive.adoc +218 -0
- data/docs/guides/advanced-features/encryption.adoc +300 -0
- data/docs/guides/advanced-features/index.adoc +49 -0
- data/docs/guides/advanced-features/parallel-processing.adoc +246 -0
- data/docs/guides/advanced-features/progress-tracking.adoc +320 -0
- data/docs/guides/advanced-features/streaming.adoc +212 -0
- data/docs/guides/archive-formats/gzip-format.adoc +107 -0
- data/docs/guides/archive-formats/index.adoc +130 -0
- data/docs/guides/archive-formats/rar-format.adoc +104 -0
- data/docs/guides/archive-formats/rar5.adoc +521 -0
- data/docs/guides/archive-formats/seven-zip-format.adoc +35 -0
- data/docs/guides/archive-formats/tar-format.adoc +106 -0
- data/docs/guides/archive-formats/xz-format.adoc +118 -0
- data/docs/guides/archive-formats/zip-format.adoc +35 -0
- data/docs/guides/compression-algorithms/bzip2.adoc +113 -0
- data/docs/guides/compression-algorithms/deflate.adoc +319 -0
- data/docs/guides/compression-algorithms/index.adoc +190 -0
- data/docs/guides/compression-algorithms/lzma.adoc +398 -0
- data/docs/guides/compression-algorithms/lzma2.adoc +327 -0
- data/docs/guides/compression-algorithms/ppmd.adoc +316 -0
- data/docs/guides/compression-algorithms/zstandard.adoc +361 -0
- data/docs/guides/creating-archives.adoc +354 -0
- data/docs/guides/extracting-archives.adoc +53 -0
- data/docs/guides/format-conversion.adoc +64 -0
- data/docs/guides/index.adoc +49 -0
- data/docs/guides/migration-rubyzip.adoc +217 -0
- data/docs/guides/parity-archives.adoc +605 -0
- data/docs/guides/performance-tuning.adoc +88 -0
- data/docs/index.adoc +218 -0
- data/docs/lychee.toml +67 -0
- data/docs/reference/api/overview.adoc +188 -0
- data/docs/reference/cli/compress-command.adoc +114 -0
- data/docs/reference/cli/overview.adoc +140 -0
- data/docs/reference/index.adoc +26 -0
- data/docs/resources/faq.adoc +185 -0
- data/docs/resources/quick-reference.adoc +222 -0
- data/docs/troubleshooting/index.adoc +208 -0
- data/examples/api_comparison.rb +205 -0
- data/examples/deflate64_example.rb +96 -0
- data/examples/par2_demo.rb +121 -0
- data/examples/quick_start_native.rb +150 -0
- data/examples/quick_start_rubyzip.rb +115 -0
- data/examples/rubyzip_compatibility_demo.rb +194 -0
- data/exe/omnizip +27 -0
- data/lib/omnizip/algorithm.rb +130 -0
- data/lib/omnizip/algorithm_registry.rb +86 -0
- data/lib/omnizip/algorithms/.keep +0 -0
- data/lib/omnizip/algorithms/bzip2/bwt.rb +225 -0
- data/lib/omnizip/algorithms/bzip2/decoder.rb +193 -0
- data/lib/omnizip/algorithms/bzip2/encoder.rb +237 -0
- data/lib/omnizip/algorithms/bzip2/huffman.rb +206 -0
- data/lib/omnizip/algorithms/bzip2/mtf.rb +101 -0
- data/lib/omnizip/algorithms/bzip2/rle.rb +151 -0
- data/lib/omnizip/algorithms/bzip2.rb +130 -0
- data/lib/omnizip/algorithms/deflate/constants.rb +28 -0
- data/lib/omnizip/algorithms/deflate/decoder.rb +38 -0
- data/lib/omnizip/algorithms/deflate/encoder.rb +46 -0
- data/lib/omnizip/algorithms/deflate.rb +128 -0
- data/lib/omnizip/algorithms/deflate64/constants.rb +45 -0
- data/lib/omnizip/algorithms/deflate64/decoder.rb +153 -0
- data/lib/omnizip/algorithms/deflate64/encoder.rb +98 -0
- data/lib/omnizip/algorithms/deflate64/huffman_coder.rb +354 -0
- data/lib/omnizip/algorithms/deflate64/lz77_encoder.rb +142 -0
- data/lib/omnizip/algorithms/deflate64.rb +109 -0
- data/lib/omnizip/algorithms/lzma/bit_model.rb +120 -0
- data/lib/omnizip/algorithms/lzma/constants.rb +112 -0
- data/lib/omnizip/algorithms/lzma/decoder.rb +148 -0
- data/lib/omnizip/algorithms/lzma/dictionary.rb +69 -0
- data/lib/omnizip/algorithms/lzma/distance_coder.rb +415 -0
- data/lib/omnizip/algorithms/lzma/encoder.rb +142 -0
- data/lib/omnizip/algorithms/lzma/length_coder.rb +260 -0
- data/lib/omnizip/algorithms/lzma/literal_decoder.rb +320 -0
- data/lib/omnizip/algorithms/lzma/literal_encoder.rb +210 -0
- data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +341 -0
- data/lib/omnizip/algorithms/lzma/lzma_alone_decoder.rb +192 -0
- data/lib/omnizip/algorithms/lzma/lzma_state.rb +128 -0
- data/lib/omnizip/algorithms/lzma/match.rb +32 -0
- data/lib/omnizip/algorithms/lzma/match_finder.rb +205 -0
- data/lib/omnizip/algorithms/lzma/match_finder_config.rb +142 -0
- data/lib/omnizip/algorithms/lzma/match_finder_factory.rb +88 -0
- data/lib/omnizip/algorithms/lzma/optimal_encoder.rb +130 -0
- data/lib/omnizip/algorithms/lzma/probability_models.rb +72 -0
- data/lib/omnizip/algorithms/lzma/range_coder.rb +85 -0
- data/lib/omnizip/algorithms/lzma/range_decoder.rb +434 -0
- data/lib/omnizip/algorithms/lzma/range_encoder.rb +194 -0
- data/lib/omnizip/algorithms/lzma/state.rb +127 -0
- data/lib/omnizip/algorithms/lzma/xz_buffered_range_encoder.rb +325 -0
- data/lib/omnizip/algorithms/lzma/xz_encoder.rb +426 -0
- data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +645 -0
- data/lib/omnizip/algorithms/lzma/xz_match_finder_adapter.rb +227 -0
- data/lib/omnizip/algorithms/lzma/xz_price_calculator.rb +169 -0
- data/lib/omnizip/algorithms/lzma/xz_probability_models.rb +261 -0
- data/lib/omnizip/algorithms/lzma/xz_range_encoder.rb +223 -0
- data/lib/omnizip/algorithms/lzma/xz_range_encoder_exact.rb +331 -0
- data/lib/omnizip/algorithms/lzma/xz_state.rb +116 -0
- data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +2055 -0
- data/lib/omnizip/algorithms/lzma.rb +238 -0
- data/lib/omnizip/algorithms/lzma2/chunk_manager.rb +182 -0
- data/lib/omnizip/algorithms/lzma2/constants.rb +41 -0
- data/lib/omnizip/algorithms/lzma2/encoder.rb +147 -0
- data/lib/omnizip/algorithms/lzma2/lzma2_chunk.rb +161 -0
- data/lib/omnizip/algorithms/lzma2/properties.rb +179 -0
- data/lib/omnizip/algorithms/lzma2/simple_lzma2_encoder.rb +127 -0
- data/lib/omnizip/algorithms/lzma2/xz_encoder_adapter.rb +85 -0
- data/lib/omnizip/algorithms/lzma2.rb +141 -0
- data/lib/omnizip/algorithms/ppmd7/constants.rb +74 -0
- data/lib/omnizip/algorithms/ppmd7/context.rb +154 -0
- data/lib/omnizip/algorithms/ppmd7/decoder.rb +126 -0
- data/lib/omnizip/algorithms/ppmd7/encoder.rb +163 -0
- data/lib/omnizip/algorithms/ppmd7/model.rb +248 -0
- data/lib/omnizip/algorithms/ppmd7/symbol_state.rb +57 -0
- data/lib/omnizip/algorithms/ppmd7.rb +116 -0
- data/lib/omnizip/algorithms/ppmd8/constants.rb +61 -0
- data/lib/omnizip/algorithms/ppmd8/context.rb +34 -0
- data/lib/omnizip/algorithms/ppmd8/decoder.rb +107 -0
- data/lib/omnizip/algorithms/ppmd8/encoder.rb +138 -0
- data/lib/omnizip/algorithms/ppmd8/model.rb +250 -0
- data/lib/omnizip/algorithms/ppmd8/restoration_method.rb +78 -0
- data/lib/omnizip/algorithms/ppmd8.rb +82 -0
- data/lib/omnizip/algorithms/ppmd_base.rb +138 -0
- data/lib/omnizip/algorithms/sevenzip_lzma2.rb +123 -0
- data/lib/omnizip/algorithms/xz_lzma2.rb +118 -0
- data/lib/omnizip/algorithms/zstandard/constants.rb +25 -0
- data/lib/omnizip/algorithms/zstandard/decoder.rb +46 -0
- data/lib/omnizip/algorithms/zstandard/encoder.rb +51 -0
- data/lib/omnizip/algorithms/zstandard.rb +138 -0
- data/lib/omnizip/buffer/memory_archive.rb +251 -0
- data/lib/omnizip/buffer/memory_extractor.rb +224 -0
- data/lib/omnizip/buffer.rb +176 -0
- data/lib/omnizip/checksum_registry.rb +114 -0
- data/lib/omnizip/checksums/crc32.rb +100 -0
- data/lib/omnizip/checksums/crc64.rb +101 -0
- data/lib/omnizip/checksums/crc_base.rb +158 -0
- data/lib/omnizip/checksums/verifier.rb +131 -0
- data/lib/omnizip/chunked/memory_manager.rb +194 -0
- data/lib/omnizip/chunked/reader.rb +78 -0
- data/lib/omnizip/chunked/writer.rb +120 -0
- data/lib/omnizip/chunked.rb +129 -0
- data/lib/omnizip/cli/output_formatter.rb +104 -0
- data/lib/omnizip/cli.rb +572 -0
- data/lib/omnizip/commands/.keep +0 -0
- data/lib/omnizip/commands/archive_create_command.rb +427 -0
- data/lib/omnizip/commands/archive_extract_command.rb +272 -0
- data/lib/omnizip/commands/archive_list_command.rb +218 -0
- data/lib/omnizip/commands/archive_repair_command.rb +131 -0
- data/lib/omnizip/commands/archive_verify_command.rb +117 -0
- data/lib/omnizip/commands/compress_command.rb +117 -0
- data/lib/omnizip/commands/decompress_command.rb +120 -0
- data/lib/omnizip/commands/list_command.rb +53 -0
- data/lib/omnizip/commands/metadata_command.rb +153 -0
- data/lib/omnizip/commands/parity_create_command.rb +122 -0
- data/lib/omnizip/commands/parity_repair_command.rb +122 -0
- data/lib/omnizip/commands/parity_verify_command.rb +124 -0
- data/lib/omnizip/commands/profile_list_command.rb +56 -0
- data/lib/omnizip/commands/profile_show_command.rb +44 -0
- data/lib/omnizip/convenience.rb +359 -0
- data/lib/omnizip/converter/conversion_registry.rb +49 -0
- data/lib/omnizip/converter/conversion_strategy.rb +121 -0
- data/lib/omnizip/converter/seven_zip_to_zip_strategy.rb +97 -0
- data/lib/omnizip/converter/zip_to_seven_zip_strategy.rb +112 -0
- data/lib/omnizip/converter.rb +105 -0
- data/lib/omnizip/crypto/aes256/cipher.rb +100 -0
- data/lib/omnizip/crypto/aes256/constants.rb +28 -0
- data/lib/omnizip/crypto/aes256/key_derivation.rb +101 -0
- data/lib/omnizip/crypto/aes256.rb +102 -0
- data/lib/omnizip/error.rb +106 -0
- data/lib/omnizip/eta/exponential_smoothing_estimator.rb +98 -0
- data/lib/omnizip/eta/moving_average_estimator.rb +99 -0
- data/lib/omnizip/eta/rate_calculator.rb +104 -0
- data/lib/omnizip/eta/sample_history.rb +143 -0
- data/lib/omnizip/eta/time_estimator.rb +106 -0
- data/lib/omnizip/eta.rb +63 -0
- data/lib/omnizip/extraction/filter_chain.rb +177 -0
- data/lib/omnizip/extraction/glob_pattern.rb +140 -0
- data/lib/omnizip/extraction/pattern_matcher.rb +70 -0
- data/lib/omnizip/extraction/predicate_pattern.rb +52 -0
- data/lib/omnizip/extraction/regex_pattern.rb +50 -0
- data/lib/omnizip/extraction/selective_extractor.rb +240 -0
- data/lib/omnizip/extraction.rb +111 -0
- data/lib/omnizip/file_type/mime_classifier.rb +144 -0
- data/lib/omnizip/file_type.rb +113 -0
- data/lib/omnizip/filter.rb +139 -0
- data/lib/omnizip/filter_pipeline.rb +108 -0
- data/lib/omnizip/filter_registry.rb +166 -0
- data/lib/omnizip/filters/bcj.rb +279 -0
- data/lib/omnizip/filters/bcj2/constants.rb +53 -0
- data/lib/omnizip/filters/bcj2/decoder.rb +200 -0
- data/lib/omnizip/filters/bcj2/encoder.rb +61 -0
- data/lib/omnizip/filters/bcj2/stream_data.rb +93 -0
- data/lib/omnizip/filters/bcj2.rb +99 -0
- data/lib/omnizip/filters/bcj_arm.rb +176 -0
- data/lib/omnizip/filters/bcj_arm64.rb +244 -0
- data/lib/omnizip/filters/bcj_ia64.rb +196 -0
- data/lib/omnizip/filters/bcj_ppc.rb +190 -0
- data/lib/omnizip/filters/bcj_sparc.rb +176 -0
- data/lib/omnizip/filters/bcj_x86.rb +193 -0
- data/lib/omnizip/filters/delta.rb +196 -0
- data/lib/omnizip/filters/filter_base.rb +72 -0
- data/lib/omnizip/filters/registry.rb +123 -0
- data/lib/omnizip/filters/xz_delta.rb +258 -0
- data/lib/omnizip/format_detector.rb +162 -0
- data/lib/omnizip/format_registry.rb +59 -0
- data/lib/omnizip/formats/.keep +0 -0
- data/lib/omnizip/formats/bzip2_file.rb +172 -0
- data/lib/omnizip/formats/cpio/constants.rb +55 -0
- data/lib/omnizip/formats/cpio/entry.rb +385 -0
- data/lib/omnizip/formats/cpio/reader.rb +196 -0
- data/lib/omnizip/formats/cpio/writer.rb +234 -0
- data/lib/omnizip/formats/cpio.rb +140 -0
- data/lib/omnizip/formats/format_spec_loader.rb +230 -0
- data/lib/omnizip/formats/gzip.rb +238 -0
- data/lib/omnizip/formats/iso/directory_builder.rb +297 -0
- data/lib/omnizip/formats/iso/directory_record.rb +152 -0
- data/lib/omnizip/formats/iso/joliet.rb +204 -0
- data/lib/omnizip/formats/iso/path_table.rb +125 -0
- data/lib/omnizip/formats/iso/reader.rb +197 -0
- data/lib/omnizip/formats/iso/rock_ridge.rb +349 -0
- data/lib/omnizip/formats/iso/volume_builder.rb +320 -0
- data/lib/omnizip/formats/iso/volume_descriptor.rb +168 -0
- data/lib/omnizip/formats/iso/writer.rb +530 -0
- data/lib/omnizip/formats/iso.rb +140 -0
- data/lib/omnizip/formats/lzip.rb +175 -0
- data/lib/omnizip/formats/lzma_alone.rb +171 -0
- data/lib/omnizip/formats/rar/archive_repairer.rb +243 -0
- data/lib/omnizip/formats/rar/archive_verifier.rb +195 -0
- data/lib/omnizip/formats/rar/block_parser.rb +243 -0
- data/lib/omnizip/formats/rar/compression/bit_stream.rb +180 -0
- data/lib/omnizip/formats/rar/compression/dispatcher.rb +217 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/decoder.rb +216 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/encoder.rb +158 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_builder.rb +217 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_coder.rb +189 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/match_finder.rb +135 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/sliding_window.rb +165 -0
- data/lib/omnizip/formats/rar/compression/ppmd/context.rb +105 -0
- data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +219 -0
- data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +262 -0
- data/lib/omnizip/formats/rar/compression_method_registry.rb +106 -0
- data/lib/omnizip/formats/rar/constants.rb +82 -0
- data/lib/omnizip/formats/rar/decompressor.rb +238 -0
- data/lib/omnizip/formats/rar/external_writer.rb +312 -0
- data/lib/omnizip/formats/rar/header.rb +192 -0
- data/lib/omnizip/formats/rar/license_validator.rb +109 -0
- data/lib/omnizip/formats/rar/models/rar_archive.rb +77 -0
- data/lib/omnizip/formats/rar/models/rar_entry.rb +65 -0
- data/lib/omnizip/formats/rar/models/rar_volume.rb +56 -0
- data/lib/omnizip/formats/rar/parity_handler.rb +292 -0
- data/lib/omnizip/formats/rar/rar5/compression/lzma.rb +202 -0
- data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +578 -0
- data/lib/omnizip/formats/rar/rar5/compression/store.rb +60 -0
- data/lib/omnizip/formats/rar/rar5/crc32.rb +39 -0
- data/lib/omnizip/formats/rar/rar5/encryption/aes256_cbc.rb +97 -0
- data/lib/omnizip/formats/rar/rar5/encryption/encryption_header.rb +114 -0
- data/lib/omnizip/formats/rar/rar5/encryption/encryption_manager.rb +166 -0
- data/lib/omnizip/formats/rar/rar5/encryption/key_derivation.rb +97 -0
- data/lib/omnizip/formats/rar/rar5/header.rb +187 -0
- data/lib/omnizip/formats/rar/rar5/models/encryption_options.rb +74 -0
- data/lib/omnizip/formats/rar/rar5/models/recovery_options.rb +63 -0
- data/lib/omnizip/formats/rar/rar5/models/solid_options.rb +63 -0
- data/lib/omnizip/formats/rar/rar5/models/volume_options.rb +74 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/ARCHITECTURE.md +290 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_manager.rb +264 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_splitter.rb +155 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_writer.rb +194 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_encoder.rb +109 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_manager.rb +142 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_stream.rb +121 -0
- data/lib/omnizip/formats/rar/rar5/vint.rb +65 -0
- data/lib/omnizip/formats/rar/rar5/writer.rb +466 -0
- data/lib/omnizip/formats/rar/rar_format_base.rb +241 -0
- data/lib/omnizip/formats/rar/reader.rb +366 -0
- data/lib/omnizip/formats/rar/recovery_record.rb +245 -0
- data/lib/omnizip/formats/rar/volume_manager.rb +168 -0
- data/lib/omnizip/formats/rar/writer.rb +431 -0
- data/lib/omnizip/formats/rar.rb +205 -0
- data/lib/omnizip/formats/rar3/compressor.rb +73 -0
- data/lib/omnizip/formats/rar3/decompressor.rb +66 -0
- data/lib/omnizip/formats/rar3/reader.rb +386 -0
- data/lib/omnizip/formats/rar3/writer.rb +219 -0
- data/lib/omnizip/formats/rar5/compressor.rb +73 -0
- data/lib/omnizip/formats/rar5/decompressor.rb +66 -0
- data/lib/omnizip/formats/rar5/reader.rb +342 -0
- data/lib/omnizip/formats/rar5/writer.rb +214 -0
- data/lib/omnizip/formats/seven_zip/coder_chain.rb +150 -0
- data/lib/omnizip/formats/seven_zip/constants.rb +126 -0
- data/lib/omnizip/formats/seven_zip/encoded_header.rb +114 -0
- data/lib/omnizip/formats/seven_zip/encrypted_header.rb +142 -0
- data/lib/omnizip/formats/seven_zip/file_collector.rb +144 -0
- data/lib/omnizip/formats/seven_zip/header.rb +106 -0
- data/lib/omnizip/formats/seven_zip/header_encryptor.rb +134 -0
- data/lib/omnizip/formats/seven_zip/header_writer.rb +466 -0
- data/lib/omnizip/formats/seven_zip/models/coder_info.rb +30 -0
- data/lib/omnizip/formats/seven_zip/models/file_entry.rb +58 -0
- data/lib/omnizip/formats/seven_zip/models/folder.rb +69 -0
- data/lib/omnizip/formats/seven_zip/models/stream_info.rb +42 -0
- data/lib/omnizip/formats/seven_zip/parser.rb +660 -0
- data/lib/omnizip/formats/seven_zip/reader.rb +458 -0
- data/lib/omnizip/formats/seven_zip/split_archive_reader.rb +632 -0
- data/lib/omnizip/formats/seven_zip/split_archive_writer.rb +315 -0
- data/lib/omnizip/formats/seven_zip/stream_compressor.rb +151 -0
- data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +162 -0
- data/lib/omnizip/formats/seven_zip/writer.rb +740 -0
- data/lib/omnizip/formats/seven_zip.rb +93 -0
- data/lib/omnizip/formats/tar/constants.rb +73 -0
- data/lib/omnizip/formats/tar/entry.rb +94 -0
- data/lib/omnizip/formats/tar/header.rb +168 -0
- data/lib/omnizip/formats/tar/reader.rb +121 -0
- data/lib/omnizip/formats/tar/writer.rb +216 -0
- data/lib/omnizip/formats/tar.rb +84 -0
- data/lib/omnizip/formats/xz/reader.rb +116 -0
- data/lib/omnizip/formats/xz.rb +237 -0
- data/lib/omnizip/formats/xz_impl/block_decoder.rb +754 -0
- data/lib/omnizip/formats/xz_impl/block_encoder.rb +306 -0
- data/lib/omnizip/formats/xz_impl/block_header.rb +210 -0
- data/lib/omnizip/formats/xz_impl/block_header_parser.rb +186 -0
- data/lib/omnizip/formats/xz_impl/constants.rb +49 -0
- data/lib/omnizip/formats/xz_impl/index_decoder.rb +174 -0
- data/lib/omnizip/formats/xz_impl/index_encoder.rb +122 -0
- data/lib/omnizip/formats/xz_impl/stream_decoder.rb +468 -0
- data/lib/omnizip/formats/xz_impl/stream_encoder.rb +99 -0
- data/lib/omnizip/formats/xz_impl/stream_footer.rb +81 -0
- data/lib/omnizip/formats/xz_impl/stream_footer_parser.rb +117 -0
- data/lib/omnizip/formats/xz_impl/stream_header.rb +55 -0
- data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +108 -0
- data/lib/omnizip/formats/xz_impl/vli.rb +128 -0
- data/lib/omnizip/formats/xz_impl/writer.rb +421 -0
- data/lib/omnizip/formats/zip/central_directory_header.rb +195 -0
- data/lib/omnizip/formats/zip/constants.rb +69 -0
- data/lib/omnizip/formats/zip/end_of_central_directory.rb +133 -0
- data/lib/omnizip/formats/zip/local_file_header.rb +138 -0
- data/lib/omnizip/formats/zip/reader.rb +250 -0
- data/lib/omnizip/formats/zip/unix_extra_field.rb +153 -0
- data/lib/omnizip/formats/zip/writer.rb +375 -0
- data/lib/omnizip/formats/zip/zip64_end_of_central_directory.rb +104 -0
- data/lib/omnizip/formats/zip/zip64_end_of_central_directory_locator.rb +66 -0
- data/lib/omnizip/formats/zip/zip64_extra_field.rb +114 -0
- data/lib/omnizip/formats/zip.rb +50 -0
- data/lib/omnizip/implementations/base/lzma2_decoder_base.rb +75 -0
- data/lib/omnizip/implementations/base/lzma2_encoder_base.rb +128 -0
- data/lib/omnizip/implementations/base/lzma_decoder_base.rb +83 -0
- data/lib/omnizip/implementations/base/lzma_encoder_base.rb +108 -0
- data/lib/omnizip/implementations/base/state_machine_base.rb +182 -0
- data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +421 -0
- data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +465 -0
- data/lib/omnizip/implementations/seven_zip/lzma/match_finder.rb +288 -0
- data/lib/omnizip/implementations/seven_zip/lzma/range_decoder.rb +200 -0
- data/lib/omnizip/implementations/seven_zip/lzma/range_encoder.rb +197 -0
- data/lib/omnizip/implementations/seven_zip/lzma/state_machine.rb +141 -0
- data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +519 -0
- data/lib/omnizip/implementations/xz_utils/lzma2/decoder.rb +723 -0
- data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +750 -0
- data/lib/omnizip/io/buffered_input.rb +146 -0
- data/lib/omnizip/io/buffered_output.rb +105 -0
- data/lib/omnizip/io/stream_manager.rb +115 -0
- data/lib/omnizip/link_handler/hard_link.rb +79 -0
- data/lib/omnizip/link_handler/symbolic_link.rb +74 -0
- data/lib/omnizip/link_handler.rb +124 -0
- data/lib/omnizip/metadata/archive_metadata.rb +114 -0
- data/lib/omnizip/metadata/entry_metadata.rb +146 -0
- data/lib/omnizip/metadata/metadata_editor.rb +171 -0
- data/lib/omnizip/metadata/metadata_registry.rb +64 -0
- data/lib/omnizip/metadata/metadata_validator.rb +99 -0
- data/lib/omnizip/metadata.rb +57 -0
- data/lib/omnizip/models/.keep +0 -0
- data/lib/omnizip/models/algorithm_metadata.rb +73 -0
- data/lib/omnizip/models/compression_options.rb +71 -0
- data/lib/omnizip/models/conversion_options.rb +87 -0
- data/lib/omnizip/models/conversion_result.rb +135 -0
- data/lib/omnizip/models/eta_result.rb +46 -0
- data/lib/omnizip/models/extraction_rule.rb +115 -0
- data/lib/omnizip/models/filter_chain.rb +144 -0
- data/lib/omnizip/models/filter_config.rb +183 -0
- data/lib/omnizip/models/match_result.rb +124 -0
- data/lib/omnizip/models/optimization_suggestion.rb +91 -0
- data/lib/omnizip/models/parallel_options.rb +104 -0
- data/lib/omnizip/models/performance_result.rb +79 -0
- data/lib/omnizip/models/profile_report.rb +82 -0
- data/lib/omnizip/models/progress_options.rb +38 -0
- data/lib/omnizip/models/split_options.rb +116 -0
- data/lib/omnizip/optimization_registry.rb +81 -0
- data/lib/omnizip/parallel/job_queue.rb +209 -0
- data/lib/omnizip/parallel/job_scheduler.rb +203 -0
- data/lib/omnizip/parallel/parallel_compressor.rb +347 -0
- data/lib/omnizip/parallel/parallel_extractor.rb +329 -0
- data/lib/omnizip/parallel/worker_pool.rb +223 -0
- data/lib/omnizip/parallel.rb +149 -0
- data/lib/omnizip/parity/chunked_block_processor.rb +196 -0
- data/lib/omnizip/parity/galois16.rb +145 -0
- data/lib/omnizip/parity/models/creator_packet.rb +73 -0
- data/lib/omnizip/parity/models/file_description_packet.rb +133 -0
- data/lib/omnizip/parity/models/ifsc_packet.rb +123 -0
- data/lib/omnizip/parity/models/main_packet.rb +128 -0
- data/lib/omnizip/parity/models/packet.rb +156 -0
- data/lib/omnizip/parity/models/packet_registry.rb +109 -0
- data/lib/omnizip/parity/models/recovery_slice_packet.rb +78 -0
- data/lib/omnizip/parity/par2_creator.rb +531 -0
- data/lib/omnizip/parity/par2_repairer.rb +407 -0
- data/lib/omnizip/parity/par2_verifier.rb +364 -0
- data/lib/omnizip/parity/par2cmdline_algorithm.rb +110 -0
- data/lib/omnizip/parity/par2cmdline_coefficients.rb +78 -0
- data/lib/omnizip/parity/reed_solomon_decoder.rb +266 -0
- data/lib/omnizip/parity/reed_solomon_encoder.rb +111 -0
- data/lib/omnizip/parity/reed_solomon_matrix.rb +342 -0
- data/lib/omnizip/parity.rb +186 -0
- data/lib/omnizip/password/encryption_registry.rb +65 -0
- data/lib/omnizip/password/encryption_strategy.rb +96 -0
- data/lib/omnizip/password/password_validator.rb +129 -0
- data/lib/omnizip/password/winzip_aes_strategy.rb +192 -0
- data/lib/omnizip/password/zip_crypto_strategy.rb +141 -0
- data/lib/omnizip/password.rb +87 -0
- data/lib/omnizip/pipe/stream_compressor.rb +124 -0
- data/lib/omnizip/pipe/stream_decompressor.rb +174 -0
- data/lib/omnizip/pipe.rb +121 -0
- data/lib/omnizip/platform/ntfs_streams.rb +201 -0
- data/lib/omnizip/platform.rb +189 -0
- data/lib/omnizip/profile/archive_profile.rb +39 -0
- data/lib/omnizip/profile/balanced_profile.rb +33 -0
- data/lib/omnizip/profile/binary_profile.rb +36 -0
- data/lib/omnizip/profile/compression_profile.rb +158 -0
- data/lib/omnizip/profile/custom_profile.rb +157 -0
- data/lib/omnizip/profile/fast_profile.rb +33 -0
- data/lib/omnizip/profile/maximum_profile.rb +33 -0
- data/lib/omnizip/profile/profile_detector.rb +110 -0
- data/lib/omnizip/profile/profile_registry.rb +161 -0
- data/lib/omnizip/profile/text_profile.rb +36 -0
- data/lib/omnizip/profile.rb +190 -0
- data/lib/omnizip/profiler/memory_profiler.rb +66 -0
- data/lib/omnizip/profiler/method_profiler.rb +49 -0
- data/lib/omnizip/profiler/report_generator.rb +169 -0
- data/lib/omnizip/profiler.rb +204 -0
- data/lib/omnizip/progress/callback_reporter.rb +36 -0
- data/lib/omnizip/progress/console_reporter.rb +62 -0
- data/lib/omnizip/progress/log_reporter.rb +91 -0
- data/lib/omnizip/progress/operation_progress.rb +118 -0
- data/lib/omnizip/progress/progress_bar.rb +156 -0
- data/lib/omnizip/progress/progress_reporter.rb +40 -0
- data/lib/omnizip/progress/progress_tracker.rb +190 -0
- data/lib/omnizip/progress/silent_reporter.rb +24 -0
- data/lib/omnizip/progress.rb +127 -0
- data/lib/omnizip/rubyzip_compat.rb +63 -0
- data/lib/omnizip/temp/safe_extract.rb +168 -0
- data/lib/omnizip/temp/temp_file.rb +124 -0
- data/lib/omnizip/temp/temp_file_pool.rb +109 -0
- data/lib/omnizip/temp.rb +181 -0
- data/lib/omnizip/version.rb +5 -0
- data/lib/omnizip/zip/entry.rb +156 -0
- data/lib/omnizip/zip/file.rb +485 -0
- data/lib/omnizip/zip/input_stream.rb +273 -0
- data/lib/omnizip/zip/output_stream.rb +324 -0
- data/lib/omnizip.rb +156 -0
- data/readme-docs/advanced-features.adoc +515 -0
- data/readme-docs/api-usage.adoc +444 -0
- data/readme-docs/architecture.adoc +449 -0
- data/readme-docs/archive-formats.adoc +479 -0
- data/readme-docs/cli-usage.adoc +222 -0
- data/readme-docs/compression-algorithms.adoc +442 -0
- data/readme-docs/compression-profiles.adoc +247 -0
- data/readme-docs/encryption-checksums.adoc +328 -0
- data/readme-docs/format-converter.adoc +325 -0
- data/readme-docs/installation.adoc +228 -0
- data/readme-docs/par2-archives.adoc +608 -0
- data/readme-docs/performance-profiler.adoc +389 -0
- data/readme-docs/preprocessing-filters.adoc +280 -0
- data/xz-file-format-1.2.1.txt +1174 -0
- metadata +617 -0
|
@@ -0,0 +1,754 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Copyright (C) 2025 Ribose Inc.
|
|
4
|
+
#
|
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a
|
|
6
|
+
# copy of this software and associated documentation files (the "Software"),
|
|
7
|
+
# to deal in the Software without restriction, including without limitation
|
|
8
|
+
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
9
|
+
# and/or sell copies of the Software, and to permit persons to whom the
|
|
10
|
+
# Software is furnished to do so, subject to the following conditions:
|
|
11
|
+
#
|
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
|
13
|
+
# all copies or substantial portions of the Software.
|
|
14
|
+
#
|
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
20
|
+
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
21
|
+
# DEALINGS IN THE SOFTWARE.
|
|
22
|
+
|
|
23
|
+
require "stringio"
|
|
24
|
+
require_relative "constants"
|
|
25
|
+
require_relative "block_header_parser"
|
|
26
|
+
require_relative "../../checksums/verifier"
|
|
27
|
+
# Load BCJ filters for filter chain decoding
|
|
28
|
+
require_relative "../../filters/bcj_x86"
|
|
29
|
+
require_relative "../../filters/bcj_arm"
|
|
30
|
+
require_relative "../../filters/bcj_ppc"
|
|
31
|
+
require_relative "../../filters/bcj_ia64"
|
|
32
|
+
require_relative "../../filters/bcj_sparc"
|
|
33
|
+
require_relative "../../filters/delta"
|
|
34
|
+
# LZMA2::Decoder is loaded by the main omnizip library via lzma2.rb
|
|
35
|
+
|
|
36
|
+
module Omnizip
|
|
37
|
+
module Formats
|
|
38
|
+
module XzFormat
|
|
39
|
+
# XZ Block decoder
|
|
40
|
+
#
|
|
41
|
+
# Decodes a single XZ block which consists of:
|
|
42
|
+
# - Block Header
|
|
43
|
+
# - Compressed Data
|
|
44
|
+
# - Block Padding (to 4-byte boundary)
|
|
45
|
+
# - Check (CRC32/CRC64/SHA256)
|
|
46
|
+
#
|
|
47
|
+
# Reference: /tmp/xz-source/src/liblzma/common/block_decoder.c
|
|
48
|
+
class BlockDecoder
|
|
49
|
+
# Filter IDs
|
|
50
|
+
FILTER_LZMA2 = 0x21
|
|
51
|
+
|
|
52
|
+
# Accessor for new input after block (used by stream decoder for multi-block files)
|
|
53
|
+
attr_reader :new_input_after_block
|
|
54
|
+
# Accessor for block size information (used for index validation)
|
|
55
|
+
attr_reader :unpadded_size, :uncompressed_size
|
|
56
|
+
|
|
57
|
+
# Wrapper for counting bytes read from a stream
|
|
58
|
+
class CountingInputStream
|
|
59
|
+
attr_reader :bytes_read
|
|
60
|
+
|
|
61
|
+
def initialize(stream)
|
|
62
|
+
@stream = stream
|
|
63
|
+
@bytes_read = 0
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def read(length = nil, outbuf = nil)
|
|
67
|
+
result = @stream.read(length, outbuf)
|
|
68
|
+
if result
|
|
69
|
+
bytes_read = result.bytesize
|
|
70
|
+
@bytes_read += bytes_read
|
|
71
|
+
end
|
|
72
|
+
result
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def getbyte
|
|
76
|
+
byte = @stream.getbyte
|
|
77
|
+
@bytes_read += 1 if byte
|
|
78
|
+
byte
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def eos?
|
|
82
|
+
@stream.eos?
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def set_encoding(enc)
|
|
86
|
+
@stream.set_encoding(enc) if @stream.respond_to?(:set_encoding)
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
# Initialize block decoder
|
|
91
|
+
#
|
|
92
|
+
# @param input [IO] Input stream positioned at block header
|
|
93
|
+
# @param check_type [Integer] Check type (0=None, 1=CRC32, 4=CRC64, 10=SHA256)
|
|
94
|
+
def initialize(input, check_type)
|
|
95
|
+
@input = input
|
|
96
|
+
@check_type = check_type
|
|
97
|
+
@new_input_after_block = nil # Track new input for stream decoder
|
|
98
|
+
@data_already_decompressed = false # Track if LZMA2 already decoded the data
|
|
99
|
+
@unpadded_size = nil # Track unpadded block size (for index validation)
|
|
100
|
+
@uncompressed_size = nil # Track uncompressed size (for index validation)
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
# Decode block
|
|
104
|
+
#
|
|
105
|
+
# @return [Array<String, Hash>] Decompressed data and block info:
|
|
106
|
+
# - data: String (decompressed data)
|
|
107
|
+
# - info: Hash with header info
|
|
108
|
+
# @raise [RuntimeError] If block is invalid or checksum mismatch
|
|
109
|
+
def decode
|
|
110
|
+
# Parse block header
|
|
111
|
+
header = BlockHeaderParser.parse(@input)
|
|
112
|
+
|
|
113
|
+
# Read compressed data
|
|
114
|
+
compressed_size = header[:compressed_size]
|
|
115
|
+
check_size = Checksums::Verifier.check_size(@check_type)
|
|
116
|
+
|
|
117
|
+
if ENV["XZ_BLOCK_DEBUG"]
|
|
118
|
+
warn "DEBUG: decode - compressed_size=#{compressed_size.inspect}, check_type=#{@check_type}"
|
|
119
|
+
warn "DEBUG: @input.class=#{@input.class}, @input.respond_to?(:pos)=#{@input.respond_to?(:pos)}"
|
|
120
|
+
pos = @input.respond_to?(:pos) ? @input.pos : "N/A"
|
|
121
|
+
warn "DEBUG: @input.pos=#{pos}"
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
if compressed_size.nil?
|
|
125
|
+
# Compressed size is not present in header - need to determine block boundary
|
|
126
|
+
# Read all remaining data
|
|
127
|
+
all_remaining = @input.read
|
|
128
|
+
|
|
129
|
+
# Decode LZMA2 and track how many bytes it consumes
|
|
130
|
+
uncompressed_data, consumed_bytes = decode_lzma2_with_consumption_tracking(
|
|
131
|
+
all_remaining: all_remaining,
|
|
132
|
+
filters: header[:filters],
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
# Mark that data is already decompressed (LZMA2 only for now)
|
|
136
|
+
@data_already_decompressed = true
|
|
137
|
+
|
|
138
|
+
# Calculate padding and check positions
|
|
139
|
+
# Block structure: [compressed data] [padding to 4-byte boundary] [check]
|
|
140
|
+
padding_needed = (4 - (consumed_bytes % 4)) % 4
|
|
141
|
+
check_start_pos = consumed_bytes + padding_needed
|
|
142
|
+
|
|
143
|
+
# XZ Utils: Validate padding bytes are all zeros
|
|
144
|
+
# Reference: /Users/mulgogi/src/external/xz/src/liblzma/common/block_decoder.c:131-139
|
|
145
|
+
if padding_needed.positive?
|
|
146
|
+
padding_bytes = all_remaining.byteslice(consumed_bytes,
|
|
147
|
+
padding_needed)
|
|
148
|
+
if padding_bytes.nil? || padding_bytes.bytesize < padding_needed
|
|
149
|
+
raise Omnizip::FormatError,
|
|
150
|
+
"Unexpected end of stream in block padding"
|
|
151
|
+
end
|
|
152
|
+
# Verify padding is all zeros
|
|
153
|
+
unless padding_bytes.bytes.all?(0)
|
|
154
|
+
raise Omnizip::FormatError,
|
|
155
|
+
"Block padding contains non-zero bytes"
|
|
156
|
+
end
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
if ENV["XZ_BLOCK_DEBUG"]
|
|
160
|
+
warn "DEBUG: consumed_bytes=#{consumed_bytes}, padding_needed=#{padding_needed}, check_start_pos=#{check_start_pos}"
|
|
161
|
+
warn "DEBUG: all_remaining.bytesize=#{all_remaining.bytesize}"
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
if check_start_pos + check_size > all_remaining.bytesize
|
|
165
|
+
raise Omnizip::FormatError,
|
|
166
|
+
"Invalid check position"
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
check_bytes = all_remaining.byteslice(check_start_pos, check_size)
|
|
170
|
+
|
|
171
|
+
# Create new input with remaining data (after this block)
|
|
172
|
+
total_block_size = check_start_pos + check_size
|
|
173
|
+
data_after_block = all_remaining[total_block_size..]
|
|
174
|
+
|
|
175
|
+
# Create new StringIO with remaining data
|
|
176
|
+
new_input = StringIO.new(data_after_block)
|
|
177
|
+
new_input.set_encoding(Encoding::BINARY)
|
|
178
|
+
|
|
179
|
+
# Store the new input for the stream decoder to use
|
|
180
|
+
@new_input_after_block = new_input
|
|
181
|
+
else
|
|
182
|
+
compressed_data = @input.read(compressed_size)
|
|
183
|
+
if compressed_data.nil? || compressed_data.bytesize < compressed_size
|
|
184
|
+
raise Omnizip::IOError,
|
|
185
|
+
"Unexpected end of stream in compressed data: expected #{compressed_size} bytes"
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
# Read block padding (align to 4-byte boundary)
|
|
189
|
+
# Block header is always 4-byte aligned, so we only need to pad the data
|
|
190
|
+
padding_needed = (4 - (compressed_size % 4)) % 4
|
|
191
|
+
if padding_needed.positive?
|
|
192
|
+
padding = @input.read(padding_needed)
|
|
193
|
+
if padding.nil? || padding.bytesize < padding_needed
|
|
194
|
+
raise Omnizip::IOError,
|
|
195
|
+
"Unexpected end of stream in block padding"
|
|
196
|
+
end
|
|
197
|
+
# Verify padding is all zeros
|
|
198
|
+
unless padding.bytes.all?(0)
|
|
199
|
+
raise Omnizip::FormatError,
|
|
200
|
+
"Block padding contains non-zero bytes"
|
|
201
|
+
end
|
|
202
|
+
end
|
|
203
|
+
|
|
204
|
+
# Read check
|
|
205
|
+
if check_size.positive?
|
|
206
|
+
check_bytes = @input.read(check_size)
|
|
207
|
+
if check_bytes.nil? || check_bytes.bytesize < check_size
|
|
208
|
+
raise Omnizip::IOError,
|
|
209
|
+
"Unexpected end of stream in block check"
|
|
210
|
+
end
|
|
211
|
+
else
|
|
212
|
+
check_bytes = ""
|
|
213
|
+
end
|
|
214
|
+
|
|
215
|
+
# When compressed_size is explicit, the input stream is now correctly
|
|
216
|
+
# positioned at the start of the next block, so no need to create new input
|
|
217
|
+
end
|
|
218
|
+
|
|
219
|
+
# Decode filter chain (for now, just LZMA2)
|
|
220
|
+
# Skip if data was already decompressed by decode_lzma2_with_consumption_tracking
|
|
221
|
+
if @data_already_decompressed
|
|
222
|
+
# LZMA2 was already decoded, but we may still have other filters to apply
|
|
223
|
+
# For multi-filter chains, apply remaining filters in reverse order
|
|
224
|
+
filters_to_process = header[:filters].dup
|
|
225
|
+
# Remove the LZMA2 filter that was already processed
|
|
226
|
+
filters_to_process.reject! { |f| f[:id] == FILTER_LZMA2 }
|
|
227
|
+
|
|
228
|
+
if filters_to_process.empty?
|
|
229
|
+
# No remaining filters
|
|
230
|
+
uncompressed_data = @decompressed_data
|
|
231
|
+
else
|
|
232
|
+
# Apply remaining filters in reverse order
|
|
233
|
+
data = @decompressed_data
|
|
234
|
+
filters_to_process.reverse_each do |filter|
|
|
235
|
+
data = decode_single_filter(data, filter)
|
|
236
|
+
end
|
|
237
|
+
uncompressed_data = data
|
|
238
|
+
end
|
|
239
|
+
else
|
|
240
|
+
uncompressed_data = decode_filters(compressed_data,
|
|
241
|
+
header[:filters])
|
|
242
|
+
end
|
|
243
|
+
|
|
244
|
+
# Verify uncompressed size matches header (if present)
|
|
245
|
+
if header[:uncompressed_size] && (uncompressed_data.bytesize != header[:uncompressed_size])
|
|
246
|
+
raise Omnizip::DecompressionError,
|
|
247
|
+
"Uncompressed size mismatch: header says #{header[:uncompressed_size]}, got #{uncompressed_data.bytesize}"
|
|
248
|
+
end
|
|
249
|
+
|
|
250
|
+
# DEBUG: Show output before checksum check
|
|
251
|
+
if ENV["DEBUG_CHECKSUM"]
|
|
252
|
+
puts "DEBUG: uncompressed_data.bytesize=#{uncompressed_data.bytesize}"
|
|
253
|
+
puts "DEBUG: first 100 bytes: #{uncompressed_data[0, 100].inspect}"
|
|
254
|
+
puts "DEBUG: last 50 bytes: #{uncompressed_data[-50..].inspect}"
|
|
255
|
+
end
|
|
256
|
+
|
|
257
|
+
# Verify check
|
|
258
|
+
unless Checksums::Verifier.verify(uncompressed_data, check_bytes,
|
|
259
|
+
@check_type)
|
|
260
|
+
raise Omnizip::ChecksumError,
|
|
261
|
+
"Block checksum mismatch for check type #{@check_type}"
|
|
262
|
+
end
|
|
263
|
+
|
|
264
|
+
# Track block sizes for index validation (per XZ Utils index_hash.c)
|
|
265
|
+
# Unpadded size = block header + compressed data + check (NO padding)
|
|
266
|
+
# This is used to validate against the index records
|
|
267
|
+
# Reference: xz-file-format-1.2.1.txt Section 3.3.2:
|
|
268
|
+
# "Unpadded Size is the size of the Block Header, Compressed Data,
|
|
269
|
+
# and Check fields. The Block Padding field is NOT included."
|
|
270
|
+
@uncompressed_size = uncompressed_data.bytesize
|
|
271
|
+
|
|
272
|
+
# Calculate unpadded block size (excludes padding per XZ spec)
|
|
273
|
+
# Block structure: [block header] [compressed data] [padding] [check]
|
|
274
|
+
# Reference: /Users/mulgogi/src/external/xz/src/liblzma/common/block_decoder.c
|
|
275
|
+
header_size = header[:header_size] || 0
|
|
276
|
+
if compressed_size.nil?
|
|
277
|
+
# When compressed_size wasn't specified, we tracked consumed_bytes
|
|
278
|
+
# unpadded_size = header_size + consumed_bytes + check_size (NO padding)
|
|
279
|
+
# Note: BlockHeaderParser already consumed the header from input
|
|
280
|
+
# For the size calculation, we need to include header size
|
|
281
|
+
actual_compressed_size = consumed_bytes
|
|
282
|
+
@unpadded_size = header_size + actual_compressed_size + check_size
|
|
283
|
+
else
|
|
284
|
+
# When compressed_size was specified
|
|
285
|
+
@unpadded_size = header_size + compressed_size + check_size
|
|
286
|
+
end
|
|
287
|
+
|
|
288
|
+
uncompressed_data
|
|
289
|
+
end
|
|
290
|
+
|
|
291
|
+
private
|
|
292
|
+
|
|
293
|
+
# Decode filter chain
|
|
294
|
+
#
|
|
295
|
+
# @param compressed_data [String] Compressed data
|
|
296
|
+
# @param filters [Array<Hash>] Filter definitions
|
|
297
|
+
# @return [String] Decompressed data
|
|
298
|
+
# @raise [RuntimeError] If filter chain is unsupported
|
|
299
|
+
def decode_filters(compressed_data, filters)
|
|
300
|
+
case filters.size
|
|
301
|
+
when 0
|
|
302
|
+
# No filters - passthrough
|
|
303
|
+
compressed_data
|
|
304
|
+
when 1
|
|
305
|
+
# Single filter - should be LZMA2
|
|
306
|
+
decode_single_filter(compressed_data, filters[0])
|
|
307
|
+
else
|
|
308
|
+
# Multiple filters - decode in reverse order
|
|
309
|
+
# For now, only support LZMA2
|
|
310
|
+
data = compressed_data
|
|
311
|
+
filters.reverse_each do |filter|
|
|
312
|
+
data = decode_single_filter(data, filter)
|
|
313
|
+
end
|
|
314
|
+
data
|
|
315
|
+
end
|
|
316
|
+
end
|
|
317
|
+
|
|
318
|
+
# Decode a single filter
|
|
319
|
+
#
|
|
320
|
+
# @param compressed_data [String] Compressed data
|
|
321
|
+
# @param filter [Hash] Filter definition with :id and :properties
|
|
322
|
+
# @return [String] Decompressed data
|
|
323
|
+
def decode_single_filter(compressed_data, filter)
|
|
324
|
+
case filter[:id]
|
|
325
|
+
when FILTER_LZMA2
|
|
326
|
+
decode_lzma2(compressed_data, filter[:properties])
|
|
327
|
+
when 0x03 # FILTER_DELTA
|
|
328
|
+
decode_delta(compressed_data, filter[:properties])
|
|
329
|
+
when 0x04 # x86 BCJ
|
|
330
|
+
decode_bcj(compressed_data, :x86, filter[:properties])
|
|
331
|
+
when 0x05 # PowerPC BCJ
|
|
332
|
+
decode_bcj(compressed_data, :powerpc, filter[:properties])
|
|
333
|
+
when 0x06 # IA-64 BCJ
|
|
334
|
+
decode_bcj(compressed_data, :ia64, filter[:properties])
|
|
335
|
+
when 0x07 # ARM BCJ
|
|
336
|
+
decode_bcj(compressed_data, :arm, filter[:properties])
|
|
337
|
+
when 0x08 # ARM Thumb BCJ
|
|
338
|
+
decode_bcj(compressed_data, :armthumb, filter[:properties])
|
|
339
|
+
when 0x09 # SPARC BCJ
|
|
340
|
+
decode_bcj(compressed_data, :sparc, filter[:properties])
|
|
341
|
+
when 0x0A # ARM64 BCJ
|
|
342
|
+
decode_bcj_arm64(compressed_data, filter[:properties])
|
|
343
|
+
else
|
|
344
|
+
raise Omnizip::FormatError,
|
|
345
|
+
"Unsupported filter ID: 0x#{filter[:id].to_s(16).upcase}"
|
|
346
|
+
end
|
|
347
|
+
end
|
|
348
|
+
|
|
349
|
+
# Decode Delta filter
|
|
350
|
+
#
|
|
351
|
+
# @param data [String] Input data
|
|
352
|
+
# @param properties [String, nil] Filter properties (first byte is distance - 1)
|
|
353
|
+
# @return [String] Delta-transformed data
|
|
354
|
+
def decode_delta(data, properties)
|
|
355
|
+
# XZ Utils: lzma_delta_props_decode sets opt->dist = props[0] + 1
|
|
356
|
+
# So if props[0] = 0, distance = 1; if props[0] = 255, distance = 256
|
|
357
|
+
distance = if properties&.bytesize&.positive?
|
|
358
|
+
(properties.getbyte(0) || 0) + 1
|
|
359
|
+
else
|
|
360
|
+
1
|
|
361
|
+
end
|
|
362
|
+
|
|
363
|
+
Omnizip::Filters::Delta.new(distance).decode(data, 0)
|
|
364
|
+
end
|
|
365
|
+
|
|
366
|
+
# Decode BCJ filter
|
|
367
|
+
#
|
|
368
|
+
# @param data [String] Input data
|
|
369
|
+
# @param architecture [Symbol] Target architecture
|
|
370
|
+
# @param properties [String, nil] Filter properties
|
|
371
|
+
# @return [String] BCJ-transformed data
|
|
372
|
+
def decode_bcj(data, architecture, properties)
|
|
373
|
+
# Get start_offset from properties if present
|
|
374
|
+
# XZ filter properties for BCJ: first 4 bytes are start_offset (big-endian)
|
|
375
|
+
start_offset = 0
|
|
376
|
+
if properties&.bytesize&.>= 4
|
|
377
|
+
start_offset = (properties.getbyte(0) || 0) << 24
|
|
378
|
+
start_offset |= (properties.getbyte(1) || 0) << 16
|
|
379
|
+
start_offset |= (properties.getbyte(2) || 0) << 8
|
|
380
|
+
start_offset |= properties.getbyte(3) || 0
|
|
381
|
+
end
|
|
382
|
+
|
|
383
|
+
# Use the appropriate BCJ filter based on architecture
|
|
384
|
+
case architecture
|
|
385
|
+
when :x86
|
|
386
|
+
Omnizip::Filters::BCJ.new(architecture: :x86).decode(data, start_offset)
|
|
387
|
+
when :powerpc
|
|
388
|
+
Omnizip::Filters::BCJ.new(architecture: :powerpc).decode(data, start_offset)
|
|
389
|
+
when :ia64
|
|
390
|
+
Omnizip::Filters::BCJ.new(architecture: :ia64).decode(data, start_offset)
|
|
391
|
+
when :arm
|
|
392
|
+
Omnizip::Filters::BCJ.new(architecture: :arm).decode(data, start_offset)
|
|
393
|
+
when :armthumb
|
|
394
|
+
Omnizip::Filters::BCJ.new(architecture: :armthumb).decode(data, start_offset)
|
|
395
|
+
when :sparc
|
|
396
|
+
Omnizip::Filters::BCJ.new(architecture: :sparc).decode(data, start_offset)
|
|
397
|
+
when :arm64
|
|
398
|
+
Omnizip::Filters::BCJ.new(architecture: :arm64).decode(data, start_offset)
|
|
399
|
+
else
|
|
400
|
+
raise Omnizip::FormatError,
|
|
401
|
+
"Unsupported BCJ architecture: #{architecture}"
|
|
402
|
+
end
|
|
403
|
+
end
|
|
404
|
+
|
|
405
|
+
# Decode ARM64 BCJ filter
|
|
406
|
+
#
|
|
407
|
+
# XZ Utils pattern (simple/arm64.c):
|
|
408
|
+
# - Converts BL instructions (bits 26-31 == 0x25) with +/-128 MiB range
|
|
409
|
+
# - Converts ADRP instructions (bits 25-29 == 0x10000) with +/-512 MiB range
|
|
410
|
+
# - Uses start_offset for position calculation
|
|
411
|
+
#
|
|
412
|
+
# @param data [String] Input data
|
|
413
|
+
# @param properties [String, nil] Filter properties (first 4 bytes are start_offset)
|
|
414
|
+
# @return [String] ARM64 BCJ-transformed data
|
|
415
|
+
def decode_bcj_arm64(data, properties)
|
|
416
|
+
# Get start_offset from properties if present
|
|
417
|
+
# XZ filter properties for BCJ: first 4 bytes are start_offset (little-endian per XZ spec)
|
|
418
|
+
# Reference: /Users/mulgogi/src/external/xz/src/liblzma/simple/simple_decoder.c:30
|
|
419
|
+
start_offset = 0
|
|
420
|
+
if properties&.bytesize&.>= 4
|
|
421
|
+
# Read as little-endian (LSB first)
|
|
422
|
+
start_offset = properties.getbyte(0) || 0
|
|
423
|
+
start_offset |= (properties.getbyte(1) || 0) << 8
|
|
424
|
+
start_offset |= (properties.getbyte(2) || 0) << 16
|
|
425
|
+
start_offset |= (properties.getbyte(3) || 0) << 24
|
|
426
|
+
end
|
|
427
|
+
|
|
428
|
+
# DEBUG: Show input data
|
|
429
|
+
if ENV["DEBUG_ARM64_BCJ"]
|
|
430
|
+
puts "DEBUG ARM64 BCJ: start_offset=0x#{start_offset.to_s(16).upcase}"
|
|
431
|
+
puts "DEBUG ARM64 BCJ: input (first 32 bytes):"
|
|
432
|
+
puts data[0, 32].unpack1("H*").scan(/../).each_slice(16).map { |row| row.join(" ") }.join("\n")
|
|
433
|
+
end
|
|
434
|
+
|
|
435
|
+
# XZ Utils ARM64 BCJ filter implementation
|
|
436
|
+
result = data.b
|
|
437
|
+
size = data.bytesize & ~3 # Round down to multiple of 4
|
|
438
|
+
|
|
439
|
+
(0...size).step(4) do |i|
|
|
440
|
+
pc = (start_offset + i) & 0xFFFFFFFF
|
|
441
|
+
instr = read_uint32_le(result, i)
|
|
442
|
+
|
|
443
|
+
# Check for BL instruction (bits 26-31 == 0x25)
|
|
444
|
+
if (instr >> 26) == 0x25
|
|
445
|
+
src = instr
|
|
446
|
+
instr = 0x94000000
|
|
447
|
+
|
|
448
|
+
# XZ Utils: pc >>= 2; if (!is_encoder) pc = 0U - pc;
|
|
449
|
+
# Reference: /Users/mulgogi/src/external/xz/src/liblzma/simple/arm64.c:56-60
|
|
450
|
+
pc_div_4 = pc >> 2
|
|
451
|
+
pc_for_calc = (0 - pc_div_4) & 0xFFFFFFFF
|
|
452
|
+
|
|
453
|
+
instr |= (src + pc_for_calc) & 0x03FFFFFF
|
|
454
|
+
write_uint32_le(result, i, instr)
|
|
455
|
+
# Check for ADRP instruction (bits 25-29 == 0x10000)
|
|
456
|
+
elsif (instr & 0x9F000000) == 0x90000000
|
|
457
|
+
# Extract src from ADRP instruction
|
|
458
|
+
src = ((instr >> 29) & 3) | ((instr >> 3) & 0x001FFFFC)
|
|
459
|
+
|
|
460
|
+
# Check if in +/-512 MiB range
|
|
461
|
+
# XZ Utils: if ((src + 0x00020000) & 0x001C0000) continue;
|
|
462
|
+
next if (src + 0x00020000).anybits?(0x001C0000)
|
|
463
|
+
|
|
464
|
+
instr &= 0x9000001F
|
|
465
|
+
|
|
466
|
+
# XZ Utils: pc >>= 12; if (!is_encoder) pc = 0U - pc;
|
|
467
|
+
# Reference: /Users/mulgogi/src/external/xz/src/liblzma/simple/arm64.c:95-96
|
|
468
|
+
pc_div_12 = pc >> 12
|
|
469
|
+
pc_for_calc = (0 - pc_div_12) & 0xFFFFFFFF
|
|
470
|
+
|
|
471
|
+
dest = (src + pc_for_calc) & 0xFFFFFFFF
|
|
472
|
+
instr |= (dest & 3) << 29
|
|
473
|
+
instr |= (dest & 0x0003FFFC) << 3
|
|
474
|
+
instr |= (0 - (dest & 0x00020000)) & 0x00E00000
|
|
475
|
+
|
|
476
|
+
write_uint32_le(result, i, instr)
|
|
477
|
+
end
|
|
478
|
+
end
|
|
479
|
+
|
|
480
|
+
# DEBUG: Show output data
|
|
481
|
+
if ENV["DEBUG_ARM64_BCJ"]
|
|
482
|
+
puts "DEBUG ARM64 BCJ: output (first 32 bytes):"
|
|
483
|
+
puts result[0, 32].unpack1("H*").scan(/../).each_slice(16).map { |row| row.join(" ") }.join("\n")
|
|
484
|
+
end
|
|
485
|
+
|
|
486
|
+
result
|
|
487
|
+
end
|
|
488
|
+
|
|
489
|
+
# Read an unsigned 32-bit little-endian integer from data
|
|
490
|
+
#
|
|
491
|
+
# @param data [String] Binary data
|
|
492
|
+
# @param offset [Integer] Starting position
|
|
493
|
+
# @return [Integer] Unsigned 32-bit integer
|
|
494
|
+
def read_uint32_le(data, offset)
|
|
495
|
+
bytes = data.byteslice(offset, 4).bytes
|
|
496
|
+
bytes[0] |
|
|
497
|
+
(bytes[1] << 8) |
|
|
498
|
+
(bytes[2] << 16) |
|
|
499
|
+
(bytes[3] << 24)
|
|
500
|
+
end
|
|
501
|
+
|
|
502
|
+
# Write an unsigned 32-bit little-endian integer to data
|
|
503
|
+
#
|
|
504
|
+
# @param data [String] Binary data (modified in place)
|
|
505
|
+
# @param offset [Integer] Starting position
|
|
506
|
+
# @param value [Integer] 32-bit integer to write
|
|
507
|
+
# @return [void]
|
|
508
|
+
def write_uint32_le(data, offset, value)
|
|
509
|
+
value &= 0xFFFFFFFF
|
|
510
|
+
data.setbyte(offset, value & 0xFF)
|
|
511
|
+
data.setbyte(offset + 1, (value >> 8) & 0xFF)
|
|
512
|
+
data.setbyte(offset + 2, (value >> 16) & 0xFF)
|
|
513
|
+
data.setbyte(offset + 3, (value >> 24) & 0xFF)
|
|
514
|
+
end
|
|
515
|
+
|
|
516
|
+
# Decode LZMA2 data with byte consumption tracking
|
|
517
|
+
#
|
|
518
|
+
# This method is used when compressed_size is not specified in the block header.
|
|
519
|
+
# It uses a CountingInputStream to track how many bytes the LZMA2 decoder consumes.
|
|
520
|
+
#
|
|
521
|
+
# @param all_remaining [String] All remaining data after block header
|
|
522
|
+
# @param filters [Array<Hash>] Filter definitions
|
|
523
|
+
# @return [Array<String, Integer>] Decompressed data and bytes consumed
|
|
524
|
+
def decode_lzma2_with_consumption_tracking(all_remaining:, filters:)
|
|
525
|
+
# Debug: Show first 30 bytes of input data
|
|
526
|
+
if ENV["DEBUG_LZMA2_INPUT"]
|
|
527
|
+
puts "DEBUG LZMA2 INPUT: first 30 bytes:"
|
|
528
|
+
all_remaining.bytes[0, 30].each_with_index do |byte, i|
|
|
529
|
+
printf " [%2d] 0x%02x (%3d)", i, byte, byte
|
|
530
|
+
puts "" if ((i + 1) % 4).zero?
|
|
531
|
+
end
|
|
532
|
+
puts ""
|
|
533
|
+
end
|
|
534
|
+
|
|
535
|
+
input_buffer = CountingInputStream.new(StringIO.new(all_remaining))
|
|
536
|
+
input_buffer.set_encoding(Encoding::BINARY)
|
|
537
|
+
|
|
538
|
+
# Get dict_size from LZMA2 filter properties
|
|
539
|
+
# IMPORTANT: For multi-filter chains, find the LZMA2 filter (not just filters[0])
|
|
540
|
+
# The filter chain is in encoding order, so we need to find the LZMA2 filter
|
|
541
|
+
lzma2_filter = filters.find { |f| f[:id] == FILTER_LZMA2 }
|
|
542
|
+
if lzma2_filter.nil?
|
|
543
|
+
raise Omnizip::FormatError,
|
|
544
|
+
"Unsupported filter chain: LZMA2 filter not found (not supported)"
|
|
545
|
+
end
|
|
546
|
+
|
|
547
|
+
properties = lzma2_filter[:properties]
|
|
548
|
+
dict_size = if properties&.bytesize&.positive?
|
|
549
|
+
prop = properties.getbyte(0)
|
|
550
|
+
if prop.even?
|
|
551
|
+
1 << ((prop / 2) + 12)
|
|
552
|
+
else
|
|
553
|
+
3 * (1 << (((prop - 1) / 2) + 11))
|
|
554
|
+
end
|
|
555
|
+
else
|
|
556
|
+
8 * 1024 * 1024 # 8MB default
|
|
557
|
+
end
|
|
558
|
+
|
|
559
|
+
# Create LZMA2 decoder with raw_mode for XZ format
|
|
560
|
+
decoder = Omnizip::Implementations::XZUtils::LZMA2::Decoder.new(input_buffer,
|
|
561
|
+
raw_mode: true)
|
|
562
|
+
|
|
563
|
+
# Set dict_size directly since we skipped property byte reading
|
|
564
|
+
decoder.instance_variable_set(:@dict_size, dict_size)
|
|
565
|
+
decoder.instance_variable_set(:@properties, Omnizip::Algorithms::LZMA2::Properties.new(dict_size))
|
|
566
|
+
|
|
567
|
+
# Decode stream
|
|
568
|
+
uncompressed_data = decoder.decode_stream
|
|
569
|
+
|
|
570
|
+
# Save decompressed data for filter chain processing
|
|
571
|
+
@decompressed_data = uncompressed_data
|
|
572
|
+
|
|
573
|
+
# Return both data and bytes consumed
|
|
574
|
+
[uncompressed_data, input_buffer.bytes_read]
|
|
575
|
+
end
|
|
576
|
+
|
|
577
|
+
# Decode LZMA2 data
|
|
578
|
+
#
|
|
579
|
+
# @param compressed_data [String] LZMA2 compressed data
|
|
580
|
+
# @param properties [String, nil] LZMA2 properties byte
|
|
581
|
+
# @return [String] Decompressed data
|
|
582
|
+
def decode_lzma2(compressed_data, properties)
|
|
583
|
+
input_buffer = StringIO.new(compressed_data)
|
|
584
|
+
input_buffer.set_encoding(Encoding::BINARY)
|
|
585
|
+
|
|
586
|
+
# For XZ format, LZMA2 data starts with control bytes, not a property byte
|
|
587
|
+
# The filter properties byte contains the dictionary size encoding
|
|
588
|
+
# We need to extract dict_size from properties if available, otherwise use a default
|
|
589
|
+
|
|
590
|
+
# Parse properties byte to get dict_size
|
|
591
|
+
# Properties byte format: (pb * 5 + lp) * 9 + lc for LZMA1
|
|
592
|
+
# For LZMA2, it encodes dictionary size directly
|
|
593
|
+
# Format: if d < 40: size = 2^((d/2) + 12) for even d, or 3 * 2^((d-1)/2 + 11) for odd d
|
|
594
|
+
|
|
595
|
+
# For now, use a reasonable default since the XZ spec doesn't require
|
|
596
|
+
# the dict_size to be specified in the filter properties for LZMA2
|
|
597
|
+
# The block header filter properties byte (0x08 in our test file) encodes dict_size
|
|
598
|
+
# Using the formula from XZ spec for LZMA2 dict_size encoding:
|
|
599
|
+
# prop 0x08 = 8 means: 2^((8/2) + 12) = 2^16 = 65536 bytes (if even)
|
|
600
|
+
# Wait, let me use the standard formula:
|
|
601
|
+
# If prop is even: dict_size = 2^((prop/2) + 12)
|
|
602
|
+
# If prop is odd: dict_size = 3 * 2^((prop-1)/2 + 11)
|
|
603
|
+
dict_size = if properties&.bytesize&.positive?
|
|
604
|
+
prop = properties.getbyte(0)
|
|
605
|
+
if prop.even?
|
|
606
|
+
1 << ((prop / 2) + 12)
|
|
607
|
+
else
|
|
608
|
+
3 * (1 << (((prop - 1) / 2) + 11))
|
|
609
|
+
end
|
|
610
|
+
else
|
|
611
|
+
8 * 1024 * 1024 # 8MB default
|
|
612
|
+
end
|
|
613
|
+
|
|
614
|
+
# Create LZMA2 decoder with raw_mode for XZ format
|
|
615
|
+
decoder = Omnizip::Implementations::XZUtils::LZMA2::Decoder.new(input_buffer,
|
|
616
|
+
raw_mode: true)
|
|
617
|
+
|
|
618
|
+
# Set dict_size directly since we skipped property byte reading
|
|
619
|
+
decoder.instance_variable_set(:@dict_size, dict_size)
|
|
620
|
+
decoder.instance_variable_set(:@properties, Omnizip::Algorithms::LZMA2::Properties.new(dict_size))
|
|
621
|
+
|
|
622
|
+
# Decode stream
|
|
623
|
+
decoder.decode_stream
|
|
624
|
+
end
|
|
625
|
+
|
|
626
|
+
# Find the end of LZMA2 compressed data by parsing chunks
|
|
627
|
+
#
|
|
628
|
+
# LZMA2 chunk format:
|
|
629
|
+
# - Control byte (1 byte)
|
|
630
|
+
# - 0x00: End of stream marker (STOP)
|
|
631
|
+
# - 0x01-0x02: Uncompressed chunk
|
|
632
|
+
# - Size (2 bytes, big-endian) + 1
|
|
633
|
+
# - Uncompressed data
|
|
634
|
+
# - 0x03-0x7F: Compressed chunk (LZMA)
|
|
635
|
+
# - Properties (1 byte)
|
|
636
|
+
# - Compressed LZMA data
|
|
637
|
+
# - 0x80-0xFF: Compressed chunk (LZMA)
|
|
638
|
+
# - Uncompressed size (2 bytes, big-endian, high 5 bits in control)
|
|
639
|
+
# - Compressed size (2 bytes, big-endian) + 1
|
|
640
|
+
# - Properties (1 byte, if control >= 0xC0)
|
|
641
|
+
# - Compressed LZMA data
|
|
642
|
+
#
|
|
643
|
+
# @param data [String] LZMA2 data to parse
|
|
644
|
+
# @return [Integer] Position where compressed data ends (before check bytes)
|
|
645
|
+
def find_lzma2_compressed_data_end(data)
|
|
646
|
+
pos = 0
|
|
647
|
+
|
|
648
|
+
while pos < data.bytesize
|
|
649
|
+
control = data.getbyte(pos)
|
|
650
|
+
pos += 1
|
|
651
|
+
|
|
652
|
+
case control
|
|
653
|
+
when 0x00
|
|
654
|
+
# End of stream marker - LZMA2 data ends here
|
|
655
|
+
# Return pos (which includes the end marker, as we've already read it)
|
|
656
|
+
return pos
|
|
657
|
+
when 0x01, 0x02
|
|
658
|
+
# Uncompressed chunk
|
|
659
|
+
# Size encoding: 2 bytes (big-endian) + 1
|
|
660
|
+
size_bytes = data.getbyte(pos) || 0
|
|
661
|
+
pos += 1
|
|
662
|
+
size_bytes = (size_bytes << 8) | (data.getbyte(pos) || 0)
|
|
663
|
+
pos += 1
|
|
664
|
+
uncompressed_size = size_bytes + 1
|
|
665
|
+
|
|
666
|
+
# Skip uncompressed data
|
|
667
|
+
pos += uncompressed_size
|
|
668
|
+
when 0x03..0x7F
|
|
669
|
+
# Compressed chunk (LZMA without explicit uncompressed size)
|
|
670
|
+
# Skip properties byte
|
|
671
|
+
pos += 1
|
|
672
|
+
|
|
673
|
+
# For compressed data, we need to find where it ends
|
|
674
|
+
# This is complex because the range decoder consumes variable bytes
|
|
675
|
+
# For now, we'll look ahead for patterns that indicate chunk boundaries
|
|
676
|
+
|
|
677
|
+
# Look for next chunk start (0x00, 0x01, 0x02, or 0x03-0x7F)
|
|
678
|
+
# But we need to be careful not to mistake data for chunk markers
|
|
679
|
+
#
|
|
680
|
+
# Heuristic: scan forward looking for potential chunk starts
|
|
681
|
+
# A valid chunk start would be followed by valid data structure
|
|
682
|
+
found_next_chunk = false
|
|
683
|
+
scan_pos = pos
|
|
684
|
+
|
|
685
|
+
while scan_pos < data.bytesize && !found_next_chunk
|
|
686
|
+
next_byte = data.getbyte(scan_pos)
|
|
687
|
+
|
|
688
|
+
# Check if this could be a chunk start
|
|
689
|
+
case next_byte
|
|
690
|
+
when 0x00
|
|
691
|
+
# End marker - this is the end of the block
|
|
692
|
+
return scan_pos
|
|
693
|
+
when 0x01, 0x02
|
|
694
|
+
# Uncompressed chunk - verify it has valid size byte
|
|
695
|
+
next_next_byte = data.getbyte(scan_pos + 1)
|
|
696
|
+
if next_next_byte
|
|
697
|
+
size_hi = (next_byte >> 5)
|
|
698
|
+
size_lo = next_next_byte
|
|
699
|
+
uncompressed_size = (size_hi << 8) | size_lo
|
|
700
|
+
|
|
701
|
+
# Check if this size makes sense (not too large)
|
|
702
|
+
if uncompressed_size <= 1024 && scan_pos + 1 + uncompressed_size <= data.bytesize
|
|
703
|
+
# Valid uncompressed chunk found
|
|
704
|
+
return scan_pos
|
|
705
|
+
end
|
|
706
|
+
end
|
|
707
|
+
scan_pos += 1
|
|
708
|
+
when 0x03..0x7F
|
|
709
|
+
# Another compressed chunk - verify it has properties byte
|
|
710
|
+
if scan_pos + 1 < data.bytesize
|
|
711
|
+
# Could be valid - assume this is the next chunk
|
|
712
|
+
return scan_pos
|
|
713
|
+
end
|
|
714
|
+
|
|
715
|
+
scan_pos += 1
|
|
716
|
+
else
|
|
717
|
+
scan_pos += 1
|
|
718
|
+
end
|
|
719
|
+
end
|
|
720
|
+
|
|
721
|
+
# If we couldn't find a clear boundary, use current position
|
|
722
|
+
return pos
|
|
723
|
+
when 0x80..0xFF
|
|
724
|
+
# Compressed chunk (LZMA with explicit uncompressed size)
|
|
725
|
+
# Uncompressed size (2 bytes, big-endian)
|
|
726
|
+
pos += 2
|
|
727
|
+
|
|
728
|
+
# Compressed size (2 bytes, big-endian) + 1
|
|
729
|
+
compressed_size_hi = data.getbyte(pos) || 0
|
|
730
|
+
pos += 1
|
|
731
|
+
compressed_size_lo = data.getbyte(pos) || 0
|
|
732
|
+
pos += 1
|
|
733
|
+
compressed_size = (compressed_size_hi << 8) | compressed_size_lo
|
|
734
|
+
compressed_size += 1
|
|
735
|
+
|
|
736
|
+
# Properties byte (if control >= 0xC0)
|
|
737
|
+
pos += 1 if control >= 0xC0
|
|
738
|
+
|
|
739
|
+
# Skip compressed LZMA data
|
|
740
|
+
pos += compressed_size
|
|
741
|
+
else
|
|
742
|
+
# Invalid control byte
|
|
743
|
+
raise Omnizip::FormatError,
|
|
744
|
+
"Invalid LZMA2 control byte: 0x#{control.to_s(16).upcase}"
|
|
745
|
+
end
|
|
746
|
+
end
|
|
747
|
+
|
|
748
|
+
# If we reach here, we've consumed all data
|
|
749
|
+
pos
|
|
750
|
+
end
|
|
751
|
+
end
|
|
752
|
+
end
|
|
753
|
+
end
|
|
754
|
+
end
|