omnizip 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/.rubocop.yml +32 -0
- data/.rubocop_todo.yml +754 -0
- data/COPYING +502 -0
- data/Gemfile +17 -0
- data/LICENSE +12 -0
- data/README.adoc +1045 -0
- data/Rakefile +12 -0
- data/benchmark/README.md +260 -0
- data/benchmark/benchmark_suite.rb +125 -0
- data/benchmark/compression_bench.rb +181 -0
- data/benchmark/filter_bench.rb +180 -0
- data/benchmark/models/benchmark_result.rb +59 -0
- data/benchmark/models/comparison_result.rb +69 -0
- data/benchmark/profile_suite.rb +167 -0
- data/benchmark/reporter.rb +150 -0
- data/benchmark/run_benchmarks.rb +66 -0
- data/benchmark/test_data.rb +137 -0
- data/config/formats/rar3_spec.yml +91 -0
- data/config/formats/rar5_spec.yml +102 -0
- data/docs/.github/workflows/docs.yml +142 -0
- data/docs/.gitignore +21 -0
- data/docs/.lychee.toml +67 -0
- data/docs/Gemfile +13 -0
- data/docs/RAR_WRITE_SUPPORT.md +26 -0
- data/docs/README.md +101 -0
- data/docs/_config.yml +112 -0
- data/docs/assets/logo.svg +1 -0
- data/docs/assets/omnizip-logo.pdf +1540 -11
- data/docs/comparison/feature-matrix.adoc +694 -0
- data/docs/comparison/index.adoc +113 -0
- data/docs/comparison/vs-7zip.adoc +309 -0
- data/docs/comparison/vs-peazip.adoc +77 -0
- data/docs/comparison/vs-rubyzip.adoc +342 -0
- data/docs/comparison/vs-winrar.adoc +100 -0
- data/docs/compatibility.adoc +579 -0
- data/docs/concepts/index.adoc +129 -0
- data/docs/developer/architecture.adoc +256 -0
- data/docs/developer/contributing.adoc +158 -0
- data/docs/developer/index.adoc +25 -0
- data/docs/developer/testing.adoc +212 -0
- data/docs/getting-started/basic-usage.adoc +271 -0
- data/docs/getting-started/index.adoc +42 -0
- data/docs/getting-started/installation.adoc +138 -0
- data/docs/getting-started/quick-start.adoc +185 -0
- data/docs/getting-started/your-first-archive.adoc +218 -0
- data/docs/guides/advanced-features/encryption.adoc +300 -0
- data/docs/guides/advanced-features/index.adoc +49 -0
- data/docs/guides/advanced-features/parallel-processing.adoc +246 -0
- data/docs/guides/advanced-features/progress-tracking.adoc +320 -0
- data/docs/guides/advanced-features/streaming.adoc +212 -0
- data/docs/guides/archive-formats/gzip-format.adoc +107 -0
- data/docs/guides/archive-formats/index.adoc +130 -0
- data/docs/guides/archive-formats/rar-format.adoc +104 -0
- data/docs/guides/archive-formats/rar5.adoc +521 -0
- data/docs/guides/archive-formats/seven-zip-format.adoc +35 -0
- data/docs/guides/archive-formats/tar-format.adoc +106 -0
- data/docs/guides/archive-formats/xz-format.adoc +118 -0
- data/docs/guides/archive-formats/zip-format.adoc +35 -0
- data/docs/guides/compression-algorithms/bzip2.adoc +113 -0
- data/docs/guides/compression-algorithms/deflate.adoc +319 -0
- data/docs/guides/compression-algorithms/index.adoc +190 -0
- data/docs/guides/compression-algorithms/lzma.adoc +398 -0
- data/docs/guides/compression-algorithms/lzma2.adoc +327 -0
- data/docs/guides/compression-algorithms/ppmd.adoc +316 -0
- data/docs/guides/compression-algorithms/zstandard.adoc +361 -0
- data/docs/guides/creating-archives.adoc +354 -0
- data/docs/guides/extracting-archives.adoc +53 -0
- data/docs/guides/format-conversion.adoc +64 -0
- data/docs/guides/index.adoc +49 -0
- data/docs/guides/migration-rubyzip.adoc +217 -0
- data/docs/guides/parity-archives.adoc +605 -0
- data/docs/guides/performance-tuning.adoc +88 -0
- data/docs/index.adoc +218 -0
- data/docs/lychee.toml +67 -0
- data/docs/reference/api/overview.adoc +188 -0
- data/docs/reference/cli/compress-command.adoc +114 -0
- data/docs/reference/cli/overview.adoc +140 -0
- data/docs/reference/index.adoc +26 -0
- data/docs/resources/faq.adoc +185 -0
- data/docs/resources/quick-reference.adoc +222 -0
- data/docs/troubleshooting/index.adoc +208 -0
- data/examples/api_comparison.rb +205 -0
- data/examples/deflate64_example.rb +96 -0
- data/examples/par2_demo.rb +121 -0
- data/examples/quick_start_native.rb +150 -0
- data/examples/quick_start_rubyzip.rb +115 -0
- data/examples/rubyzip_compatibility_demo.rb +194 -0
- data/exe/omnizip +27 -0
- data/lib/omnizip/algorithm.rb +130 -0
- data/lib/omnizip/algorithm_registry.rb +86 -0
- data/lib/omnizip/algorithms/.keep +0 -0
- data/lib/omnizip/algorithms/bzip2/bwt.rb +225 -0
- data/lib/omnizip/algorithms/bzip2/decoder.rb +193 -0
- data/lib/omnizip/algorithms/bzip2/encoder.rb +237 -0
- data/lib/omnizip/algorithms/bzip2/huffman.rb +206 -0
- data/lib/omnizip/algorithms/bzip2/mtf.rb +101 -0
- data/lib/omnizip/algorithms/bzip2/rle.rb +151 -0
- data/lib/omnizip/algorithms/bzip2.rb +130 -0
- data/lib/omnizip/algorithms/deflate/constants.rb +28 -0
- data/lib/omnizip/algorithms/deflate/decoder.rb +38 -0
- data/lib/omnizip/algorithms/deflate/encoder.rb +46 -0
- data/lib/omnizip/algorithms/deflate.rb +128 -0
- data/lib/omnizip/algorithms/deflate64/constants.rb +45 -0
- data/lib/omnizip/algorithms/deflate64/decoder.rb +153 -0
- data/lib/omnizip/algorithms/deflate64/encoder.rb +98 -0
- data/lib/omnizip/algorithms/deflate64/huffman_coder.rb +354 -0
- data/lib/omnizip/algorithms/deflate64/lz77_encoder.rb +142 -0
- data/lib/omnizip/algorithms/deflate64.rb +109 -0
- data/lib/omnizip/algorithms/lzma/bit_model.rb +120 -0
- data/lib/omnizip/algorithms/lzma/constants.rb +112 -0
- data/lib/omnizip/algorithms/lzma/decoder.rb +148 -0
- data/lib/omnizip/algorithms/lzma/dictionary.rb +69 -0
- data/lib/omnizip/algorithms/lzma/distance_coder.rb +415 -0
- data/lib/omnizip/algorithms/lzma/encoder.rb +142 -0
- data/lib/omnizip/algorithms/lzma/length_coder.rb +260 -0
- data/lib/omnizip/algorithms/lzma/literal_decoder.rb +320 -0
- data/lib/omnizip/algorithms/lzma/literal_encoder.rb +210 -0
- data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +341 -0
- data/lib/omnizip/algorithms/lzma/lzma_alone_decoder.rb +192 -0
- data/lib/omnizip/algorithms/lzma/lzma_state.rb +128 -0
- data/lib/omnizip/algorithms/lzma/match.rb +32 -0
- data/lib/omnizip/algorithms/lzma/match_finder.rb +205 -0
- data/lib/omnizip/algorithms/lzma/match_finder_config.rb +142 -0
- data/lib/omnizip/algorithms/lzma/match_finder_factory.rb +88 -0
- data/lib/omnizip/algorithms/lzma/optimal_encoder.rb +130 -0
- data/lib/omnizip/algorithms/lzma/probability_models.rb +72 -0
- data/lib/omnizip/algorithms/lzma/range_coder.rb +85 -0
- data/lib/omnizip/algorithms/lzma/range_decoder.rb +434 -0
- data/lib/omnizip/algorithms/lzma/range_encoder.rb +194 -0
- data/lib/omnizip/algorithms/lzma/state.rb +127 -0
- data/lib/omnizip/algorithms/lzma/xz_buffered_range_encoder.rb +325 -0
- data/lib/omnizip/algorithms/lzma/xz_encoder.rb +426 -0
- data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +645 -0
- data/lib/omnizip/algorithms/lzma/xz_match_finder_adapter.rb +227 -0
- data/lib/omnizip/algorithms/lzma/xz_price_calculator.rb +169 -0
- data/lib/omnizip/algorithms/lzma/xz_probability_models.rb +261 -0
- data/lib/omnizip/algorithms/lzma/xz_range_encoder.rb +223 -0
- data/lib/omnizip/algorithms/lzma/xz_range_encoder_exact.rb +331 -0
- data/lib/omnizip/algorithms/lzma/xz_state.rb +116 -0
- data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +2055 -0
- data/lib/omnizip/algorithms/lzma.rb +238 -0
- data/lib/omnizip/algorithms/lzma2/chunk_manager.rb +182 -0
- data/lib/omnizip/algorithms/lzma2/constants.rb +41 -0
- data/lib/omnizip/algorithms/lzma2/encoder.rb +147 -0
- data/lib/omnizip/algorithms/lzma2/lzma2_chunk.rb +161 -0
- data/lib/omnizip/algorithms/lzma2/properties.rb +179 -0
- data/lib/omnizip/algorithms/lzma2/simple_lzma2_encoder.rb +127 -0
- data/lib/omnizip/algorithms/lzma2/xz_encoder_adapter.rb +85 -0
- data/lib/omnizip/algorithms/lzma2.rb +141 -0
- data/lib/omnizip/algorithms/ppmd7/constants.rb +74 -0
- data/lib/omnizip/algorithms/ppmd7/context.rb +154 -0
- data/lib/omnizip/algorithms/ppmd7/decoder.rb +126 -0
- data/lib/omnizip/algorithms/ppmd7/encoder.rb +163 -0
- data/lib/omnizip/algorithms/ppmd7/model.rb +248 -0
- data/lib/omnizip/algorithms/ppmd7/symbol_state.rb +57 -0
- data/lib/omnizip/algorithms/ppmd7.rb +116 -0
- data/lib/omnizip/algorithms/ppmd8/constants.rb +61 -0
- data/lib/omnizip/algorithms/ppmd8/context.rb +34 -0
- data/lib/omnizip/algorithms/ppmd8/decoder.rb +107 -0
- data/lib/omnizip/algorithms/ppmd8/encoder.rb +138 -0
- data/lib/omnizip/algorithms/ppmd8/model.rb +250 -0
- data/lib/omnizip/algorithms/ppmd8/restoration_method.rb +78 -0
- data/lib/omnizip/algorithms/ppmd8.rb +82 -0
- data/lib/omnizip/algorithms/ppmd_base.rb +138 -0
- data/lib/omnizip/algorithms/sevenzip_lzma2.rb +123 -0
- data/lib/omnizip/algorithms/xz_lzma2.rb +118 -0
- data/lib/omnizip/algorithms/zstandard/constants.rb +25 -0
- data/lib/omnizip/algorithms/zstandard/decoder.rb +46 -0
- data/lib/omnizip/algorithms/zstandard/encoder.rb +51 -0
- data/lib/omnizip/algorithms/zstandard.rb +138 -0
- data/lib/omnizip/buffer/memory_archive.rb +251 -0
- data/lib/omnizip/buffer/memory_extractor.rb +224 -0
- data/lib/omnizip/buffer.rb +176 -0
- data/lib/omnizip/checksum_registry.rb +114 -0
- data/lib/omnizip/checksums/crc32.rb +100 -0
- data/lib/omnizip/checksums/crc64.rb +101 -0
- data/lib/omnizip/checksums/crc_base.rb +158 -0
- data/lib/omnizip/checksums/verifier.rb +131 -0
- data/lib/omnizip/chunked/memory_manager.rb +194 -0
- data/lib/omnizip/chunked/reader.rb +78 -0
- data/lib/omnizip/chunked/writer.rb +120 -0
- data/lib/omnizip/chunked.rb +129 -0
- data/lib/omnizip/cli/output_formatter.rb +104 -0
- data/lib/omnizip/cli.rb +572 -0
- data/lib/omnizip/commands/.keep +0 -0
- data/lib/omnizip/commands/archive_create_command.rb +427 -0
- data/lib/omnizip/commands/archive_extract_command.rb +272 -0
- data/lib/omnizip/commands/archive_list_command.rb +218 -0
- data/lib/omnizip/commands/archive_repair_command.rb +131 -0
- data/lib/omnizip/commands/archive_verify_command.rb +117 -0
- data/lib/omnizip/commands/compress_command.rb +117 -0
- data/lib/omnizip/commands/decompress_command.rb +120 -0
- data/lib/omnizip/commands/list_command.rb +53 -0
- data/lib/omnizip/commands/metadata_command.rb +153 -0
- data/lib/omnizip/commands/parity_create_command.rb +122 -0
- data/lib/omnizip/commands/parity_repair_command.rb +122 -0
- data/lib/omnizip/commands/parity_verify_command.rb +124 -0
- data/lib/omnizip/commands/profile_list_command.rb +56 -0
- data/lib/omnizip/commands/profile_show_command.rb +44 -0
- data/lib/omnizip/convenience.rb +359 -0
- data/lib/omnizip/converter/conversion_registry.rb +49 -0
- data/lib/omnizip/converter/conversion_strategy.rb +121 -0
- data/lib/omnizip/converter/seven_zip_to_zip_strategy.rb +97 -0
- data/lib/omnizip/converter/zip_to_seven_zip_strategy.rb +112 -0
- data/lib/omnizip/converter.rb +105 -0
- data/lib/omnizip/crypto/aes256/cipher.rb +100 -0
- data/lib/omnizip/crypto/aes256/constants.rb +28 -0
- data/lib/omnizip/crypto/aes256/key_derivation.rb +101 -0
- data/lib/omnizip/crypto/aes256.rb +102 -0
- data/lib/omnizip/error.rb +106 -0
- data/lib/omnizip/eta/exponential_smoothing_estimator.rb +98 -0
- data/lib/omnizip/eta/moving_average_estimator.rb +99 -0
- data/lib/omnizip/eta/rate_calculator.rb +104 -0
- data/lib/omnizip/eta/sample_history.rb +143 -0
- data/lib/omnizip/eta/time_estimator.rb +106 -0
- data/lib/omnizip/eta.rb +63 -0
- data/lib/omnizip/extraction/filter_chain.rb +177 -0
- data/lib/omnizip/extraction/glob_pattern.rb +140 -0
- data/lib/omnizip/extraction/pattern_matcher.rb +70 -0
- data/lib/omnizip/extraction/predicate_pattern.rb +52 -0
- data/lib/omnizip/extraction/regex_pattern.rb +50 -0
- data/lib/omnizip/extraction/selective_extractor.rb +240 -0
- data/lib/omnizip/extraction.rb +111 -0
- data/lib/omnizip/file_type/mime_classifier.rb +144 -0
- data/lib/omnizip/file_type.rb +113 -0
- data/lib/omnizip/filter.rb +139 -0
- data/lib/omnizip/filter_pipeline.rb +108 -0
- data/lib/omnizip/filter_registry.rb +166 -0
- data/lib/omnizip/filters/bcj.rb +279 -0
- data/lib/omnizip/filters/bcj2/constants.rb +53 -0
- data/lib/omnizip/filters/bcj2/decoder.rb +200 -0
- data/lib/omnizip/filters/bcj2/encoder.rb +61 -0
- data/lib/omnizip/filters/bcj2/stream_data.rb +93 -0
- data/lib/omnizip/filters/bcj2.rb +99 -0
- data/lib/omnizip/filters/bcj_arm.rb +176 -0
- data/lib/omnizip/filters/bcj_arm64.rb +244 -0
- data/lib/omnizip/filters/bcj_ia64.rb +196 -0
- data/lib/omnizip/filters/bcj_ppc.rb +190 -0
- data/lib/omnizip/filters/bcj_sparc.rb +176 -0
- data/lib/omnizip/filters/bcj_x86.rb +193 -0
- data/lib/omnizip/filters/delta.rb +196 -0
- data/lib/omnizip/filters/filter_base.rb +72 -0
- data/lib/omnizip/filters/registry.rb +123 -0
- data/lib/omnizip/filters/xz_delta.rb +258 -0
- data/lib/omnizip/format_detector.rb +162 -0
- data/lib/omnizip/format_registry.rb +59 -0
- data/lib/omnizip/formats/.keep +0 -0
- data/lib/omnizip/formats/bzip2_file.rb +172 -0
- data/lib/omnizip/formats/cpio/constants.rb +55 -0
- data/lib/omnizip/formats/cpio/entry.rb +385 -0
- data/lib/omnizip/formats/cpio/reader.rb +196 -0
- data/lib/omnizip/formats/cpio/writer.rb +234 -0
- data/lib/omnizip/formats/cpio.rb +140 -0
- data/lib/omnizip/formats/format_spec_loader.rb +230 -0
- data/lib/omnizip/formats/gzip.rb +238 -0
- data/lib/omnizip/formats/iso/directory_builder.rb +297 -0
- data/lib/omnizip/formats/iso/directory_record.rb +152 -0
- data/lib/omnizip/formats/iso/joliet.rb +204 -0
- data/lib/omnizip/formats/iso/path_table.rb +125 -0
- data/lib/omnizip/formats/iso/reader.rb +197 -0
- data/lib/omnizip/formats/iso/rock_ridge.rb +349 -0
- data/lib/omnizip/formats/iso/volume_builder.rb +320 -0
- data/lib/omnizip/formats/iso/volume_descriptor.rb +168 -0
- data/lib/omnizip/formats/iso/writer.rb +530 -0
- data/lib/omnizip/formats/iso.rb +140 -0
- data/lib/omnizip/formats/lzip.rb +175 -0
- data/lib/omnizip/formats/lzma_alone.rb +171 -0
- data/lib/omnizip/formats/rar/archive_repairer.rb +243 -0
- data/lib/omnizip/formats/rar/archive_verifier.rb +195 -0
- data/lib/omnizip/formats/rar/block_parser.rb +243 -0
- data/lib/omnizip/formats/rar/compression/bit_stream.rb +180 -0
- data/lib/omnizip/formats/rar/compression/dispatcher.rb +217 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/decoder.rb +216 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/encoder.rb +158 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_builder.rb +217 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_coder.rb +189 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/match_finder.rb +135 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/sliding_window.rb +165 -0
- data/lib/omnizip/formats/rar/compression/ppmd/context.rb +105 -0
- data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +219 -0
- data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +262 -0
- data/lib/omnizip/formats/rar/compression_method_registry.rb +106 -0
- data/lib/omnizip/formats/rar/constants.rb +82 -0
- data/lib/omnizip/formats/rar/decompressor.rb +238 -0
- data/lib/omnizip/formats/rar/external_writer.rb +312 -0
- data/lib/omnizip/formats/rar/header.rb +192 -0
- data/lib/omnizip/formats/rar/license_validator.rb +109 -0
- data/lib/omnizip/formats/rar/models/rar_archive.rb +77 -0
- data/lib/omnizip/formats/rar/models/rar_entry.rb +65 -0
- data/lib/omnizip/formats/rar/models/rar_volume.rb +56 -0
- data/lib/omnizip/formats/rar/parity_handler.rb +292 -0
- data/lib/omnizip/formats/rar/rar5/compression/lzma.rb +202 -0
- data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +578 -0
- data/lib/omnizip/formats/rar/rar5/compression/store.rb +60 -0
- data/lib/omnizip/formats/rar/rar5/crc32.rb +39 -0
- data/lib/omnizip/formats/rar/rar5/encryption/aes256_cbc.rb +97 -0
- data/lib/omnizip/formats/rar/rar5/encryption/encryption_header.rb +114 -0
- data/lib/omnizip/formats/rar/rar5/encryption/encryption_manager.rb +166 -0
- data/lib/omnizip/formats/rar/rar5/encryption/key_derivation.rb +97 -0
- data/lib/omnizip/formats/rar/rar5/header.rb +187 -0
- data/lib/omnizip/formats/rar/rar5/models/encryption_options.rb +74 -0
- data/lib/omnizip/formats/rar/rar5/models/recovery_options.rb +63 -0
- data/lib/omnizip/formats/rar/rar5/models/solid_options.rb +63 -0
- data/lib/omnizip/formats/rar/rar5/models/volume_options.rb +74 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/ARCHITECTURE.md +290 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_manager.rb +264 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_splitter.rb +155 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_writer.rb +194 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_encoder.rb +109 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_manager.rb +142 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_stream.rb +121 -0
- data/lib/omnizip/formats/rar/rar5/vint.rb +65 -0
- data/lib/omnizip/formats/rar/rar5/writer.rb +466 -0
- data/lib/omnizip/formats/rar/rar_format_base.rb +241 -0
- data/lib/omnizip/formats/rar/reader.rb +366 -0
- data/lib/omnizip/formats/rar/recovery_record.rb +245 -0
- data/lib/omnizip/formats/rar/volume_manager.rb +168 -0
- data/lib/omnizip/formats/rar/writer.rb +431 -0
- data/lib/omnizip/formats/rar.rb +205 -0
- data/lib/omnizip/formats/rar3/compressor.rb +73 -0
- data/lib/omnizip/formats/rar3/decompressor.rb +66 -0
- data/lib/omnizip/formats/rar3/reader.rb +386 -0
- data/lib/omnizip/formats/rar3/writer.rb +219 -0
- data/lib/omnizip/formats/rar5/compressor.rb +73 -0
- data/lib/omnizip/formats/rar5/decompressor.rb +66 -0
- data/lib/omnizip/formats/rar5/reader.rb +342 -0
- data/lib/omnizip/formats/rar5/writer.rb +214 -0
- data/lib/omnizip/formats/seven_zip/coder_chain.rb +150 -0
- data/lib/omnizip/formats/seven_zip/constants.rb +126 -0
- data/lib/omnizip/formats/seven_zip/encoded_header.rb +114 -0
- data/lib/omnizip/formats/seven_zip/encrypted_header.rb +142 -0
- data/lib/omnizip/formats/seven_zip/file_collector.rb +144 -0
- data/lib/omnizip/formats/seven_zip/header.rb +106 -0
- data/lib/omnizip/formats/seven_zip/header_encryptor.rb +134 -0
- data/lib/omnizip/formats/seven_zip/header_writer.rb +466 -0
- data/lib/omnizip/formats/seven_zip/models/coder_info.rb +30 -0
- data/lib/omnizip/formats/seven_zip/models/file_entry.rb +58 -0
- data/lib/omnizip/formats/seven_zip/models/folder.rb +69 -0
- data/lib/omnizip/formats/seven_zip/models/stream_info.rb +42 -0
- data/lib/omnizip/formats/seven_zip/parser.rb +660 -0
- data/lib/omnizip/formats/seven_zip/reader.rb +458 -0
- data/lib/omnizip/formats/seven_zip/split_archive_reader.rb +632 -0
- data/lib/omnizip/formats/seven_zip/split_archive_writer.rb +315 -0
- data/lib/omnizip/formats/seven_zip/stream_compressor.rb +151 -0
- data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +162 -0
- data/lib/omnizip/formats/seven_zip/writer.rb +740 -0
- data/lib/omnizip/formats/seven_zip.rb +93 -0
- data/lib/omnizip/formats/tar/constants.rb +73 -0
- data/lib/omnizip/formats/tar/entry.rb +94 -0
- data/lib/omnizip/formats/tar/header.rb +168 -0
- data/lib/omnizip/formats/tar/reader.rb +121 -0
- data/lib/omnizip/formats/tar/writer.rb +216 -0
- data/lib/omnizip/formats/tar.rb +84 -0
- data/lib/omnizip/formats/xz/reader.rb +116 -0
- data/lib/omnizip/formats/xz.rb +237 -0
- data/lib/omnizip/formats/xz_impl/block_decoder.rb +754 -0
- data/lib/omnizip/formats/xz_impl/block_encoder.rb +306 -0
- data/lib/omnizip/formats/xz_impl/block_header.rb +210 -0
- data/lib/omnizip/formats/xz_impl/block_header_parser.rb +186 -0
- data/lib/omnizip/formats/xz_impl/constants.rb +49 -0
- data/lib/omnizip/formats/xz_impl/index_decoder.rb +174 -0
- data/lib/omnizip/formats/xz_impl/index_encoder.rb +122 -0
- data/lib/omnizip/formats/xz_impl/stream_decoder.rb +468 -0
- data/lib/omnizip/formats/xz_impl/stream_encoder.rb +99 -0
- data/lib/omnizip/formats/xz_impl/stream_footer.rb +81 -0
- data/lib/omnizip/formats/xz_impl/stream_footer_parser.rb +117 -0
- data/lib/omnizip/formats/xz_impl/stream_header.rb +55 -0
- data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +108 -0
- data/lib/omnizip/formats/xz_impl/vli.rb +128 -0
- data/lib/omnizip/formats/xz_impl/writer.rb +421 -0
- data/lib/omnizip/formats/zip/central_directory_header.rb +195 -0
- data/lib/omnizip/formats/zip/constants.rb +69 -0
- data/lib/omnizip/formats/zip/end_of_central_directory.rb +133 -0
- data/lib/omnizip/formats/zip/local_file_header.rb +138 -0
- data/lib/omnizip/formats/zip/reader.rb +250 -0
- data/lib/omnizip/formats/zip/unix_extra_field.rb +153 -0
- data/lib/omnizip/formats/zip/writer.rb +375 -0
- data/lib/omnizip/formats/zip/zip64_end_of_central_directory.rb +104 -0
- data/lib/omnizip/formats/zip/zip64_end_of_central_directory_locator.rb +66 -0
- data/lib/omnizip/formats/zip/zip64_extra_field.rb +114 -0
- data/lib/omnizip/formats/zip.rb +50 -0
- data/lib/omnizip/implementations/base/lzma2_decoder_base.rb +75 -0
- data/lib/omnizip/implementations/base/lzma2_encoder_base.rb +128 -0
- data/lib/omnizip/implementations/base/lzma_decoder_base.rb +83 -0
- data/lib/omnizip/implementations/base/lzma_encoder_base.rb +108 -0
- data/lib/omnizip/implementations/base/state_machine_base.rb +182 -0
- data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +421 -0
- data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +465 -0
- data/lib/omnizip/implementations/seven_zip/lzma/match_finder.rb +288 -0
- data/lib/omnizip/implementations/seven_zip/lzma/range_decoder.rb +200 -0
- data/lib/omnizip/implementations/seven_zip/lzma/range_encoder.rb +197 -0
- data/lib/omnizip/implementations/seven_zip/lzma/state_machine.rb +141 -0
- data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +519 -0
- data/lib/omnizip/implementations/xz_utils/lzma2/decoder.rb +723 -0
- data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +750 -0
- data/lib/omnizip/io/buffered_input.rb +146 -0
- data/lib/omnizip/io/buffered_output.rb +105 -0
- data/lib/omnizip/io/stream_manager.rb +115 -0
- data/lib/omnizip/link_handler/hard_link.rb +79 -0
- data/lib/omnizip/link_handler/symbolic_link.rb +74 -0
- data/lib/omnizip/link_handler.rb +124 -0
- data/lib/omnizip/metadata/archive_metadata.rb +114 -0
- data/lib/omnizip/metadata/entry_metadata.rb +146 -0
- data/lib/omnizip/metadata/metadata_editor.rb +171 -0
- data/lib/omnizip/metadata/metadata_registry.rb +64 -0
- data/lib/omnizip/metadata/metadata_validator.rb +99 -0
- data/lib/omnizip/metadata.rb +57 -0
- data/lib/omnizip/models/.keep +0 -0
- data/lib/omnizip/models/algorithm_metadata.rb +73 -0
- data/lib/omnizip/models/compression_options.rb +71 -0
- data/lib/omnizip/models/conversion_options.rb +87 -0
- data/lib/omnizip/models/conversion_result.rb +135 -0
- data/lib/omnizip/models/eta_result.rb +46 -0
- data/lib/omnizip/models/extraction_rule.rb +115 -0
- data/lib/omnizip/models/filter_chain.rb +144 -0
- data/lib/omnizip/models/filter_config.rb +183 -0
- data/lib/omnizip/models/match_result.rb +124 -0
- data/lib/omnizip/models/optimization_suggestion.rb +91 -0
- data/lib/omnizip/models/parallel_options.rb +104 -0
- data/lib/omnizip/models/performance_result.rb +79 -0
- data/lib/omnizip/models/profile_report.rb +82 -0
- data/lib/omnizip/models/progress_options.rb +38 -0
- data/lib/omnizip/models/split_options.rb +116 -0
- data/lib/omnizip/optimization_registry.rb +81 -0
- data/lib/omnizip/parallel/job_queue.rb +209 -0
- data/lib/omnizip/parallel/job_scheduler.rb +203 -0
- data/lib/omnizip/parallel/parallel_compressor.rb +347 -0
- data/lib/omnizip/parallel/parallel_extractor.rb +329 -0
- data/lib/omnizip/parallel/worker_pool.rb +223 -0
- data/lib/omnizip/parallel.rb +149 -0
- data/lib/omnizip/parity/chunked_block_processor.rb +196 -0
- data/lib/omnizip/parity/galois16.rb +145 -0
- data/lib/omnizip/parity/models/creator_packet.rb +73 -0
- data/lib/omnizip/parity/models/file_description_packet.rb +133 -0
- data/lib/omnizip/parity/models/ifsc_packet.rb +123 -0
- data/lib/omnizip/parity/models/main_packet.rb +128 -0
- data/lib/omnizip/parity/models/packet.rb +156 -0
- data/lib/omnizip/parity/models/packet_registry.rb +109 -0
- data/lib/omnizip/parity/models/recovery_slice_packet.rb +78 -0
- data/lib/omnizip/parity/par2_creator.rb +531 -0
- data/lib/omnizip/parity/par2_repairer.rb +407 -0
- data/lib/omnizip/parity/par2_verifier.rb +364 -0
- data/lib/omnizip/parity/par2cmdline_algorithm.rb +110 -0
- data/lib/omnizip/parity/par2cmdline_coefficients.rb +78 -0
- data/lib/omnizip/parity/reed_solomon_decoder.rb +266 -0
- data/lib/omnizip/parity/reed_solomon_encoder.rb +111 -0
- data/lib/omnizip/parity/reed_solomon_matrix.rb +342 -0
- data/lib/omnizip/parity.rb +186 -0
- data/lib/omnizip/password/encryption_registry.rb +65 -0
- data/lib/omnizip/password/encryption_strategy.rb +96 -0
- data/lib/omnizip/password/password_validator.rb +129 -0
- data/lib/omnizip/password/winzip_aes_strategy.rb +192 -0
- data/lib/omnizip/password/zip_crypto_strategy.rb +141 -0
- data/lib/omnizip/password.rb +87 -0
- data/lib/omnizip/pipe/stream_compressor.rb +124 -0
- data/lib/omnizip/pipe/stream_decompressor.rb +174 -0
- data/lib/omnizip/pipe.rb +121 -0
- data/lib/omnizip/platform/ntfs_streams.rb +201 -0
- data/lib/omnizip/platform.rb +189 -0
- data/lib/omnizip/profile/archive_profile.rb +39 -0
- data/lib/omnizip/profile/balanced_profile.rb +33 -0
- data/lib/omnizip/profile/binary_profile.rb +36 -0
- data/lib/omnizip/profile/compression_profile.rb +158 -0
- data/lib/omnizip/profile/custom_profile.rb +157 -0
- data/lib/omnizip/profile/fast_profile.rb +33 -0
- data/lib/omnizip/profile/maximum_profile.rb +33 -0
- data/lib/omnizip/profile/profile_detector.rb +110 -0
- data/lib/omnizip/profile/profile_registry.rb +161 -0
- data/lib/omnizip/profile/text_profile.rb +36 -0
- data/lib/omnizip/profile.rb +190 -0
- data/lib/omnizip/profiler/memory_profiler.rb +66 -0
- data/lib/omnizip/profiler/method_profiler.rb +49 -0
- data/lib/omnizip/profiler/report_generator.rb +169 -0
- data/lib/omnizip/profiler.rb +204 -0
- data/lib/omnizip/progress/callback_reporter.rb +36 -0
- data/lib/omnizip/progress/console_reporter.rb +62 -0
- data/lib/omnizip/progress/log_reporter.rb +91 -0
- data/lib/omnizip/progress/operation_progress.rb +118 -0
- data/lib/omnizip/progress/progress_bar.rb +156 -0
- data/lib/omnizip/progress/progress_reporter.rb +40 -0
- data/lib/omnizip/progress/progress_tracker.rb +190 -0
- data/lib/omnizip/progress/silent_reporter.rb +24 -0
- data/lib/omnizip/progress.rb +127 -0
- data/lib/omnizip/rubyzip_compat.rb +63 -0
- data/lib/omnizip/temp/safe_extract.rb +168 -0
- data/lib/omnizip/temp/temp_file.rb +124 -0
- data/lib/omnizip/temp/temp_file_pool.rb +109 -0
- data/lib/omnizip/temp.rb +181 -0
- data/lib/omnizip/version.rb +5 -0
- data/lib/omnizip/zip/entry.rb +156 -0
- data/lib/omnizip/zip/file.rb +485 -0
- data/lib/omnizip/zip/input_stream.rb +273 -0
- data/lib/omnizip/zip/output_stream.rb +324 -0
- data/lib/omnizip.rb +156 -0
- data/readme-docs/advanced-features.adoc +515 -0
- data/readme-docs/api-usage.adoc +444 -0
- data/readme-docs/architecture.adoc +449 -0
- data/readme-docs/archive-formats.adoc +479 -0
- data/readme-docs/cli-usage.adoc +222 -0
- data/readme-docs/compression-algorithms.adoc +442 -0
- data/readme-docs/compression-profiles.adoc +247 -0
- data/readme-docs/encryption-checksums.adoc +328 -0
- data/readme-docs/format-converter.adoc +325 -0
- data/readme-docs/installation.adoc +228 -0
- data/readme-docs/par2-archives.adoc +608 -0
- data/readme-docs/performance-profiler.adoc +389 -0
- data/readme-docs/preprocessing-filters.adoc +280 -0
- data/xz-file-format-1.2.1.txt +1174 -0
- metadata +617 -0
|
@@ -0,0 +1,468 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Copyright (C) 2025 Ribose Inc.
|
|
4
|
+
#
|
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a
|
|
6
|
+
# copy of this software and associated documentation files (the "Software"),
|
|
7
|
+
# to deal in the Software without restriction, including without limitation
|
|
8
|
+
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
9
|
+
# and/or sell copies of the Software, and to permit persons to whom the
|
|
10
|
+
# Software is furnished to do so, subject to the following conditions:
|
|
11
|
+
#
|
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
|
13
|
+
# all copies or substantial portions of the Software.
|
|
14
|
+
#
|
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
20
|
+
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
21
|
+
# DEALINGS IN THE SOFTWARE.
|
|
22
|
+
|
|
23
|
+
require "stringio"
|
|
24
|
+
require_relative "constants"
|
|
25
|
+
require_relative "stream_header_parser"
|
|
26
|
+
require_relative "stream_footer_parser"
|
|
27
|
+
require_relative "block_decoder"
|
|
28
|
+
require_relative "index_decoder"
|
|
29
|
+
require_relative "../../error"
|
|
30
|
+
|
|
31
|
+
module Omnizip
|
|
32
|
+
module Formats
|
|
33
|
+
module XzFormat
|
|
34
|
+
# XZ Stream decoder
|
|
35
|
+
#
|
|
36
|
+
# Decodes a complete XZ stream which consists of:
|
|
37
|
+
# - Stream Header (12 bytes)
|
|
38
|
+
# - Blocks (one or more)
|
|
39
|
+
# - Index
|
|
40
|
+
# - Stream Footer (12 bytes)
|
|
41
|
+
#
|
|
42
|
+
# Reference: /tmp/xz-source/src/liblzma/common/stream_decoder.c
|
|
43
|
+
class StreamDecoder
|
|
44
|
+
# Decode XZ stream from input
|
|
45
|
+
#
|
|
46
|
+
# @param input [IO] Input stream (file, StringIO, etc.)
|
|
47
|
+
# @return [String] Decompressed data
|
|
48
|
+
# @raise [RuntimeError] If stream is invalid
|
|
49
|
+
def self.decode(input)
|
|
50
|
+
header = StreamHeaderParser.parse(input)
|
|
51
|
+
check_type = header[:check_type]
|
|
52
|
+
|
|
53
|
+
# Store original input and file size for backward_size validation (if available)
|
|
54
|
+
original_input = input
|
|
55
|
+
original_file_size = input.size if input.respond_to?(:size)
|
|
56
|
+
|
|
57
|
+
output, block_count, final_input, block_sizes = decode_blocks(input,
|
|
58
|
+
check_type)
|
|
59
|
+
index = verify_index(final_input, block_count, block_sizes)
|
|
60
|
+
|
|
61
|
+
# Validate backward_size points to valid index position (XZ spec requirement)
|
|
62
|
+
# XZ spec: "The value of Backward Size is the size of the Index field...stored in
|
|
63
|
+
# multiples of four bytes...If the stored value does not match the real size of
|
|
64
|
+
# the Index field, the decoder MUST indicate an error."
|
|
65
|
+
# Reference: /Users/mulgogi/src/external/xz/src/liblzma/common/stream_decoder.c
|
|
66
|
+
if original_input.respond_to?(:seek) && original_file_size&.positive?
|
|
67
|
+
# Use original input and file size for validation
|
|
68
|
+
validate_backward_size_from_footer(original_input,
|
|
69
|
+
original_file_size, index[:index_size])
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# Read the stream footer to advance input position past it
|
|
73
|
+
read_stream_footer(final_input, check_type, index[:index_size])
|
|
74
|
+
|
|
75
|
+
# Now check for trailing data after the stream footer
|
|
76
|
+
verify_no_trailing_data(final_input)
|
|
77
|
+
|
|
78
|
+
output.join.force_encoding(Encoding::BINARY)
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
# Decode all blocks from stream until index marker
|
|
82
|
+
#
|
|
83
|
+
# @param input [IO] Input stream
|
|
84
|
+
# @param check_type [Symbol] Checksum type
|
|
85
|
+
# @return [Array, Integer, IO, Array] Output data array, block count, final input stream, and array of block size info
|
|
86
|
+
def self.decode_blocks(input, check_type)
|
|
87
|
+
output = []
|
|
88
|
+
block_count = 0
|
|
89
|
+
block_sizes = [] # Track unpadded and uncompressed sizes for index validation
|
|
90
|
+
|
|
91
|
+
loop do
|
|
92
|
+
peek_byte = input.getbyte
|
|
93
|
+
raise FormatError, "Unexpected end of stream" if peek_byte.nil?
|
|
94
|
+
|
|
95
|
+
if peek_byte == XzConst::INDEX_INDICATOR
|
|
96
|
+
restore_byte_for_index(input, peek_byte)
|
|
97
|
+
break
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
data, decoder = decode_block(input, peek_byte, check_type)
|
|
101
|
+
output << data
|
|
102
|
+
block_count += 1
|
|
103
|
+
|
|
104
|
+
# Track block sizes for index validation (per XZ Utils index_hash.c)
|
|
105
|
+
if decoder.unpadded_size && decoder.uncompressed_size
|
|
106
|
+
block_sizes << {
|
|
107
|
+
unpadded_size: decoder.unpadded_size,
|
|
108
|
+
uncompressed_size: decoder.uncompressed_size,
|
|
109
|
+
}
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
# If block decoder created a new input (for multi-block files without explicit sizes),
|
|
113
|
+
# use it for the next iteration
|
|
114
|
+
input = decoder.new_input_after_block if decoder.new_input_after_block
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
[output, block_count, input, block_sizes]
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
# Restore byte to stream for index parser
|
|
121
|
+
#
|
|
122
|
+
# @param input [IO] Input stream
|
|
123
|
+
# @param peek_byte [Integer] Byte to restore
|
|
124
|
+
def self.restore_byte_for_index(input, peek_byte)
|
|
125
|
+
restore_byte(input, peek_byte)
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
# Decode single block from stream
|
|
129
|
+
#
|
|
130
|
+
# @param input [IO] Input stream
|
|
131
|
+
# @param peek_byte [Integer] Peeked byte
|
|
132
|
+
# @param check_type [Symbol] Checksum type
|
|
133
|
+
# @return [Array, Hash, BlockDecoder] Decompressed data, block info, and decoder instance
|
|
134
|
+
def self.decode_block(input, peek_byte, check_type)
|
|
135
|
+
restore_byte(input, peek_byte)
|
|
136
|
+
decoder = BlockDecoder.new(input, check_type)
|
|
137
|
+
data = decoder.decode
|
|
138
|
+
[data, decoder]
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
# Restore a byte to the input stream
|
|
142
|
+
#
|
|
143
|
+
# @param input [IO] Input stream
|
|
144
|
+
# @param byte [Integer] Byte to restore
|
|
145
|
+
# @raise [RuntimeError] If IO doesn't support ungetbyte
|
|
146
|
+
def self.restore_byte(input, byte)
|
|
147
|
+
return input.ungetbyte(byte) if input.respond_to?(:ungetbyte)
|
|
148
|
+
|
|
149
|
+
raise FormatError,
|
|
150
|
+
"IO object doesn't support ungetbyte - cannot parse stream"
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
# Parse and verify index matches decoded blocks
|
|
154
|
+
#
|
|
155
|
+
# @param input [IO] Input stream
|
|
156
|
+
# @param block_count [Integer] Number of blocks decoded
|
|
157
|
+
# @param block_sizes [Array<Hash>] Array of {unpadded_size, uncompressed_size} for each block
|
|
158
|
+
# @return [Hash] Index data including index_size for backward_size validation
|
|
159
|
+
# @raise [FormatError] If index doesn't match decoded blocks
|
|
160
|
+
def self.verify_index(input, block_count, block_sizes)
|
|
161
|
+
index = IndexDecoder.parse(input)
|
|
162
|
+
|
|
163
|
+
# Validate count matches
|
|
164
|
+
if index[:count] != block_count
|
|
165
|
+
raise FormatError,
|
|
166
|
+
"Index count mismatch: index says #{index[:count]}, decoded #{block_count}"
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
# Validate block sizes match index records (per XZ Utils index_hash.c:244-290)
|
|
170
|
+
# This catches corrupted index files where the sizes don't match the actual blocks
|
|
171
|
+
if block_sizes.any? && index[:records].any?
|
|
172
|
+
# Helper function to calculate VLI ceil4 (round up to multiple of 4)
|
|
173
|
+
# Reference: /Users/mulgogi/src/external/xz/src/liblzma/common/index.h:48
|
|
174
|
+
vli_ceil4 = lambda { |vli|
|
|
175
|
+
(vli + 3) & ~3
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
# Calculate sums from actual blocks
|
|
179
|
+
# Note: XZ Utils uses vli_ceil4 on unpadded_size when summing
|
|
180
|
+
blocks_unpadded_sum = block_sizes.sum do |b|
|
|
181
|
+
vli_ceil4.call(b[:unpadded_size])
|
|
182
|
+
end
|
|
183
|
+
blocks_uncompressed_sum = block_sizes.sum do |b|
|
|
184
|
+
b[:uncompressed_size]
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
# Calculate sums from index records
|
|
188
|
+
# Note: Index records already contain the unpadded_size, need to ceil4 them too
|
|
189
|
+
index_unpadded_sum = index[:records].sum do |r|
|
|
190
|
+
vli_ceil4.call(r[:unpadded_size])
|
|
191
|
+
end
|
|
192
|
+
index_uncompressed_sum = index[:records].sum do |r|
|
|
193
|
+
r[:uncompressed_size]
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
# Validate sums match
|
|
197
|
+
if blocks_unpadded_sum != index_unpadded_sum
|
|
198
|
+
raise FormatError,
|
|
199
|
+
"Index unpadded size mismatch: blocks sum to #{blocks_unpadded_sum}, " \
|
|
200
|
+
"index says #{index_unpadded_sum}"
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
if blocks_uncompressed_sum != index_uncompressed_sum
|
|
204
|
+
raise FormatError,
|
|
205
|
+
"Index uncompressed size mismatch: blocks sum to #{blocks_uncompressed_sum}, " \
|
|
206
|
+
"index says #{index_uncompressed_sum}"
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
# Validate individual record sizes match (in correct order)
|
|
210
|
+
# Compare the raw unpadded_size values (not ceiled)
|
|
211
|
+
block_sizes.each_with_index do |block, i|
|
|
212
|
+
record = index[:records][i]
|
|
213
|
+
if block[:unpadded_size] != record[:unpadded_size]
|
|
214
|
+
raise FormatError,
|
|
215
|
+
"Index record #{i} unpadded size mismatch: block has #{block[:unpadded_size]}, " \
|
|
216
|
+
"index says #{record[:unpadded_size]}"
|
|
217
|
+
end
|
|
218
|
+
|
|
219
|
+
if block[:uncompressed_size] != record[:uncompressed_size]
|
|
220
|
+
raise FormatError,
|
|
221
|
+
"Index record #{i} uncompressed size mismatch: block has #{block[:uncompressed_size]}, " \
|
|
222
|
+
"index says #{record[:uncompressed_size]}"
|
|
223
|
+
end
|
|
224
|
+
end
|
|
225
|
+
end
|
|
226
|
+
|
|
227
|
+
index
|
|
228
|
+
end
|
|
229
|
+
|
|
230
|
+
# Parse and verify footer if input is seekable
|
|
231
|
+
#
|
|
232
|
+
# @param input [IO] Input stream
|
|
233
|
+
# @param check_type [Symbol] Expected checksum type
|
|
234
|
+
# @param index_size [Integer, nil] Actual index size for backward_size validation
|
|
235
|
+
def self.verify_footer_if_seekable(input, check_type, index_size = nil)
|
|
236
|
+
return unless input.respond_to?(:seek) && input.respond_to?(:size) && input.size
|
|
237
|
+
|
|
238
|
+
original_pos = input.pos
|
|
239
|
+
input.seek(-12, ::IO::SEEK_END)
|
|
240
|
+
footer = StreamFooterParser.parse(input)
|
|
241
|
+
input.pos = original_pos
|
|
242
|
+
|
|
243
|
+
# Verify check type matches
|
|
244
|
+
return if footer[:check_type] != check_type
|
|
245
|
+
|
|
246
|
+
# Validate backward_size against actual index size (XZ spec requirement)
|
|
247
|
+
# XZ spec: "If the stored value does not match the real size of the Index field,
|
|
248
|
+
# the decoder MUST indicate an error."
|
|
249
|
+
if index_size
|
|
250
|
+
# Convert stored_backward_size to real size: (stored + 1) * 4
|
|
251
|
+
real_backward_size = (footer[:backward_size] + 1) * 4
|
|
252
|
+
if real_backward_size != index_size
|
|
253
|
+
raise FormatError, "Backward size mismatch: footer indicates #{real_backward_size} bytes, " \
|
|
254
|
+
"but index is #{index_size} bytes"
|
|
255
|
+
end
|
|
256
|
+
end
|
|
257
|
+
end
|
|
258
|
+
|
|
259
|
+
# Read and verify the stream footer from the current position
|
|
260
|
+
#
|
|
261
|
+
# @param input [IO] Input stream positioned at the start of the stream footer
|
|
262
|
+
# @param check_type [Symbol] Expected checksum type
|
|
263
|
+
# @param index_size [Integer, nil] Actual index size for backward_size validation
|
|
264
|
+
# @raise [FormatError] If footer is invalid or doesn't match expected values
|
|
265
|
+
def self.read_stream_footer(input, check_type, index_size = nil)
|
|
266
|
+
footer = StreamFooterParser.parse(input)
|
|
267
|
+
|
|
268
|
+
# Verify check type matches
|
|
269
|
+
if footer[:check_type] != check_type
|
|
270
|
+
raise FormatError,
|
|
271
|
+
"Stream footer check type mismatch: expected #{check_type}, got #{footer[:check_type]}"
|
|
272
|
+
end
|
|
273
|
+
|
|
274
|
+
# Validate backward_size against actual index size (XZ spec requirement)
|
|
275
|
+
if index_size
|
|
276
|
+
# Convert stored_backward_size to real size: (stored + 1) * 4
|
|
277
|
+
real_backward_size = (footer[:backward_size] + 1) * 4
|
|
278
|
+
if real_backward_size != index_size
|
|
279
|
+
raise FormatError, "Backward size mismatch: footer indicates #{real_backward_size} bytes, " \
|
|
280
|
+
"but index is #{index_size} bytes"
|
|
281
|
+
end
|
|
282
|
+
end
|
|
283
|
+
|
|
284
|
+
footer
|
|
285
|
+
end
|
|
286
|
+
|
|
287
|
+
# Verify there's no invalid trailing data after the stream footer
|
|
288
|
+
#
|
|
289
|
+
# According to XZ spec, after a stream there can be:
|
|
290
|
+
# 1. Stream padding (null bytes to 4-byte boundary)
|
|
291
|
+
# 2. Another stream (concatenated streams)
|
|
292
|
+
#
|
|
293
|
+
# For bad-0cat-header_magic.xz style files with invalid extra data, we must reject them.
|
|
294
|
+
# XZ Utils rejects these with LZMA_FORMAT_ERROR when the extra data is not valid.
|
|
295
|
+
#
|
|
296
|
+
# @param input [IO] Input stream
|
|
297
|
+
# @raise [FormatError] If there's invalid trailing data
|
|
298
|
+
def self.verify_no_trailing_data(input)
|
|
299
|
+
return unless input.respond_to?(:pos) && input.respond_to?(:getbyte)
|
|
300
|
+
|
|
301
|
+
# Skip stream padding (null bytes)
|
|
302
|
+
# Stream padding must be a multiple of 4 bytes (per XZ spec)
|
|
303
|
+
padding_bytes = 0
|
|
304
|
+
loop do
|
|
305
|
+
byte = input.getbyte
|
|
306
|
+
break if byte.nil?
|
|
307
|
+
|
|
308
|
+
if byte.zero?
|
|
309
|
+
padding_bytes += 1
|
|
310
|
+
else
|
|
311
|
+
# Non-zero byte found - this should be a new stream or it's invalid
|
|
312
|
+
# Restore the byte and check if it's a valid stream header
|
|
313
|
+
input.ungetbyte(byte) if input.respond_to?(:ungetbyte)
|
|
314
|
+
|
|
315
|
+
# Stream padding must be a multiple of 4 bytes
|
|
316
|
+
if padding_bytes % 4 != 0
|
|
317
|
+
raise FormatError,
|
|
318
|
+
"Invalid stream padding: not a multiple of 4 bytes"
|
|
319
|
+
end
|
|
320
|
+
|
|
321
|
+
# Check if this looks like a valid XZ stream header
|
|
322
|
+
verify_or_reject_trailing_stream(input)
|
|
323
|
+
break
|
|
324
|
+
end
|
|
325
|
+
end
|
|
326
|
+
|
|
327
|
+
# If we reached EOF (no more data after padding), verify padding is multiple of 4
|
|
328
|
+
# XZ spec: "Stream Padding MUST contain only null bytes...the size of Stream
|
|
329
|
+
# Padding MUST be a multiple of four bytes."
|
|
330
|
+
if padding_bytes.positive? && padding_bytes % 4 != 0
|
|
331
|
+
raise FormatError,
|
|
332
|
+
"Invalid stream padding at EOF: #{padding_bytes} bytes (not a multiple of 4)"
|
|
333
|
+
end
|
|
334
|
+
end
|
|
335
|
+
|
|
336
|
+
# Verify that trailing data (if any) is a valid XZ stream
|
|
337
|
+
#
|
|
338
|
+
# @param input [IO] Input stream positioned at potential next stream
|
|
339
|
+
# @raise [FormatError] If the trailing data is not a valid XZ stream
|
|
340
|
+
def self.verify_or_reject_trailing_stream(input)
|
|
341
|
+
# Try to peek at the stream header magic
|
|
342
|
+
header_magic = [0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00] # XZ magic bytes
|
|
343
|
+
|
|
344
|
+
# Read the next 6 bytes to check for stream header
|
|
345
|
+
potential_header = []
|
|
346
|
+
6.times do
|
|
347
|
+
byte = input.getbyte
|
|
348
|
+
break if byte.nil?
|
|
349
|
+
|
|
350
|
+
potential_header << byte
|
|
351
|
+
end
|
|
352
|
+
|
|
353
|
+
# Restore the bytes we read
|
|
354
|
+
if input.respond_to?(:ungetbyte)
|
|
355
|
+
potential_header.reverse_each do |b|
|
|
356
|
+
input.ungetbyte(b)
|
|
357
|
+
end
|
|
358
|
+
end
|
|
359
|
+
|
|
360
|
+
# If we couldn't read 6 bytes, it's EOF - that's fine
|
|
361
|
+
return if potential_header.size < 6
|
|
362
|
+
|
|
363
|
+
# Check if it matches XZ stream header magic
|
|
364
|
+
potential_header.each_with_index do |byte, i|
|
|
365
|
+
if byte != header_magic[i]
|
|
366
|
+
# Invalid trailing data - not a valid XZ stream
|
|
367
|
+
raise FormatError,
|
|
368
|
+
"Trailing data: invalid stream header (byte #{i}: 0x#{byte.to_s(16)} != 0x#{header_magic[i].to_s(16)})"
|
|
369
|
+
end
|
|
370
|
+
end
|
|
371
|
+
|
|
372
|
+
# At this point, we have a valid concatenated stream header
|
|
373
|
+
# We don't decode additional streams yet, but we don't reject them either
|
|
374
|
+
# The XZ spec allows concatenated streams, so having valid stream data after
|
|
375
|
+
# the first stream is OK - we just stop after decoding the first stream
|
|
376
|
+
end
|
|
377
|
+
|
|
378
|
+
# Validate that backward_size in footer points to valid index position
|
|
379
|
+
#
|
|
380
|
+
# This is required by the XZ spec: the backward_size must match the actual
|
|
381
|
+
# index size, and the index must start with the index indicator (0x00).
|
|
382
|
+
# Reference: /Users/mulgogi/src/external/xz/src/liblzma/common/stream_decoder.c
|
|
383
|
+
#
|
|
384
|
+
# @param input [IO] Input stream (must be seekable)
|
|
385
|
+
# @param file_size [Integer] Total file size in bytes
|
|
386
|
+
# @param index_size [Integer] Actual index size in bytes
|
|
387
|
+
# @raise [FormatError] If backward_size points to invalid position
|
|
388
|
+
def self.validate_backward_size_from_footer(input, file_size,
|
|
389
|
+
_index_size)
|
|
390
|
+
return unless input.respond_to?(:seek)
|
|
391
|
+
return if file_size.nil? || file_size.zero?
|
|
392
|
+
|
|
393
|
+
# Save current position
|
|
394
|
+
original_pos = input.pos
|
|
395
|
+
|
|
396
|
+
# Seek to stream footer (last 12 bytes)
|
|
397
|
+
input.seek(-12, ::IO::SEEK_END)
|
|
398
|
+
|
|
399
|
+
# Verify we're at the footer by checking magic bytes
|
|
400
|
+
footer_start = input.pos
|
|
401
|
+
input.seek(10, ::IO::SEEK_CUR)
|
|
402
|
+
magic_bytes = input.read(2)
|
|
403
|
+
if magic_bytes.nil? || magic_bytes.bytesize < 2 || magic_bytes != [
|
|
404
|
+
0x59, 0x5A
|
|
405
|
+
]
|
|
406
|
+
input.seek(original_pos, ::IO::SEEK_SET)
|
|
407
|
+
return # Not a valid footer, skip validation
|
|
408
|
+
end
|
|
409
|
+
|
|
410
|
+
# Seek back to footer start and then to backward_size field
|
|
411
|
+
input.seek(footer_start, ::IO::SEEK_SET)
|
|
412
|
+
input.seek(4, ::IO::SEEK_CUR)
|
|
413
|
+
backward_size_bytes = input.read(4)
|
|
414
|
+
if backward_size_bytes.nil? || backward_size_bytes.bytesize < 4
|
|
415
|
+
input.seek(original_pos, ::IO::SEEK_SET)
|
|
416
|
+
return
|
|
417
|
+
end
|
|
418
|
+
|
|
419
|
+
backward_size = backward_size_bytes.unpack1("V")
|
|
420
|
+
|
|
421
|
+
# Calculate real backward size: (stored + 1) * 4
|
|
422
|
+
real_backward_size = (backward_size + 1) * 4
|
|
423
|
+
|
|
424
|
+
# Calculate where index should start
|
|
425
|
+
# Stream structure: [blocks] [index + indicator] [padding] [footer 12 bytes]
|
|
426
|
+
# Index (including indicator) starts at: file_size - footer_size - real_backward_size
|
|
427
|
+
expected_index_start = file_size - 12 - real_backward_size
|
|
428
|
+
|
|
429
|
+
# Validate index start position is valid
|
|
430
|
+
if expected_index_start.negative?
|
|
431
|
+
input.seek(original_pos, ::IO::SEEK_SET)
|
|
432
|
+
raise FormatError,
|
|
433
|
+
"Invalid backward size: #{backward_size} (#{real_backward_size} bytes) " \
|
|
434
|
+
"would place index at negative position #{expected_index_start}"
|
|
435
|
+
end
|
|
436
|
+
|
|
437
|
+
if expected_index_start >= file_size
|
|
438
|
+
input.seek(original_pos, ::IO::SEEK_SET)
|
|
439
|
+
raise FormatError,
|
|
440
|
+
"Invalid backward size: #{backward_size} (#{real_backward_size} bytes) " \
|
|
441
|
+
"would place index past end of file (position #{expected_index_start}, file size #{file_size})"
|
|
442
|
+
end
|
|
443
|
+
|
|
444
|
+
# Check that the byte at the expected index start is the index indicator (0x00)
|
|
445
|
+
input.seek(expected_index_start, ::IO::SEEK_SET)
|
|
446
|
+
index_indicator = input.getbyte
|
|
447
|
+
|
|
448
|
+
if index_indicator.nil?
|
|
449
|
+
input.seek(original_pos, ::IO::SEEK_SET)
|
|
450
|
+
raise FormatError,
|
|
451
|
+
"Invalid backward size: expected index indicator (0x00) at position #{expected_index_start}, " \
|
|
452
|
+
"but reached end of file"
|
|
453
|
+
end
|
|
454
|
+
|
|
455
|
+
if index_indicator != XzConst::INDEX_INDICATOR
|
|
456
|
+
input.seek(original_pos, ::IO::SEEK_SET)
|
|
457
|
+
raise FormatError,
|
|
458
|
+
"Invalid backward size: expected index indicator (0x00) at position #{expected_index_start}, " \
|
|
459
|
+
"but found 0x#{index_indicator.to_s(16).upcase}"
|
|
460
|
+
end
|
|
461
|
+
|
|
462
|
+
# Restore original position
|
|
463
|
+
input.seek(original_pos, ::IO::SEEK_SET)
|
|
464
|
+
end
|
|
465
|
+
end
|
|
466
|
+
end
|
|
467
|
+
end
|
|
468
|
+
end
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "stringio"
|
|
4
|
+
require_relative "../xz_impl/constants"
|
|
5
|
+
require_relative "../xz_impl/stream_header"
|
|
6
|
+
require_relative "../xz_impl/stream_footer"
|
|
7
|
+
require_relative "../xz_impl/block_encoder"
|
|
8
|
+
require_relative "../xz_impl/index_encoder"
|
|
9
|
+
require_relative "../../error"
|
|
10
|
+
|
|
11
|
+
module Omnizip
|
|
12
|
+
module Formats
|
|
13
|
+
module XzFormat
|
|
14
|
+
# XZ Stream encoder
|
|
15
|
+
# Orchestrates the complete XZ stream creation
|
|
16
|
+
# Based on XZ Utils stream_encoder.c
|
|
17
|
+
class StreamEncoder
|
|
18
|
+
include Omnizip::Formats::XzConst
|
|
19
|
+
|
|
20
|
+
def initialize(check_type: CHECK_CRC64, dict_size: 64 * 1024 * 1024)
|
|
21
|
+
@check_type = check_type
|
|
22
|
+
@dict_size = dict_size
|
|
23
|
+
@index = IndexEncoder.new
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
# Encode data into XZ format
|
|
27
|
+
# @param input [String, IO] Input data to compress
|
|
28
|
+
# @return [String] XZ-formatted compressed data
|
|
29
|
+
def encode(input)
|
|
30
|
+
output = StringIO.new
|
|
31
|
+
output.set_encoding(Encoding::BINARY)
|
|
32
|
+
|
|
33
|
+
# Read input data
|
|
34
|
+
input_data = input.respond_to?(:read) ? input.read : input.to_s
|
|
35
|
+
input_data = input_data.dup.force_encoding(Encoding::BINARY)
|
|
36
|
+
|
|
37
|
+
# 1. Write Stream Header
|
|
38
|
+
header = StreamHeader.new(check_type: @check_type)
|
|
39
|
+
output.write(header.encode)
|
|
40
|
+
|
|
41
|
+
# 2. Encode and write Block(s)
|
|
42
|
+
encode_blocks(input_data, output)
|
|
43
|
+
|
|
44
|
+
# 3. Write Index
|
|
45
|
+
index_data = @index.encode
|
|
46
|
+
output.write(index_data)
|
|
47
|
+
|
|
48
|
+
# 4. Write Stream Footer
|
|
49
|
+
footer = StreamFooter.new(
|
|
50
|
+
check_type: @check_type,
|
|
51
|
+
backward_size: @index.size,
|
|
52
|
+
)
|
|
53
|
+
output.write(footer.encode)
|
|
54
|
+
|
|
55
|
+
output.string
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
private
|
|
59
|
+
|
|
60
|
+
def encode_blocks(data, output)
|
|
61
|
+
# XZ Utils behavior: If input is empty, don't create any blocks
|
|
62
|
+
# The stream will consist of just: Stream Header + Index + Stream Footer
|
|
63
|
+
return if data.empty? || data.nil?
|
|
64
|
+
|
|
65
|
+
# For now, encode entire data as single block
|
|
66
|
+
# TODO: Support multi-block encoding for large files
|
|
67
|
+
|
|
68
|
+
# Include block sizes for XZ Utils compatibility
|
|
69
|
+
# This ensures that XZ Utils can properly decode the files
|
|
70
|
+
block_encoder = BlockEncoder.new(
|
|
71
|
+
check_type: @check_type,
|
|
72
|
+
dict_size: @dict_size,
|
|
73
|
+
include_block_sizes: true, # Include size fields for compatibility
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
block = block_encoder.encode_block(data)
|
|
77
|
+
|
|
78
|
+
# Write block header
|
|
79
|
+
output.write(block[:header])
|
|
80
|
+
|
|
81
|
+
# Write compressed data
|
|
82
|
+
output.write(block[:data])
|
|
83
|
+
|
|
84
|
+
# Write padding
|
|
85
|
+
output.write(block[:padding])
|
|
86
|
+
|
|
87
|
+
# Write check value
|
|
88
|
+
output.write(block[:check])
|
|
89
|
+
|
|
90
|
+
# Add to index
|
|
91
|
+
@index.add_record(
|
|
92
|
+
block_encoder.unpadded_size,
|
|
93
|
+
block_encoder.uncompressed_size,
|
|
94
|
+
)
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
end
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "constants"
|
|
4
|
+
require "zlib"
|
|
5
|
+
require_relative "../../error"
|
|
6
|
+
|
|
7
|
+
module Omnizip
|
|
8
|
+
module Formats
|
|
9
|
+
module XzFormat
|
|
10
|
+
# XZ Stream Footer encoder
|
|
11
|
+
# Based on XZ Utils stream_flags_encoder.c
|
|
12
|
+
class StreamFooter
|
|
13
|
+
include Omnizip::Formats::XzConst
|
|
14
|
+
|
|
15
|
+
attr_reader :check_type, :backward_size
|
|
16
|
+
|
|
17
|
+
def initialize(backward_size:, check_type: CHECK_CRC64)
|
|
18
|
+
@check_type = check_type
|
|
19
|
+
@backward_size = backward_size
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
# Encode stream footer (12 bytes total)
|
|
23
|
+
# Format:
|
|
24
|
+
# - CRC32 (4 bytes): CRC32 of backward size + stream flags
|
|
25
|
+
# - Backward Size (4 bytes): Size of Index in 4-byte multiples
|
|
26
|
+
# - Stream Flags (2 bytes): version + check type
|
|
27
|
+
# - Footer Magic (2 bytes): 59 5A
|
|
28
|
+
def encode
|
|
29
|
+
output = String.new(encoding: Encoding::BINARY)
|
|
30
|
+
|
|
31
|
+
# Validate backward size
|
|
32
|
+
unless valid_backward_size?
|
|
33
|
+
raise ArgumentError, "Invalid backward size: #{@backward_size}"
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# Encode backward size (stored as (bytes / 4) - 1)
|
|
37
|
+
backward_encoded = (@backward_size / 4) - 1
|
|
38
|
+
backward_bytes = [backward_encoded].pack("V") # Little-endian uint32
|
|
39
|
+
|
|
40
|
+
# Encode stream flags
|
|
41
|
+
flags = encode_stream_flags
|
|
42
|
+
|
|
43
|
+
# Calculate CRC32 of backward size + flags
|
|
44
|
+
crc_data = backward_bytes + flags
|
|
45
|
+
crc = Zlib.crc32(crc_data)
|
|
46
|
+
|
|
47
|
+
# Write CRC32
|
|
48
|
+
output << [crc].pack("V")
|
|
49
|
+
|
|
50
|
+
# Write backward size
|
|
51
|
+
output << backward_bytes
|
|
52
|
+
|
|
53
|
+
# Write stream flags
|
|
54
|
+
output << flags
|
|
55
|
+
|
|
56
|
+
# Write footer magic
|
|
57
|
+
output << FOOTER_MAGIC.pack("C*")
|
|
58
|
+
|
|
59
|
+
output
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
private
|
|
63
|
+
|
|
64
|
+
def encode_stream_flags
|
|
65
|
+
# Stream Flags format:
|
|
66
|
+
# Byte 0: Reserved (must be 0x00)
|
|
67
|
+
# Byte 1: Check type
|
|
68
|
+
flags = String.new(encoding: Encoding::BINARY)
|
|
69
|
+
flags << "\x00" # Reserved byte
|
|
70
|
+
flags << [@check_type].pack("C")
|
|
71
|
+
flags
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def valid_backward_size?
|
|
75
|
+
@backward_size.between?(BACKWARD_SIZE_MIN, BACKWARD_SIZE_MAX) &&
|
|
76
|
+
(@backward_size % 4).zero?
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
end
|