omnizip 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/.rubocop.yml +32 -0
- data/.rubocop_todo.yml +754 -0
- data/COPYING +502 -0
- data/Gemfile +17 -0
- data/LICENSE +12 -0
- data/README.adoc +1045 -0
- data/Rakefile +12 -0
- data/benchmark/README.md +260 -0
- data/benchmark/benchmark_suite.rb +125 -0
- data/benchmark/compression_bench.rb +181 -0
- data/benchmark/filter_bench.rb +180 -0
- data/benchmark/models/benchmark_result.rb +59 -0
- data/benchmark/models/comparison_result.rb +69 -0
- data/benchmark/profile_suite.rb +167 -0
- data/benchmark/reporter.rb +150 -0
- data/benchmark/run_benchmarks.rb +66 -0
- data/benchmark/test_data.rb +137 -0
- data/config/formats/rar3_spec.yml +91 -0
- data/config/formats/rar5_spec.yml +102 -0
- data/docs/.github/workflows/docs.yml +142 -0
- data/docs/.gitignore +21 -0
- data/docs/.lychee.toml +67 -0
- data/docs/Gemfile +13 -0
- data/docs/RAR_WRITE_SUPPORT.md +26 -0
- data/docs/README.md +101 -0
- data/docs/_config.yml +112 -0
- data/docs/assets/logo.svg +1 -0
- data/docs/assets/omnizip-logo.pdf +1540 -11
- data/docs/comparison/feature-matrix.adoc +694 -0
- data/docs/comparison/index.adoc +113 -0
- data/docs/comparison/vs-7zip.adoc +309 -0
- data/docs/comparison/vs-peazip.adoc +77 -0
- data/docs/comparison/vs-rubyzip.adoc +342 -0
- data/docs/comparison/vs-winrar.adoc +100 -0
- data/docs/compatibility.adoc +579 -0
- data/docs/concepts/index.adoc +129 -0
- data/docs/developer/architecture.adoc +256 -0
- data/docs/developer/contributing.adoc +158 -0
- data/docs/developer/index.adoc +25 -0
- data/docs/developer/testing.adoc +212 -0
- data/docs/getting-started/basic-usage.adoc +271 -0
- data/docs/getting-started/index.adoc +42 -0
- data/docs/getting-started/installation.adoc +138 -0
- data/docs/getting-started/quick-start.adoc +185 -0
- data/docs/getting-started/your-first-archive.adoc +218 -0
- data/docs/guides/advanced-features/encryption.adoc +300 -0
- data/docs/guides/advanced-features/index.adoc +49 -0
- data/docs/guides/advanced-features/parallel-processing.adoc +246 -0
- data/docs/guides/advanced-features/progress-tracking.adoc +320 -0
- data/docs/guides/advanced-features/streaming.adoc +212 -0
- data/docs/guides/archive-formats/gzip-format.adoc +107 -0
- data/docs/guides/archive-formats/index.adoc +130 -0
- data/docs/guides/archive-formats/rar-format.adoc +104 -0
- data/docs/guides/archive-formats/rar5.adoc +521 -0
- data/docs/guides/archive-formats/seven-zip-format.adoc +35 -0
- data/docs/guides/archive-formats/tar-format.adoc +106 -0
- data/docs/guides/archive-formats/xz-format.adoc +118 -0
- data/docs/guides/archive-formats/zip-format.adoc +35 -0
- data/docs/guides/compression-algorithms/bzip2.adoc +113 -0
- data/docs/guides/compression-algorithms/deflate.adoc +319 -0
- data/docs/guides/compression-algorithms/index.adoc +190 -0
- data/docs/guides/compression-algorithms/lzma.adoc +398 -0
- data/docs/guides/compression-algorithms/lzma2.adoc +327 -0
- data/docs/guides/compression-algorithms/ppmd.adoc +316 -0
- data/docs/guides/compression-algorithms/zstandard.adoc +361 -0
- data/docs/guides/creating-archives.adoc +354 -0
- data/docs/guides/extracting-archives.adoc +53 -0
- data/docs/guides/format-conversion.adoc +64 -0
- data/docs/guides/index.adoc +49 -0
- data/docs/guides/migration-rubyzip.adoc +217 -0
- data/docs/guides/parity-archives.adoc +605 -0
- data/docs/guides/performance-tuning.adoc +88 -0
- data/docs/index.adoc +218 -0
- data/docs/lychee.toml +67 -0
- data/docs/reference/api/overview.adoc +188 -0
- data/docs/reference/cli/compress-command.adoc +114 -0
- data/docs/reference/cli/overview.adoc +140 -0
- data/docs/reference/index.adoc +26 -0
- data/docs/resources/faq.adoc +185 -0
- data/docs/resources/quick-reference.adoc +222 -0
- data/docs/troubleshooting/index.adoc +208 -0
- data/examples/api_comparison.rb +205 -0
- data/examples/deflate64_example.rb +96 -0
- data/examples/par2_demo.rb +121 -0
- data/examples/quick_start_native.rb +150 -0
- data/examples/quick_start_rubyzip.rb +115 -0
- data/examples/rubyzip_compatibility_demo.rb +194 -0
- data/exe/omnizip +27 -0
- data/lib/omnizip/algorithm.rb +130 -0
- data/lib/omnizip/algorithm_registry.rb +86 -0
- data/lib/omnizip/algorithms/.keep +0 -0
- data/lib/omnizip/algorithms/bzip2/bwt.rb +225 -0
- data/lib/omnizip/algorithms/bzip2/decoder.rb +193 -0
- data/lib/omnizip/algorithms/bzip2/encoder.rb +237 -0
- data/lib/omnizip/algorithms/bzip2/huffman.rb +206 -0
- data/lib/omnizip/algorithms/bzip2/mtf.rb +101 -0
- data/lib/omnizip/algorithms/bzip2/rle.rb +151 -0
- data/lib/omnizip/algorithms/bzip2.rb +130 -0
- data/lib/omnizip/algorithms/deflate/constants.rb +28 -0
- data/lib/omnizip/algorithms/deflate/decoder.rb +38 -0
- data/lib/omnizip/algorithms/deflate/encoder.rb +46 -0
- data/lib/omnizip/algorithms/deflate.rb +128 -0
- data/lib/omnizip/algorithms/deflate64/constants.rb +45 -0
- data/lib/omnizip/algorithms/deflate64/decoder.rb +153 -0
- data/lib/omnizip/algorithms/deflate64/encoder.rb +98 -0
- data/lib/omnizip/algorithms/deflate64/huffman_coder.rb +354 -0
- data/lib/omnizip/algorithms/deflate64/lz77_encoder.rb +142 -0
- data/lib/omnizip/algorithms/deflate64.rb +109 -0
- data/lib/omnizip/algorithms/lzma/bit_model.rb +120 -0
- data/lib/omnizip/algorithms/lzma/constants.rb +112 -0
- data/lib/omnizip/algorithms/lzma/decoder.rb +148 -0
- data/lib/omnizip/algorithms/lzma/dictionary.rb +69 -0
- data/lib/omnizip/algorithms/lzma/distance_coder.rb +415 -0
- data/lib/omnizip/algorithms/lzma/encoder.rb +142 -0
- data/lib/omnizip/algorithms/lzma/length_coder.rb +260 -0
- data/lib/omnizip/algorithms/lzma/literal_decoder.rb +320 -0
- data/lib/omnizip/algorithms/lzma/literal_encoder.rb +210 -0
- data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +341 -0
- data/lib/omnizip/algorithms/lzma/lzma_alone_decoder.rb +192 -0
- data/lib/omnizip/algorithms/lzma/lzma_state.rb +128 -0
- data/lib/omnizip/algorithms/lzma/match.rb +32 -0
- data/lib/omnizip/algorithms/lzma/match_finder.rb +205 -0
- data/lib/omnizip/algorithms/lzma/match_finder_config.rb +142 -0
- data/lib/omnizip/algorithms/lzma/match_finder_factory.rb +88 -0
- data/lib/omnizip/algorithms/lzma/optimal_encoder.rb +130 -0
- data/lib/omnizip/algorithms/lzma/probability_models.rb +72 -0
- data/lib/omnizip/algorithms/lzma/range_coder.rb +85 -0
- data/lib/omnizip/algorithms/lzma/range_decoder.rb +434 -0
- data/lib/omnizip/algorithms/lzma/range_encoder.rb +194 -0
- data/lib/omnizip/algorithms/lzma/state.rb +127 -0
- data/lib/omnizip/algorithms/lzma/xz_buffered_range_encoder.rb +325 -0
- data/lib/omnizip/algorithms/lzma/xz_encoder.rb +426 -0
- data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +645 -0
- data/lib/omnizip/algorithms/lzma/xz_match_finder_adapter.rb +227 -0
- data/lib/omnizip/algorithms/lzma/xz_price_calculator.rb +169 -0
- data/lib/omnizip/algorithms/lzma/xz_probability_models.rb +261 -0
- data/lib/omnizip/algorithms/lzma/xz_range_encoder.rb +223 -0
- data/lib/omnizip/algorithms/lzma/xz_range_encoder_exact.rb +331 -0
- data/lib/omnizip/algorithms/lzma/xz_state.rb +116 -0
- data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +2055 -0
- data/lib/omnizip/algorithms/lzma.rb +238 -0
- data/lib/omnizip/algorithms/lzma2/chunk_manager.rb +182 -0
- data/lib/omnizip/algorithms/lzma2/constants.rb +41 -0
- data/lib/omnizip/algorithms/lzma2/encoder.rb +147 -0
- data/lib/omnizip/algorithms/lzma2/lzma2_chunk.rb +161 -0
- data/lib/omnizip/algorithms/lzma2/properties.rb +179 -0
- data/lib/omnizip/algorithms/lzma2/simple_lzma2_encoder.rb +127 -0
- data/lib/omnizip/algorithms/lzma2/xz_encoder_adapter.rb +85 -0
- data/lib/omnizip/algorithms/lzma2.rb +141 -0
- data/lib/omnizip/algorithms/ppmd7/constants.rb +74 -0
- data/lib/omnizip/algorithms/ppmd7/context.rb +154 -0
- data/lib/omnizip/algorithms/ppmd7/decoder.rb +126 -0
- data/lib/omnizip/algorithms/ppmd7/encoder.rb +163 -0
- data/lib/omnizip/algorithms/ppmd7/model.rb +248 -0
- data/lib/omnizip/algorithms/ppmd7/symbol_state.rb +57 -0
- data/lib/omnizip/algorithms/ppmd7.rb +116 -0
- data/lib/omnizip/algorithms/ppmd8/constants.rb +61 -0
- data/lib/omnizip/algorithms/ppmd8/context.rb +34 -0
- data/lib/omnizip/algorithms/ppmd8/decoder.rb +107 -0
- data/lib/omnizip/algorithms/ppmd8/encoder.rb +138 -0
- data/lib/omnizip/algorithms/ppmd8/model.rb +250 -0
- data/lib/omnizip/algorithms/ppmd8/restoration_method.rb +78 -0
- data/lib/omnizip/algorithms/ppmd8.rb +82 -0
- data/lib/omnizip/algorithms/ppmd_base.rb +138 -0
- data/lib/omnizip/algorithms/sevenzip_lzma2.rb +123 -0
- data/lib/omnizip/algorithms/xz_lzma2.rb +118 -0
- data/lib/omnizip/algorithms/zstandard/constants.rb +25 -0
- data/lib/omnizip/algorithms/zstandard/decoder.rb +46 -0
- data/lib/omnizip/algorithms/zstandard/encoder.rb +51 -0
- data/lib/omnizip/algorithms/zstandard.rb +138 -0
- data/lib/omnizip/buffer/memory_archive.rb +251 -0
- data/lib/omnizip/buffer/memory_extractor.rb +224 -0
- data/lib/omnizip/buffer.rb +176 -0
- data/lib/omnizip/checksum_registry.rb +114 -0
- data/lib/omnizip/checksums/crc32.rb +100 -0
- data/lib/omnizip/checksums/crc64.rb +101 -0
- data/lib/omnizip/checksums/crc_base.rb +158 -0
- data/lib/omnizip/checksums/verifier.rb +131 -0
- data/lib/omnizip/chunked/memory_manager.rb +194 -0
- data/lib/omnizip/chunked/reader.rb +78 -0
- data/lib/omnizip/chunked/writer.rb +120 -0
- data/lib/omnizip/chunked.rb +129 -0
- data/lib/omnizip/cli/output_formatter.rb +104 -0
- data/lib/omnizip/cli.rb +572 -0
- data/lib/omnizip/commands/.keep +0 -0
- data/lib/omnizip/commands/archive_create_command.rb +427 -0
- data/lib/omnizip/commands/archive_extract_command.rb +272 -0
- data/lib/omnizip/commands/archive_list_command.rb +218 -0
- data/lib/omnizip/commands/archive_repair_command.rb +131 -0
- data/lib/omnizip/commands/archive_verify_command.rb +117 -0
- data/lib/omnizip/commands/compress_command.rb +117 -0
- data/lib/omnizip/commands/decompress_command.rb +120 -0
- data/lib/omnizip/commands/list_command.rb +53 -0
- data/lib/omnizip/commands/metadata_command.rb +153 -0
- data/lib/omnizip/commands/parity_create_command.rb +122 -0
- data/lib/omnizip/commands/parity_repair_command.rb +122 -0
- data/lib/omnizip/commands/parity_verify_command.rb +124 -0
- data/lib/omnizip/commands/profile_list_command.rb +56 -0
- data/lib/omnizip/commands/profile_show_command.rb +44 -0
- data/lib/omnizip/convenience.rb +359 -0
- data/lib/omnizip/converter/conversion_registry.rb +49 -0
- data/lib/omnizip/converter/conversion_strategy.rb +121 -0
- data/lib/omnizip/converter/seven_zip_to_zip_strategy.rb +97 -0
- data/lib/omnizip/converter/zip_to_seven_zip_strategy.rb +112 -0
- data/lib/omnizip/converter.rb +105 -0
- data/lib/omnizip/crypto/aes256/cipher.rb +100 -0
- data/lib/omnizip/crypto/aes256/constants.rb +28 -0
- data/lib/omnizip/crypto/aes256/key_derivation.rb +101 -0
- data/lib/omnizip/crypto/aes256.rb +102 -0
- data/lib/omnizip/error.rb +106 -0
- data/lib/omnizip/eta/exponential_smoothing_estimator.rb +98 -0
- data/lib/omnizip/eta/moving_average_estimator.rb +99 -0
- data/lib/omnizip/eta/rate_calculator.rb +104 -0
- data/lib/omnizip/eta/sample_history.rb +143 -0
- data/lib/omnizip/eta/time_estimator.rb +106 -0
- data/lib/omnizip/eta.rb +63 -0
- data/lib/omnizip/extraction/filter_chain.rb +177 -0
- data/lib/omnizip/extraction/glob_pattern.rb +140 -0
- data/lib/omnizip/extraction/pattern_matcher.rb +70 -0
- data/lib/omnizip/extraction/predicate_pattern.rb +52 -0
- data/lib/omnizip/extraction/regex_pattern.rb +50 -0
- data/lib/omnizip/extraction/selective_extractor.rb +240 -0
- data/lib/omnizip/extraction.rb +111 -0
- data/lib/omnizip/file_type/mime_classifier.rb +144 -0
- data/lib/omnizip/file_type.rb +113 -0
- data/lib/omnizip/filter.rb +139 -0
- data/lib/omnizip/filter_pipeline.rb +108 -0
- data/lib/omnizip/filter_registry.rb +166 -0
- data/lib/omnizip/filters/bcj.rb +279 -0
- data/lib/omnizip/filters/bcj2/constants.rb +53 -0
- data/lib/omnizip/filters/bcj2/decoder.rb +200 -0
- data/lib/omnizip/filters/bcj2/encoder.rb +61 -0
- data/lib/omnizip/filters/bcj2/stream_data.rb +93 -0
- data/lib/omnizip/filters/bcj2.rb +99 -0
- data/lib/omnizip/filters/bcj_arm.rb +176 -0
- data/lib/omnizip/filters/bcj_arm64.rb +244 -0
- data/lib/omnizip/filters/bcj_ia64.rb +196 -0
- data/lib/omnizip/filters/bcj_ppc.rb +190 -0
- data/lib/omnizip/filters/bcj_sparc.rb +176 -0
- data/lib/omnizip/filters/bcj_x86.rb +193 -0
- data/lib/omnizip/filters/delta.rb +196 -0
- data/lib/omnizip/filters/filter_base.rb +72 -0
- data/lib/omnizip/filters/registry.rb +123 -0
- data/lib/omnizip/filters/xz_delta.rb +258 -0
- data/lib/omnizip/format_detector.rb +162 -0
- data/lib/omnizip/format_registry.rb +59 -0
- data/lib/omnizip/formats/.keep +0 -0
- data/lib/omnizip/formats/bzip2_file.rb +172 -0
- data/lib/omnizip/formats/cpio/constants.rb +55 -0
- data/lib/omnizip/formats/cpio/entry.rb +385 -0
- data/lib/omnizip/formats/cpio/reader.rb +196 -0
- data/lib/omnizip/formats/cpio/writer.rb +234 -0
- data/lib/omnizip/formats/cpio.rb +140 -0
- data/lib/omnizip/formats/format_spec_loader.rb +230 -0
- data/lib/omnizip/formats/gzip.rb +238 -0
- data/lib/omnizip/formats/iso/directory_builder.rb +297 -0
- data/lib/omnizip/formats/iso/directory_record.rb +152 -0
- data/lib/omnizip/formats/iso/joliet.rb +204 -0
- data/lib/omnizip/formats/iso/path_table.rb +125 -0
- data/lib/omnizip/formats/iso/reader.rb +197 -0
- data/lib/omnizip/formats/iso/rock_ridge.rb +349 -0
- data/lib/omnizip/formats/iso/volume_builder.rb +320 -0
- data/lib/omnizip/formats/iso/volume_descriptor.rb +168 -0
- data/lib/omnizip/formats/iso/writer.rb +530 -0
- data/lib/omnizip/formats/iso.rb +140 -0
- data/lib/omnizip/formats/lzip.rb +175 -0
- data/lib/omnizip/formats/lzma_alone.rb +171 -0
- data/lib/omnizip/formats/rar/archive_repairer.rb +243 -0
- data/lib/omnizip/formats/rar/archive_verifier.rb +195 -0
- data/lib/omnizip/formats/rar/block_parser.rb +243 -0
- data/lib/omnizip/formats/rar/compression/bit_stream.rb +180 -0
- data/lib/omnizip/formats/rar/compression/dispatcher.rb +217 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/decoder.rb +216 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/encoder.rb +158 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_builder.rb +217 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_coder.rb +189 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/match_finder.rb +135 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/sliding_window.rb +165 -0
- data/lib/omnizip/formats/rar/compression/ppmd/context.rb +105 -0
- data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +219 -0
- data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +262 -0
- data/lib/omnizip/formats/rar/compression_method_registry.rb +106 -0
- data/lib/omnizip/formats/rar/constants.rb +82 -0
- data/lib/omnizip/formats/rar/decompressor.rb +238 -0
- data/lib/omnizip/formats/rar/external_writer.rb +312 -0
- data/lib/omnizip/formats/rar/header.rb +192 -0
- data/lib/omnizip/formats/rar/license_validator.rb +109 -0
- data/lib/omnizip/formats/rar/models/rar_archive.rb +77 -0
- data/lib/omnizip/formats/rar/models/rar_entry.rb +65 -0
- data/lib/omnizip/formats/rar/models/rar_volume.rb +56 -0
- data/lib/omnizip/formats/rar/parity_handler.rb +292 -0
- data/lib/omnizip/formats/rar/rar5/compression/lzma.rb +202 -0
- data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +578 -0
- data/lib/omnizip/formats/rar/rar5/compression/store.rb +60 -0
- data/lib/omnizip/formats/rar/rar5/crc32.rb +39 -0
- data/lib/omnizip/formats/rar/rar5/encryption/aes256_cbc.rb +97 -0
- data/lib/omnizip/formats/rar/rar5/encryption/encryption_header.rb +114 -0
- data/lib/omnizip/formats/rar/rar5/encryption/encryption_manager.rb +166 -0
- data/lib/omnizip/formats/rar/rar5/encryption/key_derivation.rb +97 -0
- data/lib/omnizip/formats/rar/rar5/header.rb +187 -0
- data/lib/omnizip/formats/rar/rar5/models/encryption_options.rb +74 -0
- data/lib/omnizip/formats/rar/rar5/models/recovery_options.rb +63 -0
- data/lib/omnizip/formats/rar/rar5/models/solid_options.rb +63 -0
- data/lib/omnizip/formats/rar/rar5/models/volume_options.rb +74 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/ARCHITECTURE.md +290 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_manager.rb +264 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_splitter.rb +155 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_writer.rb +194 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_encoder.rb +109 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_manager.rb +142 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_stream.rb +121 -0
- data/lib/omnizip/formats/rar/rar5/vint.rb +65 -0
- data/lib/omnizip/formats/rar/rar5/writer.rb +466 -0
- data/lib/omnizip/formats/rar/rar_format_base.rb +241 -0
- data/lib/omnizip/formats/rar/reader.rb +366 -0
- data/lib/omnizip/formats/rar/recovery_record.rb +245 -0
- data/lib/omnizip/formats/rar/volume_manager.rb +168 -0
- data/lib/omnizip/formats/rar/writer.rb +431 -0
- data/lib/omnizip/formats/rar.rb +205 -0
- data/lib/omnizip/formats/rar3/compressor.rb +73 -0
- data/lib/omnizip/formats/rar3/decompressor.rb +66 -0
- data/lib/omnizip/formats/rar3/reader.rb +386 -0
- data/lib/omnizip/formats/rar3/writer.rb +219 -0
- data/lib/omnizip/formats/rar5/compressor.rb +73 -0
- data/lib/omnizip/formats/rar5/decompressor.rb +66 -0
- data/lib/omnizip/formats/rar5/reader.rb +342 -0
- data/lib/omnizip/formats/rar5/writer.rb +214 -0
- data/lib/omnizip/formats/seven_zip/coder_chain.rb +150 -0
- data/lib/omnizip/formats/seven_zip/constants.rb +126 -0
- data/lib/omnizip/formats/seven_zip/encoded_header.rb +114 -0
- data/lib/omnizip/formats/seven_zip/encrypted_header.rb +142 -0
- data/lib/omnizip/formats/seven_zip/file_collector.rb +144 -0
- data/lib/omnizip/formats/seven_zip/header.rb +106 -0
- data/lib/omnizip/formats/seven_zip/header_encryptor.rb +134 -0
- data/lib/omnizip/formats/seven_zip/header_writer.rb +466 -0
- data/lib/omnizip/formats/seven_zip/models/coder_info.rb +30 -0
- data/lib/omnizip/formats/seven_zip/models/file_entry.rb +58 -0
- data/lib/omnizip/formats/seven_zip/models/folder.rb +69 -0
- data/lib/omnizip/formats/seven_zip/models/stream_info.rb +42 -0
- data/lib/omnizip/formats/seven_zip/parser.rb +660 -0
- data/lib/omnizip/formats/seven_zip/reader.rb +458 -0
- data/lib/omnizip/formats/seven_zip/split_archive_reader.rb +632 -0
- data/lib/omnizip/formats/seven_zip/split_archive_writer.rb +315 -0
- data/lib/omnizip/formats/seven_zip/stream_compressor.rb +151 -0
- data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +162 -0
- data/lib/omnizip/formats/seven_zip/writer.rb +740 -0
- data/lib/omnizip/formats/seven_zip.rb +93 -0
- data/lib/omnizip/formats/tar/constants.rb +73 -0
- data/lib/omnizip/formats/tar/entry.rb +94 -0
- data/lib/omnizip/formats/tar/header.rb +168 -0
- data/lib/omnizip/formats/tar/reader.rb +121 -0
- data/lib/omnizip/formats/tar/writer.rb +216 -0
- data/lib/omnizip/formats/tar.rb +84 -0
- data/lib/omnizip/formats/xz/reader.rb +116 -0
- data/lib/omnizip/formats/xz.rb +237 -0
- data/lib/omnizip/formats/xz_impl/block_decoder.rb +754 -0
- data/lib/omnizip/formats/xz_impl/block_encoder.rb +306 -0
- data/lib/omnizip/formats/xz_impl/block_header.rb +210 -0
- data/lib/omnizip/formats/xz_impl/block_header_parser.rb +186 -0
- data/lib/omnizip/formats/xz_impl/constants.rb +49 -0
- data/lib/omnizip/formats/xz_impl/index_decoder.rb +174 -0
- data/lib/omnizip/formats/xz_impl/index_encoder.rb +122 -0
- data/lib/omnizip/formats/xz_impl/stream_decoder.rb +468 -0
- data/lib/omnizip/formats/xz_impl/stream_encoder.rb +99 -0
- data/lib/omnizip/formats/xz_impl/stream_footer.rb +81 -0
- data/lib/omnizip/formats/xz_impl/stream_footer_parser.rb +117 -0
- data/lib/omnizip/formats/xz_impl/stream_header.rb +55 -0
- data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +108 -0
- data/lib/omnizip/formats/xz_impl/vli.rb +128 -0
- data/lib/omnizip/formats/xz_impl/writer.rb +421 -0
- data/lib/omnizip/formats/zip/central_directory_header.rb +195 -0
- data/lib/omnizip/formats/zip/constants.rb +69 -0
- data/lib/omnizip/formats/zip/end_of_central_directory.rb +133 -0
- data/lib/omnizip/formats/zip/local_file_header.rb +138 -0
- data/lib/omnizip/formats/zip/reader.rb +250 -0
- data/lib/omnizip/formats/zip/unix_extra_field.rb +153 -0
- data/lib/omnizip/formats/zip/writer.rb +375 -0
- data/lib/omnizip/formats/zip/zip64_end_of_central_directory.rb +104 -0
- data/lib/omnizip/formats/zip/zip64_end_of_central_directory_locator.rb +66 -0
- data/lib/omnizip/formats/zip/zip64_extra_field.rb +114 -0
- data/lib/omnizip/formats/zip.rb +50 -0
- data/lib/omnizip/implementations/base/lzma2_decoder_base.rb +75 -0
- data/lib/omnizip/implementations/base/lzma2_encoder_base.rb +128 -0
- data/lib/omnizip/implementations/base/lzma_decoder_base.rb +83 -0
- data/lib/omnizip/implementations/base/lzma_encoder_base.rb +108 -0
- data/lib/omnizip/implementations/base/state_machine_base.rb +182 -0
- data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +421 -0
- data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +465 -0
- data/lib/omnizip/implementations/seven_zip/lzma/match_finder.rb +288 -0
- data/lib/omnizip/implementations/seven_zip/lzma/range_decoder.rb +200 -0
- data/lib/omnizip/implementations/seven_zip/lzma/range_encoder.rb +197 -0
- data/lib/omnizip/implementations/seven_zip/lzma/state_machine.rb +141 -0
- data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +519 -0
- data/lib/omnizip/implementations/xz_utils/lzma2/decoder.rb +723 -0
- data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +750 -0
- data/lib/omnizip/io/buffered_input.rb +146 -0
- data/lib/omnizip/io/buffered_output.rb +105 -0
- data/lib/omnizip/io/stream_manager.rb +115 -0
- data/lib/omnizip/link_handler/hard_link.rb +79 -0
- data/lib/omnizip/link_handler/symbolic_link.rb +74 -0
- data/lib/omnizip/link_handler.rb +124 -0
- data/lib/omnizip/metadata/archive_metadata.rb +114 -0
- data/lib/omnizip/metadata/entry_metadata.rb +146 -0
- data/lib/omnizip/metadata/metadata_editor.rb +171 -0
- data/lib/omnizip/metadata/metadata_registry.rb +64 -0
- data/lib/omnizip/metadata/metadata_validator.rb +99 -0
- data/lib/omnizip/metadata.rb +57 -0
- data/lib/omnizip/models/.keep +0 -0
- data/lib/omnizip/models/algorithm_metadata.rb +73 -0
- data/lib/omnizip/models/compression_options.rb +71 -0
- data/lib/omnizip/models/conversion_options.rb +87 -0
- data/lib/omnizip/models/conversion_result.rb +135 -0
- data/lib/omnizip/models/eta_result.rb +46 -0
- data/lib/omnizip/models/extraction_rule.rb +115 -0
- data/lib/omnizip/models/filter_chain.rb +144 -0
- data/lib/omnizip/models/filter_config.rb +183 -0
- data/lib/omnizip/models/match_result.rb +124 -0
- data/lib/omnizip/models/optimization_suggestion.rb +91 -0
- data/lib/omnizip/models/parallel_options.rb +104 -0
- data/lib/omnizip/models/performance_result.rb +79 -0
- data/lib/omnizip/models/profile_report.rb +82 -0
- data/lib/omnizip/models/progress_options.rb +38 -0
- data/lib/omnizip/models/split_options.rb +116 -0
- data/lib/omnizip/optimization_registry.rb +81 -0
- data/lib/omnizip/parallel/job_queue.rb +209 -0
- data/lib/omnizip/parallel/job_scheduler.rb +203 -0
- data/lib/omnizip/parallel/parallel_compressor.rb +347 -0
- data/lib/omnizip/parallel/parallel_extractor.rb +329 -0
- data/lib/omnizip/parallel/worker_pool.rb +223 -0
- data/lib/omnizip/parallel.rb +149 -0
- data/lib/omnizip/parity/chunked_block_processor.rb +196 -0
- data/lib/omnizip/parity/galois16.rb +145 -0
- data/lib/omnizip/parity/models/creator_packet.rb +73 -0
- data/lib/omnizip/parity/models/file_description_packet.rb +133 -0
- data/lib/omnizip/parity/models/ifsc_packet.rb +123 -0
- data/lib/omnizip/parity/models/main_packet.rb +128 -0
- data/lib/omnizip/parity/models/packet.rb +156 -0
- data/lib/omnizip/parity/models/packet_registry.rb +109 -0
- data/lib/omnizip/parity/models/recovery_slice_packet.rb +78 -0
- data/lib/omnizip/parity/par2_creator.rb +531 -0
- data/lib/omnizip/parity/par2_repairer.rb +407 -0
- data/lib/omnizip/parity/par2_verifier.rb +364 -0
- data/lib/omnizip/parity/par2cmdline_algorithm.rb +110 -0
- data/lib/omnizip/parity/par2cmdline_coefficients.rb +78 -0
- data/lib/omnizip/parity/reed_solomon_decoder.rb +266 -0
- data/lib/omnizip/parity/reed_solomon_encoder.rb +111 -0
- data/lib/omnizip/parity/reed_solomon_matrix.rb +342 -0
- data/lib/omnizip/parity.rb +186 -0
- data/lib/omnizip/password/encryption_registry.rb +65 -0
- data/lib/omnizip/password/encryption_strategy.rb +96 -0
- data/lib/omnizip/password/password_validator.rb +129 -0
- data/lib/omnizip/password/winzip_aes_strategy.rb +192 -0
- data/lib/omnizip/password/zip_crypto_strategy.rb +141 -0
- data/lib/omnizip/password.rb +87 -0
- data/lib/omnizip/pipe/stream_compressor.rb +124 -0
- data/lib/omnizip/pipe/stream_decompressor.rb +174 -0
- data/lib/omnizip/pipe.rb +121 -0
- data/lib/omnizip/platform/ntfs_streams.rb +201 -0
- data/lib/omnizip/platform.rb +189 -0
- data/lib/omnizip/profile/archive_profile.rb +39 -0
- data/lib/omnizip/profile/balanced_profile.rb +33 -0
- data/lib/omnizip/profile/binary_profile.rb +36 -0
- data/lib/omnizip/profile/compression_profile.rb +158 -0
- data/lib/omnizip/profile/custom_profile.rb +157 -0
- data/lib/omnizip/profile/fast_profile.rb +33 -0
- data/lib/omnizip/profile/maximum_profile.rb +33 -0
- data/lib/omnizip/profile/profile_detector.rb +110 -0
- data/lib/omnizip/profile/profile_registry.rb +161 -0
- data/lib/omnizip/profile/text_profile.rb +36 -0
- data/lib/omnizip/profile.rb +190 -0
- data/lib/omnizip/profiler/memory_profiler.rb +66 -0
- data/lib/omnizip/profiler/method_profiler.rb +49 -0
- data/lib/omnizip/profiler/report_generator.rb +169 -0
- data/lib/omnizip/profiler.rb +204 -0
- data/lib/omnizip/progress/callback_reporter.rb +36 -0
- data/lib/omnizip/progress/console_reporter.rb +62 -0
- data/lib/omnizip/progress/log_reporter.rb +91 -0
- data/lib/omnizip/progress/operation_progress.rb +118 -0
- data/lib/omnizip/progress/progress_bar.rb +156 -0
- data/lib/omnizip/progress/progress_reporter.rb +40 -0
- data/lib/omnizip/progress/progress_tracker.rb +190 -0
- data/lib/omnizip/progress/silent_reporter.rb +24 -0
- data/lib/omnizip/progress.rb +127 -0
- data/lib/omnizip/rubyzip_compat.rb +63 -0
- data/lib/omnizip/temp/safe_extract.rb +168 -0
- data/lib/omnizip/temp/temp_file.rb +124 -0
- data/lib/omnizip/temp/temp_file_pool.rb +109 -0
- data/lib/omnizip/temp.rb +181 -0
- data/lib/omnizip/version.rb +5 -0
- data/lib/omnizip/zip/entry.rb +156 -0
- data/lib/omnizip/zip/file.rb +485 -0
- data/lib/omnizip/zip/input_stream.rb +273 -0
- data/lib/omnizip/zip/output_stream.rb +324 -0
- data/lib/omnizip.rb +156 -0
- data/readme-docs/advanced-features.adoc +515 -0
- data/readme-docs/api-usage.adoc +444 -0
- data/readme-docs/architecture.adoc +449 -0
- data/readme-docs/archive-formats.adoc +479 -0
- data/readme-docs/cli-usage.adoc +222 -0
- data/readme-docs/compression-algorithms.adoc +442 -0
- data/readme-docs/compression-profiles.adoc +247 -0
- data/readme-docs/encryption-checksums.adoc +328 -0
- data/readme-docs/format-converter.adoc +325 -0
- data/readme-docs/installation.adoc +228 -0
- data/readme-docs/par2-archives.adoc +608 -0
- data/readme-docs/performance-profiler.adoc +389 -0
- data/readme-docs/preprocessing-filters.adoc +280 -0
- data/xz-file-format-1.2.1.txt +1174 -0
- metadata +617 -0
|
@@ -0,0 +1,306 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "stringio"
|
|
4
|
+
require_relative "constants"
|
|
5
|
+
require_relative "../../algorithms/xz_lzma2"
|
|
6
|
+
require_relative "../../checksums/crc64"
|
|
7
|
+
require "zlib"
|
|
8
|
+
|
|
9
|
+
module Omnizip
|
|
10
|
+
module Formats
|
|
11
|
+
module XzFormat
|
|
12
|
+
# XZ Block encoder
|
|
13
|
+
# Based on XZ Utils block_header_encoder.c
|
|
14
|
+
class BlockEncoder
|
|
15
|
+
include Omnizip::Formats::XzConst
|
|
16
|
+
|
|
17
|
+
attr_reader :uncompressed_size, :compressed_size
|
|
18
|
+
|
|
19
|
+
def initialize(check_type: CHECK_CRC64, dict_size: 8 * 1024 * 1024, include_block_sizes: false)
|
|
20
|
+
@check_type = check_type
|
|
21
|
+
@dict_size = dict_size
|
|
22
|
+
@uncompressed_size = 0
|
|
23
|
+
@compressed_size = 0
|
|
24
|
+
# For simple single-block streams, omit size fields from block header
|
|
25
|
+
# This matches XZ Utils behavior for basic compression
|
|
26
|
+
# Multi-block streams should set this to true for seeking support
|
|
27
|
+
@include_block_sizes = include_block_sizes
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# Encode a block with LZMA2 compression
|
|
31
|
+
# Returns: { header: String, data: String, padding: String, check: String, compressed_size: Integer, uncompressed_size: Integer }
|
|
32
|
+
def encode_block(input_data)
|
|
33
|
+
@uncompressed_size = input_data.bytesize
|
|
34
|
+
|
|
35
|
+
# Compress data with LZMA2
|
|
36
|
+
compressed_data = compress_with_lzma2(input_data)
|
|
37
|
+
@compressed_size = compressed_data.bytesize
|
|
38
|
+
|
|
39
|
+
# Encode block header
|
|
40
|
+
header = encode_block_header
|
|
41
|
+
|
|
42
|
+
# Calculate check value
|
|
43
|
+
check = calculate_check(input_data)
|
|
44
|
+
|
|
45
|
+
# Add padding to align block
|
|
46
|
+
padding = calculate_padding(header.bytesize + compressed_data.bytesize)
|
|
47
|
+
|
|
48
|
+
{
|
|
49
|
+
header: header,
|
|
50
|
+
data: compressed_data,
|
|
51
|
+
padding: "\x00" * padding,
|
|
52
|
+
check: check,
|
|
53
|
+
compressed_size: @compressed_size,
|
|
54
|
+
uncompressed_size: @uncompressed_size,
|
|
55
|
+
}
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# Get unpadded block size (for index)
|
|
59
|
+
def unpadded_size
|
|
60
|
+
# Unpadded size = actual header + compressed data + check
|
|
61
|
+
# Note: "Unpadded" means EXCLUDING the block padding (padding after compressed data)
|
|
62
|
+
# but INCLUDES the check value
|
|
63
|
+
actual_header_size = calculate_actual_header_size
|
|
64
|
+
check_size = case @check_type
|
|
65
|
+
when CHECK_NONE then 0
|
|
66
|
+
when CHECK_CRC32 then 4
|
|
67
|
+
when CHECK_CRC64 then 8
|
|
68
|
+
else 8
|
|
69
|
+
end
|
|
70
|
+
actual_header_size + @compressed_size + check_size
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
private
|
|
74
|
+
|
|
75
|
+
def compress_with_lzma2(data)
|
|
76
|
+
# Use XZ Utils LZMA2 encoder for XZ format (proper chunk structure)
|
|
77
|
+
encoder = Omnizip::Implementations::XZUtils::LZMA2::Encoder.new(
|
|
78
|
+
dict_size: @dict_size,
|
|
79
|
+
lc: 3,
|
|
80
|
+
lp: 0,
|
|
81
|
+
pb: 2,
|
|
82
|
+
standalone: false, # XZ format (not standalone LZMA2)
|
|
83
|
+
)
|
|
84
|
+
encoder.encode(data)
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def encode_block_header
|
|
88
|
+
output = StringIO.new
|
|
89
|
+
output.set_encoding(Encoding::BINARY)
|
|
90
|
+
|
|
91
|
+
# Build header content (without size byte and CRC32)
|
|
92
|
+
header_data = StringIO.new
|
|
93
|
+
header_data.set_encoding(Encoding::BINARY)
|
|
94
|
+
|
|
95
|
+
# Block Flags byte
|
|
96
|
+
flags = encode_block_flags
|
|
97
|
+
header_data.write([flags].pack("C"))
|
|
98
|
+
|
|
99
|
+
# Compressed Size (if present)
|
|
100
|
+
# XZ Utils: Only include if NOT VLI_UNKNOWN
|
|
101
|
+
# For simple single-block streams, we can omit this field
|
|
102
|
+
if @include_block_sizes
|
|
103
|
+
header_data.write(encode_vli(@compressed_size))
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
# Uncompressed Size (if present, MUST come before filters per XZ Utils)
|
|
107
|
+
# XZ Utils: Only include if NOT VLI_UNKNOWN
|
|
108
|
+
# For simple single-block streams, we can omit this field
|
|
109
|
+
if @include_block_sizes
|
|
110
|
+
header_data.write(encode_vli(@uncompressed_size))
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
# Filters (MUST come after Uncompressed Size per XZ Utils)
|
|
114
|
+
header_data.write(encode_filter_flags)
|
|
115
|
+
|
|
116
|
+
# Calculate total header size with padding
|
|
117
|
+
content = header_data.string
|
|
118
|
+
|
|
119
|
+
# XZ Utils block header structure: [size_byte][content][padding][CRC32]
|
|
120
|
+
# Total = 1 + content.bytesize + padding + 4, must be multiple of 4
|
|
121
|
+
# XZ Utils uses a minimum block header size and specific padding requirements
|
|
122
|
+
# For small inputs, XZ Utils pads more than necessary
|
|
123
|
+
# Round UP to next multiple of 4: ((n + 3) / 4) * 4
|
|
124
|
+
# Then ensure minimum size matches XZ Utils behavior (12 bytes for small headers)
|
|
125
|
+
content_plus_size_and_crc = 1 + content.bytesize + 4
|
|
126
|
+
header_size = ((content_plus_size_and_crc + 3) / 4) * 4
|
|
127
|
+
|
|
128
|
+
# For very small blocks (like single-byte inputs), XZ Utils uses extra padding
|
|
129
|
+
# This appears to be for compatibility or alignment reasons
|
|
130
|
+
# Minimum block header size is 12 bytes, and we pad to at least 12 bytes
|
|
131
|
+
header_size = [header_size, 12].max
|
|
132
|
+
|
|
133
|
+
# Additionally, match XZ Utils padding behavior for small blocks
|
|
134
|
+
# XZ Utils seems to prefer block headers that are multiples of 8 or have specific padding
|
|
135
|
+
# For our case (7 bytes of content), we need to reach 15 bytes of content
|
|
136
|
+
# to match the reference (for XZ Utils compatibility)
|
|
137
|
+
if @include_block_sizes && content.bytesize < 15
|
|
138
|
+
# For small blocks with size fields, pad to at least 15 bytes of content
|
|
139
|
+
# to match XZ Utils behavior (12 bytes of padding + 7 bytes of data)
|
|
140
|
+
needed_padding = 15 - content.bytesize
|
|
141
|
+
if needed_padding.positive?
|
|
142
|
+
content += "\x00" * needed_padding
|
|
143
|
+
# Recalculate header_size with new content size
|
|
144
|
+
content_plus_size_and_crc = 1 + content.bytesize + 4
|
|
145
|
+
header_size = ((content_plus_size_and_crc + 3) / 4) * 4
|
|
146
|
+
end
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
# Write Block Header Size (as (total / 4) - 1)
|
|
150
|
+
size_byte = (header_size / 4) - 1
|
|
151
|
+
output.write([size_byte].pack("C"))
|
|
152
|
+
|
|
153
|
+
# Write header content
|
|
154
|
+
output.write(content)
|
|
155
|
+
|
|
156
|
+
# Add padding (header_size already includes size_byte and will include CRC32)
|
|
157
|
+
padding_size = header_size - 1 - content.bytesize - 4
|
|
158
|
+
output.write("\x00" * padding_size) if padding_size.positive?
|
|
159
|
+
|
|
160
|
+
# Calculate CRC32 of size_byte + content + padding (NOT including CRC32 itself)
|
|
161
|
+
# According to XZ spec, CRC32 covers everything in Block Header except CRC32 field
|
|
162
|
+
# This includes the padding bytes!
|
|
163
|
+
crc = Zlib.crc32(output.string)
|
|
164
|
+
|
|
165
|
+
# Write CRC32
|
|
166
|
+
output.write([crc].pack("V"))
|
|
167
|
+
|
|
168
|
+
output.string
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
def encode_block_flags
|
|
172
|
+
# Bit 0-1: Number of filters - 1 (we use 1 filter = LZMA2, so 0)
|
|
173
|
+
# IMPORTANT: spec says filter_count = (flags & 0x03) + 1
|
|
174
|
+
# So for 1 filter, we set (1 - 1) = 0 in these bits
|
|
175
|
+
# Bit 6: Compressed Size present (optional in XZ spec)
|
|
176
|
+
# Bit 7: Uncompressed Size present (optional in XZ spec)
|
|
177
|
+
#
|
|
178
|
+
# XZ Utils behavior: For simple single-block streams, these fields
|
|
179
|
+
# are omitted to save space. They're only needed for:
|
|
180
|
+
# - Multi-block streams (to know where each block ends)
|
|
181
|
+
# - Random access (to seek to specific blocks)
|
|
182
|
+
# - Memory allocation planning (for multithreading)
|
|
183
|
+
flags = 0
|
|
184
|
+
flags |= 0x00 # Filter count - 1 = 0 (for 1 filter)
|
|
185
|
+
|
|
186
|
+
# Only include size fields if explicitly requested
|
|
187
|
+
if @include_block_sizes
|
|
188
|
+
flags |= 0x40 # Compressed Size present
|
|
189
|
+
flags |= 0x80 # Uncompressed Size present
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
flags
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
def encode_filter_flags
|
|
196
|
+
output = StringIO.new
|
|
197
|
+
output.set_encoding(Encoding::BINARY)
|
|
198
|
+
|
|
199
|
+
# Filter ID (LZMA2 = 0x21)
|
|
200
|
+
output.write(encode_vli(FILTER_LZMA2))
|
|
201
|
+
|
|
202
|
+
# Size of Properties (1 byte for LZMA2)
|
|
203
|
+
output.write(encode_vli(1))
|
|
204
|
+
|
|
205
|
+
# Properties (dictionary size encoded)
|
|
206
|
+
dict_byte = Algorithms::LZMA2.encode_dict_size(@dict_size)
|
|
207
|
+
output.write([dict_byte].pack("C"))
|
|
208
|
+
|
|
209
|
+
output.string
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
def encode_vli(value)
|
|
213
|
+
# Variable Length Integer encoding (1-9 bytes)
|
|
214
|
+
output = String.new(encoding: Encoding::BINARY)
|
|
215
|
+
|
|
216
|
+
loop do
|
|
217
|
+
byte = value & 0x7F
|
|
218
|
+
value >>= 7
|
|
219
|
+
|
|
220
|
+
if value.zero?
|
|
221
|
+
output << [byte].pack("C")
|
|
222
|
+
break
|
|
223
|
+
else
|
|
224
|
+
output << [byte | 0x80].pack("C")
|
|
225
|
+
end
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
output
|
|
229
|
+
end
|
|
230
|
+
|
|
231
|
+
def calculate_header_size
|
|
232
|
+
# Estimate header size (used for pre-allocation)
|
|
233
|
+
# Actual size calculated in encode_block_header
|
|
234
|
+
32 # Conservative estimate
|
|
235
|
+
end
|
|
236
|
+
|
|
237
|
+
def calculate_padding(size)
|
|
238
|
+
# Blocks must be padded to 4-byte boundary
|
|
239
|
+
remainder = size % 4
|
|
240
|
+
remainder.zero? ? 0 : 4 - remainder
|
|
241
|
+
end
|
|
242
|
+
|
|
243
|
+
def calculate_check(data)
|
|
244
|
+
case @check_type
|
|
245
|
+
when CHECK_NONE
|
|
246
|
+
""
|
|
247
|
+
when CHECK_CRC32
|
|
248
|
+
crc = Zlib.crc32(data)
|
|
249
|
+
[crc].pack("V")
|
|
250
|
+
when CHECK_CRC64
|
|
251
|
+
# Use existing CRC64 implementation
|
|
252
|
+
crc = Omnizip::Checksums::Crc64.calculate(data)
|
|
253
|
+
[crc].pack("Q<") # Little-endian 64-bit
|
|
254
|
+
else
|
|
255
|
+
raise Omnizip::FormatError, "Unsupported check type: #{@check_type}"
|
|
256
|
+
end
|
|
257
|
+
end
|
|
258
|
+
|
|
259
|
+
def calculate_actual_header_size
|
|
260
|
+
# Calculate the exact header size that was written
|
|
261
|
+
# This mirrors the logic in encode_block_header
|
|
262
|
+
|
|
263
|
+
# Build header content
|
|
264
|
+
header_data = StringIO.new
|
|
265
|
+
header_data.set_encoding(Encoding::BINARY)
|
|
266
|
+
|
|
267
|
+
# Block Flags byte
|
|
268
|
+
flags = encode_block_flags
|
|
269
|
+
header_data.write([flags].pack("C"))
|
|
270
|
+
|
|
271
|
+
# Compressed Size (if present)
|
|
272
|
+
if @include_block_sizes
|
|
273
|
+
header_data.write(encode_vli(@compressed_size))
|
|
274
|
+
end
|
|
275
|
+
|
|
276
|
+
# Uncompressed Size (if present, MUST come before filters per XZ Utils)
|
|
277
|
+
if @include_block_sizes
|
|
278
|
+
header_data.write(encode_vli(@uncompressed_size))
|
|
279
|
+
end
|
|
280
|
+
|
|
281
|
+
# Filters (MUST come after Uncompressed Size per XZ Utils)
|
|
282
|
+
header_data.write(encode_filter_flags)
|
|
283
|
+
|
|
284
|
+
content = header_data.string
|
|
285
|
+
|
|
286
|
+
# Calculate total header size with padding (matching encode_block_header logic)
|
|
287
|
+
content_plus_size_and_crc = 1 + content.bytesize + 4
|
|
288
|
+
header_size = ((content_plus_size_and_crc + 3) / 4) * 4
|
|
289
|
+
header_size = [header_size, 12].max
|
|
290
|
+
|
|
291
|
+
# Additionally, match XZ Utils padding behavior for small blocks
|
|
292
|
+
if @include_block_sizes && content.bytesize < 15
|
|
293
|
+
needed_padding = 15 - content.bytesize
|
|
294
|
+
if needed_padding.positive?
|
|
295
|
+
content += "\x00" * needed_padding
|
|
296
|
+
content_plus_size_and_crc = 1 + content.bytesize + 4
|
|
297
|
+
header_size = ((content_plus_size_and_crc + 3) / 4) * 4
|
|
298
|
+
end
|
|
299
|
+
end
|
|
300
|
+
|
|
301
|
+
header_size
|
|
302
|
+
end
|
|
303
|
+
end
|
|
304
|
+
end
|
|
305
|
+
end
|
|
306
|
+
end
|
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Omnizip
|
|
4
|
+
module Formats
|
|
5
|
+
module Xz
|
|
6
|
+
# XZ block header
|
|
7
|
+
#
|
|
8
|
+
# Block header contains:
|
|
9
|
+
# - Header size (1 byte) - size in 4-byte blocks
|
|
10
|
+
# - Block flags (1 byte)
|
|
11
|
+
# - Compressed size (variable, optional)
|
|
12
|
+
# - Uncompressed size (variable, optional)
|
|
13
|
+
# - Filter flags (variable)
|
|
14
|
+
# - Padding to 4-byte boundary
|
|
15
|
+
# - CRC32 (4 bytes)
|
|
16
|
+
class BlockHeader
|
|
17
|
+
# Filter IDs
|
|
18
|
+
FILTER_LZMA2 = 0x21
|
|
19
|
+
|
|
20
|
+
attr_reader :compressed_size, :uncompressed_size, :filters
|
|
21
|
+
|
|
22
|
+
# Initialize block header
|
|
23
|
+
#
|
|
24
|
+
# @param options [Hash] Header options
|
|
25
|
+
def initialize(options = {})
|
|
26
|
+
@compressed_size = options[:compressed_size]
|
|
27
|
+
@uncompressed_size = options[:uncompressed_size]
|
|
28
|
+
@filters = options[:filters] || [{ id: FILTER_LZMA2 }]
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Encode block header to bytes
|
|
32
|
+
#
|
|
33
|
+
# @return [String] Encoded header
|
|
34
|
+
def encode
|
|
35
|
+
# Block flags byte
|
|
36
|
+
flags = 0
|
|
37
|
+
flags |= 0x40 if @compressed_size
|
|
38
|
+
flags |= 0x80 if @uncompressed_size
|
|
39
|
+
|
|
40
|
+
# Number of filters (0 = 1 filter, 3 = 4 filters)
|
|
41
|
+
filter_count = [@filters.size - 1, 3].min
|
|
42
|
+
flags |= filter_count
|
|
43
|
+
|
|
44
|
+
data = [flags].pack("C")
|
|
45
|
+
|
|
46
|
+
# Add sizes if present (encoded as multibyte integers)
|
|
47
|
+
if @compressed_size
|
|
48
|
+
data << encode_multibyte_integer(@compressed_size)
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
if @uncompressed_size
|
|
52
|
+
data << encode_multibyte_integer(@uncompressed_size)
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
# Add filter properties
|
|
56
|
+
@filters.each do |filter|
|
|
57
|
+
data << encode_filter(filter)
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# Calculate header size (including size byte and CRC32)
|
|
61
|
+
# Round up to 4-byte blocks
|
|
62
|
+
header_size_bytes = 1 + data.bytesize + 4
|
|
63
|
+
header_size_blocks = (header_size_bytes + 3) / 4
|
|
64
|
+
|
|
65
|
+
# Add padding
|
|
66
|
+
padding_size = (header_size_blocks * 4) - header_size_bytes
|
|
67
|
+
data << ("\0" * padding_size) if padding_size.positive?
|
|
68
|
+
|
|
69
|
+
# Prepend header size
|
|
70
|
+
full_header = [header_size_blocks].pack("C") + data
|
|
71
|
+
|
|
72
|
+
# Append CRC32
|
|
73
|
+
crc32 = Zlib.crc32(full_header)
|
|
74
|
+
full_header + [crc32].pack("V")
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
# Decode block header from stream
|
|
78
|
+
#
|
|
79
|
+
# @param io [IO] Input stream
|
|
80
|
+
# @return [BlockHeader] Decoded header
|
|
81
|
+
def self.decode(io)
|
|
82
|
+
header_size_blocks = io.read(1).unpack1("C")
|
|
83
|
+
return nil if header_size_blocks.nil? || header_size_blocks.zero?
|
|
84
|
+
|
|
85
|
+
header_size_bytes = header_size_blocks * 4
|
|
86
|
+
|
|
87
|
+
# Read rest of header (excluding size byte and CRC32)
|
|
88
|
+
header_data_size = header_size_bytes - 1 - 4
|
|
89
|
+
header_data = io.read(header_data_size)
|
|
90
|
+
|
|
91
|
+
# Read and verify CRC32
|
|
92
|
+
crc32_expected = io.read(4).unpack1("V")
|
|
93
|
+
full_header = [header_size_blocks].pack("C") + header_data
|
|
94
|
+
crc32_actual = Zlib.crc32(full_header)
|
|
95
|
+
|
|
96
|
+
unless crc32_expected == crc32_actual
|
|
97
|
+
raise Error, "XZ block header CRC32 mismatch"
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
# Parse header data
|
|
101
|
+
flags = header_data.unpack1("C")
|
|
102
|
+
offset = 1
|
|
103
|
+
|
|
104
|
+
options = {}
|
|
105
|
+
|
|
106
|
+
# Read compressed size if present
|
|
107
|
+
if flags.anybits?(0x40)
|
|
108
|
+
options[:compressed_size], bytes_read =
|
|
109
|
+
decode_multibyte_integer(header_data[offset..])
|
|
110
|
+
offset += bytes_read
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
# Read uncompressed size if present
|
|
114
|
+
if flags.anybits?(0x80)
|
|
115
|
+
options[:uncompressed_size], bytes_read =
|
|
116
|
+
decode_multibyte_integer(header_data[offset..])
|
|
117
|
+
offset += bytes_read
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
# Parse filters
|
|
121
|
+
filter_count = (flags & 0x03) + 1
|
|
122
|
+
options[:filters] = []
|
|
123
|
+
|
|
124
|
+
filter_count.times do
|
|
125
|
+
filter, bytes_read = decode_filter(header_data[offset..])
|
|
126
|
+
options[:filters] << filter
|
|
127
|
+
offset += bytes_read
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
new(options)
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
private
|
|
134
|
+
|
|
135
|
+
# Encode multibyte integer (VLI - Variable Length Integer)
|
|
136
|
+
#
|
|
137
|
+
# @param value [Integer] Value to encode
|
|
138
|
+
# @return [String] Encoded bytes
|
|
139
|
+
def encode_multibyte_integer(value)
|
|
140
|
+
bytes = []
|
|
141
|
+
loop do
|
|
142
|
+
byte = value & 0x7F
|
|
143
|
+
value >>= 7
|
|
144
|
+
byte |= 0x80 if value.positive?
|
|
145
|
+
bytes << byte
|
|
146
|
+
break if value.zero?
|
|
147
|
+
end
|
|
148
|
+
bytes.pack("C*")
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
# Decode multibyte integer
|
|
152
|
+
#
|
|
153
|
+
# @param data [String] Data to decode
|
|
154
|
+
# @return [Array<Integer, Integer>] Value and bytes consumed
|
|
155
|
+
def self.decode_multibyte_integer(data)
|
|
156
|
+
value = 0
|
|
157
|
+
shift = 0
|
|
158
|
+
offset = 0
|
|
159
|
+
|
|
160
|
+
loop do
|
|
161
|
+
byte = data[offset].unpack1("C")
|
|
162
|
+
value |= (byte & 0x7F) << shift
|
|
163
|
+
offset += 1
|
|
164
|
+
break if byte.nobits?(0x80)
|
|
165
|
+
|
|
166
|
+
shift += 7
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
[value, offset]
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
# Encode filter
|
|
173
|
+
#
|
|
174
|
+
# @param filter [Hash] Filter specification
|
|
175
|
+
# @return [String] Encoded filter
|
|
176
|
+
def encode_filter(filter)
|
|
177
|
+
filter_id = filter[:id] || FILTER_LZMA2
|
|
178
|
+
props = filter[:properties] || ""
|
|
179
|
+
|
|
180
|
+
# Encode filter ID as VLI
|
|
181
|
+
id_bytes = encode_multibyte_integer(filter_id)
|
|
182
|
+
|
|
183
|
+
# Encode properties size as VLI
|
|
184
|
+
props_size_bytes = encode_multibyte_integer(props.bytesize)
|
|
185
|
+
|
|
186
|
+
id_bytes + props_size_bytes + props
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
# Decode filter
|
|
190
|
+
#
|
|
191
|
+
# @param data [String] Data to decode
|
|
192
|
+
# @return [Array<Hash, Integer>] Filter and bytes consumed
|
|
193
|
+
def self.decode_filter(data)
|
|
194
|
+
filter_id, offset = decode_multibyte_integer(data)
|
|
195
|
+
|
|
196
|
+
props_size, bytes_read = decode_multibyte_integer(data[offset..])
|
|
197
|
+
offset += bytes_read
|
|
198
|
+
|
|
199
|
+
props = data[offset, props_size]
|
|
200
|
+
offset += props_size
|
|
201
|
+
|
|
202
|
+
filter = { id: filter_id }
|
|
203
|
+
filter[:properties] = props unless props.empty?
|
|
204
|
+
|
|
205
|
+
[filter, offset]
|
|
206
|
+
end
|
|
207
|
+
end
|
|
208
|
+
end
|
|
209
|
+
end
|
|
210
|
+
end
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Copyright (C) 2025 Ribose Inc.
|
|
4
|
+
#
|
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a
|
|
6
|
+
# copy of this software and associated documentation files (the "Software"),
|
|
7
|
+
# to deal in the Software without restriction, including without limitation
|
|
8
|
+
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
9
|
+
# and/or sell copies of the Software, and to permit persons to whom the
|
|
10
|
+
# Software is furnished to do so, subject to the following conditions:
|
|
11
|
+
#
|
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
|
13
|
+
# all copies or substantial portions of the Software.
|
|
14
|
+
#
|
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
20
|
+
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
21
|
+
# DEALINGS IN THE SOFTWARE.
|
|
22
|
+
|
|
23
|
+
require "stringio"
|
|
24
|
+
require "zlib"
|
|
25
|
+
require_relative "constants"
|
|
26
|
+
require_relative "vli"
|
|
27
|
+
require_relative "../../error"
|
|
28
|
+
require_relative "../../checksums/verifier"
|
|
29
|
+
|
|
30
|
+
module Omnizip
|
|
31
|
+
module Formats
|
|
32
|
+
module XzFormat
|
|
33
|
+
# XZ Block Header parser
|
|
34
|
+
#
|
|
35
|
+
# Block Header format:
|
|
36
|
+
# - Block Header Size: (size_in_4byte_units - 1) encoded as 1 byte
|
|
37
|
+
# - Block Flags: 1 byte (bit 7=uncompressed_size_present, bit 6=compressed_size_present, bits 0-1=num_filters)
|
|
38
|
+
# - Compressed Size: VLI (if flag bit 6 is set)
|
|
39
|
+
# - Uncompressed Size: VLI (if flag bit 7 is set)
|
|
40
|
+
# - Filters: for each filter: id(1) + props_size(1) + properties(props_size bytes)
|
|
41
|
+
# - Padding: 0-3 bytes to align to 4-byte boundary
|
|
42
|
+
# - CRC32: 4 bytes of header + padding
|
|
43
|
+
#
|
|
44
|
+
# Reference: /tmp/xz-source/src/liblzma/common/block_header_decoder.c
|
|
45
|
+
class BlockHeaderParser
|
|
46
|
+
# Filter IDs (from XZ spec)
|
|
47
|
+
FILTER_LZMA2 = 0x21
|
|
48
|
+
FILTER_DELTA = 0x03
|
|
49
|
+
FILTER_BCJ_X86 = 0x04
|
|
50
|
+
FILTER_BCJ_POWERPC = 0x05
|
|
51
|
+
FILTER_BCJ_IA64 = 0x06
|
|
52
|
+
FILTER_BCJ_ARM = 0x07
|
|
53
|
+
FILTER_BCJ_ARMTHUMB = 0x08
|
|
54
|
+
FILTER_BCJ_SPARC = 0x09
|
|
55
|
+
|
|
56
|
+
# Parse block header from input stream
|
|
57
|
+
#
|
|
58
|
+
# @param input [IO] Input stream positioned at block header start
|
|
59
|
+
# @return [Hash] Parsed header data with keys:
|
|
60
|
+
# - compressed_size: Integer or nil
|
|
61
|
+
# - uncompressed_size: Integer or nil
|
|
62
|
+
# - filters: Array of {id: Integer, properties: String or nil}
|
|
63
|
+
# - header_size: Integer (total header size in bytes)
|
|
64
|
+
# @raise [RuntimeError] If header is invalid or CRC mismatch
|
|
65
|
+
def self.parse(input)
|
|
66
|
+
# Read block header size byte
|
|
67
|
+
size_byte = input.getbyte
|
|
68
|
+
if size_byte.nil?
|
|
69
|
+
raise FormatError,
|
|
70
|
+
"Unexpected end of stream in block header"
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# Calculate actual header size: stored as (size / 4) - 1
|
|
74
|
+
# So actual size = (stored + 1) * 4
|
|
75
|
+
# Reference: /Users/mulgogi/src/external/xz/src/liblzma/api/lzma/block.h:340
|
|
76
|
+
# #define lzma_block_header_size_decode(b) (((uint32_t)(b) + 1) * 4)
|
|
77
|
+
header_size = ((size_byte + 1) * 4)
|
|
78
|
+
|
|
79
|
+
if header_size < 8 || header_size > 1024
|
|
80
|
+
raise FormatError, "Invalid block header size: #{header_size}"
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
# Read remaining header (minus size byte)
|
|
84
|
+
remaining_size = header_size - 1
|
|
85
|
+
header_data = input.read(remaining_size)
|
|
86
|
+
|
|
87
|
+
if header_data.nil? || header_data.bytesize < remaining_size
|
|
88
|
+
raise FormatError, "Unexpected end of stream in block header data"
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# Reconstruct full header for CRC verification
|
|
92
|
+
full_header = [size_byte].pack("C") + header_data
|
|
93
|
+
|
|
94
|
+
# CRC32 is at the end (last 4 bytes)
|
|
95
|
+
crc_offset = header_size - 4
|
|
96
|
+
stored_crc = full_header[crc_offset..].unpack1("V")
|
|
97
|
+
|
|
98
|
+
# CRC data is: size_byte + header_fields + padding (but NOT the CRC itself)
|
|
99
|
+
crc_data = full_header[0..(crc_offset - 1)]
|
|
100
|
+
actual_crc = Zlib.crc32(crc_data)
|
|
101
|
+
|
|
102
|
+
if actual_crc != stored_crc
|
|
103
|
+
raise FormatError,
|
|
104
|
+
"Block header CRC mismatch: expected #{stored_crc}, got #{actual_crc}"
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
# Parse block header (excluding padding and CRC)
|
|
108
|
+
parse_buffer = StringIO.new(crc_data[1..]) # Skip size byte, parse until padding
|
|
109
|
+
|
|
110
|
+
# Parse block flags (1 byte)
|
|
111
|
+
block_flags = parse_buffer.getbyte
|
|
112
|
+
if block_flags.nil?
|
|
113
|
+
raise FormatError,
|
|
114
|
+
"Unexpected end of block header flags"
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
has_compressed_size = block_flags.anybits?(0x40)
|
|
118
|
+
has_uncompressed_size = block_flags.anybits?(0x80)
|
|
119
|
+
# Number of filters is encoded as (num_filters - 1) in bits 0-1
|
|
120
|
+
num_filters = (block_flags & 0x03) + 1
|
|
121
|
+
|
|
122
|
+
# Parse compressed size (VLI, if present)
|
|
123
|
+
compressed_size = nil
|
|
124
|
+
if has_compressed_size
|
|
125
|
+
compressed_size = VLI.decode(parse_buffer)
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
# Parse uncompressed size (VLI, if present)
|
|
129
|
+
uncompressed_size = nil
|
|
130
|
+
if has_uncompressed_size
|
|
131
|
+
uncompressed_size = VLI.decode(parse_buffer)
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
# Parse filters
|
|
135
|
+
filters = []
|
|
136
|
+
num_filters.times do
|
|
137
|
+
# Filter ID is stored as VLI (can be multi-byte for custom filters)
|
|
138
|
+
# But standard filters are single-byte values
|
|
139
|
+
# Reference: /Users/mulgogi/src/external/xz/src/liblzma/common/filter_common.c:44-52
|
|
140
|
+
filter_id = VLI.decode(parse_buffer)
|
|
141
|
+
|
|
142
|
+
# Validate filter ID against XZ spec
|
|
143
|
+
# Standard filter IDs: 0x01-0x04 are reserved for 7z compatibility (invalid for XZ)
|
|
144
|
+
# Valid XZ filters: 0x03 (Delta), 0x04-0x0B (BCJ filters), 0x21 (LZMA2)
|
|
145
|
+
# Reference: xz-file-format-1.2.1.txt Section 5.4.1
|
|
146
|
+
if filter_id < 0x03 || (filter_id > 0x0B && filter_id < 0x21)
|
|
147
|
+
raise FormatError,
|
|
148
|
+
"Unsupported or invalid filter ID: 0x#{filter_id.to_s(16).upcase}"
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
# Reserved custom filter range (>= 0x4000000000000000) is invalid
|
|
152
|
+
if filter_id >= 0x4000_0000_0000_0000
|
|
153
|
+
raise FormatError,
|
|
154
|
+
"Invalid reserved custom filter ID: 0x#{filter_id.to_s(16).upcase}"
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
props_size = parse_buffer.getbyte
|
|
158
|
+
if props_size.nil?
|
|
159
|
+
raise FormatError,
|
|
160
|
+
"Unexpected end of stream in filter props size"
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
properties = if props_size.positive?
|
|
164
|
+
props_data = parse_buffer.read(props_size)
|
|
165
|
+
if props_data.nil? || props_data.bytesize < props_size
|
|
166
|
+
raise FormatError,
|
|
167
|
+
"Unexpected end of stream in filter properties"
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
props_data
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
filters << { id: filter_id, properties: properties }
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
{
|
|
177
|
+
compressed_size: compressed_size,
|
|
178
|
+
uncompressed_size: uncompressed_size,
|
|
179
|
+
filters: filters,
|
|
180
|
+
header_size: header_size,
|
|
181
|
+
}
|
|
182
|
+
end
|
|
183
|
+
end
|
|
184
|
+
end
|
|
185
|
+
end
|
|
186
|
+
end
|