omnizip 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/.rubocop.yml +32 -0
- data/.rubocop_todo.yml +754 -0
- data/COPYING +502 -0
- data/Gemfile +17 -0
- data/LICENSE +12 -0
- data/README.adoc +1045 -0
- data/Rakefile +12 -0
- data/benchmark/README.md +260 -0
- data/benchmark/benchmark_suite.rb +125 -0
- data/benchmark/compression_bench.rb +181 -0
- data/benchmark/filter_bench.rb +180 -0
- data/benchmark/models/benchmark_result.rb +59 -0
- data/benchmark/models/comparison_result.rb +69 -0
- data/benchmark/profile_suite.rb +167 -0
- data/benchmark/reporter.rb +150 -0
- data/benchmark/run_benchmarks.rb +66 -0
- data/benchmark/test_data.rb +137 -0
- data/config/formats/rar3_spec.yml +91 -0
- data/config/formats/rar5_spec.yml +102 -0
- data/docs/.github/workflows/docs.yml +142 -0
- data/docs/.gitignore +21 -0
- data/docs/.lychee.toml +67 -0
- data/docs/Gemfile +13 -0
- data/docs/RAR_WRITE_SUPPORT.md +26 -0
- data/docs/README.md +101 -0
- data/docs/_config.yml +112 -0
- data/docs/assets/logo.svg +1 -0
- data/docs/assets/omnizip-logo.pdf +1540 -11
- data/docs/comparison/feature-matrix.adoc +694 -0
- data/docs/comparison/index.adoc +113 -0
- data/docs/comparison/vs-7zip.adoc +309 -0
- data/docs/comparison/vs-peazip.adoc +77 -0
- data/docs/comparison/vs-rubyzip.adoc +342 -0
- data/docs/comparison/vs-winrar.adoc +100 -0
- data/docs/compatibility.adoc +579 -0
- data/docs/concepts/index.adoc +129 -0
- data/docs/developer/architecture.adoc +256 -0
- data/docs/developer/contributing.adoc +158 -0
- data/docs/developer/index.adoc +25 -0
- data/docs/developer/testing.adoc +212 -0
- data/docs/getting-started/basic-usage.adoc +271 -0
- data/docs/getting-started/index.adoc +42 -0
- data/docs/getting-started/installation.adoc +138 -0
- data/docs/getting-started/quick-start.adoc +185 -0
- data/docs/getting-started/your-first-archive.adoc +218 -0
- data/docs/guides/advanced-features/encryption.adoc +300 -0
- data/docs/guides/advanced-features/index.adoc +49 -0
- data/docs/guides/advanced-features/parallel-processing.adoc +246 -0
- data/docs/guides/advanced-features/progress-tracking.adoc +320 -0
- data/docs/guides/advanced-features/streaming.adoc +212 -0
- data/docs/guides/archive-formats/gzip-format.adoc +107 -0
- data/docs/guides/archive-formats/index.adoc +130 -0
- data/docs/guides/archive-formats/rar-format.adoc +104 -0
- data/docs/guides/archive-formats/rar5.adoc +521 -0
- data/docs/guides/archive-formats/seven-zip-format.adoc +35 -0
- data/docs/guides/archive-formats/tar-format.adoc +106 -0
- data/docs/guides/archive-formats/xz-format.adoc +118 -0
- data/docs/guides/archive-formats/zip-format.adoc +35 -0
- data/docs/guides/compression-algorithms/bzip2.adoc +113 -0
- data/docs/guides/compression-algorithms/deflate.adoc +319 -0
- data/docs/guides/compression-algorithms/index.adoc +190 -0
- data/docs/guides/compression-algorithms/lzma.adoc +398 -0
- data/docs/guides/compression-algorithms/lzma2.adoc +327 -0
- data/docs/guides/compression-algorithms/ppmd.adoc +316 -0
- data/docs/guides/compression-algorithms/zstandard.adoc +361 -0
- data/docs/guides/creating-archives.adoc +354 -0
- data/docs/guides/extracting-archives.adoc +53 -0
- data/docs/guides/format-conversion.adoc +64 -0
- data/docs/guides/index.adoc +49 -0
- data/docs/guides/migration-rubyzip.adoc +217 -0
- data/docs/guides/parity-archives.adoc +605 -0
- data/docs/guides/performance-tuning.adoc +88 -0
- data/docs/index.adoc +218 -0
- data/docs/lychee.toml +67 -0
- data/docs/reference/api/overview.adoc +188 -0
- data/docs/reference/cli/compress-command.adoc +114 -0
- data/docs/reference/cli/overview.adoc +140 -0
- data/docs/reference/index.adoc +26 -0
- data/docs/resources/faq.adoc +185 -0
- data/docs/resources/quick-reference.adoc +222 -0
- data/docs/troubleshooting/index.adoc +208 -0
- data/examples/api_comparison.rb +205 -0
- data/examples/deflate64_example.rb +96 -0
- data/examples/par2_demo.rb +121 -0
- data/examples/quick_start_native.rb +150 -0
- data/examples/quick_start_rubyzip.rb +115 -0
- data/examples/rubyzip_compatibility_demo.rb +194 -0
- data/exe/omnizip +27 -0
- data/lib/omnizip/algorithm.rb +130 -0
- data/lib/omnizip/algorithm_registry.rb +86 -0
- data/lib/omnizip/algorithms/.keep +0 -0
- data/lib/omnizip/algorithms/bzip2/bwt.rb +225 -0
- data/lib/omnizip/algorithms/bzip2/decoder.rb +193 -0
- data/lib/omnizip/algorithms/bzip2/encoder.rb +237 -0
- data/lib/omnizip/algorithms/bzip2/huffman.rb +206 -0
- data/lib/omnizip/algorithms/bzip2/mtf.rb +101 -0
- data/lib/omnizip/algorithms/bzip2/rle.rb +151 -0
- data/lib/omnizip/algorithms/bzip2.rb +130 -0
- data/lib/omnizip/algorithms/deflate/constants.rb +28 -0
- data/lib/omnizip/algorithms/deflate/decoder.rb +38 -0
- data/lib/omnizip/algorithms/deflate/encoder.rb +46 -0
- data/lib/omnizip/algorithms/deflate.rb +128 -0
- data/lib/omnizip/algorithms/deflate64/constants.rb +45 -0
- data/lib/omnizip/algorithms/deflate64/decoder.rb +153 -0
- data/lib/omnizip/algorithms/deflate64/encoder.rb +98 -0
- data/lib/omnizip/algorithms/deflate64/huffman_coder.rb +354 -0
- data/lib/omnizip/algorithms/deflate64/lz77_encoder.rb +142 -0
- data/lib/omnizip/algorithms/deflate64.rb +109 -0
- data/lib/omnizip/algorithms/lzma/bit_model.rb +120 -0
- data/lib/omnizip/algorithms/lzma/constants.rb +112 -0
- data/lib/omnizip/algorithms/lzma/decoder.rb +148 -0
- data/lib/omnizip/algorithms/lzma/dictionary.rb +69 -0
- data/lib/omnizip/algorithms/lzma/distance_coder.rb +415 -0
- data/lib/omnizip/algorithms/lzma/encoder.rb +142 -0
- data/lib/omnizip/algorithms/lzma/length_coder.rb +260 -0
- data/lib/omnizip/algorithms/lzma/literal_decoder.rb +320 -0
- data/lib/omnizip/algorithms/lzma/literal_encoder.rb +210 -0
- data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +341 -0
- data/lib/omnizip/algorithms/lzma/lzma_alone_decoder.rb +192 -0
- data/lib/omnizip/algorithms/lzma/lzma_state.rb +128 -0
- data/lib/omnizip/algorithms/lzma/match.rb +32 -0
- data/lib/omnizip/algorithms/lzma/match_finder.rb +205 -0
- data/lib/omnizip/algorithms/lzma/match_finder_config.rb +142 -0
- data/lib/omnizip/algorithms/lzma/match_finder_factory.rb +88 -0
- data/lib/omnizip/algorithms/lzma/optimal_encoder.rb +130 -0
- data/lib/omnizip/algorithms/lzma/probability_models.rb +72 -0
- data/lib/omnizip/algorithms/lzma/range_coder.rb +85 -0
- data/lib/omnizip/algorithms/lzma/range_decoder.rb +434 -0
- data/lib/omnizip/algorithms/lzma/range_encoder.rb +194 -0
- data/lib/omnizip/algorithms/lzma/state.rb +127 -0
- data/lib/omnizip/algorithms/lzma/xz_buffered_range_encoder.rb +325 -0
- data/lib/omnizip/algorithms/lzma/xz_encoder.rb +426 -0
- data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +645 -0
- data/lib/omnizip/algorithms/lzma/xz_match_finder_adapter.rb +227 -0
- data/lib/omnizip/algorithms/lzma/xz_price_calculator.rb +169 -0
- data/lib/omnizip/algorithms/lzma/xz_probability_models.rb +261 -0
- data/lib/omnizip/algorithms/lzma/xz_range_encoder.rb +223 -0
- data/lib/omnizip/algorithms/lzma/xz_range_encoder_exact.rb +331 -0
- data/lib/omnizip/algorithms/lzma/xz_state.rb +116 -0
- data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +2055 -0
- data/lib/omnizip/algorithms/lzma.rb +238 -0
- data/lib/omnizip/algorithms/lzma2/chunk_manager.rb +182 -0
- data/lib/omnizip/algorithms/lzma2/constants.rb +41 -0
- data/lib/omnizip/algorithms/lzma2/encoder.rb +147 -0
- data/lib/omnizip/algorithms/lzma2/lzma2_chunk.rb +161 -0
- data/lib/omnizip/algorithms/lzma2/properties.rb +179 -0
- data/lib/omnizip/algorithms/lzma2/simple_lzma2_encoder.rb +127 -0
- data/lib/omnizip/algorithms/lzma2/xz_encoder_adapter.rb +85 -0
- data/lib/omnizip/algorithms/lzma2.rb +141 -0
- data/lib/omnizip/algorithms/ppmd7/constants.rb +74 -0
- data/lib/omnizip/algorithms/ppmd7/context.rb +154 -0
- data/lib/omnizip/algorithms/ppmd7/decoder.rb +126 -0
- data/lib/omnizip/algorithms/ppmd7/encoder.rb +163 -0
- data/lib/omnizip/algorithms/ppmd7/model.rb +248 -0
- data/lib/omnizip/algorithms/ppmd7/symbol_state.rb +57 -0
- data/lib/omnizip/algorithms/ppmd7.rb +116 -0
- data/lib/omnizip/algorithms/ppmd8/constants.rb +61 -0
- data/lib/omnizip/algorithms/ppmd8/context.rb +34 -0
- data/lib/omnizip/algorithms/ppmd8/decoder.rb +107 -0
- data/lib/omnizip/algorithms/ppmd8/encoder.rb +138 -0
- data/lib/omnizip/algorithms/ppmd8/model.rb +250 -0
- data/lib/omnizip/algorithms/ppmd8/restoration_method.rb +78 -0
- data/lib/omnizip/algorithms/ppmd8.rb +82 -0
- data/lib/omnizip/algorithms/ppmd_base.rb +138 -0
- data/lib/omnizip/algorithms/sevenzip_lzma2.rb +123 -0
- data/lib/omnizip/algorithms/xz_lzma2.rb +118 -0
- data/lib/omnizip/algorithms/zstandard/constants.rb +25 -0
- data/lib/omnizip/algorithms/zstandard/decoder.rb +46 -0
- data/lib/omnizip/algorithms/zstandard/encoder.rb +51 -0
- data/lib/omnizip/algorithms/zstandard.rb +138 -0
- data/lib/omnizip/buffer/memory_archive.rb +251 -0
- data/lib/omnizip/buffer/memory_extractor.rb +224 -0
- data/lib/omnizip/buffer.rb +176 -0
- data/lib/omnizip/checksum_registry.rb +114 -0
- data/lib/omnizip/checksums/crc32.rb +100 -0
- data/lib/omnizip/checksums/crc64.rb +101 -0
- data/lib/omnizip/checksums/crc_base.rb +158 -0
- data/lib/omnizip/checksums/verifier.rb +131 -0
- data/lib/omnizip/chunked/memory_manager.rb +194 -0
- data/lib/omnizip/chunked/reader.rb +78 -0
- data/lib/omnizip/chunked/writer.rb +120 -0
- data/lib/omnizip/chunked.rb +129 -0
- data/lib/omnizip/cli/output_formatter.rb +104 -0
- data/lib/omnizip/cli.rb +572 -0
- data/lib/omnizip/commands/.keep +0 -0
- data/lib/omnizip/commands/archive_create_command.rb +427 -0
- data/lib/omnizip/commands/archive_extract_command.rb +272 -0
- data/lib/omnizip/commands/archive_list_command.rb +218 -0
- data/lib/omnizip/commands/archive_repair_command.rb +131 -0
- data/lib/omnizip/commands/archive_verify_command.rb +117 -0
- data/lib/omnizip/commands/compress_command.rb +117 -0
- data/lib/omnizip/commands/decompress_command.rb +120 -0
- data/lib/omnizip/commands/list_command.rb +53 -0
- data/lib/omnizip/commands/metadata_command.rb +153 -0
- data/lib/omnizip/commands/parity_create_command.rb +122 -0
- data/lib/omnizip/commands/parity_repair_command.rb +122 -0
- data/lib/omnizip/commands/parity_verify_command.rb +124 -0
- data/lib/omnizip/commands/profile_list_command.rb +56 -0
- data/lib/omnizip/commands/profile_show_command.rb +44 -0
- data/lib/omnizip/convenience.rb +359 -0
- data/lib/omnizip/converter/conversion_registry.rb +49 -0
- data/lib/omnizip/converter/conversion_strategy.rb +121 -0
- data/lib/omnizip/converter/seven_zip_to_zip_strategy.rb +97 -0
- data/lib/omnizip/converter/zip_to_seven_zip_strategy.rb +112 -0
- data/lib/omnizip/converter.rb +105 -0
- data/lib/omnizip/crypto/aes256/cipher.rb +100 -0
- data/lib/omnizip/crypto/aes256/constants.rb +28 -0
- data/lib/omnizip/crypto/aes256/key_derivation.rb +101 -0
- data/lib/omnizip/crypto/aes256.rb +102 -0
- data/lib/omnizip/error.rb +106 -0
- data/lib/omnizip/eta/exponential_smoothing_estimator.rb +98 -0
- data/lib/omnizip/eta/moving_average_estimator.rb +99 -0
- data/lib/omnizip/eta/rate_calculator.rb +104 -0
- data/lib/omnizip/eta/sample_history.rb +143 -0
- data/lib/omnizip/eta/time_estimator.rb +106 -0
- data/lib/omnizip/eta.rb +63 -0
- data/lib/omnizip/extraction/filter_chain.rb +177 -0
- data/lib/omnizip/extraction/glob_pattern.rb +140 -0
- data/lib/omnizip/extraction/pattern_matcher.rb +70 -0
- data/lib/omnizip/extraction/predicate_pattern.rb +52 -0
- data/lib/omnizip/extraction/regex_pattern.rb +50 -0
- data/lib/omnizip/extraction/selective_extractor.rb +240 -0
- data/lib/omnizip/extraction.rb +111 -0
- data/lib/omnizip/file_type/mime_classifier.rb +144 -0
- data/lib/omnizip/file_type.rb +113 -0
- data/lib/omnizip/filter.rb +139 -0
- data/lib/omnizip/filter_pipeline.rb +108 -0
- data/lib/omnizip/filter_registry.rb +166 -0
- data/lib/omnizip/filters/bcj.rb +279 -0
- data/lib/omnizip/filters/bcj2/constants.rb +53 -0
- data/lib/omnizip/filters/bcj2/decoder.rb +200 -0
- data/lib/omnizip/filters/bcj2/encoder.rb +61 -0
- data/lib/omnizip/filters/bcj2/stream_data.rb +93 -0
- data/lib/omnizip/filters/bcj2.rb +99 -0
- data/lib/omnizip/filters/bcj_arm.rb +176 -0
- data/lib/omnizip/filters/bcj_arm64.rb +244 -0
- data/lib/omnizip/filters/bcj_ia64.rb +196 -0
- data/lib/omnizip/filters/bcj_ppc.rb +190 -0
- data/lib/omnizip/filters/bcj_sparc.rb +176 -0
- data/lib/omnizip/filters/bcj_x86.rb +193 -0
- data/lib/omnizip/filters/delta.rb +196 -0
- data/lib/omnizip/filters/filter_base.rb +72 -0
- data/lib/omnizip/filters/registry.rb +123 -0
- data/lib/omnizip/filters/xz_delta.rb +258 -0
- data/lib/omnizip/format_detector.rb +162 -0
- data/lib/omnizip/format_registry.rb +59 -0
- data/lib/omnizip/formats/.keep +0 -0
- data/lib/omnizip/formats/bzip2_file.rb +172 -0
- data/lib/omnizip/formats/cpio/constants.rb +55 -0
- data/lib/omnizip/formats/cpio/entry.rb +385 -0
- data/lib/omnizip/formats/cpio/reader.rb +196 -0
- data/lib/omnizip/formats/cpio/writer.rb +234 -0
- data/lib/omnizip/formats/cpio.rb +140 -0
- data/lib/omnizip/formats/format_spec_loader.rb +230 -0
- data/lib/omnizip/formats/gzip.rb +238 -0
- data/lib/omnizip/formats/iso/directory_builder.rb +297 -0
- data/lib/omnizip/formats/iso/directory_record.rb +152 -0
- data/lib/omnizip/formats/iso/joliet.rb +204 -0
- data/lib/omnizip/formats/iso/path_table.rb +125 -0
- data/lib/omnizip/formats/iso/reader.rb +197 -0
- data/lib/omnizip/formats/iso/rock_ridge.rb +349 -0
- data/lib/omnizip/formats/iso/volume_builder.rb +320 -0
- data/lib/omnizip/formats/iso/volume_descriptor.rb +168 -0
- data/lib/omnizip/formats/iso/writer.rb +530 -0
- data/lib/omnizip/formats/iso.rb +140 -0
- data/lib/omnizip/formats/lzip.rb +175 -0
- data/lib/omnizip/formats/lzma_alone.rb +171 -0
- data/lib/omnizip/formats/rar/archive_repairer.rb +243 -0
- data/lib/omnizip/formats/rar/archive_verifier.rb +195 -0
- data/lib/omnizip/formats/rar/block_parser.rb +243 -0
- data/lib/omnizip/formats/rar/compression/bit_stream.rb +180 -0
- data/lib/omnizip/formats/rar/compression/dispatcher.rb +217 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/decoder.rb +216 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/encoder.rb +158 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_builder.rb +217 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_coder.rb +189 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/match_finder.rb +135 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/sliding_window.rb +165 -0
- data/lib/omnizip/formats/rar/compression/ppmd/context.rb +105 -0
- data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +219 -0
- data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +262 -0
- data/lib/omnizip/formats/rar/compression_method_registry.rb +106 -0
- data/lib/omnizip/formats/rar/constants.rb +82 -0
- data/lib/omnizip/formats/rar/decompressor.rb +238 -0
- data/lib/omnizip/formats/rar/external_writer.rb +312 -0
- data/lib/omnizip/formats/rar/header.rb +192 -0
- data/lib/omnizip/formats/rar/license_validator.rb +109 -0
- data/lib/omnizip/formats/rar/models/rar_archive.rb +77 -0
- data/lib/omnizip/formats/rar/models/rar_entry.rb +65 -0
- data/lib/omnizip/formats/rar/models/rar_volume.rb +56 -0
- data/lib/omnizip/formats/rar/parity_handler.rb +292 -0
- data/lib/omnizip/formats/rar/rar5/compression/lzma.rb +202 -0
- data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +578 -0
- data/lib/omnizip/formats/rar/rar5/compression/store.rb +60 -0
- data/lib/omnizip/formats/rar/rar5/crc32.rb +39 -0
- data/lib/omnizip/formats/rar/rar5/encryption/aes256_cbc.rb +97 -0
- data/lib/omnizip/formats/rar/rar5/encryption/encryption_header.rb +114 -0
- data/lib/omnizip/formats/rar/rar5/encryption/encryption_manager.rb +166 -0
- data/lib/omnizip/formats/rar/rar5/encryption/key_derivation.rb +97 -0
- data/lib/omnizip/formats/rar/rar5/header.rb +187 -0
- data/lib/omnizip/formats/rar/rar5/models/encryption_options.rb +74 -0
- data/lib/omnizip/formats/rar/rar5/models/recovery_options.rb +63 -0
- data/lib/omnizip/formats/rar/rar5/models/solid_options.rb +63 -0
- data/lib/omnizip/formats/rar/rar5/models/volume_options.rb +74 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/ARCHITECTURE.md +290 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_manager.rb +264 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_splitter.rb +155 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_writer.rb +194 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_encoder.rb +109 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_manager.rb +142 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_stream.rb +121 -0
- data/lib/omnizip/formats/rar/rar5/vint.rb +65 -0
- data/lib/omnizip/formats/rar/rar5/writer.rb +466 -0
- data/lib/omnizip/formats/rar/rar_format_base.rb +241 -0
- data/lib/omnizip/formats/rar/reader.rb +366 -0
- data/lib/omnizip/formats/rar/recovery_record.rb +245 -0
- data/lib/omnizip/formats/rar/volume_manager.rb +168 -0
- data/lib/omnizip/formats/rar/writer.rb +431 -0
- data/lib/omnizip/formats/rar.rb +205 -0
- data/lib/omnizip/formats/rar3/compressor.rb +73 -0
- data/lib/omnizip/formats/rar3/decompressor.rb +66 -0
- data/lib/omnizip/formats/rar3/reader.rb +386 -0
- data/lib/omnizip/formats/rar3/writer.rb +219 -0
- data/lib/omnizip/formats/rar5/compressor.rb +73 -0
- data/lib/omnizip/formats/rar5/decompressor.rb +66 -0
- data/lib/omnizip/formats/rar5/reader.rb +342 -0
- data/lib/omnizip/formats/rar5/writer.rb +214 -0
- data/lib/omnizip/formats/seven_zip/coder_chain.rb +150 -0
- data/lib/omnizip/formats/seven_zip/constants.rb +126 -0
- data/lib/omnizip/formats/seven_zip/encoded_header.rb +114 -0
- data/lib/omnizip/formats/seven_zip/encrypted_header.rb +142 -0
- data/lib/omnizip/formats/seven_zip/file_collector.rb +144 -0
- data/lib/omnizip/formats/seven_zip/header.rb +106 -0
- data/lib/omnizip/formats/seven_zip/header_encryptor.rb +134 -0
- data/lib/omnizip/formats/seven_zip/header_writer.rb +466 -0
- data/lib/omnizip/formats/seven_zip/models/coder_info.rb +30 -0
- data/lib/omnizip/formats/seven_zip/models/file_entry.rb +58 -0
- data/lib/omnizip/formats/seven_zip/models/folder.rb +69 -0
- data/lib/omnizip/formats/seven_zip/models/stream_info.rb +42 -0
- data/lib/omnizip/formats/seven_zip/parser.rb +660 -0
- data/lib/omnizip/formats/seven_zip/reader.rb +458 -0
- data/lib/omnizip/formats/seven_zip/split_archive_reader.rb +632 -0
- data/lib/omnizip/formats/seven_zip/split_archive_writer.rb +315 -0
- data/lib/omnizip/formats/seven_zip/stream_compressor.rb +151 -0
- data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +162 -0
- data/lib/omnizip/formats/seven_zip/writer.rb +740 -0
- data/lib/omnizip/formats/seven_zip.rb +93 -0
- data/lib/omnizip/formats/tar/constants.rb +73 -0
- data/lib/omnizip/formats/tar/entry.rb +94 -0
- data/lib/omnizip/formats/tar/header.rb +168 -0
- data/lib/omnizip/formats/tar/reader.rb +121 -0
- data/lib/omnizip/formats/tar/writer.rb +216 -0
- data/lib/omnizip/formats/tar.rb +84 -0
- data/lib/omnizip/formats/xz/reader.rb +116 -0
- data/lib/omnizip/formats/xz.rb +237 -0
- data/lib/omnizip/formats/xz_impl/block_decoder.rb +754 -0
- data/lib/omnizip/formats/xz_impl/block_encoder.rb +306 -0
- data/lib/omnizip/formats/xz_impl/block_header.rb +210 -0
- data/lib/omnizip/formats/xz_impl/block_header_parser.rb +186 -0
- data/lib/omnizip/formats/xz_impl/constants.rb +49 -0
- data/lib/omnizip/formats/xz_impl/index_decoder.rb +174 -0
- data/lib/omnizip/formats/xz_impl/index_encoder.rb +122 -0
- data/lib/omnizip/formats/xz_impl/stream_decoder.rb +468 -0
- data/lib/omnizip/formats/xz_impl/stream_encoder.rb +99 -0
- data/lib/omnizip/formats/xz_impl/stream_footer.rb +81 -0
- data/lib/omnizip/formats/xz_impl/stream_footer_parser.rb +117 -0
- data/lib/omnizip/formats/xz_impl/stream_header.rb +55 -0
- data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +108 -0
- data/lib/omnizip/formats/xz_impl/vli.rb +128 -0
- data/lib/omnizip/formats/xz_impl/writer.rb +421 -0
- data/lib/omnizip/formats/zip/central_directory_header.rb +195 -0
- data/lib/omnizip/formats/zip/constants.rb +69 -0
- data/lib/omnizip/formats/zip/end_of_central_directory.rb +133 -0
- data/lib/omnizip/formats/zip/local_file_header.rb +138 -0
- data/lib/omnizip/formats/zip/reader.rb +250 -0
- data/lib/omnizip/formats/zip/unix_extra_field.rb +153 -0
- data/lib/omnizip/formats/zip/writer.rb +375 -0
- data/lib/omnizip/formats/zip/zip64_end_of_central_directory.rb +104 -0
- data/lib/omnizip/formats/zip/zip64_end_of_central_directory_locator.rb +66 -0
- data/lib/omnizip/formats/zip/zip64_extra_field.rb +114 -0
- data/lib/omnizip/formats/zip.rb +50 -0
- data/lib/omnizip/implementations/base/lzma2_decoder_base.rb +75 -0
- data/lib/omnizip/implementations/base/lzma2_encoder_base.rb +128 -0
- data/lib/omnizip/implementations/base/lzma_decoder_base.rb +83 -0
- data/lib/omnizip/implementations/base/lzma_encoder_base.rb +108 -0
- data/lib/omnizip/implementations/base/state_machine_base.rb +182 -0
- data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +421 -0
- data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +465 -0
- data/lib/omnizip/implementations/seven_zip/lzma/match_finder.rb +288 -0
- data/lib/omnizip/implementations/seven_zip/lzma/range_decoder.rb +200 -0
- data/lib/omnizip/implementations/seven_zip/lzma/range_encoder.rb +197 -0
- data/lib/omnizip/implementations/seven_zip/lzma/state_machine.rb +141 -0
- data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +519 -0
- data/lib/omnizip/implementations/xz_utils/lzma2/decoder.rb +723 -0
- data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +750 -0
- data/lib/omnizip/io/buffered_input.rb +146 -0
- data/lib/omnizip/io/buffered_output.rb +105 -0
- data/lib/omnizip/io/stream_manager.rb +115 -0
- data/lib/omnizip/link_handler/hard_link.rb +79 -0
- data/lib/omnizip/link_handler/symbolic_link.rb +74 -0
- data/lib/omnizip/link_handler.rb +124 -0
- data/lib/omnizip/metadata/archive_metadata.rb +114 -0
- data/lib/omnizip/metadata/entry_metadata.rb +146 -0
- data/lib/omnizip/metadata/metadata_editor.rb +171 -0
- data/lib/omnizip/metadata/metadata_registry.rb +64 -0
- data/lib/omnizip/metadata/metadata_validator.rb +99 -0
- data/lib/omnizip/metadata.rb +57 -0
- data/lib/omnizip/models/.keep +0 -0
- data/lib/omnizip/models/algorithm_metadata.rb +73 -0
- data/lib/omnizip/models/compression_options.rb +71 -0
- data/lib/omnizip/models/conversion_options.rb +87 -0
- data/lib/omnizip/models/conversion_result.rb +135 -0
- data/lib/omnizip/models/eta_result.rb +46 -0
- data/lib/omnizip/models/extraction_rule.rb +115 -0
- data/lib/omnizip/models/filter_chain.rb +144 -0
- data/lib/omnizip/models/filter_config.rb +183 -0
- data/lib/omnizip/models/match_result.rb +124 -0
- data/lib/omnizip/models/optimization_suggestion.rb +91 -0
- data/lib/omnizip/models/parallel_options.rb +104 -0
- data/lib/omnizip/models/performance_result.rb +79 -0
- data/lib/omnizip/models/profile_report.rb +82 -0
- data/lib/omnizip/models/progress_options.rb +38 -0
- data/lib/omnizip/models/split_options.rb +116 -0
- data/lib/omnizip/optimization_registry.rb +81 -0
- data/lib/omnizip/parallel/job_queue.rb +209 -0
- data/lib/omnizip/parallel/job_scheduler.rb +203 -0
- data/lib/omnizip/parallel/parallel_compressor.rb +347 -0
- data/lib/omnizip/parallel/parallel_extractor.rb +329 -0
- data/lib/omnizip/parallel/worker_pool.rb +223 -0
- data/lib/omnizip/parallel.rb +149 -0
- data/lib/omnizip/parity/chunked_block_processor.rb +196 -0
- data/lib/omnizip/parity/galois16.rb +145 -0
- data/lib/omnizip/parity/models/creator_packet.rb +73 -0
- data/lib/omnizip/parity/models/file_description_packet.rb +133 -0
- data/lib/omnizip/parity/models/ifsc_packet.rb +123 -0
- data/lib/omnizip/parity/models/main_packet.rb +128 -0
- data/lib/omnizip/parity/models/packet.rb +156 -0
- data/lib/omnizip/parity/models/packet_registry.rb +109 -0
- data/lib/omnizip/parity/models/recovery_slice_packet.rb +78 -0
- data/lib/omnizip/parity/par2_creator.rb +531 -0
- data/lib/omnizip/parity/par2_repairer.rb +407 -0
- data/lib/omnizip/parity/par2_verifier.rb +364 -0
- data/lib/omnizip/parity/par2cmdline_algorithm.rb +110 -0
- data/lib/omnizip/parity/par2cmdline_coefficients.rb +78 -0
- data/lib/omnizip/parity/reed_solomon_decoder.rb +266 -0
- data/lib/omnizip/parity/reed_solomon_encoder.rb +111 -0
- data/lib/omnizip/parity/reed_solomon_matrix.rb +342 -0
- data/lib/omnizip/parity.rb +186 -0
- data/lib/omnizip/password/encryption_registry.rb +65 -0
- data/lib/omnizip/password/encryption_strategy.rb +96 -0
- data/lib/omnizip/password/password_validator.rb +129 -0
- data/lib/omnizip/password/winzip_aes_strategy.rb +192 -0
- data/lib/omnizip/password/zip_crypto_strategy.rb +141 -0
- data/lib/omnizip/password.rb +87 -0
- data/lib/omnizip/pipe/stream_compressor.rb +124 -0
- data/lib/omnizip/pipe/stream_decompressor.rb +174 -0
- data/lib/omnizip/pipe.rb +121 -0
- data/lib/omnizip/platform/ntfs_streams.rb +201 -0
- data/lib/omnizip/platform.rb +189 -0
- data/lib/omnizip/profile/archive_profile.rb +39 -0
- data/lib/omnizip/profile/balanced_profile.rb +33 -0
- data/lib/omnizip/profile/binary_profile.rb +36 -0
- data/lib/omnizip/profile/compression_profile.rb +158 -0
- data/lib/omnizip/profile/custom_profile.rb +157 -0
- data/lib/omnizip/profile/fast_profile.rb +33 -0
- data/lib/omnizip/profile/maximum_profile.rb +33 -0
- data/lib/omnizip/profile/profile_detector.rb +110 -0
- data/lib/omnizip/profile/profile_registry.rb +161 -0
- data/lib/omnizip/profile/text_profile.rb +36 -0
- data/lib/omnizip/profile.rb +190 -0
- data/lib/omnizip/profiler/memory_profiler.rb +66 -0
- data/lib/omnizip/profiler/method_profiler.rb +49 -0
- data/lib/omnizip/profiler/report_generator.rb +169 -0
- data/lib/omnizip/profiler.rb +204 -0
- data/lib/omnizip/progress/callback_reporter.rb +36 -0
- data/lib/omnizip/progress/console_reporter.rb +62 -0
- data/lib/omnizip/progress/log_reporter.rb +91 -0
- data/lib/omnizip/progress/operation_progress.rb +118 -0
- data/lib/omnizip/progress/progress_bar.rb +156 -0
- data/lib/omnizip/progress/progress_reporter.rb +40 -0
- data/lib/omnizip/progress/progress_tracker.rb +190 -0
- data/lib/omnizip/progress/silent_reporter.rb +24 -0
- data/lib/omnizip/progress.rb +127 -0
- data/lib/omnizip/rubyzip_compat.rb +63 -0
- data/lib/omnizip/temp/safe_extract.rb +168 -0
- data/lib/omnizip/temp/temp_file.rb +124 -0
- data/lib/omnizip/temp/temp_file_pool.rb +109 -0
- data/lib/omnizip/temp.rb +181 -0
- data/lib/omnizip/version.rb +5 -0
- data/lib/omnizip/zip/entry.rb +156 -0
- data/lib/omnizip/zip/file.rb +485 -0
- data/lib/omnizip/zip/input_stream.rb +273 -0
- data/lib/omnizip/zip/output_stream.rb +324 -0
- data/lib/omnizip.rb +156 -0
- data/readme-docs/advanced-features.adoc +515 -0
- data/readme-docs/api-usage.adoc +444 -0
- data/readme-docs/architecture.adoc +449 -0
- data/readme-docs/archive-formats.adoc +479 -0
- data/readme-docs/cli-usage.adoc +222 -0
- data/readme-docs/compression-algorithms.adoc +442 -0
- data/readme-docs/compression-profiles.adoc +247 -0
- data/readme-docs/encryption-checksums.adoc +328 -0
- data/readme-docs/format-converter.adoc +325 -0
- data/readme-docs/installation.adoc +228 -0
- data/readme-docs/par2-archives.adoc +608 -0
- data/readme-docs/performance-profiler.adoc +389 -0
- data/readme-docs/preprocessing-filters.adoc +280 -0
- data/xz-file-format-1.2.1.txt +1174 -0
- metadata +617 -0
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Copyright (C) 2025 Ribose Inc.
|
|
4
|
+
#
|
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a
|
|
6
|
+
# copy of this software and associated documentation files (the "Software"),
|
|
7
|
+
# to deal in the Software without restriction, including without limitation
|
|
8
|
+
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
9
|
+
# and/or sell copies of the Software, and to permit persons to whom the
|
|
10
|
+
# Software is furnished to do so, subject to the following conditions:
|
|
11
|
+
#
|
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
|
13
|
+
# all copies or substantial portions of the Software.
|
|
14
|
+
#
|
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
20
|
+
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
21
|
+
# DEALINGS IN THE SOFTWARE.
|
|
22
|
+
|
|
23
|
+
require_relative "constants"
|
|
24
|
+
|
|
25
|
+
module Omnizip
|
|
26
|
+
module Algorithms
|
|
27
|
+
class LZMA < Algorithm
|
|
28
|
+
# Literal byte encoder
|
|
29
|
+
#
|
|
30
|
+
# This class is responsible for encoding literal bytes using
|
|
31
|
+
# probability models. It supports two modes:
|
|
32
|
+
#
|
|
33
|
+
# 1. Unmatched mode: Simple 8-bit encoding
|
|
34
|
+
# 2. Matched mode: Uses match byte for context (SDK feature)
|
|
35
|
+
#
|
|
36
|
+
# The matched mode improves compression when a literal follows
|
|
37
|
+
# a match, by using the corresponding byte from the match as
|
|
38
|
+
# context for probability modeling.
|
|
39
|
+
#
|
|
40
|
+
# Single Responsibility: Literal byte encoding only
|
|
41
|
+
#
|
|
42
|
+
# @example Unmatched encoding
|
|
43
|
+
# encoder = LiteralEncoder.new
|
|
44
|
+
# encoder.encode_unmatched(byte, lit_state, range_encoder, models)
|
|
45
|
+
#
|
|
46
|
+
# @example Matched encoding (SDK mode)
|
|
47
|
+
# encoder = LiteralEncoder.new
|
|
48
|
+
# encoder.encode_matched(byte, match_byte, lit_state, range_encoder, models)
|
|
49
|
+
class LiteralEncoder
|
|
50
|
+
include Constants
|
|
51
|
+
|
|
52
|
+
# Initialize the literal encoder
|
|
53
|
+
#
|
|
54
|
+
# @param lc [Integer] Literal context bits (0-8)
|
|
55
|
+
# Default to 3 for compatibility
|
|
56
|
+
def initialize(lc = 3)
|
|
57
|
+
@lc = lc
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# Encode literal byte in unmatched mode
|
|
61
|
+
#
|
|
62
|
+
# This is the standard LZMA literal encoding where each bit
|
|
63
|
+
# is encoded using probability models based on the partial
|
|
64
|
+
# symbol value.
|
|
65
|
+
#
|
|
66
|
+
# # XZ Utils literal_subcoder macro (from lzma_common.h:141-145):
|
|
67
|
+
# # ((probs) + 3 * (((((pos) << 8) + (prev_byte)) & (literal_mask)) << (lc))
|
|
68
|
+
#
|
|
69
|
+
# @param byte [Integer] Byte value to encode (0-255)
|
|
70
|
+
# @param pos [Integer] Current position in stream
|
|
71
|
+
# @param prev_byte [Integer] Previous byte value
|
|
72
|
+
# @param lc [Integer] Literal context bits (0-8)
|
|
73
|
+
# @param literal_mask [Integer] Literal mask for context calculation
|
|
74
|
+
# @param range_encoder [RangeEncoder] Range encoder instance
|
|
75
|
+
# @param models [Array<BitModel>] Literal probability models
|
|
76
|
+
# @return [void]
|
|
77
|
+
def encode_unmatched(byte, pos, prev_byte, lc, literal_mask,
|
|
78
|
+
range_encoder, models)
|
|
79
|
+
# Calculate base_offset using XZ Utils formula
|
|
80
|
+
# (((pos << 8) + prev_byte) & literal_mask) << lc
|
|
81
|
+
context = (((pos << 8) + prev_byte) & literal_mask)
|
|
82
|
+
base_offset = 3 * (context << lc)
|
|
83
|
+
model_index = 1
|
|
84
|
+
bit_count = 8
|
|
85
|
+
|
|
86
|
+
loop do
|
|
87
|
+
# const uint32_t bit = (symbol >> --bit_count) & 1;
|
|
88
|
+
bit_count -= 1
|
|
89
|
+
bit = (byte >> bit_count) & 1
|
|
90
|
+
|
|
91
|
+
# rc_bit(rc, &probs[model_index], bit);
|
|
92
|
+
range_encoder.encode_bit(models[base_offset + model_index], bit)
|
|
93
|
+
|
|
94
|
+
# model_index = (model_index << 1) + bit;
|
|
95
|
+
model_index = (model_index << 1) + bit
|
|
96
|
+
|
|
97
|
+
break if bit_count.zero?
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
# Encode literal byte in matched mode (SDK feature)
|
|
102
|
+
#
|
|
103
|
+
# This mode uses a byte from the dictionary (the "match byte")
|
|
104
|
+
# as context for encoding the literal. This improves compression
|
|
105
|
+
# when the literal follows a match, as the match byte provides
|
|
106
|
+
# additional predictive information.
|
|
107
|
+
#
|
|
108
|
+
# Direct port from XZ Utils literal_matched() in lzma_encoder.c:22-41
|
|
109
|
+
#
|
|
110
|
+
# @param byte [Integer] Byte value to encode (0-255)
|
|
111
|
+
# @param match_byte [Integer] Corresponding byte from dictionary
|
|
112
|
+
# @param pos [Integer] Current position in stream
|
|
113
|
+
# @param prev_byte [Integer] Previous byte value
|
|
114
|
+
# @param lc [Integer] Literal context bits (0-8)
|
|
115
|
+
# @param literal_mask [Integer] Literal mask for context calculation
|
|
116
|
+
# @param range_encoder [RangeEncoder] Range encoder instance
|
|
117
|
+
# @param models [Array<BitModel>] Literal probability models
|
|
118
|
+
# @return [void]
|
|
119
|
+
def encode_matched(byte, match_byte, pos, prev_byte, lc, literal_mask,
|
|
120
|
+
range_encoder, models)
|
|
121
|
+
# Direct port of xz's literal_matched
|
|
122
|
+
# static inline void
|
|
123
|
+
# literal_matched(lzma_range_encoder *rc, probability *subcoder,
|
|
124
|
+
# uint32_t match_byte, uint32_t symbol)
|
|
125
|
+
# {
|
|
126
|
+
# uint32_t offset = 0x100;
|
|
127
|
+
# symbol += UINT32_C(1) << 8;
|
|
128
|
+
#
|
|
129
|
+
# do {
|
|
130
|
+
# match_byte <<= 1;
|
|
131
|
+
# const uint32_t match_bit = match_byte & offset;
|
|
132
|
+
# const uint32_t subcoder_index
|
|
133
|
+
# = offset + match_bit + (symbol >> 8);
|
|
134
|
+
# const uint32_t bit = (symbol >> 7) & 1;
|
|
135
|
+
# rc_bit(rc, &subcoder[subcoder_index], bit);
|
|
136
|
+
#
|
|
137
|
+
# symbol <<= 1;
|
|
138
|
+
# offset &= ~(match_byte ^ symbol);
|
|
139
|
+
#
|
|
140
|
+
# } while (symbol < (UINT32_C(1) << 16));
|
|
141
|
+
# }
|
|
142
|
+
|
|
143
|
+
# Calculate base_offset using XZ Utils formula (same as encode_unmatched)
|
|
144
|
+
# (((pos << 8) + prev_byte) & literal_mask) << lc
|
|
145
|
+
context = (((pos << 8) + prev_byte) & literal_mask)
|
|
146
|
+
base_offset = 3 * (context << lc)
|
|
147
|
+
|
|
148
|
+
offset = 0x100
|
|
149
|
+
symbol = byte + (1 << 8) # symbol += UINT32_C(1) << 8
|
|
150
|
+
|
|
151
|
+
loop do
|
|
152
|
+
# match_byte <<= 1;
|
|
153
|
+
match_byte <<= 1
|
|
154
|
+
|
|
155
|
+
# const uint32_t match_bit = match_byte & offset;
|
|
156
|
+
match_bit = match_byte & offset
|
|
157
|
+
|
|
158
|
+
# const uint32_t subcoder_index = offset + match_bit + (symbol >> 8);
|
|
159
|
+
subcoder_index = base_offset + offset + match_bit + (symbol >> 8)
|
|
160
|
+
|
|
161
|
+
# const uint32_t bit = (symbol >> 7) & 1;
|
|
162
|
+
bit = (symbol >> 7) & 1
|
|
163
|
+
|
|
164
|
+
# rc_bit(rc, &subcoder[subcoder_index], bit);
|
|
165
|
+
range_encoder.encode_bit(models[subcoder_index], bit)
|
|
166
|
+
|
|
167
|
+
# symbol <<= 1;
|
|
168
|
+
symbol <<= 1
|
|
169
|
+
|
|
170
|
+
# offset &= ~(match_byte ^ symbol);
|
|
171
|
+
offset &= ~(match_byte ^ symbol)
|
|
172
|
+
|
|
173
|
+
# } while (symbol < (UINT32_C(1) << 16));
|
|
174
|
+
break if symbol >= (1 << 16)
|
|
175
|
+
end
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
private
|
|
179
|
+
|
|
180
|
+
# Encode remaining bits in unmatched mode
|
|
181
|
+
#
|
|
182
|
+
# Called from matched mode when bits diverge.
|
|
183
|
+
# Similar to encode_unmatched but starts with partial symbol.
|
|
184
|
+
#
|
|
185
|
+
# @param byte [Integer] Original byte value
|
|
186
|
+
# @param symbol [Integer] Partial symbol value
|
|
187
|
+
# @param base_offset [Integer] Model base offset
|
|
188
|
+
# @param range_encoder [RangeEncoder] Range encoder instance
|
|
189
|
+
# @param models [Array<BitModel>] Literal probability models
|
|
190
|
+
# @return [void]
|
|
191
|
+
def encode_unmatched_tail(byte, symbol, base_offset, range_encoder,
|
|
192
|
+
models)
|
|
193
|
+
# Continue encoding remaining bits of the byte
|
|
194
|
+
# symbol contains the bits already encoded (built up from MSB)
|
|
195
|
+
# We need to encode bits from symbol's current position to the end
|
|
196
|
+
remaining_bits = 8 - (symbol.bit_length - 1)
|
|
197
|
+
remaining_bits.times do |i|
|
|
198
|
+
model_index = base_offset + symbol
|
|
199
|
+
|
|
200
|
+
# Extract next bit from byte (MSB first from current position)
|
|
201
|
+
bit = (byte >> (7 - i)) & 1
|
|
202
|
+
|
|
203
|
+
range_encoder.encode_bit(models[model_index], bit)
|
|
204
|
+
symbol = (symbol << 1) | bit
|
|
205
|
+
end
|
|
206
|
+
end
|
|
207
|
+
end
|
|
208
|
+
end
|
|
209
|
+
end
|
|
210
|
+
end
|
|
@@ -0,0 +1,341 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Copyright (C) 2025 Ribose Inc.
|
|
4
|
+
#
|
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a
|
|
6
|
+
# copy of this software and associated documentation files (the "Software"),
|
|
7
|
+
# to deal in the Software without restriction, including without limitation
|
|
8
|
+
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
9
|
+
# and/or sell copies of the Software, and to permit persons to whom the
|
|
10
|
+
# Software is furnished to do so, subject to the following conditions:
|
|
11
|
+
#
|
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
|
13
|
+
# all copies or substantial portions of the Software.
|
|
14
|
+
#
|
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
20
|
+
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
21
|
+
# DEALINGS IN THE SOFTWARE.
|
|
22
|
+
|
|
23
|
+
require_relative "xz_utils_decoder"
|
|
24
|
+
require_relative "../../checksums/crc32"
|
|
25
|
+
require "stringio"
|
|
26
|
+
|
|
27
|
+
module Omnizip
|
|
28
|
+
module Algorithms
|
|
29
|
+
class LZMA < Algorithm
|
|
30
|
+
#
|
|
31
|
+
# Decoder for .lz (lzip) format
|
|
32
|
+
#
|
|
33
|
+
# This is the lzip format, a DIFFERENT container format from both
|
|
34
|
+
# XZ and .lzma (LZMA_Alone). Lzip was created as an alternative to
|
|
35
|
+
# the legacy .lzma format with better integrity checking.
|
|
36
|
+
#
|
|
37
|
+
# File format:
|
|
38
|
+
# - Magic bytes: "LZIP" (0x4C 0x5A 0x49 0x50)
|
|
39
|
+
# - Version (1 byte): 0 or 1
|
|
40
|
+
# - Dictionary size (1 byte): encoded format
|
|
41
|
+
# - LZMA1 compressed stream (with fixed LC=3, LP=0, PB=2)
|
|
42
|
+
# - Footer:
|
|
43
|
+
# - Version 0 (12 bytes): CRC32 (4) + Uncompressed size (8)
|
|
44
|
+
# - Version 1 (20 bytes): CRC32 (4) + Uncompressed size (8) + Member size (8)
|
|
45
|
+
#
|
|
46
|
+
# Reference: /Users/mulgogi/src/external/xz/src/liblzma/common/lzip_decoder.c
|
|
47
|
+
#
|
|
48
|
+
# This decoder uses the same LZMA1 decoding engine as XZ format,
|
|
49
|
+
# but with the lzip container format and CRC32 integrity checking.
|
|
50
|
+
#
|
|
51
|
+
# @example Decode .lz file
|
|
52
|
+
# data = File.binread("file.lz")
|
|
53
|
+
# decoder = Omnizip::Algorithms::LZMA::LzipDecoder.new(StringIO.new(data))
|
|
54
|
+
# result = decoder.decode_stream
|
|
55
|
+
#
|
|
56
|
+
class LzipDecoder
|
|
57
|
+
# Lzip magic bytes: "LZIP" in ASCII
|
|
58
|
+
# Reference: lzip_decoder.c:106
|
|
59
|
+
MAGIC = [0x4C, 0x5A, 0x49, 0x50].freeze
|
|
60
|
+
|
|
61
|
+
# Fixed LC/LP/PB values for lzip format
|
|
62
|
+
# Reference: lzip_decoder.c:23-26
|
|
63
|
+
LZIP_LC = 3
|
|
64
|
+
LZIP_LP = 0
|
|
65
|
+
LZIP_PB = 2
|
|
66
|
+
|
|
67
|
+
# Footer sizes
|
|
68
|
+
# Reference: lzip_decoder.c:19-21
|
|
69
|
+
LZIP_V0_FOOTER_SIZE = 12
|
|
70
|
+
LZIP_V1_FOOTER_SIZE = 20
|
|
71
|
+
LZIP_FOOTER_SIZE_MAX = LZIP_V1_FOOTER_SIZE
|
|
72
|
+
|
|
73
|
+
# Minimum and maximum dictionary sizes (in bytes)
|
|
74
|
+
# Reference: lzip_decoder.c:197-198
|
|
75
|
+
MIN_DICT_SIZE = 4096 # 4 KiB
|
|
76
|
+
MAX_DICT_SIZE = (512 << 20) # 512 MiB
|
|
77
|
+
|
|
78
|
+
# Initialize the decoder with .lz format input
|
|
79
|
+
#
|
|
80
|
+
# @param input [IO] Input stream of .lz compressed data
|
|
81
|
+
# @param options [Hash] Decoding options
|
|
82
|
+
# @option options [Boolean] :ignore_check If true, skip CRC32 verification (default: false)
|
|
83
|
+
# @option options [Boolean] :concatenated If true, decode concatenated .lz members (default: false)
|
|
84
|
+
# @raise [Omnizip::DecompressionError] If header is invalid or unsupported
|
|
85
|
+
def initialize(input, options = {})
|
|
86
|
+
@input = input
|
|
87
|
+
@ignore_check = options.fetch(:ignore_check, false)
|
|
88
|
+
# Concatenated mode is enabled by default (lzip natively supports multiple members)
|
|
89
|
+
@concatenated = options.fetch(:concatenated, true)
|
|
90
|
+
|
|
91
|
+
# Parse .lz header
|
|
92
|
+
parse_header
|
|
93
|
+
|
|
94
|
+
# Track member size (including header and footer)
|
|
95
|
+
# We start with the 6 bytes we've already read (magic + version + dict_size)
|
|
96
|
+
@member_size = 6
|
|
97
|
+
|
|
98
|
+
# For concatenated mode, track if this is the first member
|
|
99
|
+
@first_member = true
|
|
100
|
+
|
|
101
|
+
# Initialize CRC32 calculator
|
|
102
|
+
@crc32 = 0
|
|
103
|
+
@uncompressed_size = 0
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
# Decode the .lz stream
|
|
107
|
+
#
|
|
108
|
+
# @param output [IO, nil] Optional output stream
|
|
109
|
+
# @return [String, Integer] Decompressed data or bytes written
|
|
110
|
+
def decode_stream(output = nil)
|
|
111
|
+
# For concatenated mode, accumulate all decoded data
|
|
112
|
+
all_decoded_data = String.new(encoding: Encoding::BINARY)
|
|
113
|
+
bytes_written = 0
|
|
114
|
+
result = nil # Initialize result variable
|
|
115
|
+
|
|
116
|
+
loop do
|
|
117
|
+
# Track the starting position of compressed data
|
|
118
|
+
start_pos = @input.pos
|
|
119
|
+
|
|
120
|
+
# Initialize the XZ Utils LZMA decoder with fixed lzip parameters
|
|
121
|
+
decoder = XzUtilsDecoder.new(@input,
|
|
122
|
+
lzma2_mode: true,
|
|
123
|
+
lc: LZIP_LC,
|
|
124
|
+
lp: LZIP_LP,
|
|
125
|
+
pb: LZIP_PB,
|
|
126
|
+
dict_size: @dict_size,
|
|
127
|
+
uncompressed_size: 0xFFFFFFFFFFFFFFFF) # Unknown size, allow EOPM
|
|
128
|
+
|
|
129
|
+
# Decode the LZMA stream (allow EOPM for .lz format)
|
|
130
|
+
# Get decoded data as string (no output stream)
|
|
131
|
+
decoded_data = decoder.decode_stream(nil, check_rc_finished: false)
|
|
132
|
+
|
|
133
|
+
# If caller provided output stream, write to it
|
|
134
|
+
if output
|
|
135
|
+
output.write(decoded_data)
|
|
136
|
+
bytes_written += decoded_data.bytesize
|
|
137
|
+
result = bytes_written
|
|
138
|
+
else
|
|
139
|
+
all_decoded_data << decoded_data
|
|
140
|
+
result = all_decoded_data
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
# Calculate member size (header + compressed data + footer)
|
|
144
|
+
# We calculate it here (compressed data + header), then add footer size below
|
|
145
|
+
@member_size = @input.pos - start_pos + 6 # +6 for header bytes
|
|
146
|
+
|
|
147
|
+
# Calculate and verify CRC32
|
|
148
|
+
if @ignore_check
|
|
149
|
+
# Skip footer
|
|
150
|
+
footer_size = @version.zero? ? LZIP_V0_FOOTER_SIZE : LZIP_V1_FOOTER_SIZE
|
|
151
|
+
@input.read(footer_size)
|
|
152
|
+
@member_size += footer_size
|
|
153
|
+
else
|
|
154
|
+
data_to_crc = decoded_data || +''
|
|
155
|
+
calculated_crc = Omnizip::Checksums::Crc32.calculate(data_to_crc)
|
|
156
|
+
@uncompressed_size = data_to_crc.bytesize
|
|
157
|
+
|
|
158
|
+
# Read and verify footer (also updates @member_size to include footer)
|
|
159
|
+
verify_footer(calculated_crc)
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
# Check for concatenated members
|
|
163
|
+
break unless @concatenated
|
|
164
|
+
|
|
165
|
+
# Peek ahead to check if there's another LZIP member
|
|
166
|
+
break unless has_next_member?
|
|
167
|
+
|
|
168
|
+
# Parse header for next member
|
|
169
|
+
parse_header
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
# Return decoded data or bytes written
|
|
173
|
+
result
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
private
|
|
177
|
+
|
|
178
|
+
# Check if there's another concatenated LZIP member
|
|
179
|
+
# Peeks ahead without consuming the magic bytes
|
|
180
|
+
#
|
|
181
|
+
# @return [Boolean] true if another member is present
|
|
182
|
+
def has_next_member?
|
|
183
|
+
# Peek at next 4 bytes to check for magic
|
|
184
|
+
magic_bytes = @input.read(4)
|
|
185
|
+
return false if magic_bytes.nil? || magic_bytes.bytesize < 4
|
|
186
|
+
|
|
187
|
+
# Check if it's LZIP magic
|
|
188
|
+
is_lzip = magic_bytes.getbyte(0) == MAGIC[0] &&
|
|
189
|
+
magic_bytes.getbyte(1) == MAGIC[1] &&
|
|
190
|
+
magic_bytes.getbyte(2) == MAGIC[2] &&
|
|
191
|
+
magic_bytes.getbyte(3) == MAGIC[3]
|
|
192
|
+
|
|
193
|
+
# Put the bytes back by seeking back
|
|
194
|
+
@input.seek(-4, ::IO::SEEK_CUR) if is_lzip
|
|
195
|
+
|
|
196
|
+
is_lzip
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
# Parse .lz format header
|
|
200
|
+
#
|
|
201
|
+
# Format (from lzip_decoder.c):
|
|
202
|
+
# - Magic bytes: "LZIP" (4 bytes)
|
|
203
|
+
# - Version (1 byte): 0 or 1
|
|
204
|
+
# - Dictionary size (1 byte): encoded format
|
|
205
|
+
#
|
|
206
|
+
# Reference: /Users/mulgogi/src/external/xz/src/liblzma/common/lzip_decoder.c
|
|
207
|
+
#
|
|
208
|
+
# @return [void]
|
|
209
|
+
# @raise [Omnizip::DecompressionError] If header is invalid
|
|
210
|
+
def parse_header
|
|
211
|
+
# Step 1: Verify magic bytes (SEQ_ID_STRING)
|
|
212
|
+
# Reference: lzip_decoder.c:104-153
|
|
213
|
+
magic_bytes = @input.read(4)
|
|
214
|
+
raise Omnizip::DecompressionError, "Incomplete .lz header: missing magic bytes" if magic_bytes.nil? || magic_bytes.bytesize < 4
|
|
215
|
+
|
|
216
|
+
4.times do |i|
|
|
217
|
+
if magic_bytes.getbyte(i) != MAGIC[i]
|
|
218
|
+
raise Omnizip::DecompressionError, "Invalid .lz header: magic bytes don't match LZIP (expected #{MAGIC.map { |b| "0x#{b.to_s(16).upcase}" }.join(' ')}, got #{magic_bytes.bytes.map { |b| "0x#{b.to_s(16).upcase}" }.join(' ')})"
|
|
219
|
+
end
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
# Step 2: Read version byte (SEQ_VERSION)
|
|
223
|
+
# Reference: lzip_decoder.c:156-174
|
|
224
|
+
version_byte = @input.getbyte
|
|
225
|
+
raise Omnizip::DecompressionError, "Incomplete .lz header: missing version byte" if version_byte.nil?
|
|
226
|
+
|
|
227
|
+
@version = version_byte
|
|
228
|
+
|
|
229
|
+
# We support version 0 and unextended version 1
|
|
230
|
+
# Reference: lzip_decoder.c:163-164
|
|
231
|
+
if @version > 1
|
|
232
|
+
raise Omnizip::UnsupportedFormatError, "Unsupported .lz version: #{@version} (only 0 and 1 are supported)"
|
|
233
|
+
end
|
|
234
|
+
|
|
235
|
+
# Step 3: Parse dictionary size (SEQ_DICT_SIZE)
|
|
236
|
+
# Reference: lzip_decoder.c:177-222
|
|
237
|
+
dict_size_byte = @input.getbyte
|
|
238
|
+
raise Omnizip::DecompressionError, "Incomplete .lz header: missing dictionary size byte" if dict_size_byte.nil?
|
|
239
|
+
|
|
240
|
+
# Decode dictionary size from the encoded byte
|
|
241
|
+
# The five lowest bits are for the base-2 logarithm of the dictionary size
|
|
242
|
+
# and the highest three bits are the fractional part (0/16 to 7/16)
|
|
243
|
+
# Reference: lzip_decoder.c:183-204
|
|
244
|
+
b2log = dict_size_byte & 0x1F
|
|
245
|
+
fracnum = dict_size_byte >> 5
|
|
246
|
+
|
|
247
|
+
# Validate range: [4 KiB, 512 MiB]
|
|
248
|
+
# Reference: lzip_decoder.c:198-199
|
|
249
|
+
if b2log < 12 || b2log > 29 || (b2log == 12 && fracnum.positive?)
|
|
250
|
+
raise Omnizip::DecompressionError, "Invalid .lz header: dictionary size byte 0x#{dict_size_byte.to_s(16).upcase} is out of valid range"
|
|
251
|
+
end
|
|
252
|
+
|
|
253
|
+
# Calculate: 2^[b2log] - [fracnum] * 2^([b2log] - 4)
|
|
254
|
+
# Reference: lzip_decoder.c:201-204
|
|
255
|
+
@dict_size = (1 << b2log) - (fracnum << (b2log - 4))
|
|
256
|
+
|
|
257
|
+
# Sanity checks
|
|
258
|
+
raise Omnizip::DecompressionError, "Dictionary size calculation error: too small" if @dict_size < MIN_DICT_SIZE
|
|
259
|
+
raise Omnizip::DecompressionError, "Dictionary size calculation error: too large" if @dict_size > MAX_DICT_SIZE
|
|
260
|
+
end
|
|
261
|
+
|
|
262
|
+
# Verify .lz format footer
|
|
263
|
+
#
|
|
264
|
+
# Format (from lzip_decoder.c):
|
|
265
|
+
# - CRC32 of uncompressed data (4 bytes, little-endian)
|
|
266
|
+
# - Uncompressed size (8 bytes, little-endian)
|
|
267
|
+
# - Member size (8 bytes, little-endian) - only for version 1
|
|
268
|
+
#
|
|
269
|
+
# Reference: /Users/mulgogi/src/external/xz/src/liblzma/common/lzip_decoder.c:277-309
|
|
270
|
+
#
|
|
271
|
+
# @param calculated_crc [Integer] Calculated CRC32 of uncompressed data
|
|
272
|
+
# @raise [Omnizip::DecompressionError] If footer is invalid or checksum mismatch
|
|
273
|
+
def verify_footer(calculated_crc)
|
|
274
|
+
footer_size = @version.zero? ? LZIP_V0_FOOTER_SIZE : LZIP_V1_FOOTER_SIZE
|
|
275
|
+
footer = @input.read(footer_size)
|
|
276
|
+
raise Omnizip::DecompressionError, "Incomplete .lz footer: expected #{footer_size} bytes, got #{footer&.bytesize || 0}" if footer.nil? || footer.bytesize < footer_size
|
|
277
|
+
|
|
278
|
+
# Update member_size to include the footer
|
|
279
|
+
@member_size += footer_size
|
|
280
|
+
|
|
281
|
+
# Parse CRC32 (little-endian)
|
|
282
|
+
stored_crc = footer.getbyte(0) | (footer.getbyte(1) << 8) |
|
|
283
|
+
(footer.getbyte(2) << 16) | (footer.getbyte(3) << 24)
|
|
284
|
+
|
|
285
|
+
# Verify CRC32
|
|
286
|
+
if calculated_crc != stored_crc
|
|
287
|
+
raise Omnizip::ChecksumError, "CRC32 mismatch: calculated 0x#{calculated_crc.to_s(16).upcase}, stored 0x#{stored_crc.to_s(16).upcase}"
|
|
288
|
+
end
|
|
289
|
+
|
|
290
|
+
# Parse and verify uncompressed size (little-endian)
|
|
291
|
+
stored_uncompressed_size = footer.getbyte(4) | (footer.getbyte(5) << 8) |
|
|
292
|
+
(footer.getbyte(6) << 16) | (footer.getbyte(7) << 24) |
|
|
293
|
+
(footer.getbyte(8) << 32) | (footer.getbyte(9) << 40) |
|
|
294
|
+
(footer.getbyte(10) << 48) | (footer.getbyte(11) << 56)
|
|
295
|
+
|
|
296
|
+
if @uncompressed_size != stored_uncompressed_size
|
|
297
|
+
raise Omnizip::ChecksumError, "Uncompressed size mismatch: decoded #{@uncompressed_size}, stored #{stored_uncompressed_size}"
|
|
298
|
+
end
|
|
299
|
+
|
|
300
|
+
# For version 1, verify member size
|
|
301
|
+
if @version.positive?
|
|
302
|
+
stored_member_size = footer.getbyte(12) | (footer.getbyte(13) << 8) |
|
|
303
|
+
(footer.getbyte(14) << 16) | (footer.getbyte(15) << 24) |
|
|
304
|
+
(footer.getbyte(16) << 32) | (footer.getbyte(17) << 40) |
|
|
305
|
+
(footer.getbyte(18) << 48) | (footer.getbyte(19) << 56)
|
|
306
|
+
|
|
307
|
+
if @member_size != stored_member_size
|
|
308
|
+
raise Omnizip::ChecksumError, "Member size mismatch: decoded #{@member_size}, stored #{stored_member_size}"
|
|
309
|
+
end
|
|
310
|
+
end
|
|
311
|
+
end
|
|
312
|
+
|
|
313
|
+
# Wrapper input stream that tracks bytes read
|
|
314
|
+
class TrackingInputStream
|
|
315
|
+
attr_reader :bytes_read
|
|
316
|
+
|
|
317
|
+
def initialize(input, start_offset = 0)
|
|
318
|
+
@input = input
|
|
319
|
+
@bytes_read = start_offset
|
|
320
|
+
end
|
|
321
|
+
|
|
322
|
+
def read(size = nil)
|
|
323
|
+
data = @input.read(size)
|
|
324
|
+
@bytes_read += data.bytesize if data
|
|
325
|
+
data
|
|
326
|
+
end
|
|
327
|
+
|
|
328
|
+
def getbyte
|
|
329
|
+
byte = @input.getbyte
|
|
330
|
+
@bytes_read += 1 if byte
|
|
331
|
+
byte
|
|
332
|
+
end
|
|
333
|
+
|
|
334
|
+
def eof?
|
|
335
|
+
@input.eof?
|
|
336
|
+
end
|
|
337
|
+
end
|
|
338
|
+
end
|
|
339
|
+
end
|
|
340
|
+
end
|
|
341
|
+
end
|