omnizip 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/.rubocop.yml +32 -0
- data/.rubocop_todo.yml +754 -0
- data/COPYING +502 -0
- data/Gemfile +17 -0
- data/LICENSE +12 -0
- data/README.adoc +1045 -0
- data/Rakefile +12 -0
- data/benchmark/README.md +260 -0
- data/benchmark/benchmark_suite.rb +125 -0
- data/benchmark/compression_bench.rb +181 -0
- data/benchmark/filter_bench.rb +180 -0
- data/benchmark/models/benchmark_result.rb +59 -0
- data/benchmark/models/comparison_result.rb +69 -0
- data/benchmark/profile_suite.rb +167 -0
- data/benchmark/reporter.rb +150 -0
- data/benchmark/run_benchmarks.rb +66 -0
- data/benchmark/test_data.rb +137 -0
- data/config/formats/rar3_spec.yml +91 -0
- data/config/formats/rar5_spec.yml +102 -0
- data/docs/.github/workflows/docs.yml +142 -0
- data/docs/.gitignore +21 -0
- data/docs/.lychee.toml +67 -0
- data/docs/Gemfile +13 -0
- data/docs/RAR_WRITE_SUPPORT.md +26 -0
- data/docs/README.md +101 -0
- data/docs/_config.yml +112 -0
- data/docs/assets/logo.svg +1 -0
- data/docs/assets/omnizip-logo.pdf +1540 -11
- data/docs/comparison/feature-matrix.adoc +694 -0
- data/docs/comparison/index.adoc +113 -0
- data/docs/comparison/vs-7zip.adoc +309 -0
- data/docs/comparison/vs-peazip.adoc +77 -0
- data/docs/comparison/vs-rubyzip.adoc +342 -0
- data/docs/comparison/vs-winrar.adoc +100 -0
- data/docs/compatibility.adoc +579 -0
- data/docs/concepts/index.adoc +129 -0
- data/docs/developer/architecture.adoc +256 -0
- data/docs/developer/contributing.adoc +158 -0
- data/docs/developer/index.adoc +25 -0
- data/docs/developer/testing.adoc +212 -0
- data/docs/getting-started/basic-usage.adoc +271 -0
- data/docs/getting-started/index.adoc +42 -0
- data/docs/getting-started/installation.adoc +138 -0
- data/docs/getting-started/quick-start.adoc +185 -0
- data/docs/getting-started/your-first-archive.adoc +218 -0
- data/docs/guides/advanced-features/encryption.adoc +300 -0
- data/docs/guides/advanced-features/index.adoc +49 -0
- data/docs/guides/advanced-features/parallel-processing.adoc +246 -0
- data/docs/guides/advanced-features/progress-tracking.adoc +320 -0
- data/docs/guides/advanced-features/streaming.adoc +212 -0
- data/docs/guides/archive-formats/gzip-format.adoc +107 -0
- data/docs/guides/archive-formats/index.adoc +130 -0
- data/docs/guides/archive-formats/rar-format.adoc +104 -0
- data/docs/guides/archive-formats/rar5.adoc +521 -0
- data/docs/guides/archive-formats/seven-zip-format.adoc +35 -0
- data/docs/guides/archive-formats/tar-format.adoc +106 -0
- data/docs/guides/archive-formats/xz-format.adoc +118 -0
- data/docs/guides/archive-formats/zip-format.adoc +35 -0
- data/docs/guides/compression-algorithms/bzip2.adoc +113 -0
- data/docs/guides/compression-algorithms/deflate.adoc +319 -0
- data/docs/guides/compression-algorithms/index.adoc +190 -0
- data/docs/guides/compression-algorithms/lzma.adoc +398 -0
- data/docs/guides/compression-algorithms/lzma2.adoc +327 -0
- data/docs/guides/compression-algorithms/ppmd.adoc +316 -0
- data/docs/guides/compression-algorithms/zstandard.adoc +361 -0
- data/docs/guides/creating-archives.adoc +354 -0
- data/docs/guides/extracting-archives.adoc +53 -0
- data/docs/guides/format-conversion.adoc +64 -0
- data/docs/guides/index.adoc +49 -0
- data/docs/guides/migration-rubyzip.adoc +217 -0
- data/docs/guides/parity-archives.adoc +605 -0
- data/docs/guides/performance-tuning.adoc +88 -0
- data/docs/index.adoc +218 -0
- data/docs/lychee.toml +67 -0
- data/docs/reference/api/overview.adoc +188 -0
- data/docs/reference/cli/compress-command.adoc +114 -0
- data/docs/reference/cli/overview.adoc +140 -0
- data/docs/reference/index.adoc +26 -0
- data/docs/resources/faq.adoc +185 -0
- data/docs/resources/quick-reference.adoc +222 -0
- data/docs/troubleshooting/index.adoc +208 -0
- data/examples/api_comparison.rb +205 -0
- data/examples/deflate64_example.rb +96 -0
- data/examples/par2_demo.rb +121 -0
- data/examples/quick_start_native.rb +150 -0
- data/examples/quick_start_rubyzip.rb +115 -0
- data/examples/rubyzip_compatibility_demo.rb +194 -0
- data/exe/omnizip +27 -0
- data/lib/omnizip/algorithm.rb +130 -0
- data/lib/omnizip/algorithm_registry.rb +86 -0
- data/lib/omnizip/algorithms/.keep +0 -0
- data/lib/omnizip/algorithms/bzip2/bwt.rb +225 -0
- data/lib/omnizip/algorithms/bzip2/decoder.rb +193 -0
- data/lib/omnizip/algorithms/bzip2/encoder.rb +237 -0
- data/lib/omnizip/algorithms/bzip2/huffman.rb +206 -0
- data/lib/omnizip/algorithms/bzip2/mtf.rb +101 -0
- data/lib/omnizip/algorithms/bzip2/rle.rb +151 -0
- data/lib/omnizip/algorithms/bzip2.rb +130 -0
- data/lib/omnizip/algorithms/deflate/constants.rb +28 -0
- data/lib/omnizip/algorithms/deflate/decoder.rb +38 -0
- data/lib/omnizip/algorithms/deflate/encoder.rb +46 -0
- data/lib/omnizip/algorithms/deflate.rb +128 -0
- data/lib/omnizip/algorithms/deflate64/constants.rb +45 -0
- data/lib/omnizip/algorithms/deflate64/decoder.rb +153 -0
- data/lib/omnizip/algorithms/deflate64/encoder.rb +98 -0
- data/lib/omnizip/algorithms/deflate64/huffman_coder.rb +354 -0
- data/lib/omnizip/algorithms/deflate64/lz77_encoder.rb +142 -0
- data/lib/omnizip/algorithms/deflate64.rb +109 -0
- data/lib/omnizip/algorithms/lzma/bit_model.rb +120 -0
- data/lib/omnizip/algorithms/lzma/constants.rb +112 -0
- data/lib/omnizip/algorithms/lzma/decoder.rb +148 -0
- data/lib/omnizip/algorithms/lzma/dictionary.rb +69 -0
- data/lib/omnizip/algorithms/lzma/distance_coder.rb +415 -0
- data/lib/omnizip/algorithms/lzma/encoder.rb +142 -0
- data/lib/omnizip/algorithms/lzma/length_coder.rb +260 -0
- data/lib/omnizip/algorithms/lzma/literal_decoder.rb +320 -0
- data/lib/omnizip/algorithms/lzma/literal_encoder.rb +210 -0
- data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +341 -0
- data/lib/omnizip/algorithms/lzma/lzma_alone_decoder.rb +192 -0
- data/lib/omnizip/algorithms/lzma/lzma_state.rb +128 -0
- data/lib/omnizip/algorithms/lzma/match.rb +32 -0
- data/lib/omnizip/algorithms/lzma/match_finder.rb +205 -0
- data/lib/omnizip/algorithms/lzma/match_finder_config.rb +142 -0
- data/lib/omnizip/algorithms/lzma/match_finder_factory.rb +88 -0
- data/lib/omnizip/algorithms/lzma/optimal_encoder.rb +130 -0
- data/lib/omnizip/algorithms/lzma/probability_models.rb +72 -0
- data/lib/omnizip/algorithms/lzma/range_coder.rb +85 -0
- data/lib/omnizip/algorithms/lzma/range_decoder.rb +434 -0
- data/lib/omnizip/algorithms/lzma/range_encoder.rb +194 -0
- data/lib/omnizip/algorithms/lzma/state.rb +127 -0
- data/lib/omnizip/algorithms/lzma/xz_buffered_range_encoder.rb +325 -0
- data/lib/omnizip/algorithms/lzma/xz_encoder.rb +426 -0
- data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +645 -0
- data/lib/omnizip/algorithms/lzma/xz_match_finder_adapter.rb +227 -0
- data/lib/omnizip/algorithms/lzma/xz_price_calculator.rb +169 -0
- data/lib/omnizip/algorithms/lzma/xz_probability_models.rb +261 -0
- data/lib/omnizip/algorithms/lzma/xz_range_encoder.rb +223 -0
- data/lib/omnizip/algorithms/lzma/xz_range_encoder_exact.rb +331 -0
- data/lib/omnizip/algorithms/lzma/xz_state.rb +116 -0
- data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +2055 -0
- data/lib/omnizip/algorithms/lzma.rb +238 -0
- data/lib/omnizip/algorithms/lzma2/chunk_manager.rb +182 -0
- data/lib/omnizip/algorithms/lzma2/constants.rb +41 -0
- data/lib/omnizip/algorithms/lzma2/encoder.rb +147 -0
- data/lib/omnizip/algorithms/lzma2/lzma2_chunk.rb +161 -0
- data/lib/omnizip/algorithms/lzma2/properties.rb +179 -0
- data/lib/omnizip/algorithms/lzma2/simple_lzma2_encoder.rb +127 -0
- data/lib/omnizip/algorithms/lzma2/xz_encoder_adapter.rb +85 -0
- data/lib/omnizip/algorithms/lzma2.rb +141 -0
- data/lib/omnizip/algorithms/ppmd7/constants.rb +74 -0
- data/lib/omnizip/algorithms/ppmd7/context.rb +154 -0
- data/lib/omnizip/algorithms/ppmd7/decoder.rb +126 -0
- data/lib/omnizip/algorithms/ppmd7/encoder.rb +163 -0
- data/lib/omnizip/algorithms/ppmd7/model.rb +248 -0
- data/lib/omnizip/algorithms/ppmd7/symbol_state.rb +57 -0
- data/lib/omnizip/algorithms/ppmd7.rb +116 -0
- data/lib/omnizip/algorithms/ppmd8/constants.rb +61 -0
- data/lib/omnizip/algorithms/ppmd8/context.rb +34 -0
- data/lib/omnizip/algorithms/ppmd8/decoder.rb +107 -0
- data/lib/omnizip/algorithms/ppmd8/encoder.rb +138 -0
- data/lib/omnizip/algorithms/ppmd8/model.rb +250 -0
- data/lib/omnizip/algorithms/ppmd8/restoration_method.rb +78 -0
- data/lib/omnizip/algorithms/ppmd8.rb +82 -0
- data/lib/omnizip/algorithms/ppmd_base.rb +138 -0
- data/lib/omnizip/algorithms/sevenzip_lzma2.rb +123 -0
- data/lib/omnizip/algorithms/xz_lzma2.rb +118 -0
- data/lib/omnizip/algorithms/zstandard/constants.rb +25 -0
- data/lib/omnizip/algorithms/zstandard/decoder.rb +46 -0
- data/lib/omnizip/algorithms/zstandard/encoder.rb +51 -0
- data/lib/omnizip/algorithms/zstandard.rb +138 -0
- data/lib/omnizip/buffer/memory_archive.rb +251 -0
- data/lib/omnizip/buffer/memory_extractor.rb +224 -0
- data/lib/omnizip/buffer.rb +176 -0
- data/lib/omnizip/checksum_registry.rb +114 -0
- data/lib/omnizip/checksums/crc32.rb +100 -0
- data/lib/omnizip/checksums/crc64.rb +101 -0
- data/lib/omnizip/checksums/crc_base.rb +158 -0
- data/lib/omnizip/checksums/verifier.rb +131 -0
- data/lib/omnizip/chunked/memory_manager.rb +194 -0
- data/lib/omnizip/chunked/reader.rb +78 -0
- data/lib/omnizip/chunked/writer.rb +120 -0
- data/lib/omnizip/chunked.rb +129 -0
- data/lib/omnizip/cli/output_formatter.rb +104 -0
- data/lib/omnizip/cli.rb +572 -0
- data/lib/omnizip/commands/.keep +0 -0
- data/lib/omnizip/commands/archive_create_command.rb +427 -0
- data/lib/omnizip/commands/archive_extract_command.rb +272 -0
- data/lib/omnizip/commands/archive_list_command.rb +218 -0
- data/lib/omnizip/commands/archive_repair_command.rb +131 -0
- data/lib/omnizip/commands/archive_verify_command.rb +117 -0
- data/lib/omnizip/commands/compress_command.rb +117 -0
- data/lib/omnizip/commands/decompress_command.rb +120 -0
- data/lib/omnizip/commands/list_command.rb +53 -0
- data/lib/omnizip/commands/metadata_command.rb +153 -0
- data/lib/omnizip/commands/parity_create_command.rb +122 -0
- data/lib/omnizip/commands/parity_repair_command.rb +122 -0
- data/lib/omnizip/commands/parity_verify_command.rb +124 -0
- data/lib/omnizip/commands/profile_list_command.rb +56 -0
- data/lib/omnizip/commands/profile_show_command.rb +44 -0
- data/lib/omnizip/convenience.rb +359 -0
- data/lib/omnizip/converter/conversion_registry.rb +49 -0
- data/lib/omnizip/converter/conversion_strategy.rb +121 -0
- data/lib/omnizip/converter/seven_zip_to_zip_strategy.rb +97 -0
- data/lib/omnizip/converter/zip_to_seven_zip_strategy.rb +112 -0
- data/lib/omnizip/converter.rb +105 -0
- data/lib/omnizip/crypto/aes256/cipher.rb +100 -0
- data/lib/omnizip/crypto/aes256/constants.rb +28 -0
- data/lib/omnizip/crypto/aes256/key_derivation.rb +101 -0
- data/lib/omnizip/crypto/aes256.rb +102 -0
- data/lib/omnizip/error.rb +106 -0
- data/lib/omnizip/eta/exponential_smoothing_estimator.rb +98 -0
- data/lib/omnizip/eta/moving_average_estimator.rb +99 -0
- data/lib/omnizip/eta/rate_calculator.rb +104 -0
- data/lib/omnizip/eta/sample_history.rb +143 -0
- data/lib/omnizip/eta/time_estimator.rb +106 -0
- data/lib/omnizip/eta.rb +63 -0
- data/lib/omnizip/extraction/filter_chain.rb +177 -0
- data/lib/omnizip/extraction/glob_pattern.rb +140 -0
- data/lib/omnizip/extraction/pattern_matcher.rb +70 -0
- data/lib/omnizip/extraction/predicate_pattern.rb +52 -0
- data/lib/omnizip/extraction/regex_pattern.rb +50 -0
- data/lib/omnizip/extraction/selective_extractor.rb +240 -0
- data/lib/omnizip/extraction.rb +111 -0
- data/lib/omnizip/file_type/mime_classifier.rb +144 -0
- data/lib/omnizip/file_type.rb +113 -0
- data/lib/omnizip/filter.rb +139 -0
- data/lib/omnizip/filter_pipeline.rb +108 -0
- data/lib/omnizip/filter_registry.rb +166 -0
- data/lib/omnizip/filters/bcj.rb +279 -0
- data/lib/omnizip/filters/bcj2/constants.rb +53 -0
- data/lib/omnizip/filters/bcj2/decoder.rb +200 -0
- data/lib/omnizip/filters/bcj2/encoder.rb +61 -0
- data/lib/omnizip/filters/bcj2/stream_data.rb +93 -0
- data/lib/omnizip/filters/bcj2.rb +99 -0
- data/lib/omnizip/filters/bcj_arm.rb +176 -0
- data/lib/omnizip/filters/bcj_arm64.rb +244 -0
- data/lib/omnizip/filters/bcj_ia64.rb +196 -0
- data/lib/omnizip/filters/bcj_ppc.rb +190 -0
- data/lib/omnizip/filters/bcj_sparc.rb +176 -0
- data/lib/omnizip/filters/bcj_x86.rb +193 -0
- data/lib/omnizip/filters/delta.rb +196 -0
- data/lib/omnizip/filters/filter_base.rb +72 -0
- data/lib/omnizip/filters/registry.rb +123 -0
- data/lib/omnizip/filters/xz_delta.rb +258 -0
- data/lib/omnizip/format_detector.rb +162 -0
- data/lib/omnizip/format_registry.rb +59 -0
- data/lib/omnizip/formats/.keep +0 -0
- data/lib/omnizip/formats/bzip2_file.rb +172 -0
- data/lib/omnizip/formats/cpio/constants.rb +55 -0
- data/lib/omnizip/formats/cpio/entry.rb +385 -0
- data/lib/omnizip/formats/cpio/reader.rb +196 -0
- data/lib/omnizip/formats/cpio/writer.rb +234 -0
- data/lib/omnizip/formats/cpio.rb +140 -0
- data/lib/omnizip/formats/format_spec_loader.rb +230 -0
- data/lib/omnizip/formats/gzip.rb +238 -0
- data/lib/omnizip/formats/iso/directory_builder.rb +297 -0
- data/lib/omnizip/formats/iso/directory_record.rb +152 -0
- data/lib/omnizip/formats/iso/joliet.rb +204 -0
- data/lib/omnizip/formats/iso/path_table.rb +125 -0
- data/lib/omnizip/formats/iso/reader.rb +197 -0
- data/lib/omnizip/formats/iso/rock_ridge.rb +349 -0
- data/lib/omnizip/formats/iso/volume_builder.rb +320 -0
- data/lib/omnizip/formats/iso/volume_descriptor.rb +168 -0
- data/lib/omnizip/formats/iso/writer.rb +530 -0
- data/lib/omnizip/formats/iso.rb +140 -0
- data/lib/omnizip/formats/lzip.rb +175 -0
- data/lib/omnizip/formats/lzma_alone.rb +171 -0
- data/lib/omnizip/formats/rar/archive_repairer.rb +243 -0
- data/lib/omnizip/formats/rar/archive_verifier.rb +195 -0
- data/lib/omnizip/formats/rar/block_parser.rb +243 -0
- data/lib/omnizip/formats/rar/compression/bit_stream.rb +180 -0
- data/lib/omnizip/formats/rar/compression/dispatcher.rb +217 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/decoder.rb +216 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/encoder.rb +158 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_builder.rb +217 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_coder.rb +189 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/match_finder.rb +135 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/sliding_window.rb +165 -0
- data/lib/omnizip/formats/rar/compression/ppmd/context.rb +105 -0
- data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +219 -0
- data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +262 -0
- data/lib/omnizip/formats/rar/compression_method_registry.rb +106 -0
- data/lib/omnizip/formats/rar/constants.rb +82 -0
- data/lib/omnizip/formats/rar/decompressor.rb +238 -0
- data/lib/omnizip/formats/rar/external_writer.rb +312 -0
- data/lib/omnizip/formats/rar/header.rb +192 -0
- data/lib/omnizip/formats/rar/license_validator.rb +109 -0
- data/lib/omnizip/formats/rar/models/rar_archive.rb +77 -0
- data/lib/omnizip/formats/rar/models/rar_entry.rb +65 -0
- data/lib/omnizip/formats/rar/models/rar_volume.rb +56 -0
- data/lib/omnizip/formats/rar/parity_handler.rb +292 -0
- data/lib/omnizip/formats/rar/rar5/compression/lzma.rb +202 -0
- data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +578 -0
- data/lib/omnizip/formats/rar/rar5/compression/store.rb +60 -0
- data/lib/omnizip/formats/rar/rar5/crc32.rb +39 -0
- data/lib/omnizip/formats/rar/rar5/encryption/aes256_cbc.rb +97 -0
- data/lib/omnizip/formats/rar/rar5/encryption/encryption_header.rb +114 -0
- data/lib/omnizip/formats/rar/rar5/encryption/encryption_manager.rb +166 -0
- data/lib/omnizip/formats/rar/rar5/encryption/key_derivation.rb +97 -0
- data/lib/omnizip/formats/rar/rar5/header.rb +187 -0
- data/lib/omnizip/formats/rar/rar5/models/encryption_options.rb +74 -0
- data/lib/omnizip/formats/rar/rar5/models/recovery_options.rb +63 -0
- data/lib/omnizip/formats/rar/rar5/models/solid_options.rb +63 -0
- data/lib/omnizip/formats/rar/rar5/models/volume_options.rb +74 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/ARCHITECTURE.md +290 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_manager.rb +264 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_splitter.rb +155 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_writer.rb +194 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_encoder.rb +109 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_manager.rb +142 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_stream.rb +121 -0
- data/lib/omnizip/formats/rar/rar5/vint.rb +65 -0
- data/lib/omnizip/formats/rar/rar5/writer.rb +466 -0
- data/lib/omnizip/formats/rar/rar_format_base.rb +241 -0
- data/lib/omnizip/formats/rar/reader.rb +366 -0
- data/lib/omnizip/formats/rar/recovery_record.rb +245 -0
- data/lib/omnizip/formats/rar/volume_manager.rb +168 -0
- data/lib/omnizip/formats/rar/writer.rb +431 -0
- data/lib/omnizip/formats/rar.rb +205 -0
- data/lib/omnizip/formats/rar3/compressor.rb +73 -0
- data/lib/omnizip/formats/rar3/decompressor.rb +66 -0
- data/lib/omnizip/formats/rar3/reader.rb +386 -0
- data/lib/omnizip/formats/rar3/writer.rb +219 -0
- data/lib/omnizip/formats/rar5/compressor.rb +73 -0
- data/lib/omnizip/formats/rar5/decompressor.rb +66 -0
- data/lib/omnizip/formats/rar5/reader.rb +342 -0
- data/lib/omnizip/formats/rar5/writer.rb +214 -0
- data/lib/omnizip/formats/seven_zip/coder_chain.rb +150 -0
- data/lib/omnizip/formats/seven_zip/constants.rb +126 -0
- data/lib/omnizip/formats/seven_zip/encoded_header.rb +114 -0
- data/lib/omnizip/formats/seven_zip/encrypted_header.rb +142 -0
- data/lib/omnizip/formats/seven_zip/file_collector.rb +144 -0
- data/lib/omnizip/formats/seven_zip/header.rb +106 -0
- data/lib/omnizip/formats/seven_zip/header_encryptor.rb +134 -0
- data/lib/omnizip/formats/seven_zip/header_writer.rb +466 -0
- data/lib/omnizip/formats/seven_zip/models/coder_info.rb +30 -0
- data/lib/omnizip/formats/seven_zip/models/file_entry.rb +58 -0
- data/lib/omnizip/formats/seven_zip/models/folder.rb +69 -0
- data/lib/omnizip/formats/seven_zip/models/stream_info.rb +42 -0
- data/lib/omnizip/formats/seven_zip/parser.rb +660 -0
- data/lib/omnizip/formats/seven_zip/reader.rb +458 -0
- data/lib/omnizip/formats/seven_zip/split_archive_reader.rb +632 -0
- data/lib/omnizip/formats/seven_zip/split_archive_writer.rb +315 -0
- data/lib/omnizip/formats/seven_zip/stream_compressor.rb +151 -0
- data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +162 -0
- data/lib/omnizip/formats/seven_zip/writer.rb +740 -0
- data/lib/omnizip/formats/seven_zip.rb +93 -0
- data/lib/omnizip/formats/tar/constants.rb +73 -0
- data/lib/omnizip/formats/tar/entry.rb +94 -0
- data/lib/omnizip/formats/tar/header.rb +168 -0
- data/lib/omnizip/formats/tar/reader.rb +121 -0
- data/lib/omnizip/formats/tar/writer.rb +216 -0
- data/lib/omnizip/formats/tar.rb +84 -0
- data/lib/omnizip/formats/xz/reader.rb +116 -0
- data/lib/omnizip/formats/xz.rb +237 -0
- data/lib/omnizip/formats/xz_impl/block_decoder.rb +754 -0
- data/lib/omnizip/formats/xz_impl/block_encoder.rb +306 -0
- data/lib/omnizip/formats/xz_impl/block_header.rb +210 -0
- data/lib/omnizip/formats/xz_impl/block_header_parser.rb +186 -0
- data/lib/omnizip/formats/xz_impl/constants.rb +49 -0
- data/lib/omnizip/formats/xz_impl/index_decoder.rb +174 -0
- data/lib/omnizip/formats/xz_impl/index_encoder.rb +122 -0
- data/lib/omnizip/formats/xz_impl/stream_decoder.rb +468 -0
- data/lib/omnizip/formats/xz_impl/stream_encoder.rb +99 -0
- data/lib/omnizip/formats/xz_impl/stream_footer.rb +81 -0
- data/lib/omnizip/formats/xz_impl/stream_footer_parser.rb +117 -0
- data/lib/omnizip/formats/xz_impl/stream_header.rb +55 -0
- data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +108 -0
- data/lib/omnizip/formats/xz_impl/vli.rb +128 -0
- data/lib/omnizip/formats/xz_impl/writer.rb +421 -0
- data/lib/omnizip/formats/zip/central_directory_header.rb +195 -0
- data/lib/omnizip/formats/zip/constants.rb +69 -0
- data/lib/omnizip/formats/zip/end_of_central_directory.rb +133 -0
- data/lib/omnizip/formats/zip/local_file_header.rb +138 -0
- data/lib/omnizip/formats/zip/reader.rb +250 -0
- data/lib/omnizip/formats/zip/unix_extra_field.rb +153 -0
- data/lib/omnizip/formats/zip/writer.rb +375 -0
- data/lib/omnizip/formats/zip/zip64_end_of_central_directory.rb +104 -0
- data/lib/omnizip/formats/zip/zip64_end_of_central_directory_locator.rb +66 -0
- data/lib/omnizip/formats/zip/zip64_extra_field.rb +114 -0
- data/lib/omnizip/formats/zip.rb +50 -0
- data/lib/omnizip/implementations/base/lzma2_decoder_base.rb +75 -0
- data/lib/omnizip/implementations/base/lzma2_encoder_base.rb +128 -0
- data/lib/omnizip/implementations/base/lzma_decoder_base.rb +83 -0
- data/lib/omnizip/implementations/base/lzma_encoder_base.rb +108 -0
- data/lib/omnizip/implementations/base/state_machine_base.rb +182 -0
- data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +421 -0
- data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +465 -0
- data/lib/omnizip/implementations/seven_zip/lzma/match_finder.rb +288 -0
- data/lib/omnizip/implementations/seven_zip/lzma/range_decoder.rb +200 -0
- data/lib/omnizip/implementations/seven_zip/lzma/range_encoder.rb +197 -0
- data/lib/omnizip/implementations/seven_zip/lzma/state_machine.rb +141 -0
- data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +519 -0
- data/lib/omnizip/implementations/xz_utils/lzma2/decoder.rb +723 -0
- data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +750 -0
- data/lib/omnizip/io/buffered_input.rb +146 -0
- data/lib/omnizip/io/buffered_output.rb +105 -0
- data/lib/omnizip/io/stream_manager.rb +115 -0
- data/lib/omnizip/link_handler/hard_link.rb +79 -0
- data/lib/omnizip/link_handler/symbolic_link.rb +74 -0
- data/lib/omnizip/link_handler.rb +124 -0
- data/lib/omnizip/metadata/archive_metadata.rb +114 -0
- data/lib/omnizip/metadata/entry_metadata.rb +146 -0
- data/lib/omnizip/metadata/metadata_editor.rb +171 -0
- data/lib/omnizip/metadata/metadata_registry.rb +64 -0
- data/lib/omnizip/metadata/metadata_validator.rb +99 -0
- data/lib/omnizip/metadata.rb +57 -0
- data/lib/omnizip/models/.keep +0 -0
- data/lib/omnizip/models/algorithm_metadata.rb +73 -0
- data/lib/omnizip/models/compression_options.rb +71 -0
- data/lib/omnizip/models/conversion_options.rb +87 -0
- data/lib/omnizip/models/conversion_result.rb +135 -0
- data/lib/omnizip/models/eta_result.rb +46 -0
- data/lib/omnizip/models/extraction_rule.rb +115 -0
- data/lib/omnizip/models/filter_chain.rb +144 -0
- data/lib/omnizip/models/filter_config.rb +183 -0
- data/lib/omnizip/models/match_result.rb +124 -0
- data/lib/omnizip/models/optimization_suggestion.rb +91 -0
- data/lib/omnizip/models/parallel_options.rb +104 -0
- data/lib/omnizip/models/performance_result.rb +79 -0
- data/lib/omnizip/models/profile_report.rb +82 -0
- data/lib/omnizip/models/progress_options.rb +38 -0
- data/lib/omnizip/models/split_options.rb +116 -0
- data/lib/omnizip/optimization_registry.rb +81 -0
- data/lib/omnizip/parallel/job_queue.rb +209 -0
- data/lib/omnizip/parallel/job_scheduler.rb +203 -0
- data/lib/omnizip/parallel/parallel_compressor.rb +347 -0
- data/lib/omnizip/parallel/parallel_extractor.rb +329 -0
- data/lib/omnizip/parallel/worker_pool.rb +223 -0
- data/lib/omnizip/parallel.rb +149 -0
- data/lib/omnizip/parity/chunked_block_processor.rb +196 -0
- data/lib/omnizip/parity/galois16.rb +145 -0
- data/lib/omnizip/parity/models/creator_packet.rb +73 -0
- data/lib/omnizip/parity/models/file_description_packet.rb +133 -0
- data/lib/omnizip/parity/models/ifsc_packet.rb +123 -0
- data/lib/omnizip/parity/models/main_packet.rb +128 -0
- data/lib/omnizip/parity/models/packet.rb +156 -0
- data/lib/omnizip/parity/models/packet_registry.rb +109 -0
- data/lib/omnizip/parity/models/recovery_slice_packet.rb +78 -0
- data/lib/omnizip/parity/par2_creator.rb +531 -0
- data/lib/omnizip/parity/par2_repairer.rb +407 -0
- data/lib/omnizip/parity/par2_verifier.rb +364 -0
- data/lib/omnizip/parity/par2cmdline_algorithm.rb +110 -0
- data/lib/omnizip/parity/par2cmdline_coefficients.rb +78 -0
- data/lib/omnizip/parity/reed_solomon_decoder.rb +266 -0
- data/lib/omnizip/parity/reed_solomon_encoder.rb +111 -0
- data/lib/omnizip/parity/reed_solomon_matrix.rb +342 -0
- data/lib/omnizip/parity.rb +186 -0
- data/lib/omnizip/password/encryption_registry.rb +65 -0
- data/lib/omnizip/password/encryption_strategy.rb +96 -0
- data/lib/omnizip/password/password_validator.rb +129 -0
- data/lib/omnizip/password/winzip_aes_strategy.rb +192 -0
- data/lib/omnizip/password/zip_crypto_strategy.rb +141 -0
- data/lib/omnizip/password.rb +87 -0
- data/lib/omnizip/pipe/stream_compressor.rb +124 -0
- data/lib/omnizip/pipe/stream_decompressor.rb +174 -0
- data/lib/omnizip/pipe.rb +121 -0
- data/lib/omnizip/platform/ntfs_streams.rb +201 -0
- data/lib/omnizip/platform.rb +189 -0
- data/lib/omnizip/profile/archive_profile.rb +39 -0
- data/lib/omnizip/profile/balanced_profile.rb +33 -0
- data/lib/omnizip/profile/binary_profile.rb +36 -0
- data/lib/omnizip/profile/compression_profile.rb +158 -0
- data/lib/omnizip/profile/custom_profile.rb +157 -0
- data/lib/omnizip/profile/fast_profile.rb +33 -0
- data/lib/omnizip/profile/maximum_profile.rb +33 -0
- data/lib/omnizip/profile/profile_detector.rb +110 -0
- data/lib/omnizip/profile/profile_registry.rb +161 -0
- data/lib/omnizip/profile/text_profile.rb +36 -0
- data/lib/omnizip/profile.rb +190 -0
- data/lib/omnizip/profiler/memory_profiler.rb +66 -0
- data/lib/omnizip/profiler/method_profiler.rb +49 -0
- data/lib/omnizip/profiler/report_generator.rb +169 -0
- data/lib/omnizip/profiler.rb +204 -0
- data/lib/omnizip/progress/callback_reporter.rb +36 -0
- data/lib/omnizip/progress/console_reporter.rb +62 -0
- data/lib/omnizip/progress/log_reporter.rb +91 -0
- data/lib/omnizip/progress/operation_progress.rb +118 -0
- data/lib/omnizip/progress/progress_bar.rb +156 -0
- data/lib/omnizip/progress/progress_reporter.rb +40 -0
- data/lib/omnizip/progress/progress_tracker.rb +190 -0
- data/lib/omnizip/progress/silent_reporter.rb +24 -0
- data/lib/omnizip/progress.rb +127 -0
- data/lib/omnizip/rubyzip_compat.rb +63 -0
- data/lib/omnizip/temp/safe_extract.rb +168 -0
- data/lib/omnizip/temp/temp_file.rb +124 -0
- data/lib/omnizip/temp/temp_file_pool.rb +109 -0
- data/lib/omnizip/temp.rb +181 -0
- data/lib/omnizip/version.rb +5 -0
- data/lib/omnizip/zip/entry.rb +156 -0
- data/lib/omnizip/zip/file.rb +485 -0
- data/lib/omnizip/zip/input_stream.rb +273 -0
- data/lib/omnizip/zip/output_stream.rb +324 -0
- data/lib/omnizip.rb +156 -0
- data/readme-docs/advanced-features.adoc +515 -0
- data/readme-docs/api-usage.adoc +444 -0
- data/readme-docs/architecture.adoc +449 -0
- data/readme-docs/archive-formats.adoc +479 -0
- data/readme-docs/cli-usage.adoc +222 -0
- data/readme-docs/compression-algorithms.adoc +442 -0
- data/readme-docs/compression-profiles.adoc +247 -0
- data/readme-docs/encryption-checksums.adoc +328 -0
- data/readme-docs/format-converter.adoc +325 -0
- data/readme-docs/installation.adoc +228 -0
- data/readme-docs/par2-archives.adoc +608 -0
- data/readme-docs/performance-profiler.adoc +389 -0
- data/readme-docs/preprocessing-filters.adoc +280 -0
- data/xz-file-format-1.2.1.txt +1174 -0
- metadata +617 -0
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Copyright (C) 2025 Ribose Inc.
|
|
4
|
+
#
|
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a
|
|
6
|
+
# copy of this software and associated documentation files (the "Software"),
|
|
7
|
+
# to deal in the Software without restriction, including without limitation
|
|
8
|
+
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
9
|
+
# and/or sell copies of the Software, and to permit persons to whom the
|
|
10
|
+
# Software is furnished to do so, subject to the following conditions:
|
|
11
|
+
#
|
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
|
13
|
+
# all copies or substantial portions of the Software.
|
|
14
|
+
#
|
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
20
|
+
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
21
|
+
# DEALINGS IN THE SOFTWARE.
|
|
22
|
+
|
|
23
|
+
require_relative "../../../algorithms/lzma/state"
|
|
24
|
+
|
|
25
|
+
module Omnizip
|
|
26
|
+
module Implementations
|
|
27
|
+
module SevenZip
|
|
28
|
+
module LZMA
|
|
29
|
+
# 7-Zip LZMA SDK state machine implementation.
|
|
30
|
+
#
|
|
31
|
+
# This is the original SdkStateMachine moved from algorithms/lzma/sdk_state_machine.rb
|
|
32
|
+
# to the new namespace structure.
|
|
33
|
+
#
|
|
34
|
+
# Ported from 7-Zip LZMA SDK by Igor Pavlov.
|
|
35
|
+
class StateMachine < Omnizip::Algorithms::LZMA::State
|
|
36
|
+
# State categories (SDK classification)
|
|
37
|
+
CATEGORY_LITERAL = :literal # States 0-6
|
|
38
|
+
CATEGORY_MATCH = :match # States 7-9
|
|
39
|
+
CATEGORY_REP = :rep # State 8, 11
|
|
40
|
+
CATEGORY_SHORT_REP = :short_rep # State 9, 11 after short rep
|
|
41
|
+
|
|
42
|
+
# Check if current state is a character state
|
|
43
|
+
#
|
|
44
|
+
# Character states (0-6) occur after literal encoding.
|
|
45
|
+
# The SDK uses this to determine probability model selection.
|
|
46
|
+
# This is SDK's IsCharState() macro.
|
|
47
|
+
#
|
|
48
|
+
# @return [Boolean] True if state < 7
|
|
49
|
+
def is_char_state?
|
|
50
|
+
@index < 7
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Get state value (alias for index)
|
|
54
|
+
#
|
|
55
|
+
# @return [Integer] Current state index
|
|
56
|
+
def value
|
|
57
|
+
@index
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# Get literal state index for probability model selection
|
|
61
|
+
#
|
|
62
|
+
# The SDK uses a simplified state value for literal encoding:
|
|
63
|
+
# - States 0-3 map to themselves (0-3)
|
|
64
|
+
# - States 4-6 map to 4-6
|
|
65
|
+
# - States 7+ map to state - 3 (4-9)
|
|
66
|
+
#
|
|
67
|
+
# This creates 10 possible literal contexts (0-9) from 12 states.
|
|
68
|
+
# From LzmaEnc.c: litState = (state < 4) ? state : (state - (state < 10 ? 3 : 6))
|
|
69
|
+
#
|
|
70
|
+
# @return [Integer] Literal state index (0-9)
|
|
71
|
+
def literal_state
|
|
72
|
+
if @index < 4
|
|
73
|
+
@index
|
|
74
|
+
elsif @index < 10
|
|
75
|
+
@index - 3
|
|
76
|
+
else
|
|
77
|
+
@index - 6
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
# Check if matched literal mode should be used
|
|
82
|
+
#
|
|
83
|
+
# XZ Utils logic (lzma_decoder.c, lzma_common.h):
|
|
84
|
+
# - if (is_literal_state(state)) → use UNMATCHED literal
|
|
85
|
+
# - else → use MATCHED literal
|
|
86
|
+
# - is_literal_state(state) = (state < LIT_STATES) where LIT_STATES = 7
|
|
87
|
+
# - States 0-6: literal states (unmatched)
|
|
88
|
+
# - States 7-11: non-literal states (matched after rep/match)
|
|
89
|
+
#
|
|
90
|
+
# @return [Boolean] True if state >= 7 (non-literal state)
|
|
91
|
+
def use_matched_literal?
|
|
92
|
+
@index >= 7
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
# Get state category
|
|
96
|
+
#
|
|
97
|
+
# Categorizes states for debugging and encoder logic.
|
|
98
|
+
# The SDK doesn't expose this directly but uses state ranges
|
|
99
|
+
# in various encoding decisions.
|
|
100
|
+
#
|
|
101
|
+
# @return [Symbol] State category
|
|
102
|
+
def category
|
|
103
|
+
case @index
|
|
104
|
+
when 0..6
|
|
105
|
+
CATEGORY_LITERAL
|
|
106
|
+
when 7, 10
|
|
107
|
+
CATEGORY_MATCH
|
|
108
|
+
when 8, 11
|
|
109
|
+
CATEGORY_REP
|
|
110
|
+
when 9
|
|
111
|
+
CATEGORY_SHORT_REP
|
|
112
|
+
else
|
|
113
|
+
raise "Invalid state: #{@index}"
|
|
114
|
+
end
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
# Create a copy of this state
|
|
118
|
+
#
|
|
119
|
+
# Overrides parent to return StateMachine instance
|
|
120
|
+
#
|
|
121
|
+
# @return [StateMachine] A new StateMachine with the same index
|
|
122
|
+
def dup
|
|
123
|
+
StateMachine.new(@index)
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
# Check if state would use matched literal after match
|
|
127
|
+
#
|
|
128
|
+
# Helper method for encoder to determine encoding path.
|
|
129
|
+
# Checks if encoding a match NOW would result in matched literal NEXT.
|
|
130
|
+
#
|
|
131
|
+
# @return [Boolean] True if state would transition to matched literal state
|
|
132
|
+
def would_use_matched_literal?
|
|
133
|
+
# After a match, we transition to MATCH_STATES[@index]
|
|
134
|
+
next_state = MATCH_STATES[@index]
|
|
135
|
+
next_state >= 7
|
|
136
|
+
end
|
|
137
|
+
end
|
|
138
|
+
end
|
|
139
|
+
end
|
|
140
|
+
end
|
|
141
|
+
end
|
|
@@ -0,0 +1,519 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Copyright (C) 2025 Ribose Inc.
|
|
4
|
+
#
|
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a
|
|
6
|
+
# copy of this software and associated documentation files (the "Software"),
|
|
7
|
+
# to deal in the Software without restriction, including without limitation
|
|
8
|
+
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
9
|
+
# and/or sell copies of the Software, and to permit persons to whom the
|
|
10
|
+
# Software is furnished to do so, subject to the following conditions:
|
|
11
|
+
#
|
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
|
13
|
+
# all copies or substantial portions of the Software.
|
|
14
|
+
#
|
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
20
|
+
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
21
|
+
# DEALINGS IN THE SOFTWARE.
|
|
22
|
+
|
|
23
|
+
require "stringio"
|
|
24
|
+
require_relative "../../base/lzma2_encoder_base"
|
|
25
|
+
require_relative "../../../algorithms/lzma2/constants"
|
|
26
|
+
require_relative "../../../algorithms/lzma2/lzma2_chunk"
|
|
27
|
+
|
|
28
|
+
module Omnizip
|
|
29
|
+
module Implementations
|
|
30
|
+
module SevenZip
|
|
31
|
+
module LZMA2
|
|
32
|
+
# 7-Zip SDK LZMA2 encoder implementation.
|
|
33
|
+
#
|
|
34
|
+
# This encoder produces LZMA2 compressed data compatible with 7-Zip format.
|
|
35
|
+
# It uses the same LZMA encoding logic as XZ Utils, but with 7-Zip
|
|
36
|
+
# format requirements (no EOS marker, no padding).
|
|
37
|
+
#
|
|
38
|
+
# Key differences from XZ Utils implementation:
|
|
39
|
+
# - No EOS marker (raw LZMA2 data ends with 0x00 control byte)
|
|
40
|
+
# - No chunk padding (XZ pads to 4-byte boundary)
|
|
41
|
+
# - No LZMA2 property byte in data stream (method ID only in container)
|
|
42
|
+
#
|
|
43
|
+
# Based on LZMA SDK by Igor Pavlov
|
|
44
|
+
# Reference: https://www.7-zip.org/sdk.html
|
|
45
|
+
#
|
|
46
|
+
# LZMA2 format (as used by 7-Zip):
|
|
47
|
+
# - Control byte specifies chunk type and dictionary reset
|
|
48
|
+
# - Dictionary size follows in some chunk types
|
|
49
|
+
# - Uncompressed size follows in some chunk types
|
|
50
|
+
# - Compressed data follows
|
|
51
|
+
class Encoder < Base::LZMA2EncoderBase
|
|
52
|
+
include Omnizip::Algorithms::LZMA2Const
|
|
53
|
+
|
|
54
|
+
# Maximum chunk sizes (from LZMA2 specification)
|
|
55
|
+
MAX_UNCOMPRESSED_CHUNK = 2 * 1024 * 1024 # 2MB
|
|
56
|
+
MAX_COMPRESSED_CHUNK = 64 * 1024 # 64KB
|
|
57
|
+
|
|
58
|
+
# Encoding constants
|
|
59
|
+
UINT32_MAX = 0xFFFFFFFF
|
|
60
|
+
REPS = 4
|
|
61
|
+
MATCH_LEN_MIN = 2
|
|
62
|
+
|
|
63
|
+
attr_reader :dict_size, :lc, :lp, :pb, :standalone
|
|
64
|
+
|
|
65
|
+
# Initialize 7-Zip SDK LZMA2 encoder
|
|
66
|
+
#
|
|
67
|
+
# @param dict_size [Integer] Dictionary size (must be power of 2)
|
|
68
|
+
# @param lc [Integer] Literal context bits (0-8)
|
|
69
|
+
# @param lp [Integer] Literal position bits (0-4)
|
|
70
|
+
# @param pb [Integer] Position bits (0-4)
|
|
71
|
+
# @param standalone [Boolean] Include property byte (false for 7-Zip)
|
|
72
|
+
def initialize(dict_size:, lc: 3, lp: 0, pb: 2, standalone: false)
|
|
73
|
+
super
|
|
74
|
+
|
|
75
|
+
# Initialize shared state across all chunks
|
|
76
|
+
# Using XZ Utils components (tested and working)
|
|
77
|
+
require_relative "../../../algorithms/lzma/dictionary"
|
|
78
|
+
require_relative "../../../algorithms/lzma/lzma_state"
|
|
79
|
+
require_relative "../../../algorithms/lzma/xz_probability_models"
|
|
80
|
+
require_relative "../../../algorithms/lzma/match_finder"
|
|
81
|
+
require_relative "../../../algorithms/lzma/optimal_encoder"
|
|
82
|
+
require_relative "../../../algorithms/lzma/xz_range_encoder_exact"
|
|
83
|
+
|
|
84
|
+
@dictionary = Omnizip::Algorithms::LZMA::Dictionary.new(dict_size)
|
|
85
|
+
@state = Omnizip::Algorithms::LZMA::LZMAState.new(0)
|
|
86
|
+
@models = Omnizip::Algorithms::LZMA::XzProbabilityModels.new(lc, lp, pb)
|
|
87
|
+
@match_finder = Omnizip::Algorithms::LZMA::MatchFinder.new(@dictionary)
|
|
88
|
+
@optimal = Omnizip::Algorithms::LZMA::OptimalEncoder.new(mode: :fast)
|
|
89
|
+
|
|
90
|
+
# Track previous byte for literal context
|
|
91
|
+
@prev_byte = 0
|
|
92
|
+
|
|
93
|
+
# First chunk always resets dictionary (7-Zip compatibility)
|
|
94
|
+
@need_dictionary_reset = true
|
|
95
|
+
@need_state_reset = false
|
|
96
|
+
@need_properties = true
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
# Encode data with LZMA2 compression
|
|
100
|
+
#
|
|
101
|
+
# @param data [String] Input data to compress
|
|
102
|
+
# @return [String] LZMA2 compressed data (7-Zip format)
|
|
103
|
+
def encode(data)
|
|
104
|
+
return "" if data.empty?
|
|
105
|
+
|
|
106
|
+
output = StringIO.new
|
|
107
|
+
output.set_encoding(Encoding::BINARY)
|
|
108
|
+
|
|
109
|
+
# Write property byte if standalone mode
|
|
110
|
+
if @standalone
|
|
111
|
+
prop_byte = encode_dict_size(@dict_size)
|
|
112
|
+
output.putc(prop_byte)
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
# Reset match finder state for each encoding session
|
|
116
|
+
@match_finder.reset
|
|
117
|
+
|
|
118
|
+
# Process in chunks
|
|
119
|
+
input = StringIO.new(data)
|
|
120
|
+
input.set_encoding(Encoding::BINARY)
|
|
121
|
+
|
|
122
|
+
while !input.eof?
|
|
123
|
+
chunk_data = input.read(MAX_UNCOMPRESSED_CHUNK)
|
|
124
|
+
break if chunk_data.nil? || chunk_data.empty?
|
|
125
|
+
|
|
126
|
+
chunk = encode_chunk(chunk_data)
|
|
127
|
+
output.write(chunk)
|
|
128
|
+
|
|
129
|
+
# Update reset flags for next chunk
|
|
130
|
+
@need_dictionary_reset = false
|
|
131
|
+
@need_state_reset = false
|
|
132
|
+
@need_properties = false
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
# End of stream marker (0x00)
|
|
136
|
+
output.write(Omnizip::Algorithms::LZMA2::LZMA2Chunk.end_chunk.to_bytes)
|
|
137
|
+
|
|
138
|
+
output.string
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
# Get implementation identifier
|
|
142
|
+
#
|
|
143
|
+
# @return [Symbol] :seven_zip_sdk
|
|
144
|
+
def implementation_name
|
|
145
|
+
:seven_zip_sdk
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
private
|
|
149
|
+
|
|
150
|
+
# Encode a single chunk with LZMA2 compression
|
|
151
|
+
#
|
|
152
|
+
# Uses XZ Utils encoding logic (tested and compatible)
|
|
153
|
+
def encode_chunk(uncompressed_data)
|
|
154
|
+
compressed = try_compress(uncompressed_data)
|
|
155
|
+
|
|
156
|
+
# Decide: compressed vs uncompressed
|
|
157
|
+
# Use compressed if it's actually smaller
|
|
158
|
+
if compressed.bytesize >= uncompressed_data.bytesize
|
|
159
|
+
# Use uncompressed chunk
|
|
160
|
+
chunk = Omnizip::Algorithms::LZMA2::LZMA2Chunk.new(
|
|
161
|
+
chunk_type: :uncompressed,
|
|
162
|
+
uncompressed_data: uncompressed_data,
|
|
163
|
+
compressed_data: "",
|
|
164
|
+
need_dict_reset: @need_dictionary_reset,
|
|
165
|
+
need_state_reset: false,
|
|
166
|
+
need_props: false,
|
|
167
|
+
)
|
|
168
|
+
# After uncompressed chunk, next chunk needs state reset
|
|
169
|
+
@need_state_reset = true
|
|
170
|
+
else
|
|
171
|
+
# Use compressed chunk
|
|
172
|
+
chunk_properties = (((@pb * 5) + @lp) * 9) + @lc
|
|
173
|
+
chunk = Omnizip::Algorithms::LZMA2::LZMA2Chunk.new(
|
|
174
|
+
chunk_type: :compressed,
|
|
175
|
+
uncompressed_data: uncompressed_data,
|
|
176
|
+
compressed_data: compressed,
|
|
177
|
+
compressed_size: compressed.bytesize,
|
|
178
|
+
properties: chunk_properties,
|
|
179
|
+
need_dict_reset: @need_dictionary_reset,
|
|
180
|
+
need_state_reset: @need_state_reset,
|
|
181
|
+
need_props: true,
|
|
182
|
+
)
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
# Update dictionary with the chunk data
|
|
186
|
+
@dictionary.append(uncompressed_data)
|
|
187
|
+
|
|
188
|
+
# Update prev_byte for next chunk
|
|
189
|
+
if uncompressed_data.bytesize.positive?
|
|
190
|
+
@prev_byte = uncompressed_data.getbyte(uncompressed_data.bytesize - 1)
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
chunk.to_bytes
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
# Try to compress data using LZMA
|
|
197
|
+
#
|
|
198
|
+
# Uses XZ Utils encoding components (tested and working)
|
|
199
|
+
def try_compress(data)
|
|
200
|
+
# Create output buffer
|
|
201
|
+
output_buffer = StringIO.new
|
|
202
|
+
output_buffer.set_encoding(Encoding::BINARY)
|
|
203
|
+
|
|
204
|
+
# Create range encoder
|
|
205
|
+
encoder = Omnizip::Algorithms::LZMA::XzRangeEncoder.new(output_buffer)
|
|
206
|
+
|
|
207
|
+
# Feed all data to match finder first
|
|
208
|
+
@match_finder.feed(data)
|
|
209
|
+
|
|
210
|
+
# Initialize hash table
|
|
211
|
+
match_len_max = 2
|
|
212
|
+
end_pos = [@dictionary.buffer.bytesize + data.bytesize - match_len_max, 0].max
|
|
213
|
+
@match_finder.skip(end_pos)
|
|
214
|
+
|
|
215
|
+
# Position in match finder's buffer for encoding
|
|
216
|
+
start_pos = @dictionary.buffer.bytesize
|
|
217
|
+
@current_start_pos = start_pos
|
|
218
|
+
|
|
219
|
+
pos = 0
|
|
220
|
+
while pos < data.bytesize
|
|
221
|
+
# Encode queued symbols if buffer getting full
|
|
222
|
+
if encoder.count > 20
|
|
223
|
+
encode_queued_symbols(encoder, output_buffer)
|
|
224
|
+
end
|
|
225
|
+
|
|
226
|
+
# Find matches at current position
|
|
227
|
+
match_pos = start_pos + pos
|
|
228
|
+
@match_finder.find_matches(match_pos)
|
|
229
|
+
|
|
230
|
+
# Get optimal encoding choice
|
|
231
|
+
distance, length = @optimal.find_optimal(
|
|
232
|
+
match_pos,
|
|
233
|
+
@match_finder,
|
|
234
|
+
@state,
|
|
235
|
+
@state.reps,
|
|
236
|
+
@models,
|
|
237
|
+
)
|
|
238
|
+
|
|
239
|
+
# Encode based on choice
|
|
240
|
+
if distance == UINT32_MAX || length == 1
|
|
241
|
+
encode_literal(data.getbyte(pos), encoder, pos)
|
|
242
|
+
pos += 1
|
|
243
|
+
elsif distance < REPS
|
|
244
|
+
encode_repeated_match(distance, length, encoder, pos, match_pos)
|
|
245
|
+
pos += length
|
|
246
|
+
else
|
|
247
|
+
actual_distance = distance - REPS
|
|
248
|
+
encode_match(actual_distance, length, encoder, pos, match_pos, data)
|
|
249
|
+
pos += length
|
|
250
|
+
end
|
|
251
|
+
end
|
|
252
|
+
|
|
253
|
+
# Flush encoder
|
|
254
|
+
encode_queued_symbols(encoder, output_buffer)
|
|
255
|
+
encoder.queue_flush
|
|
256
|
+
encode_queued_symbols(encoder, output_buffer)
|
|
257
|
+
|
|
258
|
+
output_buffer.string
|
|
259
|
+
end
|
|
260
|
+
|
|
261
|
+
# Encode queued symbols to output
|
|
262
|
+
def encode_queued_symbols(encoder, output)
|
|
263
|
+
return if encoder.none?
|
|
264
|
+
|
|
265
|
+
temp_buffer = "\0" * 10000
|
|
266
|
+
out_pos = Omnizip::Algorithms::LZMA::IntRef.new(0)
|
|
267
|
+
|
|
268
|
+
size_before = output.size
|
|
269
|
+
|
|
270
|
+
encoder.encode_symbols(temp_buffer, out_pos, 10000)
|
|
271
|
+
|
|
272
|
+
if out_pos.value.positive?
|
|
273
|
+
output.write(StringCompat.byteslice(temp_buffer, 0, out_pos.value))
|
|
274
|
+
end
|
|
275
|
+
|
|
276
|
+
output.size - size_before
|
|
277
|
+
end
|
|
278
|
+
|
|
279
|
+
# Compatibility helper for Ruby 3.0-3.1
|
|
280
|
+
module StringCompat
|
|
281
|
+
if "".respond_to?(:byteslice)
|
|
282
|
+
def self.byteslice(string, start, length)
|
|
283
|
+
string.byteslice(start, length)
|
|
284
|
+
end
|
|
285
|
+
else
|
|
286
|
+
def self.byteslice(string, start, length)
|
|
287
|
+
string.bytes[start, length]&.pack("C*") || ""
|
|
288
|
+
end
|
|
289
|
+
end
|
|
290
|
+
end
|
|
291
|
+
|
|
292
|
+
# Encode literal byte
|
|
293
|
+
def encode_literal(symbol, encoder, pos)
|
|
294
|
+
pos_state = pos & ((1 << @pb) - 1)
|
|
295
|
+
|
|
296
|
+
prob_is_match = @models.is_match[@state.value][pos_state]
|
|
297
|
+
encoder.queue_bit(prob_is_match, 0)
|
|
298
|
+
|
|
299
|
+
literal_offset = get_literal_state(pos, @prev_byte)
|
|
300
|
+
use_matched = @state.use_matched_literal?
|
|
301
|
+
|
|
302
|
+
@state.update_literal!
|
|
303
|
+
|
|
304
|
+
if use_matched
|
|
305
|
+
match_pos = @current_start_pos + pos
|
|
306
|
+
match_byte_pos = match_pos - @state.reps[0] - 1
|
|
307
|
+
match_byte = @match_finder.buffer.getbyte(match_byte_pos) if match_byte_pos >= 0 && match_byte_pos < @match_finder.buffer.bytesize
|
|
308
|
+
|
|
309
|
+
if match_byte.nil?
|
|
310
|
+
encode_normal_literal(literal_offset, symbol, encoder)
|
|
311
|
+
else
|
|
312
|
+
encode_matched_literal(literal_offset, match_byte, symbol, encoder)
|
|
313
|
+
end
|
|
314
|
+
else
|
|
315
|
+
encode_normal_literal(literal_offset, symbol, encoder)
|
|
316
|
+
end
|
|
317
|
+
|
|
318
|
+
@prev_byte = symbol
|
|
319
|
+
end
|
|
320
|
+
|
|
321
|
+
# Encode normal match
|
|
322
|
+
def encode_match(distance, length, encoder, pos, match_pos, _input_data)
|
|
323
|
+
pos_state = pos & ((1 << @pb) - 1)
|
|
324
|
+
|
|
325
|
+
prob_is_match = @models.is_match[@state.value][pos_state]
|
|
326
|
+
encoder.queue_bit(prob_is_match, 1)
|
|
327
|
+
|
|
328
|
+
prob_is_rep = @models.is_rep[@state.value]
|
|
329
|
+
encoder.queue_bit(prob_is_rep, 0)
|
|
330
|
+
|
|
331
|
+
@state.update_match!(distance)
|
|
332
|
+
|
|
333
|
+
encode_match_length(length, pos_state, encoder)
|
|
334
|
+
encode_distance(distance, length, encoder)
|
|
335
|
+
|
|
336
|
+
last_byte_pos = match_pos - distance + length - 1
|
|
337
|
+
@prev_byte = @match_finder.buffer.getbyte(last_byte_pos) if last_byte_pos >= 0 && last_byte_pos < @match_finder.buffer.bytesize
|
|
338
|
+
end
|
|
339
|
+
|
|
340
|
+
# Encode repeated match
|
|
341
|
+
def encode_repeated_match(rep, length, encoder, pos, match_pos)
|
|
342
|
+
pos_state = pos & ((1 << @pb) - 1)
|
|
343
|
+
|
|
344
|
+
prob_is_match = @models.is_match[@state.value][pos_state]
|
|
345
|
+
encoder.queue_bit(prob_is_match, 1)
|
|
346
|
+
|
|
347
|
+
prob_is_rep = @models.is_rep[@state.value]
|
|
348
|
+
encoder.queue_bit(prob_is_rep, 1)
|
|
349
|
+
|
|
350
|
+
prob_is_rep0 = @models.is_rep0[@state.value]
|
|
351
|
+
if rep.zero?
|
|
352
|
+
encoder.queue_bit(prob_is_rep0, 0)
|
|
353
|
+
|
|
354
|
+
prob_is_rep0_long = @models.is_rep0_long[@state.value][pos_state]
|
|
355
|
+
encoder.queue_bit(prob_is_rep0_long, length == 1 ? 0 : 1)
|
|
356
|
+
else
|
|
357
|
+
encoder.queue_bit(prob_is_rep0, 1)
|
|
358
|
+
|
|
359
|
+
prob_is_rep1 = @models.is_rep1[@state.value]
|
|
360
|
+
if rep == 1
|
|
361
|
+
encoder.queue_bit(prob_is_rep1, 0)
|
|
362
|
+
else
|
|
363
|
+
encoder.queue_bit(prob_is_rep1, 1)
|
|
364
|
+
|
|
365
|
+
prob_is_rep2 = @models.is_rep2[@state.value]
|
|
366
|
+
encoder.queue_bit(prob_is_rep2, rep - 2)
|
|
367
|
+
|
|
368
|
+
if rep == 3
|
|
369
|
+
@state.reps[3] = @state.reps[2]
|
|
370
|
+
end
|
|
371
|
+
|
|
372
|
+
@state.reps[2] = @state.reps[1]
|
|
373
|
+
end
|
|
374
|
+
|
|
375
|
+
@state.reps[1] = @state.reps[0]
|
|
376
|
+
|
|
377
|
+
distance = @state.reps[rep]
|
|
378
|
+
|
|
379
|
+
if distance.nil?
|
|
380
|
+
raise "Distance is nil for rep #{rep}, reps=#{@state.reps.inspect}"
|
|
381
|
+
end
|
|
382
|
+
|
|
383
|
+
@state.reps[0] = distance
|
|
384
|
+
end
|
|
385
|
+
|
|
386
|
+
if length == 1
|
|
387
|
+
@state.update_short_rep!
|
|
388
|
+
else
|
|
389
|
+
encode_match_length(length, pos_state, encoder)
|
|
390
|
+
@state.update_long_rep!
|
|
391
|
+
end
|
|
392
|
+
|
|
393
|
+
last_byte_pos = match_pos - @state.reps[0] + length - 1
|
|
394
|
+
@prev_byte = @match_finder.buffer.getbyte(last_byte_pos) if last_byte_pos >= 0 && last_byte_pos < @match_finder.buffer.bytesize
|
|
395
|
+
end
|
|
396
|
+
|
|
397
|
+
def get_literal_state(pos, prev_byte)
|
|
398
|
+
literal_mask = (0x100 << @lp) - (0x100 >> @lc)
|
|
399
|
+
((((pos << 8) + prev_byte) & literal_mask) << @lc)
|
|
400
|
+
end
|
|
401
|
+
|
|
402
|
+
def encode_normal_literal(literal_offset, symbol, encoder)
|
|
403
|
+
context = 1
|
|
404
|
+
8.downto(1) do |i|
|
|
405
|
+
bit = (symbol >> (i - 1)) & 1
|
|
406
|
+
encoder.queue_bit(@models.literal[literal_offset + context], bit)
|
|
407
|
+
context = (context << 1) | bit
|
|
408
|
+
end
|
|
409
|
+
end
|
|
410
|
+
|
|
411
|
+
def encode_matched_literal(literal_offset, match_byte, symbol, encoder)
|
|
412
|
+
offset = 0x100
|
|
413
|
+
symbol += 0x100
|
|
414
|
+
|
|
415
|
+
while symbol < 0x10000
|
|
416
|
+
match_byte <<= 1
|
|
417
|
+
match_bit = match_byte & offset
|
|
418
|
+
subcoder_index = offset + match_bit + (symbol >> 8)
|
|
419
|
+
bit = (symbol >> 7) & 1
|
|
420
|
+
|
|
421
|
+
encoder.queue_bit(@models.literal[literal_offset + subcoder_index], bit)
|
|
422
|
+
|
|
423
|
+
symbol <<= 1
|
|
424
|
+
offset &= ~(match_byte ^ symbol)
|
|
425
|
+
end
|
|
426
|
+
end
|
|
427
|
+
|
|
428
|
+
def encode_match_length(length, pos_state, encoder)
|
|
429
|
+
len = length - 2
|
|
430
|
+
|
|
431
|
+
if len < 8
|
|
432
|
+
encoder.queue_bit(@models.match_len_encoder.choice, 0)
|
|
433
|
+
encode_bittree(@models.match_len_encoder.low[pos_state], 3, len, encoder)
|
|
434
|
+
elsif len < 16
|
|
435
|
+
encoder.queue_bit(@models.match_len_encoder.choice, 1)
|
|
436
|
+
encoder.queue_bit(@models.match_len_encoder.choice2, 0)
|
|
437
|
+
encode_bittree(@models.match_len_encoder.mid[pos_state], 3, len - 8, encoder)
|
|
438
|
+
else
|
|
439
|
+
encoder.queue_bit(@models.match_len_encoder.choice, 1)
|
|
440
|
+
encoder.queue_bit(@models.match_len_encoder.choice2, 1)
|
|
441
|
+
encode_bittree(@models.match_len_encoder.high, 8, len - 16, encoder)
|
|
442
|
+
end
|
|
443
|
+
end
|
|
444
|
+
|
|
445
|
+
def encode_distance(distance, length, encoder)
|
|
446
|
+
dist_slot = get_dist_slot(distance)
|
|
447
|
+
len_state = [length - 2, 3].min
|
|
448
|
+
|
|
449
|
+
encode_bittree(@models.dist_slot[len_state], 6, dist_slot, encoder)
|
|
450
|
+
|
|
451
|
+
if dist_slot >= 4
|
|
452
|
+
footer_bits = (dist_slot >> 1) - 1
|
|
453
|
+
base = (2 | (dist_slot & 1)) << footer_bits
|
|
454
|
+
dist_reduced = distance - base
|
|
455
|
+
|
|
456
|
+
if dist_slot < 14
|
|
457
|
+
encode_bittree_reverse(@models.dist_special, dist_reduced, footer_bits, base - dist_slot - 1, encoder)
|
|
458
|
+
else
|
|
459
|
+
direct_bits = footer_bits - 4
|
|
460
|
+
encoder.queue_direct_bits(dist_reduced >> 4, direct_bits)
|
|
461
|
+
align_mask = (1 << 4) - 1
|
|
462
|
+
encode_bittree_reverse(@models.dist_align, dist_reduced & align_mask, 4, 0, encoder)
|
|
463
|
+
end
|
|
464
|
+
end
|
|
465
|
+
end
|
|
466
|
+
|
|
467
|
+
def encode_bittree(probs, num_bits, value, encoder)
|
|
468
|
+
context = 1
|
|
469
|
+
num_bits.downto(1) do |i|
|
|
470
|
+
bit = (value >> (i - 1)) & 1
|
|
471
|
+
encoder.queue_bit(probs[context], bit)
|
|
472
|
+
context = (context << 1) | bit
|
|
473
|
+
end
|
|
474
|
+
end
|
|
475
|
+
|
|
476
|
+
def encode_bittree_reverse(probs, value, num_bits, offset, encoder)
|
|
477
|
+
context = 1
|
|
478
|
+
num_bits.times do |i|
|
|
479
|
+
bit = (value >> i) & 1
|
|
480
|
+
encoder.queue_bit(probs[offset + context], bit)
|
|
481
|
+
context = (context << 1) | bit
|
|
482
|
+
end
|
|
483
|
+
end
|
|
484
|
+
|
|
485
|
+
def get_dist_slot(distance)
|
|
486
|
+
if distance < 4
|
|
487
|
+
distance
|
|
488
|
+
else
|
|
489
|
+
slot = 0
|
|
490
|
+
dist = distance
|
|
491
|
+
while dist > 3
|
|
492
|
+
dist >>= 1
|
|
493
|
+
slot += 2
|
|
494
|
+
end
|
|
495
|
+
slot + dist
|
|
496
|
+
end
|
|
497
|
+
end
|
|
498
|
+
|
|
499
|
+
def encode_dict_size(dict_size)
|
|
500
|
+
d = [dict_size, DICT_SIZE_MIN].max
|
|
501
|
+
|
|
502
|
+
log2_size = 0
|
|
503
|
+
temp = d
|
|
504
|
+
while temp > 1
|
|
505
|
+
log2_size += 1
|
|
506
|
+
temp >>= 1
|
|
507
|
+
end
|
|
508
|
+
|
|
509
|
+
if d == (1 << log2_size)
|
|
510
|
+
[(log2_size - 12) * 2, 40].min
|
|
511
|
+
else
|
|
512
|
+
[((log2_size - 12) * 2) + 1, 40].min
|
|
513
|
+
end
|
|
514
|
+
end
|
|
515
|
+
end
|
|
516
|
+
end
|
|
517
|
+
end
|
|
518
|
+
end
|
|
519
|
+
end
|