omnizip 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/.rubocop.yml +32 -0
- data/.rubocop_todo.yml +754 -0
- data/COPYING +502 -0
- data/Gemfile +17 -0
- data/LICENSE +12 -0
- data/README.adoc +1045 -0
- data/Rakefile +12 -0
- data/benchmark/README.md +260 -0
- data/benchmark/benchmark_suite.rb +125 -0
- data/benchmark/compression_bench.rb +181 -0
- data/benchmark/filter_bench.rb +180 -0
- data/benchmark/models/benchmark_result.rb +59 -0
- data/benchmark/models/comparison_result.rb +69 -0
- data/benchmark/profile_suite.rb +167 -0
- data/benchmark/reporter.rb +150 -0
- data/benchmark/run_benchmarks.rb +66 -0
- data/benchmark/test_data.rb +137 -0
- data/config/formats/rar3_spec.yml +91 -0
- data/config/formats/rar5_spec.yml +102 -0
- data/docs/.github/workflows/docs.yml +142 -0
- data/docs/.gitignore +21 -0
- data/docs/.lychee.toml +67 -0
- data/docs/Gemfile +13 -0
- data/docs/RAR_WRITE_SUPPORT.md +26 -0
- data/docs/README.md +101 -0
- data/docs/_config.yml +112 -0
- data/docs/assets/logo.svg +1 -0
- data/docs/assets/omnizip-logo.pdf +1540 -11
- data/docs/comparison/feature-matrix.adoc +694 -0
- data/docs/comparison/index.adoc +113 -0
- data/docs/comparison/vs-7zip.adoc +309 -0
- data/docs/comparison/vs-peazip.adoc +77 -0
- data/docs/comparison/vs-rubyzip.adoc +342 -0
- data/docs/comparison/vs-winrar.adoc +100 -0
- data/docs/compatibility.adoc +579 -0
- data/docs/concepts/index.adoc +129 -0
- data/docs/developer/architecture.adoc +256 -0
- data/docs/developer/contributing.adoc +158 -0
- data/docs/developer/index.adoc +25 -0
- data/docs/developer/testing.adoc +212 -0
- data/docs/getting-started/basic-usage.adoc +271 -0
- data/docs/getting-started/index.adoc +42 -0
- data/docs/getting-started/installation.adoc +138 -0
- data/docs/getting-started/quick-start.adoc +185 -0
- data/docs/getting-started/your-first-archive.adoc +218 -0
- data/docs/guides/advanced-features/encryption.adoc +300 -0
- data/docs/guides/advanced-features/index.adoc +49 -0
- data/docs/guides/advanced-features/parallel-processing.adoc +246 -0
- data/docs/guides/advanced-features/progress-tracking.adoc +320 -0
- data/docs/guides/advanced-features/streaming.adoc +212 -0
- data/docs/guides/archive-formats/gzip-format.adoc +107 -0
- data/docs/guides/archive-formats/index.adoc +130 -0
- data/docs/guides/archive-formats/rar-format.adoc +104 -0
- data/docs/guides/archive-formats/rar5.adoc +521 -0
- data/docs/guides/archive-formats/seven-zip-format.adoc +35 -0
- data/docs/guides/archive-formats/tar-format.adoc +106 -0
- data/docs/guides/archive-formats/xz-format.adoc +118 -0
- data/docs/guides/archive-formats/zip-format.adoc +35 -0
- data/docs/guides/compression-algorithms/bzip2.adoc +113 -0
- data/docs/guides/compression-algorithms/deflate.adoc +319 -0
- data/docs/guides/compression-algorithms/index.adoc +190 -0
- data/docs/guides/compression-algorithms/lzma.adoc +398 -0
- data/docs/guides/compression-algorithms/lzma2.adoc +327 -0
- data/docs/guides/compression-algorithms/ppmd.adoc +316 -0
- data/docs/guides/compression-algorithms/zstandard.adoc +361 -0
- data/docs/guides/creating-archives.adoc +354 -0
- data/docs/guides/extracting-archives.adoc +53 -0
- data/docs/guides/format-conversion.adoc +64 -0
- data/docs/guides/index.adoc +49 -0
- data/docs/guides/migration-rubyzip.adoc +217 -0
- data/docs/guides/parity-archives.adoc +605 -0
- data/docs/guides/performance-tuning.adoc +88 -0
- data/docs/index.adoc +218 -0
- data/docs/lychee.toml +67 -0
- data/docs/reference/api/overview.adoc +188 -0
- data/docs/reference/cli/compress-command.adoc +114 -0
- data/docs/reference/cli/overview.adoc +140 -0
- data/docs/reference/index.adoc +26 -0
- data/docs/resources/faq.adoc +185 -0
- data/docs/resources/quick-reference.adoc +222 -0
- data/docs/troubleshooting/index.adoc +208 -0
- data/examples/api_comparison.rb +205 -0
- data/examples/deflate64_example.rb +96 -0
- data/examples/par2_demo.rb +121 -0
- data/examples/quick_start_native.rb +150 -0
- data/examples/quick_start_rubyzip.rb +115 -0
- data/examples/rubyzip_compatibility_demo.rb +194 -0
- data/exe/omnizip +27 -0
- data/lib/omnizip/algorithm.rb +130 -0
- data/lib/omnizip/algorithm_registry.rb +86 -0
- data/lib/omnizip/algorithms/.keep +0 -0
- data/lib/omnizip/algorithms/bzip2/bwt.rb +225 -0
- data/lib/omnizip/algorithms/bzip2/decoder.rb +193 -0
- data/lib/omnizip/algorithms/bzip2/encoder.rb +237 -0
- data/lib/omnizip/algorithms/bzip2/huffman.rb +206 -0
- data/lib/omnizip/algorithms/bzip2/mtf.rb +101 -0
- data/lib/omnizip/algorithms/bzip2/rle.rb +151 -0
- data/lib/omnizip/algorithms/bzip2.rb +130 -0
- data/lib/omnizip/algorithms/deflate/constants.rb +28 -0
- data/lib/omnizip/algorithms/deflate/decoder.rb +38 -0
- data/lib/omnizip/algorithms/deflate/encoder.rb +46 -0
- data/lib/omnizip/algorithms/deflate.rb +128 -0
- data/lib/omnizip/algorithms/deflate64/constants.rb +45 -0
- data/lib/omnizip/algorithms/deflate64/decoder.rb +153 -0
- data/lib/omnizip/algorithms/deflate64/encoder.rb +98 -0
- data/lib/omnizip/algorithms/deflate64/huffman_coder.rb +354 -0
- data/lib/omnizip/algorithms/deflate64/lz77_encoder.rb +142 -0
- data/lib/omnizip/algorithms/deflate64.rb +109 -0
- data/lib/omnizip/algorithms/lzma/bit_model.rb +120 -0
- data/lib/omnizip/algorithms/lzma/constants.rb +112 -0
- data/lib/omnizip/algorithms/lzma/decoder.rb +148 -0
- data/lib/omnizip/algorithms/lzma/dictionary.rb +69 -0
- data/lib/omnizip/algorithms/lzma/distance_coder.rb +415 -0
- data/lib/omnizip/algorithms/lzma/encoder.rb +142 -0
- data/lib/omnizip/algorithms/lzma/length_coder.rb +260 -0
- data/lib/omnizip/algorithms/lzma/literal_decoder.rb +320 -0
- data/lib/omnizip/algorithms/lzma/literal_encoder.rb +210 -0
- data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +341 -0
- data/lib/omnizip/algorithms/lzma/lzma_alone_decoder.rb +192 -0
- data/lib/omnizip/algorithms/lzma/lzma_state.rb +128 -0
- data/lib/omnizip/algorithms/lzma/match.rb +32 -0
- data/lib/omnizip/algorithms/lzma/match_finder.rb +205 -0
- data/lib/omnizip/algorithms/lzma/match_finder_config.rb +142 -0
- data/lib/omnizip/algorithms/lzma/match_finder_factory.rb +88 -0
- data/lib/omnizip/algorithms/lzma/optimal_encoder.rb +130 -0
- data/lib/omnizip/algorithms/lzma/probability_models.rb +72 -0
- data/lib/omnizip/algorithms/lzma/range_coder.rb +85 -0
- data/lib/omnizip/algorithms/lzma/range_decoder.rb +434 -0
- data/lib/omnizip/algorithms/lzma/range_encoder.rb +194 -0
- data/lib/omnizip/algorithms/lzma/state.rb +127 -0
- data/lib/omnizip/algorithms/lzma/xz_buffered_range_encoder.rb +325 -0
- data/lib/omnizip/algorithms/lzma/xz_encoder.rb +426 -0
- data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +645 -0
- data/lib/omnizip/algorithms/lzma/xz_match_finder_adapter.rb +227 -0
- data/lib/omnizip/algorithms/lzma/xz_price_calculator.rb +169 -0
- data/lib/omnizip/algorithms/lzma/xz_probability_models.rb +261 -0
- data/lib/omnizip/algorithms/lzma/xz_range_encoder.rb +223 -0
- data/lib/omnizip/algorithms/lzma/xz_range_encoder_exact.rb +331 -0
- data/lib/omnizip/algorithms/lzma/xz_state.rb +116 -0
- data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +2055 -0
- data/lib/omnizip/algorithms/lzma.rb +238 -0
- data/lib/omnizip/algorithms/lzma2/chunk_manager.rb +182 -0
- data/lib/omnizip/algorithms/lzma2/constants.rb +41 -0
- data/lib/omnizip/algorithms/lzma2/encoder.rb +147 -0
- data/lib/omnizip/algorithms/lzma2/lzma2_chunk.rb +161 -0
- data/lib/omnizip/algorithms/lzma2/properties.rb +179 -0
- data/lib/omnizip/algorithms/lzma2/simple_lzma2_encoder.rb +127 -0
- data/lib/omnizip/algorithms/lzma2/xz_encoder_adapter.rb +85 -0
- data/lib/omnizip/algorithms/lzma2.rb +141 -0
- data/lib/omnizip/algorithms/ppmd7/constants.rb +74 -0
- data/lib/omnizip/algorithms/ppmd7/context.rb +154 -0
- data/lib/omnizip/algorithms/ppmd7/decoder.rb +126 -0
- data/lib/omnizip/algorithms/ppmd7/encoder.rb +163 -0
- data/lib/omnizip/algorithms/ppmd7/model.rb +248 -0
- data/lib/omnizip/algorithms/ppmd7/symbol_state.rb +57 -0
- data/lib/omnizip/algorithms/ppmd7.rb +116 -0
- data/lib/omnizip/algorithms/ppmd8/constants.rb +61 -0
- data/lib/omnizip/algorithms/ppmd8/context.rb +34 -0
- data/lib/omnizip/algorithms/ppmd8/decoder.rb +107 -0
- data/lib/omnizip/algorithms/ppmd8/encoder.rb +138 -0
- data/lib/omnizip/algorithms/ppmd8/model.rb +250 -0
- data/lib/omnizip/algorithms/ppmd8/restoration_method.rb +78 -0
- data/lib/omnizip/algorithms/ppmd8.rb +82 -0
- data/lib/omnizip/algorithms/ppmd_base.rb +138 -0
- data/lib/omnizip/algorithms/sevenzip_lzma2.rb +123 -0
- data/lib/omnizip/algorithms/xz_lzma2.rb +118 -0
- data/lib/omnizip/algorithms/zstandard/constants.rb +25 -0
- data/lib/omnizip/algorithms/zstandard/decoder.rb +46 -0
- data/lib/omnizip/algorithms/zstandard/encoder.rb +51 -0
- data/lib/omnizip/algorithms/zstandard.rb +138 -0
- data/lib/omnizip/buffer/memory_archive.rb +251 -0
- data/lib/omnizip/buffer/memory_extractor.rb +224 -0
- data/lib/omnizip/buffer.rb +176 -0
- data/lib/omnizip/checksum_registry.rb +114 -0
- data/lib/omnizip/checksums/crc32.rb +100 -0
- data/lib/omnizip/checksums/crc64.rb +101 -0
- data/lib/omnizip/checksums/crc_base.rb +158 -0
- data/lib/omnizip/checksums/verifier.rb +131 -0
- data/lib/omnizip/chunked/memory_manager.rb +194 -0
- data/lib/omnizip/chunked/reader.rb +78 -0
- data/lib/omnizip/chunked/writer.rb +120 -0
- data/lib/omnizip/chunked.rb +129 -0
- data/lib/omnizip/cli/output_formatter.rb +104 -0
- data/lib/omnizip/cli.rb +572 -0
- data/lib/omnizip/commands/.keep +0 -0
- data/lib/omnizip/commands/archive_create_command.rb +427 -0
- data/lib/omnizip/commands/archive_extract_command.rb +272 -0
- data/lib/omnizip/commands/archive_list_command.rb +218 -0
- data/lib/omnizip/commands/archive_repair_command.rb +131 -0
- data/lib/omnizip/commands/archive_verify_command.rb +117 -0
- data/lib/omnizip/commands/compress_command.rb +117 -0
- data/lib/omnizip/commands/decompress_command.rb +120 -0
- data/lib/omnizip/commands/list_command.rb +53 -0
- data/lib/omnizip/commands/metadata_command.rb +153 -0
- data/lib/omnizip/commands/parity_create_command.rb +122 -0
- data/lib/omnizip/commands/parity_repair_command.rb +122 -0
- data/lib/omnizip/commands/parity_verify_command.rb +124 -0
- data/lib/omnizip/commands/profile_list_command.rb +56 -0
- data/lib/omnizip/commands/profile_show_command.rb +44 -0
- data/lib/omnizip/convenience.rb +359 -0
- data/lib/omnizip/converter/conversion_registry.rb +49 -0
- data/lib/omnizip/converter/conversion_strategy.rb +121 -0
- data/lib/omnizip/converter/seven_zip_to_zip_strategy.rb +97 -0
- data/lib/omnizip/converter/zip_to_seven_zip_strategy.rb +112 -0
- data/lib/omnizip/converter.rb +105 -0
- data/lib/omnizip/crypto/aes256/cipher.rb +100 -0
- data/lib/omnizip/crypto/aes256/constants.rb +28 -0
- data/lib/omnizip/crypto/aes256/key_derivation.rb +101 -0
- data/lib/omnizip/crypto/aes256.rb +102 -0
- data/lib/omnizip/error.rb +106 -0
- data/lib/omnizip/eta/exponential_smoothing_estimator.rb +98 -0
- data/lib/omnizip/eta/moving_average_estimator.rb +99 -0
- data/lib/omnizip/eta/rate_calculator.rb +104 -0
- data/lib/omnizip/eta/sample_history.rb +143 -0
- data/lib/omnizip/eta/time_estimator.rb +106 -0
- data/lib/omnizip/eta.rb +63 -0
- data/lib/omnizip/extraction/filter_chain.rb +177 -0
- data/lib/omnizip/extraction/glob_pattern.rb +140 -0
- data/lib/omnizip/extraction/pattern_matcher.rb +70 -0
- data/lib/omnizip/extraction/predicate_pattern.rb +52 -0
- data/lib/omnizip/extraction/regex_pattern.rb +50 -0
- data/lib/omnizip/extraction/selective_extractor.rb +240 -0
- data/lib/omnizip/extraction.rb +111 -0
- data/lib/omnizip/file_type/mime_classifier.rb +144 -0
- data/lib/omnizip/file_type.rb +113 -0
- data/lib/omnizip/filter.rb +139 -0
- data/lib/omnizip/filter_pipeline.rb +108 -0
- data/lib/omnizip/filter_registry.rb +166 -0
- data/lib/omnizip/filters/bcj.rb +279 -0
- data/lib/omnizip/filters/bcj2/constants.rb +53 -0
- data/lib/omnizip/filters/bcj2/decoder.rb +200 -0
- data/lib/omnizip/filters/bcj2/encoder.rb +61 -0
- data/lib/omnizip/filters/bcj2/stream_data.rb +93 -0
- data/lib/omnizip/filters/bcj2.rb +99 -0
- data/lib/omnizip/filters/bcj_arm.rb +176 -0
- data/lib/omnizip/filters/bcj_arm64.rb +244 -0
- data/lib/omnizip/filters/bcj_ia64.rb +196 -0
- data/lib/omnizip/filters/bcj_ppc.rb +190 -0
- data/lib/omnizip/filters/bcj_sparc.rb +176 -0
- data/lib/omnizip/filters/bcj_x86.rb +193 -0
- data/lib/omnizip/filters/delta.rb +196 -0
- data/lib/omnizip/filters/filter_base.rb +72 -0
- data/lib/omnizip/filters/registry.rb +123 -0
- data/lib/omnizip/filters/xz_delta.rb +258 -0
- data/lib/omnizip/format_detector.rb +162 -0
- data/lib/omnizip/format_registry.rb +59 -0
- data/lib/omnizip/formats/.keep +0 -0
- data/lib/omnizip/formats/bzip2_file.rb +172 -0
- data/lib/omnizip/formats/cpio/constants.rb +55 -0
- data/lib/omnizip/formats/cpio/entry.rb +385 -0
- data/lib/omnizip/formats/cpio/reader.rb +196 -0
- data/lib/omnizip/formats/cpio/writer.rb +234 -0
- data/lib/omnizip/formats/cpio.rb +140 -0
- data/lib/omnizip/formats/format_spec_loader.rb +230 -0
- data/lib/omnizip/formats/gzip.rb +238 -0
- data/lib/omnizip/formats/iso/directory_builder.rb +297 -0
- data/lib/omnizip/formats/iso/directory_record.rb +152 -0
- data/lib/omnizip/formats/iso/joliet.rb +204 -0
- data/lib/omnizip/formats/iso/path_table.rb +125 -0
- data/lib/omnizip/formats/iso/reader.rb +197 -0
- data/lib/omnizip/formats/iso/rock_ridge.rb +349 -0
- data/lib/omnizip/formats/iso/volume_builder.rb +320 -0
- data/lib/omnizip/formats/iso/volume_descriptor.rb +168 -0
- data/lib/omnizip/formats/iso/writer.rb +530 -0
- data/lib/omnizip/formats/iso.rb +140 -0
- data/lib/omnizip/formats/lzip.rb +175 -0
- data/lib/omnizip/formats/lzma_alone.rb +171 -0
- data/lib/omnizip/formats/rar/archive_repairer.rb +243 -0
- data/lib/omnizip/formats/rar/archive_verifier.rb +195 -0
- data/lib/omnizip/formats/rar/block_parser.rb +243 -0
- data/lib/omnizip/formats/rar/compression/bit_stream.rb +180 -0
- data/lib/omnizip/formats/rar/compression/dispatcher.rb +217 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/decoder.rb +216 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/encoder.rb +158 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_builder.rb +217 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_coder.rb +189 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/match_finder.rb +135 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/sliding_window.rb +165 -0
- data/lib/omnizip/formats/rar/compression/ppmd/context.rb +105 -0
- data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +219 -0
- data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +262 -0
- data/lib/omnizip/formats/rar/compression_method_registry.rb +106 -0
- data/lib/omnizip/formats/rar/constants.rb +82 -0
- data/lib/omnizip/formats/rar/decompressor.rb +238 -0
- data/lib/omnizip/formats/rar/external_writer.rb +312 -0
- data/lib/omnizip/formats/rar/header.rb +192 -0
- data/lib/omnizip/formats/rar/license_validator.rb +109 -0
- data/lib/omnizip/formats/rar/models/rar_archive.rb +77 -0
- data/lib/omnizip/formats/rar/models/rar_entry.rb +65 -0
- data/lib/omnizip/formats/rar/models/rar_volume.rb +56 -0
- data/lib/omnizip/formats/rar/parity_handler.rb +292 -0
- data/lib/omnizip/formats/rar/rar5/compression/lzma.rb +202 -0
- data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +578 -0
- data/lib/omnizip/formats/rar/rar5/compression/store.rb +60 -0
- data/lib/omnizip/formats/rar/rar5/crc32.rb +39 -0
- data/lib/omnizip/formats/rar/rar5/encryption/aes256_cbc.rb +97 -0
- data/lib/omnizip/formats/rar/rar5/encryption/encryption_header.rb +114 -0
- data/lib/omnizip/formats/rar/rar5/encryption/encryption_manager.rb +166 -0
- data/lib/omnizip/formats/rar/rar5/encryption/key_derivation.rb +97 -0
- data/lib/omnizip/formats/rar/rar5/header.rb +187 -0
- data/lib/omnizip/formats/rar/rar5/models/encryption_options.rb +74 -0
- data/lib/omnizip/formats/rar/rar5/models/recovery_options.rb +63 -0
- data/lib/omnizip/formats/rar/rar5/models/solid_options.rb +63 -0
- data/lib/omnizip/formats/rar/rar5/models/volume_options.rb +74 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/ARCHITECTURE.md +290 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_manager.rb +264 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_splitter.rb +155 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_writer.rb +194 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_encoder.rb +109 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_manager.rb +142 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_stream.rb +121 -0
- data/lib/omnizip/formats/rar/rar5/vint.rb +65 -0
- data/lib/omnizip/formats/rar/rar5/writer.rb +466 -0
- data/lib/omnizip/formats/rar/rar_format_base.rb +241 -0
- data/lib/omnizip/formats/rar/reader.rb +366 -0
- data/lib/omnizip/formats/rar/recovery_record.rb +245 -0
- data/lib/omnizip/formats/rar/volume_manager.rb +168 -0
- data/lib/omnizip/formats/rar/writer.rb +431 -0
- data/lib/omnizip/formats/rar.rb +205 -0
- data/lib/omnizip/formats/rar3/compressor.rb +73 -0
- data/lib/omnizip/formats/rar3/decompressor.rb +66 -0
- data/lib/omnizip/formats/rar3/reader.rb +386 -0
- data/lib/omnizip/formats/rar3/writer.rb +219 -0
- data/lib/omnizip/formats/rar5/compressor.rb +73 -0
- data/lib/omnizip/formats/rar5/decompressor.rb +66 -0
- data/lib/omnizip/formats/rar5/reader.rb +342 -0
- data/lib/omnizip/formats/rar5/writer.rb +214 -0
- data/lib/omnizip/formats/seven_zip/coder_chain.rb +150 -0
- data/lib/omnizip/formats/seven_zip/constants.rb +126 -0
- data/lib/omnizip/formats/seven_zip/encoded_header.rb +114 -0
- data/lib/omnizip/formats/seven_zip/encrypted_header.rb +142 -0
- data/lib/omnizip/formats/seven_zip/file_collector.rb +144 -0
- data/lib/omnizip/formats/seven_zip/header.rb +106 -0
- data/lib/omnizip/formats/seven_zip/header_encryptor.rb +134 -0
- data/lib/omnizip/formats/seven_zip/header_writer.rb +466 -0
- data/lib/omnizip/formats/seven_zip/models/coder_info.rb +30 -0
- data/lib/omnizip/formats/seven_zip/models/file_entry.rb +58 -0
- data/lib/omnizip/formats/seven_zip/models/folder.rb +69 -0
- data/lib/omnizip/formats/seven_zip/models/stream_info.rb +42 -0
- data/lib/omnizip/formats/seven_zip/parser.rb +660 -0
- data/lib/omnizip/formats/seven_zip/reader.rb +458 -0
- data/lib/omnizip/formats/seven_zip/split_archive_reader.rb +632 -0
- data/lib/omnizip/formats/seven_zip/split_archive_writer.rb +315 -0
- data/lib/omnizip/formats/seven_zip/stream_compressor.rb +151 -0
- data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +162 -0
- data/lib/omnizip/formats/seven_zip/writer.rb +740 -0
- data/lib/omnizip/formats/seven_zip.rb +93 -0
- data/lib/omnizip/formats/tar/constants.rb +73 -0
- data/lib/omnizip/formats/tar/entry.rb +94 -0
- data/lib/omnizip/formats/tar/header.rb +168 -0
- data/lib/omnizip/formats/tar/reader.rb +121 -0
- data/lib/omnizip/formats/tar/writer.rb +216 -0
- data/lib/omnizip/formats/tar.rb +84 -0
- data/lib/omnizip/formats/xz/reader.rb +116 -0
- data/lib/omnizip/formats/xz.rb +237 -0
- data/lib/omnizip/formats/xz_impl/block_decoder.rb +754 -0
- data/lib/omnizip/formats/xz_impl/block_encoder.rb +306 -0
- data/lib/omnizip/formats/xz_impl/block_header.rb +210 -0
- data/lib/omnizip/formats/xz_impl/block_header_parser.rb +186 -0
- data/lib/omnizip/formats/xz_impl/constants.rb +49 -0
- data/lib/omnizip/formats/xz_impl/index_decoder.rb +174 -0
- data/lib/omnizip/formats/xz_impl/index_encoder.rb +122 -0
- data/lib/omnizip/formats/xz_impl/stream_decoder.rb +468 -0
- data/lib/omnizip/formats/xz_impl/stream_encoder.rb +99 -0
- data/lib/omnizip/formats/xz_impl/stream_footer.rb +81 -0
- data/lib/omnizip/formats/xz_impl/stream_footer_parser.rb +117 -0
- data/lib/omnizip/formats/xz_impl/stream_header.rb +55 -0
- data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +108 -0
- data/lib/omnizip/formats/xz_impl/vli.rb +128 -0
- data/lib/omnizip/formats/xz_impl/writer.rb +421 -0
- data/lib/omnizip/formats/zip/central_directory_header.rb +195 -0
- data/lib/omnizip/formats/zip/constants.rb +69 -0
- data/lib/omnizip/formats/zip/end_of_central_directory.rb +133 -0
- data/lib/omnizip/formats/zip/local_file_header.rb +138 -0
- data/lib/omnizip/formats/zip/reader.rb +250 -0
- data/lib/omnizip/formats/zip/unix_extra_field.rb +153 -0
- data/lib/omnizip/formats/zip/writer.rb +375 -0
- data/lib/omnizip/formats/zip/zip64_end_of_central_directory.rb +104 -0
- data/lib/omnizip/formats/zip/zip64_end_of_central_directory_locator.rb +66 -0
- data/lib/omnizip/formats/zip/zip64_extra_field.rb +114 -0
- data/lib/omnizip/formats/zip.rb +50 -0
- data/lib/omnizip/implementations/base/lzma2_decoder_base.rb +75 -0
- data/lib/omnizip/implementations/base/lzma2_encoder_base.rb +128 -0
- data/lib/omnizip/implementations/base/lzma_decoder_base.rb +83 -0
- data/lib/omnizip/implementations/base/lzma_encoder_base.rb +108 -0
- data/lib/omnizip/implementations/base/state_machine_base.rb +182 -0
- data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +421 -0
- data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +465 -0
- data/lib/omnizip/implementations/seven_zip/lzma/match_finder.rb +288 -0
- data/lib/omnizip/implementations/seven_zip/lzma/range_decoder.rb +200 -0
- data/lib/omnizip/implementations/seven_zip/lzma/range_encoder.rb +197 -0
- data/lib/omnizip/implementations/seven_zip/lzma/state_machine.rb +141 -0
- data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +519 -0
- data/lib/omnizip/implementations/xz_utils/lzma2/decoder.rb +723 -0
- data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +750 -0
- data/lib/omnizip/io/buffered_input.rb +146 -0
- data/lib/omnizip/io/buffered_output.rb +105 -0
- data/lib/omnizip/io/stream_manager.rb +115 -0
- data/lib/omnizip/link_handler/hard_link.rb +79 -0
- data/lib/omnizip/link_handler/symbolic_link.rb +74 -0
- data/lib/omnizip/link_handler.rb +124 -0
- data/lib/omnizip/metadata/archive_metadata.rb +114 -0
- data/lib/omnizip/metadata/entry_metadata.rb +146 -0
- data/lib/omnizip/metadata/metadata_editor.rb +171 -0
- data/lib/omnizip/metadata/metadata_registry.rb +64 -0
- data/lib/omnizip/metadata/metadata_validator.rb +99 -0
- data/lib/omnizip/metadata.rb +57 -0
- data/lib/omnizip/models/.keep +0 -0
- data/lib/omnizip/models/algorithm_metadata.rb +73 -0
- data/lib/omnizip/models/compression_options.rb +71 -0
- data/lib/omnizip/models/conversion_options.rb +87 -0
- data/lib/omnizip/models/conversion_result.rb +135 -0
- data/lib/omnizip/models/eta_result.rb +46 -0
- data/lib/omnizip/models/extraction_rule.rb +115 -0
- data/lib/omnizip/models/filter_chain.rb +144 -0
- data/lib/omnizip/models/filter_config.rb +183 -0
- data/lib/omnizip/models/match_result.rb +124 -0
- data/lib/omnizip/models/optimization_suggestion.rb +91 -0
- data/lib/omnizip/models/parallel_options.rb +104 -0
- data/lib/omnizip/models/performance_result.rb +79 -0
- data/lib/omnizip/models/profile_report.rb +82 -0
- data/lib/omnizip/models/progress_options.rb +38 -0
- data/lib/omnizip/models/split_options.rb +116 -0
- data/lib/omnizip/optimization_registry.rb +81 -0
- data/lib/omnizip/parallel/job_queue.rb +209 -0
- data/lib/omnizip/parallel/job_scheduler.rb +203 -0
- data/lib/omnizip/parallel/parallel_compressor.rb +347 -0
- data/lib/omnizip/parallel/parallel_extractor.rb +329 -0
- data/lib/omnizip/parallel/worker_pool.rb +223 -0
- data/lib/omnizip/parallel.rb +149 -0
- data/lib/omnizip/parity/chunked_block_processor.rb +196 -0
- data/lib/omnizip/parity/galois16.rb +145 -0
- data/lib/omnizip/parity/models/creator_packet.rb +73 -0
- data/lib/omnizip/parity/models/file_description_packet.rb +133 -0
- data/lib/omnizip/parity/models/ifsc_packet.rb +123 -0
- data/lib/omnizip/parity/models/main_packet.rb +128 -0
- data/lib/omnizip/parity/models/packet.rb +156 -0
- data/lib/omnizip/parity/models/packet_registry.rb +109 -0
- data/lib/omnizip/parity/models/recovery_slice_packet.rb +78 -0
- data/lib/omnizip/parity/par2_creator.rb +531 -0
- data/lib/omnizip/parity/par2_repairer.rb +407 -0
- data/lib/omnizip/parity/par2_verifier.rb +364 -0
- data/lib/omnizip/parity/par2cmdline_algorithm.rb +110 -0
- data/lib/omnizip/parity/par2cmdline_coefficients.rb +78 -0
- data/lib/omnizip/parity/reed_solomon_decoder.rb +266 -0
- data/lib/omnizip/parity/reed_solomon_encoder.rb +111 -0
- data/lib/omnizip/parity/reed_solomon_matrix.rb +342 -0
- data/lib/omnizip/parity.rb +186 -0
- data/lib/omnizip/password/encryption_registry.rb +65 -0
- data/lib/omnizip/password/encryption_strategy.rb +96 -0
- data/lib/omnizip/password/password_validator.rb +129 -0
- data/lib/omnizip/password/winzip_aes_strategy.rb +192 -0
- data/lib/omnizip/password/zip_crypto_strategy.rb +141 -0
- data/lib/omnizip/password.rb +87 -0
- data/lib/omnizip/pipe/stream_compressor.rb +124 -0
- data/lib/omnizip/pipe/stream_decompressor.rb +174 -0
- data/lib/omnizip/pipe.rb +121 -0
- data/lib/omnizip/platform/ntfs_streams.rb +201 -0
- data/lib/omnizip/platform.rb +189 -0
- data/lib/omnizip/profile/archive_profile.rb +39 -0
- data/lib/omnizip/profile/balanced_profile.rb +33 -0
- data/lib/omnizip/profile/binary_profile.rb +36 -0
- data/lib/omnizip/profile/compression_profile.rb +158 -0
- data/lib/omnizip/profile/custom_profile.rb +157 -0
- data/lib/omnizip/profile/fast_profile.rb +33 -0
- data/lib/omnizip/profile/maximum_profile.rb +33 -0
- data/lib/omnizip/profile/profile_detector.rb +110 -0
- data/lib/omnizip/profile/profile_registry.rb +161 -0
- data/lib/omnizip/profile/text_profile.rb +36 -0
- data/lib/omnizip/profile.rb +190 -0
- data/lib/omnizip/profiler/memory_profiler.rb +66 -0
- data/lib/omnizip/profiler/method_profiler.rb +49 -0
- data/lib/omnizip/profiler/report_generator.rb +169 -0
- data/lib/omnizip/profiler.rb +204 -0
- data/lib/omnizip/progress/callback_reporter.rb +36 -0
- data/lib/omnizip/progress/console_reporter.rb +62 -0
- data/lib/omnizip/progress/log_reporter.rb +91 -0
- data/lib/omnizip/progress/operation_progress.rb +118 -0
- data/lib/omnizip/progress/progress_bar.rb +156 -0
- data/lib/omnizip/progress/progress_reporter.rb +40 -0
- data/lib/omnizip/progress/progress_tracker.rb +190 -0
- data/lib/omnizip/progress/silent_reporter.rb +24 -0
- data/lib/omnizip/progress.rb +127 -0
- data/lib/omnizip/rubyzip_compat.rb +63 -0
- data/lib/omnizip/temp/safe_extract.rb +168 -0
- data/lib/omnizip/temp/temp_file.rb +124 -0
- data/lib/omnizip/temp/temp_file_pool.rb +109 -0
- data/lib/omnizip/temp.rb +181 -0
- data/lib/omnizip/version.rb +5 -0
- data/lib/omnizip/zip/entry.rb +156 -0
- data/lib/omnizip/zip/file.rb +485 -0
- data/lib/omnizip/zip/input_stream.rb +273 -0
- data/lib/omnizip/zip/output_stream.rb +324 -0
- data/lib/omnizip.rb +156 -0
- data/readme-docs/advanced-features.adoc +515 -0
- data/readme-docs/api-usage.adoc +444 -0
- data/readme-docs/architecture.adoc +449 -0
- data/readme-docs/archive-formats.adoc +479 -0
- data/readme-docs/cli-usage.adoc +222 -0
- data/readme-docs/compression-algorithms.adoc +442 -0
- data/readme-docs/compression-profiles.adoc +247 -0
- data/readme-docs/encryption-checksums.adoc +328 -0
- data/readme-docs/format-converter.adoc +325 -0
- data/readme-docs/installation.adoc +228 -0
- data/readme-docs/par2-archives.adoc +608 -0
- data/readme-docs/performance-profiler.adoc +389 -0
- data/readme-docs/preprocessing-filters.adoc +280 -0
- data/xz-file-format-1.2.1.txt +1174 -0
- metadata +617 -0
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Copyright (C) 2025 Ribose Inc.
|
|
4
|
+
#
|
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a
|
|
6
|
+
# copy of this software and associated documentation files (the "Software"),
|
|
7
|
+
# to deal in the Software without restriction, including without limitation
|
|
8
|
+
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
9
|
+
# and/or sell copies of the Software, and to permit persons to whom the
|
|
10
|
+
# Software is furnished to do so, subject to the following conditions:
|
|
11
|
+
#
|
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
|
13
|
+
# all copies or substantial portions of the Software.
|
|
14
|
+
#
|
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
20
|
+
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
21
|
+
# DEALINGS IN THE SOFTWARE.
|
|
22
|
+
|
|
23
|
+
require_relative "xz_utils_decoder"
|
|
24
|
+
require "stringio"
|
|
25
|
+
|
|
26
|
+
module Omnizip
|
|
27
|
+
module Algorithms
|
|
28
|
+
class LZMA < Algorithm
|
|
29
|
+
#
|
|
30
|
+
# Decoder for .lzma (LZMA_Alone) format
|
|
31
|
+
#
|
|
32
|
+
# This is the legacy LZMA_Alone format used by LZMA Utils 4.32.x.
|
|
33
|
+
# It is DIFFERENT from the XZ format's LZMA2 compression!
|
|
34
|
+
#
|
|
35
|
+
# File format:
|
|
36
|
+
# - Properties (1 byte): encodes lc, lp, pb values
|
|
37
|
+
# - Dictionary size (4 bytes, little-endian)
|
|
38
|
+
# - Uncompressed size (8 bytes, little-endian, UINT64_MAX = unknown)
|
|
39
|
+
# - LZMA1 compressed stream (no footer, no CRC32)
|
|
40
|
+
#
|
|
41
|
+
# Reference: /Users/mulgogi/src/external/xz/src/liblzma/common/alone_decoder.c
|
|
42
|
+
#
|
|
43
|
+
# This decoder uses the same LZMA1 decoding engine as XZ format,
|
|
44
|
+
# but with the legacy .lzma container format.
|
|
45
|
+
#
|
|
46
|
+
# @example Decode .lzma file
|
|
47
|
+
# data = File.binread("file.lzma")
|
|
48
|
+
# decoder = Omnizip::Algorithms::LZMA::LzmaAloneDecoder.new(StringIO.new(data))
|
|
49
|
+
# result = decoder.decode_stream
|
|
50
|
+
#
|
|
51
|
+
class LzmaAloneDecoder
|
|
52
|
+
# Maximum valid uncompressed size (256 GiB)
|
|
53
|
+
# From alone_decoder.c:118
|
|
54
|
+
MAX_UNCOMPRESSED_SIZE = (1 << 38)
|
|
55
|
+
|
|
56
|
+
# Property byte validation limits
|
|
57
|
+
# From lzma_decoder.c:1218
|
|
58
|
+
MAX_PROPERTY_BYTE = (((4 * 5) + 4) * 9) + 8 # = 233
|
|
59
|
+
|
|
60
|
+
# Initialize the decoder with .lzma format input
|
|
61
|
+
#
|
|
62
|
+
# @param input [IO] Input stream of .lzma compressed data
|
|
63
|
+
# @param options [Hash] Decoding options
|
|
64
|
+
# @option options [Boolean] :picky If true, reject files unlikely to be .lzma (default: false)
|
|
65
|
+
# @raise [RuntimeError] If header is invalid or unsupported
|
|
66
|
+
def initialize(input, options = {})
|
|
67
|
+
@input = input
|
|
68
|
+
@picky = options.fetch(:picky, false)
|
|
69
|
+
|
|
70
|
+
# Parse .lzma header
|
|
71
|
+
parse_header
|
|
72
|
+
|
|
73
|
+
# Create a wrapper stream that starts after the header
|
|
74
|
+
# The XzUtilsDecoder will read from this stream
|
|
75
|
+
@lzma_stream = @input
|
|
76
|
+
|
|
77
|
+
# Initialize the XZ Utils LZMA decoder with parsed parameters
|
|
78
|
+
# validate_size=true because .lzma format has explicit uncompressed size
|
|
79
|
+
# allow_eopm=true because .lzma format allows EOPM even with known size
|
|
80
|
+
# Reference: alone_decoder.c:127 (LZMA_LZMA1EXT_ALLOW_EOPM)
|
|
81
|
+
@decoder = XzUtilsDecoder.new(@lzma_stream,
|
|
82
|
+
lzma2_mode: true,
|
|
83
|
+
validate_size: true,
|
|
84
|
+
allow_eopm: true,
|
|
85
|
+
lc: @lc,
|
|
86
|
+
lp: @lp,
|
|
87
|
+
pb: @pb,
|
|
88
|
+
dict_size: @dict_size,
|
|
89
|
+
uncompressed_size: @uncompressed_size)
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
# Decode the .lzma stream
|
|
93
|
+
#
|
|
94
|
+
# @param output [IO, nil] Optional output stream
|
|
95
|
+
# @return [String, Integer] Decompressed data or bytes written
|
|
96
|
+
def decode_stream(output = nil)
|
|
97
|
+
# .lzma format allows EOPM even when uncompressed size is known
|
|
98
|
+
# Reference: alone_decoder.c:127 (LZMA_LZMA1EXT_ALLOW_EOPM)
|
|
99
|
+
@decoder.decode_stream(output, check_rc_finished: false)
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
private
|
|
103
|
+
|
|
104
|
+
# Parse .lzma format header
|
|
105
|
+
#
|
|
106
|
+
# Format (from alone_decoder.c):
|
|
107
|
+
# - Properties (1 byte): lc/lp/pb encoded
|
|
108
|
+
# - Dictionary size (4 bytes, little-endian)
|
|
109
|
+
# - Uncompressed size (8 bytes, little-endian, UINT64_MAX = unknown)
|
|
110
|
+
#
|
|
111
|
+
# Reference: /Users/mulgogi/src/external/xz/src/liblzma/common/alone_decoder.c
|
|
112
|
+
#
|
|
113
|
+
# @return [void]
|
|
114
|
+
# @raise [RuntimeError] If header is invalid
|
|
115
|
+
def parse_header
|
|
116
|
+
# Step 1: Parse properties byte (SEQ_PROPERTIES)
|
|
117
|
+
# Reference: alone_decoder.c:64-68
|
|
118
|
+
props = @input.getbyte
|
|
119
|
+
raise "Invalid .lzma header: missing properties byte" if props.nil?
|
|
120
|
+
|
|
121
|
+
# Use XZ Utils property byte parsing
|
|
122
|
+
# Reference: /Users/mulgogi/src/external/xz/src/liblzma/lzma/lzma_decoder.c:1216-1228
|
|
123
|
+
if props > MAX_PROPERTY_BYTE
|
|
124
|
+
raise "Invalid .lzma header: properties byte #{props} exceeds maximum #{MAX_PROPERTY_BYTE}"
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
# Parse lc, lp, pb from properties byte
|
|
128
|
+
# Formula: pb = props / (9 * 5); lp = (props % 45) / 9; lc = (props % 45) % 9
|
|
129
|
+
@pb = props / (9 * 5)
|
|
130
|
+
remainder = props - (@pb * 9 * 5)
|
|
131
|
+
@lp = remainder / 9
|
|
132
|
+
@lc = remainder - (@lp * 9)
|
|
133
|
+
|
|
134
|
+
# Validate lc + lp <= 4 (LZMA_LCLP_MAX)
|
|
135
|
+
# Reference: lzma_decoder.c:1227
|
|
136
|
+
if @lc + @lp > 4
|
|
137
|
+
raise "Invalid .lzma header: lc (#{@lc}) + lp (#{@lp}) exceeds maximum 4"
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
# Step 2: Parse dictionary size (SEQ_DICTIONARY_SIZE)
|
|
141
|
+
# Reference: alone_decoder.c:71-96
|
|
142
|
+
@dict_size = 0
|
|
143
|
+
4.times do |i|
|
|
144
|
+
byte = @input.getbyte
|
|
145
|
+
raise "Incomplete .lzma header: missing dictionary size byte" if byte.nil?
|
|
146
|
+
|
|
147
|
+
@dict_size |= (byte << (i * 8))
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
# Picky mode validation: only accept dictionary sizes that are
|
|
151
|
+
# 2^n or 2^n + 2^(n-1). This reduces false positives.
|
|
152
|
+
# Reference: alone_decoder.c:76-93
|
|
153
|
+
if @picky && @dict_size != 0xFFFFFFFF
|
|
154
|
+
# Check if dict_size is valid: 2^n or 2^n + 2^(n-1)
|
|
155
|
+
d = @dict_size - 1
|
|
156
|
+
d |= d >> 2
|
|
157
|
+
d |= d >> 3
|
|
158
|
+
d |= d >> 4
|
|
159
|
+
d |= d >> 8
|
|
160
|
+
d |= d >> 16
|
|
161
|
+
d += 1
|
|
162
|
+
|
|
163
|
+
if d != @dict_size
|
|
164
|
+
raise "Invalid .lzma header: dictionary size #{@dict_size} is not 2^n or 2^n + 2^(n-1)"
|
|
165
|
+
end
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
# Step 3: Parse uncompressed size (SEQ_UNCOMPRESSED_SIZE)
|
|
169
|
+
# Reference: alone_decoder.c:102-120
|
|
170
|
+
@uncompressed_size = 0
|
|
171
|
+
8.times do |i|
|
|
172
|
+
byte = @input.getbyte
|
|
173
|
+
raise "Incomplete .lzma header: missing uncompressed size byte" if byte.nil?
|
|
174
|
+
|
|
175
|
+
@uncompressed_size |= (byte << (i * 8))
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
# Picky mode validation: if uncompressed size is known (not UINT64_MAX),
|
|
179
|
+
# it must be less than 256 GiB
|
|
180
|
+
# Reference: alone_decoder.c:116-120
|
|
181
|
+
if @picky && @uncompressed_size != 0xFFFFFFFFFFFFFFFF &&
|
|
182
|
+
@uncompressed_size >= MAX_UNCOMPRESSED_SIZE
|
|
183
|
+
raise "Invalid .lzma header: uncompressed size #{@uncompressed_size} exceeds maximum #{MAX_UNCOMPRESSED_SIZE}"
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
# Note: XZ Utils uses UINT64_MAX (0xFFFFFFFFFFFFFFFF) for unknown size
|
|
187
|
+
# Our decoder treats this as "allow end-of-payload marker"
|
|
188
|
+
end
|
|
189
|
+
end
|
|
190
|
+
end
|
|
191
|
+
end
|
|
192
|
+
end
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Omnizip
|
|
4
|
+
module Algorithms
|
|
5
|
+
class LZMA < Algorithm
|
|
6
|
+
# LZMA State Machine
|
|
7
|
+
# Ported from XZ Utils lzma_common.h and lzma_decoder.c
|
|
8
|
+
class LZMAState
|
|
9
|
+
# State transition table (from lzma_decoder.c)
|
|
10
|
+
TRANSITIONS = {
|
|
11
|
+
# Literal after literal (matches XZ Utils update_literal macro)
|
|
12
|
+
update_literal: {
|
|
13
|
+
0 => 0, 1 => 0, 2 => 0, 3 => 0, 4 => 1, 5 => 2,
|
|
14
|
+
6 => 3, 7 => 4, 8 => 5, 9 => 6, 10 => 4, 11 => 5
|
|
15
|
+
}.freeze,
|
|
16
|
+
|
|
17
|
+
# Matched literal (literal after match, matches XZ Utils update_literal_matched macro)
|
|
18
|
+
# Only called when previous state was NOT a literal (states 7-11)
|
|
19
|
+
update_literal_matched: {
|
|
20
|
+
0 => 0, 1 => 0, 2 => 0, 3 => 0, 4 => 1, 5 => 2,
|
|
21
|
+
6 => 3, 7 => 4, 8 => 5, 9 => 6, 10 => 4, 11 => 5
|
|
22
|
+
}.freeze,
|
|
23
|
+
|
|
24
|
+
# Regular match
|
|
25
|
+
update_match: {
|
|
26
|
+
0 => 7, 1 => 7, 2 => 7, 3 => 7, 4 => 7, 5 => 7,
|
|
27
|
+
6 => 7, 7 => 10, 8 => 10, 9 => 10, 10 => 10, 11 => 10
|
|
28
|
+
}.freeze,
|
|
29
|
+
|
|
30
|
+
# Repeat match
|
|
31
|
+
update_rep: {
|
|
32
|
+
0 => 8, 1 => 8, 2 => 8, 3 => 8, 4 => 8, 5 => 8,
|
|
33
|
+
6 => 8, 7 => 11, 8 => 11, 9 => 11, 10 => 11, 11 => 11
|
|
34
|
+
}.freeze,
|
|
35
|
+
|
|
36
|
+
# Short repeat (length=1)
|
|
37
|
+
update_short_rep: {
|
|
38
|
+
0 => 9, 1 => 9, 2 => 9, 3 => 9, 4 => 9, 5 => 9,
|
|
39
|
+
6 => 9, 7 => 11, 8 => 11, 9 => 11, 10 => 11, 11 => 11
|
|
40
|
+
}.freeze,
|
|
41
|
+
|
|
42
|
+
# Long repeat (length>1)
|
|
43
|
+
# Ported from XZ Utils: state < LIT_STATES ? STATE_LIT_LONGREP : STATE_NONLIT_REP
|
|
44
|
+
# where LIT_STATES=7, STATE_LIT_LONGREP=8, STATE_NONLIT_REP=11
|
|
45
|
+
update_long_rep: {
|
|
46
|
+
0 => 8, 1 => 8, 2 => 8, 3 => 8, 4 => 8, 5 => 8,
|
|
47
|
+
6 => 8, 7 => 11, 8 => 11, 9 => 11, 10 => 11, 11 => 11
|
|
48
|
+
}.freeze,
|
|
49
|
+
}.freeze
|
|
50
|
+
|
|
51
|
+
attr_reader :value, :reps
|
|
52
|
+
|
|
53
|
+
def initialize(value = 0)
|
|
54
|
+
@value = value
|
|
55
|
+
@reps = [0, 0, 0, 0] # Initial repeat distances (matches XZ Utils)
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# After encoding a literal
|
|
59
|
+
def update_literal!
|
|
60
|
+
@value = if use_matched_literal?
|
|
61
|
+
TRANSITIONS[:update_literal_matched][@value]
|
|
62
|
+
else
|
|
63
|
+
TRANSITIONS[:update_literal][@value]
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# After encoding a regular match
|
|
68
|
+
def update_match!(distance)
|
|
69
|
+
@value = TRANSITIONS[:update_match][@value]
|
|
70
|
+
rotate_reps!(distance)
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# After encoding a repeat match
|
|
74
|
+
def update_rep!(rep_index)
|
|
75
|
+
@value = TRANSITIONS[:update_rep][@value]
|
|
76
|
+
rotate_reps_for_rep!(rep_index)
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# After encoding a short rep (length=1)
|
|
80
|
+
def update_short_rep!
|
|
81
|
+
@value = TRANSITIONS[:update_short_rep][@value]
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
# After encoding a long rep (length>1)
|
|
85
|
+
# Ported from XZ Utils update_long_rep macro
|
|
86
|
+
def update_long_rep!
|
|
87
|
+
@value = TRANSITIONS[:update_long_rep][@value]
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
# Check if we should use matched literal encoding
|
|
91
|
+
# XZ Utils logic: is_literal_state(state) = (state < LIT_STATES)
|
|
92
|
+
# where LIT_STATES = 7
|
|
93
|
+
# States 0-6: literal states (use unmatched literal)
|
|
94
|
+
# States 7-11: non-literal states (use matched literal)
|
|
95
|
+
def use_matched_literal?
|
|
96
|
+
@value >= 7
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
# Repeat distance rotation
|
|
100
|
+
def rotate_reps!(distance)
|
|
101
|
+
@reps[3] = @reps[2]
|
|
102
|
+
@reps[2] = @reps[1]
|
|
103
|
+
@reps[1] = @reps[0]
|
|
104
|
+
@reps[0] = distance
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
private
|
|
108
|
+
|
|
109
|
+
def rotate_reps_for_rep!(rep_index)
|
|
110
|
+
case rep_index
|
|
111
|
+
when 0
|
|
112
|
+
# Keep rep0, no rotation
|
|
113
|
+
when 1
|
|
114
|
+
# rep1 -> rep0
|
|
115
|
+
@reps[0], @reps[1] = @reps[1], @reps[0]
|
|
116
|
+
when 2
|
|
117
|
+
# rep2 -> rep0, rep0 -> rep1, rep1 -> rep2
|
|
118
|
+
@reps[0], @reps[1], @reps[2] = @reps[2], @reps[0], @reps[1]
|
|
119
|
+
when 3
|
|
120
|
+
# rep3 -> rep0, rotate others
|
|
121
|
+
@reps[0], @reps[1], @reps[2], @reps[3] =
|
|
122
|
+
@reps[3], @reps[0], @reps[1], @reps[2]
|
|
123
|
+
end
|
|
124
|
+
end
|
|
125
|
+
end
|
|
126
|
+
end
|
|
127
|
+
end
|
|
128
|
+
end
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Omnizip
|
|
4
|
+
module Algorithms
|
|
5
|
+
class LZMA < Algorithm
|
|
6
|
+
# Match candidate result from LZ77 match finding
|
|
7
|
+
class Match
|
|
8
|
+
attr_reader :distance, :length
|
|
9
|
+
|
|
10
|
+
def initialize(distance, length)
|
|
11
|
+
@distance = distance
|
|
12
|
+
@length = length
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
# Check if match is valid for given dictionary size
|
|
16
|
+
#
|
|
17
|
+
# @param dict_size [Integer] Dictionary size in bytes
|
|
18
|
+
# @return [Boolean] true if match is valid
|
|
19
|
+
def valid?(dict_size)
|
|
20
|
+
@distance <= dict_size && @length >= 2
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# String representation for debugging
|
|
24
|
+
#
|
|
25
|
+
# @return [String] Match description
|
|
26
|
+
def to_s
|
|
27
|
+
"Match(dist=#{@distance}, len=#{@length})"
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Copyright (C) 2025 Ribose Inc.
|
|
4
|
+
#
|
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a
|
|
6
|
+
# copy of this software and associated documentation files (the "Software"),
|
|
7
|
+
# to deal in the Software without restriction, including without limitation
|
|
8
|
+
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
9
|
+
# and/or sell copies of the Software, and to permit persons to whom the
|
|
10
|
+
# Software is furnished to do so, subject to the following conditions:
|
|
11
|
+
#
|
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
|
13
|
+
# all copies or substantial portions of the Software.
|
|
14
|
+
#
|
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
20
|
+
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
21
|
+
# DEALINGS IN THE SOFTWARE.
|
|
22
|
+
|
|
23
|
+
require_relative "match"
|
|
24
|
+
|
|
25
|
+
module Omnizip
|
|
26
|
+
module Algorithms
|
|
27
|
+
class LZMA < Algorithm
|
|
28
|
+
# Match Finder using hash chain algorithm for LZ77 compression
|
|
29
|
+
# Ported from XZ Utils lz_encoder.c
|
|
30
|
+
class MatchFinder
|
|
31
|
+
HASH_SIZE = 4096
|
|
32
|
+
MAX_MATCHES = 274
|
|
33
|
+
|
|
34
|
+
attr_reader :dictionary, :buffer, :position
|
|
35
|
+
|
|
36
|
+
def initialize(dictionary)
|
|
37
|
+
@dictionary = dictionary
|
|
38
|
+
@buffer = String.new(encoding: Encoding::BINARY)
|
|
39
|
+
@position = 0
|
|
40
|
+
# Use nil as empty marker (not 0) to distinguish from position 0
|
|
41
|
+
@hash_table = Array.new(HASH_SIZE, nil)
|
|
42
|
+
@hash_chain = Array.new(0)
|
|
43
|
+
@matches = Array.new(MAX_MATCHES)
|
|
44
|
+
@matches_count = 0
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# Add input data for processing
|
|
48
|
+
#
|
|
49
|
+
# @param data [String] Binary data to add
|
|
50
|
+
# @return [void]
|
|
51
|
+
def feed(data)
|
|
52
|
+
@buffer << data
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
# Reset the match finder state for a new encoding session
|
|
56
|
+
# Clears the buffer, hash table, and hash chain
|
|
57
|
+
def reset
|
|
58
|
+
@buffer.clear
|
|
59
|
+
@position = 0
|
|
60
|
+
@hash_table = Array.new(HASH_SIZE, nil)
|
|
61
|
+
@hash_chain.clear
|
|
62
|
+
@matches_count = 0
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
# Initialize hash table for all positions up to end_pos
|
|
66
|
+
# This is called before encoding starts to ensure the hash table
|
|
67
|
+
# is populated for all positions. Matches XZ Utils "skip" behavior.
|
|
68
|
+
#
|
|
69
|
+
# @param end_pos [Integer] Last position to initialize (inclusive)
|
|
70
|
+
# @return [void]
|
|
71
|
+
def skip(end_pos)
|
|
72
|
+
pos = 0
|
|
73
|
+
while pos + 3 <= @buffer.bytesize && pos <= end_pos
|
|
74
|
+
hash = calc_hash(@buffer, pos)
|
|
75
|
+
if hash
|
|
76
|
+
@hash_chain[pos] = @hash_table[hash]
|
|
77
|
+
@hash_table[hash] = pos
|
|
78
|
+
end
|
|
79
|
+
pos += 1
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
# Find matches for current position
|
|
84
|
+
#
|
|
85
|
+
# @param current_pos [Integer] Position to find matches at (defaults to end)
|
|
86
|
+
# @return [Array<Match>] Array of matches sorted by length (descending)
|
|
87
|
+
def find_matches(current_pos = @buffer.bytesize - 273)
|
|
88
|
+
# Calculate hash for current position
|
|
89
|
+
hash = nil
|
|
90
|
+
if current_pos >= 0 && current_pos + 3 <= @buffer.bytesize
|
|
91
|
+
hash = calc_hash(@buffer, current_pos)
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
# Update hash table for current position (even for early positions)
|
|
95
|
+
# This ensures positions 0-3 are available for later matches
|
|
96
|
+
# XZ Utils calls this "skip" - update hash without finding matches
|
|
97
|
+
# CRITICAL: Only update if this position hasn't been processed yet
|
|
98
|
+
# (i.e., @hash_table[hash] != current_pos)
|
|
99
|
+
# This prevents overwriting the hash chain when find_matches is called
|
|
100
|
+
# after skip() has already initialized the hash table
|
|
101
|
+
if hash && @hash_table[hash] != current_pos
|
|
102
|
+
@hash_chain[current_pos] = @hash_table[hash]
|
|
103
|
+
@hash_table[hash] = current_pos
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
# Can't find matches if no hash or insufficient data
|
|
107
|
+
# Note: We CAN find matches at early positions (e.g., position 2 can match position 0)
|
|
108
|
+
# The only requirement is that there's enough data for hash calculation (current_pos + 3 <= buffer size)
|
|
109
|
+
# and that there's at least 2 bytes of history (for MIN_MATCH_LENGTH=2)
|
|
110
|
+
# CRITICAL: Don't produce matches until position >= 2 to ensure decoder has enough dict_full
|
|
111
|
+
# The decoder validates: dict_full > distance, where dict_full starts at 0 after 1st byte
|
|
112
|
+
# For distance=1 match to be valid, decoder needs dict_full >= 2 (at least 2 bytes decoded)
|
|
113
|
+
# This happens after processing position 1 (first byte was literal at position 0)
|
|
114
|
+
# So we can only produce matches starting at position 2
|
|
115
|
+
return [] if hash.nil? || @buffer.bytesize < 4 || current_pos + 3 > @buffer.bytesize || current_pos < 2
|
|
116
|
+
|
|
117
|
+
@matches_count = 0
|
|
118
|
+
chain_pos = @hash_chain[current_pos]
|
|
119
|
+
|
|
120
|
+
while chain_pos && @matches_count < MAX_MATCHES
|
|
121
|
+
# CRITICAL: Skip invalid chain_pos values (beyond buffer or negative)
|
|
122
|
+
next if chain_pos >= @buffer.bytesize || chain_pos.negative?
|
|
123
|
+
|
|
124
|
+
distance = current_pos - chain_pos
|
|
125
|
+
# CRITICAL: Break if distance is negative (chain_pos > current_pos)
|
|
126
|
+
# This can happen when skip() links positions within the same chunk
|
|
127
|
+
# where a later position has the same hash as an earlier position
|
|
128
|
+
break if distance.negative? || distance > @dictionary.size || distance.zero?
|
|
129
|
+
|
|
130
|
+
length = verify_match(current_pos, chain_pos)
|
|
131
|
+
|
|
132
|
+
if length >= 2
|
|
133
|
+
@matches[@matches_count] = Match.new(distance, length)
|
|
134
|
+
@matches_count += 1
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
# Safely get next chain position
|
|
138
|
+
chain_pos = if chain_pos < @hash_chain.size
|
|
139
|
+
@hash_chain[chain_pos]
|
|
140
|
+
end
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
@matches.first(@matches_count).sort_by { |m| -m.length }
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
# Get the longest match at current position
|
|
147
|
+
#
|
|
148
|
+
# @return [Match, nil] Longest match found or nil
|
|
149
|
+
def longest_match
|
|
150
|
+
find_matches.first
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
# Legacy API: Find longest match at given position in external byte array
|
|
154
|
+
# This is a compatibility method for older code that passes bytes and position
|
|
155
|
+
#
|
|
156
|
+
# @param bytes [Array<Integer>] Byte array
|
|
157
|
+
# @param pos [Integer] Position to find match at
|
|
158
|
+
# @return [Match, nil] Longest match found or nil
|
|
159
|
+
def find_longest_match(bytes, pos)
|
|
160
|
+
# If position is beyond current buffer, feed more data
|
|
161
|
+
if pos >= @buffer.bytesize
|
|
162
|
+
bytes_to_feed = bytes[pos..]
|
|
163
|
+
@buffer << bytes_to_feed.pack("C*")
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
# Find matches at the given position
|
|
167
|
+
matches = find_matches(pos)
|
|
168
|
+
matches.first
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
private
|
|
172
|
+
|
|
173
|
+
# Calculate hash for position (first 3 bytes)
|
|
174
|
+
#
|
|
175
|
+
# @param data [String] Buffer data
|
|
176
|
+
# @param pos [Integer] Position to hash
|
|
177
|
+
# @return [Integer] Hash value
|
|
178
|
+
def calc_hash(data, pos)
|
|
179
|
+
return 0 if pos + 3 > data.bytesize
|
|
180
|
+
|
|
181
|
+
(data.getbyte(pos) |
|
|
182
|
+
(data.getbyte(pos + 1) << 8) |
|
|
183
|
+
(data.getbyte(pos + 2) << 16)) % HASH_SIZE
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
# Verify match length between two positions
|
|
187
|
+
#
|
|
188
|
+
# @param pos1 [Integer] First position
|
|
189
|
+
# @param pos2 [Integer] Second position
|
|
190
|
+
# @return [Integer] Match length
|
|
191
|
+
def verify_match(pos1, pos2)
|
|
192
|
+
max_len = [273, @buffer.bytesize - pos1, @buffer.bytesize - pos2].min
|
|
193
|
+
length = 0
|
|
194
|
+
|
|
195
|
+
while length < max_len &&
|
|
196
|
+
@buffer.getbyte(pos1 + length) == @buffer.getbyte(pos2 + length)
|
|
197
|
+
length += 1
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
length
|
|
201
|
+
end
|
|
202
|
+
end
|
|
203
|
+
end
|
|
204
|
+
end
|
|
205
|
+
end
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Copyright (C) 2025 Ribose Inc.
|
|
4
|
+
#
|
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a
|
|
6
|
+
# copy of this software and associated documentation files (the "Software"),
|
|
7
|
+
# to deal in the Software without restriction, including without limitation
|
|
8
|
+
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
9
|
+
# and/or sell copies of the Software, and to permit persons to whom the
|
|
10
|
+
# Software is furnished to do so, subject to the following conditions:
|
|
11
|
+
#
|
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
|
13
|
+
# all copies or substantial portions of the Software.
|
|
14
|
+
#
|
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
20
|
+
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
21
|
+
# DEALINGS IN THE SOFTWARE.
|
|
22
|
+
|
|
23
|
+
module Omnizip
|
|
24
|
+
module Algorithms
|
|
25
|
+
class LZMA < Algorithm
|
|
26
|
+
# Configuration model for Match Finder behavior
|
|
27
|
+
#
|
|
28
|
+
# This model separates configuration from implementation, allowing
|
|
29
|
+
# different match finding strategies (SDK-compatible vs simplified)
|
|
30
|
+
# to be configured declaratively.
|
|
31
|
+
#
|
|
32
|
+
# @example SDK-compatible configuration
|
|
33
|
+
# config = MatchFinderConfig.new(
|
|
34
|
+
# mode: :sdk,
|
|
35
|
+
# hash_size: 65536,
|
|
36
|
+
# chain_length: 32,
|
|
37
|
+
# lazy_matching: false
|
|
38
|
+
# )
|
|
39
|
+
#
|
|
40
|
+
# @example Simplified configuration
|
|
41
|
+
# config = MatchFinderConfig.new(
|
|
42
|
+
# mode: :simplified,
|
|
43
|
+
# hash_size: 65536,
|
|
44
|
+
# chain_length: 1024
|
|
45
|
+
# )
|
|
46
|
+
class MatchFinderConfig
|
|
47
|
+
attr_accessor :mode, :hash_size, :chain_length, :search_mode,
|
|
48
|
+
:lazy_matching, :max_match_length, :window_size
|
|
49
|
+
|
|
50
|
+
def initialize(mode: "simplified", hash_size: 65_536,
|
|
51
|
+
chain_length: 1024, search_mode: "hash_chain",
|
|
52
|
+
lazy_matching: false, max_match_length: 273,
|
|
53
|
+
window_size: 65_536)
|
|
54
|
+
@mode = mode
|
|
55
|
+
@hash_size = hash_size
|
|
56
|
+
@chain_length = chain_length
|
|
57
|
+
@search_mode = search_mode
|
|
58
|
+
@lazy_matching = lazy_matching
|
|
59
|
+
@max_match_length = max_match_length
|
|
60
|
+
@window_size = window_size
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# Validate configuration
|
|
64
|
+
#
|
|
65
|
+
# @return [Boolean] true if valid
|
|
66
|
+
# @raise [ArgumentError] if configuration is invalid
|
|
67
|
+
def validate!
|
|
68
|
+
unless %w[sdk simplified].include?(mode)
|
|
69
|
+
raise ArgumentError, "mode must be :sdk or :simplified"
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
unless %w[hash_chain binary_tree].include?(search_mode)
|
|
73
|
+
raise ArgumentError,
|
|
74
|
+
"search_mode must be :hash_chain or :binary_tree"
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
raise ArgumentError, "hash_size must be positive" if hash_size <= 0
|
|
78
|
+
|
|
79
|
+
if chain_length <= 0
|
|
80
|
+
raise ArgumentError,
|
|
81
|
+
"chain_length must be positive"
|
|
82
|
+
end
|
|
83
|
+
if max_match_length < 2
|
|
84
|
+
raise ArgumentError,
|
|
85
|
+
"max_match_length must be >= 2"
|
|
86
|
+
end
|
|
87
|
+
if window_size <= 0
|
|
88
|
+
raise ArgumentError,
|
|
89
|
+
"window_size must be positive"
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
true
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
# Create SDK-compatible configuration
|
|
96
|
+
#
|
|
97
|
+
# @param dict_size [Integer] Dictionary size
|
|
98
|
+
# @param level [Integer] Compression level (0-9)
|
|
99
|
+
# @return [MatchFinderConfig] SDK-compatible configuration
|
|
100
|
+
def self.sdk_config(dict_size: 65536, level: 5)
|
|
101
|
+
# SDK uses different parameters based on dictionary size and level
|
|
102
|
+
hash_size = dict_size >= (1 << 20) ? (1 << 20) : (1 << 16)
|
|
103
|
+
|
|
104
|
+
# SDK nice_len varies by compression level:
|
|
105
|
+
# Level 0-4: 32, Level 5-6: 64, Level 7-8: 128, Level 9: 273
|
|
106
|
+
chain_length = case level
|
|
107
|
+
when 0..4 then 32
|
|
108
|
+
when 5..6 then 64
|
|
109
|
+
when 7..8 then 128
|
|
110
|
+
else 273
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
new(
|
|
114
|
+
mode: "sdk",
|
|
115
|
+
hash_size: hash_size,
|
|
116
|
+
chain_length: chain_length,
|
|
117
|
+
search_mode: "hash_chain",
|
|
118
|
+
lazy_matching: level >= 7, # Enable lazy matching for high compression
|
|
119
|
+
max_match_length: 273,
|
|
120
|
+
window_size: dict_size,
|
|
121
|
+
)
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
# Create simplified configuration (backward compatible)
|
|
125
|
+
#
|
|
126
|
+
# @param dict_size [Integer] Dictionary size
|
|
127
|
+
# @return [MatchFinderConfig] Simplified configuration
|
|
128
|
+
def self.simplified_config(dict_size: 65536)
|
|
129
|
+
new(
|
|
130
|
+
mode: "simplified",
|
|
131
|
+
hash_size: 65536,
|
|
132
|
+
chain_length: 1024,
|
|
133
|
+
search_mode: "hash_chain",
|
|
134
|
+
lazy_matching: false,
|
|
135
|
+
max_match_length: 273,
|
|
136
|
+
window_size: dict_size,
|
|
137
|
+
)
|
|
138
|
+
end
|
|
139
|
+
end
|
|
140
|
+
end
|
|
141
|
+
end
|
|
142
|
+
end
|