omnizip 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/.rubocop.yml +32 -0
- data/.rubocop_todo.yml +754 -0
- data/COPYING +502 -0
- data/Gemfile +17 -0
- data/LICENSE +12 -0
- data/README.adoc +1045 -0
- data/Rakefile +12 -0
- data/benchmark/README.md +260 -0
- data/benchmark/benchmark_suite.rb +125 -0
- data/benchmark/compression_bench.rb +181 -0
- data/benchmark/filter_bench.rb +180 -0
- data/benchmark/models/benchmark_result.rb +59 -0
- data/benchmark/models/comparison_result.rb +69 -0
- data/benchmark/profile_suite.rb +167 -0
- data/benchmark/reporter.rb +150 -0
- data/benchmark/run_benchmarks.rb +66 -0
- data/benchmark/test_data.rb +137 -0
- data/config/formats/rar3_spec.yml +91 -0
- data/config/formats/rar5_spec.yml +102 -0
- data/docs/.github/workflows/docs.yml +142 -0
- data/docs/.gitignore +21 -0
- data/docs/.lychee.toml +67 -0
- data/docs/Gemfile +13 -0
- data/docs/RAR_WRITE_SUPPORT.md +26 -0
- data/docs/README.md +101 -0
- data/docs/_config.yml +112 -0
- data/docs/assets/logo.svg +1 -0
- data/docs/assets/omnizip-logo.pdf +1540 -11
- data/docs/comparison/feature-matrix.adoc +694 -0
- data/docs/comparison/index.adoc +113 -0
- data/docs/comparison/vs-7zip.adoc +309 -0
- data/docs/comparison/vs-peazip.adoc +77 -0
- data/docs/comparison/vs-rubyzip.adoc +342 -0
- data/docs/comparison/vs-winrar.adoc +100 -0
- data/docs/compatibility.adoc +579 -0
- data/docs/concepts/index.adoc +129 -0
- data/docs/developer/architecture.adoc +256 -0
- data/docs/developer/contributing.adoc +158 -0
- data/docs/developer/index.adoc +25 -0
- data/docs/developer/testing.adoc +212 -0
- data/docs/getting-started/basic-usage.adoc +271 -0
- data/docs/getting-started/index.adoc +42 -0
- data/docs/getting-started/installation.adoc +138 -0
- data/docs/getting-started/quick-start.adoc +185 -0
- data/docs/getting-started/your-first-archive.adoc +218 -0
- data/docs/guides/advanced-features/encryption.adoc +300 -0
- data/docs/guides/advanced-features/index.adoc +49 -0
- data/docs/guides/advanced-features/parallel-processing.adoc +246 -0
- data/docs/guides/advanced-features/progress-tracking.adoc +320 -0
- data/docs/guides/advanced-features/streaming.adoc +212 -0
- data/docs/guides/archive-formats/gzip-format.adoc +107 -0
- data/docs/guides/archive-formats/index.adoc +130 -0
- data/docs/guides/archive-formats/rar-format.adoc +104 -0
- data/docs/guides/archive-formats/rar5.adoc +521 -0
- data/docs/guides/archive-formats/seven-zip-format.adoc +35 -0
- data/docs/guides/archive-formats/tar-format.adoc +106 -0
- data/docs/guides/archive-formats/xz-format.adoc +118 -0
- data/docs/guides/archive-formats/zip-format.adoc +35 -0
- data/docs/guides/compression-algorithms/bzip2.adoc +113 -0
- data/docs/guides/compression-algorithms/deflate.adoc +319 -0
- data/docs/guides/compression-algorithms/index.adoc +190 -0
- data/docs/guides/compression-algorithms/lzma.adoc +398 -0
- data/docs/guides/compression-algorithms/lzma2.adoc +327 -0
- data/docs/guides/compression-algorithms/ppmd.adoc +316 -0
- data/docs/guides/compression-algorithms/zstandard.adoc +361 -0
- data/docs/guides/creating-archives.adoc +354 -0
- data/docs/guides/extracting-archives.adoc +53 -0
- data/docs/guides/format-conversion.adoc +64 -0
- data/docs/guides/index.adoc +49 -0
- data/docs/guides/migration-rubyzip.adoc +217 -0
- data/docs/guides/parity-archives.adoc +605 -0
- data/docs/guides/performance-tuning.adoc +88 -0
- data/docs/index.adoc +218 -0
- data/docs/lychee.toml +67 -0
- data/docs/reference/api/overview.adoc +188 -0
- data/docs/reference/cli/compress-command.adoc +114 -0
- data/docs/reference/cli/overview.adoc +140 -0
- data/docs/reference/index.adoc +26 -0
- data/docs/resources/faq.adoc +185 -0
- data/docs/resources/quick-reference.adoc +222 -0
- data/docs/troubleshooting/index.adoc +208 -0
- data/examples/api_comparison.rb +205 -0
- data/examples/deflate64_example.rb +96 -0
- data/examples/par2_demo.rb +121 -0
- data/examples/quick_start_native.rb +150 -0
- data/examples/quick_start_rubyzip.rb +115 -0
- data/examples/rubyzip_compatibility_demo.rb +194 -0
- data/exe/omnizip +27 -0
- data/lib/omnizip/algorithm.rb +130 -0
- data/lib/omnizip/algorithm_registry.rb +86 -0
- data/lib/omnizip/algorithms/.keep +0 -0
- data/lib/omnizip/algorithms/bzip2/bwt.rb +225 -0
- data/lib/omnizip/algorithms/bzip2/decoder.rb +193 -0
- data/lib/omnizip/algorithms/bzip2/encoder.rb +237 -0
- data/lib/omnizip/algorithms/bzip2/huffman.rb +206 -0
- data/lib/omnizip/algorithms/bzip2/mtf.rb +101 -0
- data/lib/omnizip/algorithms/bzip2/rle.rb +151 -0
- data/lib/omnizip/algorithms/bzip2.rb +130 -0
- data/lib/omnizip/algorithms/deflate/constants.rb +28 -0
- data/lib/omnizip/algorithms/deflate/decoder.rb +38 -0
- data/lib/omnizip/algorithms/deflate/encoder.rb +46 -0
- data/lib/omnizip/algorithms/deflate.rb +128 -0
- data/lib/omnizip/algorithms/deflate64/constants.rb +45 -0
- data/lib/omnizip/algorithms/deflate64/decoder.rb +153 -0
- data/lib/omnizip/algorithms/deflate64/encoder.rb +98 -0
- data/lib/omnizip/algorithms/deflate64/huffman_coder.rb +354 -0
- data/lib/omnizip/algorithms/deflate64/lz77_encoder.rb +142 -0
- data/lib/omnizip/algorithms/deflate64.rb +109 -0
- data/lib/omnizip/algorithms/lzma/bit_model.rb +120 -0
- data/lib/omnizip/algorithms/lzma/constants.rb +112 -0
- data/lib/omnizip/algorithms/lzma/decoder.rb +148 -0
- data/lib/omnizip/algorithms/lzma/dictionary.rb +69 -0
- data/lib/omnizip/algorithms/lzma/distance_coder.rb +415 -0
- data/lib/omnizip/algorithms/lzma/encoder.rb +142 -0
- data/lib/omnizip/algorithms/lzma/length_coder.rb +260 -0
- data/lib/omnizip/algorithms/lzma/literal_decoder.rb +320 -0
- data/lib/omnizip/algorithms/lzma/literal_encoder.rb +210 -0
- data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +341 -0
- data/lib/omnizip/algorithms/lzma/lzma_alone_decoder.rb +192 -0
- data/lib/omnizip/algorithms/lzma/lzma_state.rb +128 -0
- data/lib/omnizip/algorithms/lzma/match.rb +32 -0
- data/lib/omnizip/algorithms/lzma/match_finder.rb +205 -0
- data/lib/omnizip/algorithms/lzma/match_finder_config.rb +142 -0
- data/lib/omnizip/algorithms/lzma/match_finder_factory.rb +88 -0
- data/lib/omnizip/algorithms/lzma/optimal_encoder.rb +130 -0
- data/lib/omnizip/algorithms/lzma/probability_models.rb +72 -0
- data/lib/omnizip/algorithms/lzma/range_coder.rb +85 -0
- data/lib/omnizip/algorithms/lzma/range_decoder.rb +434 -0
- data/lib/omnizip/algorithms/lzma/range_encoder.rb +194 -0
- data/lib/omnizip/algorithms/lzma/state.rb +127 -0
- data/lib/omnizip/algorithms/lzma/xz_buffered_range_encoder.rb +325 -0
- data/lib/omnizip/algorithms/lzma/xz_encoder.rb +426 -0
- data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +645 -0
- data/lib/omnizip/algorithms/lzma/xz_match_finder_adapter.rb +227 -0
- data/lib/omnizip/algorithms/lzma/xz_price_calculator.rb +169 -0
- data/lib/omnizip/algorithms/lzma/xz_probability_models.rb +261 -0
- data/lib/omnizip/algorithms/lzma/xz_range_encoder.rb +223 -0
- data/lib/omnizip/algorithms/lzma/xz_range_encoder_exact.rb +331 -0
- data/lib/omnizip/algorithms/lzma/xz_state.rb +116 -0
- data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +2055 -0
- data/lib/omnizip/algorithms/lzma.rb +238 -0
- data/lib/omnizip/algorithms/lzma2/chunk_manager.rb +182 -0
- data/lib/omnizip/algorithms/lzma2/constants.rb +41 -0
- data/lib/omnizip/algorithms/lzma2/encoder.rb +147 -0
- data/lib/omnizip/algorithms/lzma2/lzma2_chunk.rb +161 -0
- data/lib/omnizip/algorithms/lzma2/properties.rb +179 -0
- data/lib/omnizip/algorithms/lzma2/simple_lzma2_encoder.rb +127 -0
- data/lib/omnizip/algorithms/lzma2/xz_encoder_adapter.rb +85 -0
- data/lib/omnizip/algorithms/lzma2.rb +141 -0
- data/lib/omnizip/algorithms/ppmd7/constants.rb +74 -0
- data/lib/omnizip/algorithms/ppmd7/context.rb +154 -0
- data/lib/omnizip/algorithms/ppmd7/decoder.rb +126 -0
- data/lib/omnizip/algorithms/ppmd7/encoder.rb +163 -0
- data/lib/omnizip/algorithms/ppmd7/model.rb +248 -0
- data/lib/omnizip/algorithms/ppmd7/symbol_state.rb +57 -0
- data/lib/omnizip/algorithms/ppmd7.rb +116 -0
- data/lib/omnizip/algorithms/ppmd8/constants.rb +61 -0
- data/lib/omnizip/algorithms/ppmd8/context.rb +34 -0
- data/lib/omnizip/algorithms/ppmd8/decoder.rb +107 -0
- data/lib/omnizip/algorithms/ppmd8/encoder.rb +138 -0
- data/lib/omnizip/algorithms/ppmd8/model.rb +250 -0
- data/lib/omnizip/algorithms/ppmd8/restoration_method.rb +78 -0
- data/lib/omnizip/algorithms/ppmd8.rb +82 -0
- data/lib/omnizip/algorithms/ppmd_base.rb +138 -0
- data/lib/omnizip/algorithms/sevenzip_lzma2.rb +123 -0
- data/lib/omnizip/algorithms/xz_lzma2.rb +118 -0
- data/lib/omnizip/algorithms/zstandard/constants.rb +25 -0
- data/lib/omnizip/algorithms/zstandard/decoder.rb +46 -0
- data/lib/omnizip/algorithms/zstandard/encoder.rb +51 -0
- data/lib/omnizip/algorithms/zstandard.rb +138 -0
- data/lib/omnizip/buffer/memory_archive.rb +251 -0
- data/lib/omnizip/buffer/memory_extractor.rb +224 -0
- data/lib/omnizip/buffer.rb +176 -0
- data/lib/omnizip/checksum_registry.rb +114 -0
- data/lib/omnizip/checksums/crc32.rb +100 -0
- data/lib/omnizip/checksums/crc64.rb +101 -0
- data/lib/omnizip/checksums/crc_base.rb +158 -0
- data/lib/omnizip/checksums/verifier.rb +131 -0
- data/lib/omnizip/chunked/memory_manager.rb +194 -0
- data/lib/omnizip/chunked/reader.rb +78 -0
- data/lib/omnizip/chunked/writer.rb +120 -0
- data/lib/omnizip/chunked.rb +129 -0
- data/lib/omnizip/cli/output_formatter.rb +104 -0
- data/lib/omnizip/cli.rb +572 -0
- data/lib/omnizip/commands/.keep +0 -0
- data/lib/omnizip/commands/archive_create_command.rb +427 -0
- data/lib/omnizip/commands/archive_extract_command.rb +272 -0
- data/lib/omnizip/commands/archive_list_command.rb +218 -0
- data/lib/omnizip/commands/archive_repair_command.rb +131 -0
- data/lib/omnizip/commands/archive_verify_command.rb +117 -0
- data/lib/omnizip/commands/compress_command.rb +117 -0
- data/lib/omnizip/commands/decompress_command.rb +120 -0
- data/lib/omnizip/commands/list_command.rb +53 -0
- data/lib/omnizip/commands/metadata_command.rb +153 -0
- data/lib/omnizip/commands/parity_create_command.rb +122 -0
- data/lib/omnizip/commands/parity_repair_command.rb +122 -0
- data/lib/omnizip/commands/parity_verify_command.rb +124 -0
- data/lib/omnizip/commands/profile_list_command.rb +56 -0
- data/lib/omnizip/commands/profile_show_command.rb +44 -0
- data/lib/omnizip/convenience.rb +359 -0
- data/lib/omnizip/converter/conversion_registry.rb +49 -0
- data/lib/omnizip/converter/conversion_strategy.rb +121 -0
- data/lib/omnizip/converter/seven_zip_to_zip_strategy.rb +97 -0
- data/lib/omnizip/converter/zip_to_seven_zip_strategy.rb +112 -0
- data/lib/omnizip/converter.rb +105 -0
- data/lib/omnizip/crypto/aes256/cipher.rb +100 -0
- data/lib/omnizip/crypto/aes256/constants.rb +28 -0
- data/lib/omnizip/crypto/aes256/key_derivation.rb +101 -0
- data/lib/omnizip/crypto/aes256.rb +102 -0
- data/lib/omnizip/error.rb +106 -0
- data/lib/omnizip/eta/exponential_smoothing_estimator.rb +98 -0
- data/lib/omnizip/eta/moving_average_estimator.rb +99 -0
- data/lib/omnizip/eta/rate_calculator.rb +104 -0
- data/lib/omnizip/eta/sample_history.rb +143 -0
- data/lib/omnizip/eta/time_estimator.rb +106 -0
- data/lib/omnizip/eta.rb +63 -0
- data/lib/omnizip/extraction/filter_chain.rb +177 -0
- data/lib/omnizip/extraction/glob_pattern.rb +140 -0
- data/lib/omnizip/extraction/pattern_matcher.rb +70 -0
- data/lib/omnizip/extraction/predicate_pattern.rb +52 -0
- data/lib/omnizip/extraction/regex_pattern.rb +50 -0
- data/lib/omnizip/extraction/selective_extractor.rb +240 -0
- data/lib/omnizip/extraction.rb +111 -0
- data/lib/omnizip/file_type/mime_classifier.rb +144 -0
- data/lib/omnizip/file_type.rb +113 -0
- data/lib/omnizip/filter.rb +139 -0
- data/lib/omnizip/filter_pipeline.rb +108 -0
- data/lib/omnizip/filter_registry.rb +166 -0
- data/lib/omnizip/filters/bcj.rb +279 -0
- data/lib/omnizip/filters/bcj2/constants.rb +53 -0
- data/lib/omnizip/filters/bcj2/decoder.rb +200 -0
- data/lib/omnizip/filters/bcj2/encoder.rb +61 -0
- data/lib/omnizip/filters/bcj2/stream_data.rb +93 -0
- data/lib/omnizip/filters/bcj2.rb +99 -0
- data/lib/omnizip/filters/bcj_arm.rb +176 -0
- data/lib/omnizip/filters/bcj_arm64.rb +244 -0
- data/lib/omnizip/filters/bcj_ia64.rb +196 -0
- data/lib/omnizip/filters/bcj_ppc.rb +190 -0
- data/lib/omnizip/filters/bcj_sparc.rb +176 -0
- data/lib/omnizip/filters/bcj_x86.rb +193 -0
- data/lib/omnizip/filters/delta.rb +196 -0
- data/lib/omnizip/filters/filter_base.rb +72 -0
- data/lib/omnizip/filters/registry.rb +123 -0
- data/lib/omnizip/filters/xz_delta.rb +258 -0
- data/lib/omnizip/format_detector.rb +162 -0
- data/lib/omnizip/format_registry.rb +59 -0
- data/lib/omnizip/formats/.keep +0 -0
- data/lib/omnizip/formats/bzip2_file.rb +172 -0
- data/lib/omnizip/formats/cpio/constants.rb +55 -0
- data/lib/omnizip/formats/cpio/entry.rb +385 -0
- data/lib/omnizip/formats/cpio/reader.rb +196 -0
- data/lib/omnizip/formats/cpio/writer.rb +234 -0
- data/lib/omnizip/formats/cpio.rb +140 -0
- data/lib/omnizip/formats/format_spec_loader.rb +230 -0
- data/lib/omnizip/formats/gzip.rb +238 -0
- data/lib/omnizip/formats/iso/directory_builder.rb +297 -0
- data/lib/omnizip/formats/iso/directory_record.rb +152 -0
- data/lib/omnizip/formats/iso/joliet.rb +204 -0
- data/lib/omnizip/formats/iso/path_table.rb +125 -0
- data/lib/omnizip/formats/iso/reader.rb +197 -0
- data/lib/omnizip/formats/iso/rock_ridge.rb +349 -0
- data/lib/omnizip/formats/iso/volume_builder.rb +320 -0
- data/lib/omnizip/formats/iso/volume_descriptor.rb +168 -0
- data/lib/omnizip/formats/iso/writer.rb +530 -0
- data/lib/omnizip/formats/iso.rb +140 -0
- data/lib/omnizip/formats/lzip.rb +175 -0
- data/lib/omnizip/formats/lzma_alone.rb +171 -0
- data/lib/omnizip/formats/rar/archive_repairer.rb +243 -0
- data/lib/omnizip/formats/rar/archive_verifier.rb +195 -0
- data/lib/omnizip/formats/rar/block_parser.rb +243 -0
- data/lib/omnizip/formats/rar/compression/bit_stream.rb +180 -0
- data/lib/omnizip/formats/rar/compression/dispatcher.rb +217 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/decoder.rb +216 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/encoder.rb +158 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_builder.rb +217 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_coder.rb +189 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/match_finder.rb +135 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/sliding_window.rb +165 -0
- data/lib/omnizip/formats/rar/compression/ppmd/context.rb +105 -0
- data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +219 -0
- data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +262 -0
- data/lib/omnizip/formats/rar/compression_method_registry.rb +106 -0
- data/lib/omnizip/formats/rar/constants.rb +82 -0
- data/lib/omnizip/formats/rar/decompressor.rb +238 -0
- data/lib/omnizip/formats/rar/external_writer.rb +312 -0
- data/lib/omnizip/formats/rar/header.rb +192 -0
- data/lib/omnizip/formats/rar/license_validator.rb +109 -0
- data/lib/omnizip/formats/rar/models/rar_archive.rb +77 -0
- data/lib/omnizip/formats/rar/models/rar_entry.rb +65 -0
- data/lib/omnizip/formats/rar/models/rar_volume.rb +56 -0
- data/lib/omnizip/formats/rar/parity_handler.rb +292 -0
- data/lib/omnizip/formats/rar/rar5/compression/lzma.rb +202 -0
- data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +578 -0
- data/lib/omnizip/formats/rar/rar5/compression/store.rb +60 -0
- data/lib/omnizip/formats/rar/rar5/crc32.rb +39 -0
- data/lib/omnizip/formats/rar/rar5/encryption/aes256_cbc.rb +97 -0
- data/lib/omnizip/formats/rar/rar5/encryption/encryption_header.rb +114 -0
- data/lib/omnizip/formats/rar/rar5/encryption/encryption_manager.rb +166 -0
- data/lib/omnizip/formats/rar/rar5/encryption/key_derivation.rb +97 -0
- data/lib/omnizip/formats/rar/rar5/header.rb +187 -0
- data/lib/omnizip/formats/rar/rar5/models/encryption_options.rb +74 -0
- data/lib/omnizip/formats/rar/rar5/models/recovery_options.rb +63 -0
- data/lib/omnizip/formats/rar/rar5/models/solid_options.rb +63 -0
- data/lib/omnizip/formats/rar/rar5/models/volume_options.rb +74 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/ARCHITECTURE.md +290 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_manager.rb +264 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_splitter.rb +155 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_writer.rb +194 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_encoder.rb +109 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_manager.rb +142 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_stream.rb +121 -0
- data/lib/omnizip/formats/rar/rar5/vint.rb +65 -0
- data/lib/omnizip/formats/rar/rar5/writer.rb +466 -0
- data/lib/omnizip/formats/rar/rar_format_base.rb +241 -0
- data/lib/omnizip/formats/rar/reader.rb +366 -0
- data/lib/omnizip/formats/rar/recovery_record.rb +245 -0
- data/lib/omnizip/formats/rar/volume_manager.rb +168 -0
- data/lib/omnizip/formats/rar/writer.rb +431 -0
- data/lib/omnizip/formats/rar.rb +205 -0
- data/lib/omnizip/formats/rar3/compressor.rb +73 -0
- data/lib/omnizip/formats/rar3/decompressor.rb +66 -0
- data/lib/omnizip/formats/rar3/reader.rb +386 -0
- data/lib/omnizip/formats/rar3/writer.rb +219 -0
- data/lib/omnizip/formats/rar5/compressor.rb +73 -0
- data/lib/omnizip/formats/rar5/decompressor.rb +66 -0
- data/lib/omnizip/formats/rar5/reader.rb +342 -0
- data/lib/omnizip/formats/rar5/writer.rb +214 -0
- data/lib/omnizip/formats/seven_zip/coder_chain.rb +150 -0
- data/lib/omnizip/formats/seven_zip/constants.rb +126 -0
- data/lib/omnizip/formats/seven_zip/encoded_header.rb +114 -0
- data/lib/omnizip/formats/seven_zip/encrypted_header.rb +142 -0
- data/lib/omnizip/formats/seven_zip/file_collector.rb +144 -0
- data/lib/omnizip/formats/seven_zip/header.rb +106 -0
- data/lib/omnizip/formats/seven_zip/header_encryptor.rb +134 -0
- data/lib/omnizip/formats/seven_zip/header_writer.rb +466 -0
- data/lib/omnizip/formats/seven_zip/models/coder_info.rb +30 -0
- data/lib/omnizip/formats/seven_zip/models/file_entry.rb +58 -0
- data/lib/omnizip/formats/seven_zip/models/folder.rb +69 -0
- data/lib/omnizip/formats/seven_zip/models/stream_info.rb +42 -0
- data/lib/omnizip/formats/seven_zip/parser.rb +660 -0
- data/lib/omnizip/formats/seven_zip/reader.rb +458 -0
- data/lib/omnizip/formats/seven_zip/split_archive_reader.rb +632 -0
- data/lib/omnizip/formats/seven_zip/split_archive_writer.rb +315 -0
- data/lib/omnizip/formats/seven_zip/stream_compressor.rb +151 -0
- data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +162 -0
- data/lib/omnizip/formats/seven_zip/writer.rb +740 -0
- data/lib/omnizip/formats/seven_zip.rb +93 -0
- data/lib/omnizip/formats/tar/constants.rb +73 -0
- data/lib/omnizip/formats/tar/entry.rb +94 -0
- data/lib/omnizip/formats/tar/header.rb +168 -0
- data/lib/omnizip/formats/tar/reader.rb +121 -0
- data/lib/omnizip/formats/tar/writer.rb +216 -0
- data/lib/omnizip/formats/tar.rb +84 -0
- data/lib/omnizip/formats/xz/reader.rb +116 -0
- data/lib/omnizip/formats/xz.rb +237 -0
- data/lib/omnizip/formats/xz_impl/block_decoder.rb +754 -0
- data/lib/omnizip/formats/xz_impl/block_encoder.rb +306 -0
- data/lib/omnizip/formats/xz_impl/block_header.rb +210 -0
- data/lib/omnizip/formats/xz_impl/block_header_parser.rb +186 -0
- data/lib/omnizip/formats/xz_impl/constants.rb +49 -0
- data/lib/omnizip/formats/xz_impl/index_decoder.rb +174 -0
- data/lib/omnizip/formats/xz_impl/index_encoder.rb +122 -0
- data/lib/omnizip/formats/xz_impl/stream_decoder.rb +468 -0
- data/lib/omnizip/formats/xz_impl/stream_encoder.rb +99 -0
- data/lib/omnizip/formats/xz_impl/stream_footer.rb +81 -0
- data/lib/omnizip/formats/xz_impl/stream_footer_parser.rb +117 -0
- data/lib/omnizip/formats/xz_impl/stream_header.rb +55 -0
- data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +108 -0
- data/lib/omnizip/formats/xz_impl/vli.rb +128 -0
- data/lib/omnizip/formats/xz_impl/writer.rb +421 -0
- data/lib/omnizip/formats/zip/central_directory_header.rb +195 -0
- data/lib/omnizip/formats/zip/constants.rb +69 -0
- data/lib/omnizip/formats/zip/end_of_central_directory.rb +133 -0
- data/lib/omnizip/formats/zip/local_file_header.rb +138 -0
- data/lib/omnizip/formats/zip/reader.rb +250 -0
- data/lib/omnizip/formats/zip/unix_extra_field.rb +153 -0
- data/lib/omnizip/formats/zip/writer.rb +375 -0
- data/lib/omnizip/formats/zip/zip64_end_of_central_directory.rb +104 -0
- data/lib/omnizip/formats/zip/zip64_end_of_central_directory_locator.rb +66 -0
- data/lib/omnizip/formats/zip/zip64_extra_field.rb +114 -0
- data/lib/omnizip/formats/zip.rb +50 -0
- data/lib/omnizip/implementations/base/lzma2_decoder_base.rb +75 -0
- data/lib/omnizip/implementations/base/lzma2_encoder_base.rb +128 -0
- data/lib/omnizip/implementations/base/lzma_decoder_base.rb +83 -0
- data/lib/omnizip/implementations/base/lzma_encoder_base.rb +108 -0
- data/lib/omnizip/implementations/base/state_machine_base.rb +182 -0
- data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +421 -0
- data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +465 -0
- data/lib/omnizip/implementations/seven_zip/lzma/match_finder.rb +288 -0
- data/lib/omnizip/implementations/seven_zip/lzma/range_decoder.rb +200 -0
- data/lib/omnizip/implementations/seven_zip/lzma/range_encoder.rb +197 -0
- data/lib/omnizip/implementations/seven_zip/lzma/state_machine.rb +141 -0
- data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +519 -0
- data/lib/omnizip/implementations/xz_utils/lzma2/decoder.rb +723 -0
- data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +750 -0
- data/lib/omnizip/io/buffered_input.rb +146 -0
- data/lib/omnizip/io/buffered_output.rb +105 -0
- data/lib/omnizip/io/stream_manager.rb +115 -0
- data/lib/omnizip/link_handler/hard_link.rb +79 -0
- data/lib/omnizip/link_handler/symbolic_link.rb +74 -0
- data/lib/omnizip/link_handler.rb +124 -0
- data/lib/omnizip/metadata/archive_metadata.rb +114 -0
- data/lib/omnizip/metadata/entry_metadata.rb +146 -0
- data/lib/omnizip/metadata/metadata_editor.rb +171 -0
- data/lib/omnizip/metadata/metadata_registry.rb +64 -0
- data/lib/omnizip/metadata/metadata_validator.rb +99 -0
- data/lib/omnizip/metadata.rb +57 -0
- data/lib/omnizip/models/.keep +0 -0
- data/lib/omnizip/models/algorithm_metadata.rb +73 -0
- data/lib/omnizip/models/compression_options.rb +71 -0
- data/lib/omnizip/models/conversion_options.rb +87 -0
- data/lib/omnizip/models/conversion_result.rb +135 -0
- data/lib/omnizip/models/eta_result.rb +46 -0
- data/lib/omnizip/models/extraction_rule.rb +115 -0
- data/lib/omnizip/models/filter_chain.rb +144 -0
- data/lib/omnizip/models/filter_config.rb +183 -0
- data/lib/omnizip/models/match_result.rb +124 -0
- data/lib/omnizip/models/optimization_suggestion.rb +91 -0
- data/lib/omnizip/models/parallel_options.rb +104 -0
- data/lib/omnizip/models/performance_result.rb +79 -0
- data/lib/omnizip/models/profile_report.rb +82 -0
- data/lib/omnizip/models/progress_options.rb +38 -0
- data/lib/omnizip/models/split_options.rb +116 -0
- data/lib/omnizip/optimization_registry.rb +81 -0
- data/lib/omnizip/parallel/job_queue.rb +209 -0
- data/lib/omnizip/parallel/job_scheduler.rb +203 -0
- data/lib/omnizip/parallel/parallel_compressor.rb +347 -0
- data/lib/omnizip/parallel/parallel_extractor.rb +329 -0
- data/lib/omnizip/parallel/worker_pool.rb +223 -0
- data/lib/omnizip/parallel.rb +149 -0
- data/lib/omnizip/parity/chunked_block_processor.rb +196 -0
- data/lib/omnizip/parity/galois16.rb +145 -0
- data/lib/omnizip/parity/models/creator_packet.rb +73 -0
- data/lib/omnizip/parity/models/file_description_packet.rb +133 -0
- data/lib/omnizip/parity/models/ifsc_packet.rb +123 -0
- data/lib/omnizip/parity/models/main_packet.rb +128 -0
- data/lib/omnizip/parity/models/packet.rb +156 -0
- data/lib/omnizip/parity/models/packet_registry.rb +109 -0
- data/lib/omnizip/parity/models/recovery_slice_packet.rb +78 -0
- data/lib/omnizip/parity/par2_creator.rb +531 -0
- data/lib/omnizip/parity/par2_repairer.rb +407 -0
- data/lib/omnizip/parity/par2_verifier.rb +364 -0
- data/lib/omnizip/parity/par2cmdline_algorithm.rb +110 -0
- data/lib/omnizip/parity/par2cmdline_coefficients.rb +78 -0
- data/lib/omnizip/parity/reed_solomon_decoder.rb +266 -0
- data/lib/omnizip/parity/reed_solomon_encoder.rb +111 -0
- data/lib/omnizip/parity/reed_solomon_matrix.rb +342 -0
- data/lib/omnizip/parity.rb +186 -0
- data/lib/omnizip/password/encryption_registry.rb +65 -0
- data/lib/omnizip/password/encryption_strategy.rb +96 -0
- data/lib/omnizip/password/password_validator.rb +129 -0
- data/lib/omnizip/password/winzip_aes_strategy.rb +192 -0
- data/lib/omnizip/password/zip_crypto_strategy.rb +141 -0
- data/lib/omnizip/password.rb +87 -0
- data/lib/omnizip/pipe/stream_compressor.rb +124 -0
- data/lib/omnizip/pipe/stream_decompressor.rb +174 -0
- data/lib/omnizip/pipe.rb +121 -0
- data/lib/omnizip/platform/ntfs_streams.rb +201 -0
- data/lib/omnizip/platform.rb +189 -0
- data/lib/omnizip/profile/archive_profile.rb +39 -0
- data/lib/omnizip/profile/balanced_profile.rb +33 -0
- data/lib/omnizip/profile/binary_profile.rb +36 -0
- data/lib/omnizip/profile/compression_profile.rb +158 -0
- data/lib/omnizip/profile/custom_profile.rb +157 -0
- data/lib/omnizip/profile/fast_profile.rb +33 -0
- data/lib/omnizip/profile/maximum_profile.rb +33 -0
- data/lib/omnizip/profile/profile_detector.rb +110 -0
- data/lib/omnizip/profile/profile_registry.rb +161 -0
- data/lib/omnizip/profile/text_profile.rb +36 -0
- data/lib/omnizip/profile.rb +190 -0
- data/lib/omnizip/profiler/memory_profiler.rb +66 -0
- data/lib/omnizip/profiler/method_profiler.rb +49 -0
- data/lib/omnizip/profiler/report_generator.rb +169 -0
- data/lib/omnizip/profiler.rb +204 -0
- data/lib/omnizip/progress/callback_reporter.rb +36 -0
- data/lib/omnizip/progress/console_reporter.rb +62 -0
- data/lib/omnizip/progress/log_reporter.rb +91 -0
- data/lib/omnizip/progress/operation_progress.rb +118 -0
- data/lib/omnizip/progress/progress_bar.rb +156 -0
- data/lib/omnizip/progress/progress_reporter.rb +40 -0
- data/lib/omnizip/progress/progress_tracker.rb +190 -0
- data/lib/omnizip/progress/silent_reporter.rb +24 -0
- data/lib/omnizip/progress.rb +127 -0
- data/lib/omnizip/rubyzip_compat.rb +63 -0
- data/lib/omnizip/temp/safe_extract.rb +168 -0
- data/lib/omnizip/temp/temp_file.rb +124 -0
- data/lib/omnizip/temp/temp_file_pool.rb +109 -0
- data/lib/omnizip/temp.rb +181 -0
- data/lib/omnizip/version.rb +5 -0
- data/lib/omnizip/zip/entry.rb +156 -0
- data/lib/omnizip/zip/file.rb +485 -0
- data/lib/omnizip/zip/input_stream.rb +273 -0
- data/lib/omnizip/zip/output_stream.rb +324 -0
- data/lib/omnizip.rb +156 -0
- data/readme-docs/advanced-features.adoc +515 -0
- data/readme-docs/api-usage.adoc +444 -0
- data/readme-docs/architecture.adoc +449 -0
- data/readme-docs/archive-formats.adoc +479 -0
- data/readme-docs/cli-usage.adoc +222 -0
- data/readme-docs/compression-algorithms.adoc +442 -0
- data/readme-docs/compression-profiles.adoc +247 -0
- data/readme-docs/encryption-checksums.adoc +328 -0
- data/readme-docs/format-converter.adoc +325 -0
- data/readme-docs/installation.adoc +228 -0
- data/readme-docs/par2-archives.adoc +608 -0
- data/readme-docs/performance-profiler.adoc +389 -0
- data/readme-docs/preprocessing-filters.adoc +280 -0
- data/xz-file-format-1.2.1.txt +1174 -0
- metadata +617 -0
|
@@ -0,0 +1,465 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Copyright (C) 2025 Ribose Inc.
|
|
4
|
+
#
|
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a
|
|
6
|
+
# copy of this software and associated documentation files (the "Software"),
|
|
7
|
+
# to deal in the Software without restriction, including without limitation
|
|
8
|
+
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
9
|
+
# and/or sell copies of the Software, and to permit persons to whom the
|
|
10
|
+
# Software is furnished to do so, subject to the following conditions:
|
|
11
|
+
#
|
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
|
13
|
+
# all copies or substantial portions of the Software.
|
|
14
|
+
#
|
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
20
|
+
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
21
|
+
# DEALINGS IN THE SOFTWARE.
|
|
22
|
+
|
|
23
|
+
require_relative "../../../algorithms/lzma/constants"
|
|
24
|
+
require_relative "match_finder"
|
|
25
|
+
require_relative "../../../algorithms/lzma/match_finder_config"
|
|
26
|
+
require_relative "../../../algorithms/lzma/match_finder_factory"
|
|
27
|
+
require_relative "../../../algorithms/lzma/literal_encoder"
|
|
28
|
+
require_relative "state_machine"
|
|
29
|
+
require_relative "../../../algorithms/lzma/length_coder"
|
|
30
|
+
require_relative "../../../algorithms/lzma/distance_coder"
|
|
31
|
+
require_relative "range_encoder" # Use 7-Zip SDK range encoder (not XZ Utils)
|
|
32
|
+
require_relative "../../../algorithms/lzma/bit_model"
|
|
33
|
+
|
|
34
|
+
module Omnizip
|
|
35
|
+
module Implementations
|
|
36
|
+
module SevenZip
|
|
37
|
+
module LZMA
|
|
38
|
+
# 7-Zip LZMA SDK encoder implementation.
|
|
39
|
+
#
|
|
40
|
+
# This is the original SdkEncoder moved from algorithms/lzma/sdk_encoder.rb
|
|
41
|
+
# to the new namespace structure.
|
|
42
|
+
#
|
|
43
|
+
# Ported from 7-Zip LZMA SDK by Igor Pavlov.
|
|
44
|
+
class Encoder
|
|
45
|
+
include Omnizip::Algorithms::LZMA::Constants
|
|
46
|
+
|
|
47
|
+
attr_reader :lc, :lp, :pb, :dict_size
|
|
48
|
+
|
|
49
|
+
# Initialize the SDK-compatible encoder
|
|
50
|
+
#
|
|
51
|
+
# @param output [IO] Output stream for compressed data
|
|
52
|
+
# @param options [Hash] Encoding options
|
|
53
|
+
# @option options [Integer] :lc Literal context bits (0-8, default: 3)
|
|
54
|
+
# @option options [Integer] :lp Literal position bits (0-4, default: 0)
|
|
55
|
+
# @option options [Integer] :pb Position bits (0-4, default: 2)
|
|
56
|
+
# @option options [Integer] :dict_size Dictionary size (default: 64KB)
|
|
57
|
+
# @option options [Integer] :level Compression level (0-9, default: 5)
|
|
58
|
+
# @option options [Boolean] :raw_mode Skip header and EOS marker for LZMA2 (default: false)
|
|
59
|
+
def initialize(output, options = {})
|
|
60
|
+
@output = output
|
|
61
|
+
@lc = options.fetch(:lc, 3)
|
|
62
|
+
@lp = options.fetch(:lp, 0)
|
|
63
|
+
@pb = options.fetch(:pb, 2)
|
|
64
|
+
@dict_size = options.fetch(:dict_size, 1 << 16) # 64KB default
|
|
65
|
+
@level = options.fetch(:level, 5)
|
|
66
|
+
@raw_mode = options.fetch(:raw_mode, false) # NEW: skip header/EOS for LZMA2
|
|
67
|
+
|
|
68
|
+
validate_parameters
|
|
69
|
+
init_models
|
|
70
|
+
init_coders
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# Encode a stream of data
|
|
74
|
+
#
|
|
75
|
+
# Main encoding loop following SDK's LzmaEnc_CodeOneBlock logic:
|
|
76
|
+
# 1. Initialize match finder with data
|
|
77
|
+
# 2. Process each position: find matches, encode literals/matches
|
|
78
|
+
# 3. Write EOS marker
|
|
79
|
+
# 4. Flush range encoder
|
|
80
|
+
#
|
|
81
|
+
# @param data [String, IO] Input data to compress
|
|
82
|
+
# @return [Array<String, Integer>] Tuple of [compressed_data, decode_bytes]
|
|
83
|
+
def encode_stream(data)
|
|
84
|
+
input_data = data.is_a?(String) ? data : data.read
|
|
85
|
+
|
|
86
|
+
# Force binary encoding to handle binary data properly
|
|
87
|
+
# Duplicate to avoid modifying frozen strings
|
|
88
|
+
input_data = input_data.dup.force_encoding(Encoding::BINARY)
|
|
89
|
+
|
|
90
|
+
# Write LZMA header
|
|
91
|
+
write_header(input_data.bytesize) unless @raw_mode
|
|
92
|
+
|
|
93
|
+
# Initialize range encoder (7-Zip SDK version)
|
|
94
|
+
@range_encoder = RangeEncoder.new(@output)
|
|
95
|
+
|
|
96
|
+
# Initialize match finder with SDK configuration
|
|
97
|
+
match_finder_config = Omnizip::Algorithms::LZMA::MatchFinderConfig.sdk_config(
|
|
98
|
+
dict_size: @dict_size,
|
|
99
|
+
level: @level,
|
|
100
|
+
)
|
|
101
|
+
@match_finder = Omnizip::Algorithms::LZMA::MatchFinderFactory.create(match_finder_config)
|
|
102
|
+
|
|
103
|
+
# Initialize state and dictionary
|
|
104
|
+
@state = StateMachine.new
|
|
105
|
+
@dict = +"" # Mutable string for dictionary
|
|
106
|
+
@pos = 0
|
|
107
|
+
|
|
108
|
+
# Initialize repeat distances (all 1 initially, as in SDK)
|
|
109
|
+
@reps = [1, 1, 1, 1]
|
|
110
|
+
|
|
111
|
+
# Main encoding loop
|
|
112
|
+
while @pos < input_data.bytesize
|
|
113
|
+
# Find best match at current position
|
|
114
|
+
match = @match_finder.find_longest_match(input_data, @pos)
|
|
115
|
+
|
|
116
|
+
# Decide: literal vs match
|
|
117
|
+
if should_encode_match?(match)
|
|
118
|
+
encode_match(match, input_data)
|
|
119
|
+
else
|
|
120
|
+
encode_literal(input_data[@pos].ord, input_data)
|
|
121
|
+
end
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
# Write EOS marker and flush
|
|
125
|
+
# For LZMA2: skip EOS marker but DO flush the range encoder
|
|
126
|
+
# The range encoder flush outputs pending bytes needed by decoder
|
|
127
|
+
# LZMA2 uses CONTROL_END (0x00) to signal end of stream instead of LZMA EOS
|
|
128
|
+
encode_eos_marker unless @raw_mode # Skip EOS in raw mode
|
|
129
|
+
@range_encoder.flush # Always flush to output pending range encoder bytes
|
|
130
|
+
|
|
131
|
+
# Return tuple for LZMA2: [data, bytes_for_decode]
|
|
132
|
+
# For raw mode, return actual decode bytes (excluding flush padding)
|
|
133
|
+
if @raw_mode
|
|
134
|
+
[@output.string, @range_encoder.bytes_for_decode]
|
|
135
|
+
elsif @output.respond_to?(:string)
|
|
136
|
+
# For File output, just return bytes written (don't try to read back)
|
|
137
|
+
# For StringIO, return the string and its size
|
|
138
|
+
[@output.string, @output.string.bytesize]
|
|
139
|
+
else
|
|
140
|
+
[@range_encoder.bytes_for_decode, @range_encoder.bytes_for_decode]
|
|
141
|
+
end
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
private
|
|
145
|
+
|
|
146
|
+
# Validate encoding parameters
|
|
147
|
+
#
|
|
148
|
+
# @return [void]
|
|
149
|
+
# @raise [ArgumentError] If parameters are invalid
|
|
150
|
+
def validate_parameters
|
|
151
|
+
raise ArgumentError, "lc must be 0-8" unless @lc.between?(0, 8)
|
|
152
|
+
raise ArgumentError, "lp must be 0-4" unless @lp.between?(0, 4)
|
|
153
|
+
raise ArgumentError, "pb must be 0-4" unless @pb.between?(0, 4)
|
|
154
|
+
raise ArgumentError, "level must be 0-9" unless @level.between?(0, 9)
|
|
155
|
+
return if @dict_size.between?(DICT_SIZE_MIN, DICT_SIZE_MAX)
|
|
156
|
+
|
|
157
|
+
raise ArgumentError, "Invalid dictionary size"
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
# Initialize probability models
|
|
161
|
+
#
|
|
162
|
+
# SDK allocates models following exact structure from LzmaEnc.c:
|
|
163
|
+
# - Literal models: compact layout indexed by literal_subcoder macro
|
|
164
|
+
# - Match models: NUM_STATES * (1 << @pb) models
|
|
165
|
+
# - Rep models: NUM_STATES models each
|
|
166
|
+
#
|
|
167
|
+
# The literal_subcoder macro calculates:
|
|
168
|
+
# base_offset = 3 * (((((pos) << 8) + (prev_byte)) & (literal_mask)) << (lc))
|
|
169
|
+
# We need to allocate enough models for the maximum possible offset.
|
|
170
|
+
#
|
|
171
|
+
# @return [void]
|
|
172
|
+
def init_models
|
|
173
|
+
# Calculate literal_mask using XZ Utils formula
|
|
174
|
+
# literal_mask = (UINT32_C(0x100) << (lp)) - (UINT32_C(0x100) >> (lc))
|
|
175
|
+
literal_mask = (0x100 << @lp) - (0x100 >> @lc)
|
|
176
|
+
|
|
177
|
+
# Calculate maximum possible context value
|
|
178
|
+
# context = (((pos << 8) + prev_byte) & literal_mask)
|
|
179
|
+
# Maximum context occurs when the lower bits of (pos << 8) + prev_byte
|
|
180
|
+
# align with the mask to give the maximum value.
|
|
181
|
+
max_context = literal_mask # Maximum possible context value
|
|
182
|
+
|
|
183
|
+
# Calculate maximum base_offset
|
|
184
|
+
# base_offset = 3 * (context << lc)
|
|
185
|
+
max_base_offset = 3 * (max_context << @lc)
|
|
186
|
+
|
|
187
|
+
# Maximum index for matched mode:
|
|
188
|
+
# encode_matched can use up to base_offset + offset + match_bit + (symbol >> 8)
|
|
189
|
+
# where offset, match_bit, and (symbol >> 8) can each be up to 0x100
|
|
190
|
+
# So max index = base_offset + 0x100 + 0x100 + 0x100 = base_offset + 0x300
|
|
191
|
+
# encode_unmatched can use up to base_offset + 256
|
|
192
|
+
max_model_index = max_base_offset + 0x300
|
|
193
|
+
|
|
194
|
+
# Allocate literal models
|
|
195
|
+
@literal_models = Array.new(max_model_index + 1) do
|
|
196
|
+
Omnizip::Algorithms::LZMA::BitModel.new
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
# Match/rep decision models
|
|
200
|
+
@is_match_models = Array.new(NUM_STATES * (1 << @pb)) do
|
|
201
|
+
Omnizip::Algorithms::LZMA::BitModel.new
|
|
202
|
+
end
|
|
203
|
+
@is_rep_models = Array.new(NUM_STATES) { Omnizip::Algorithms::LZMA::BitModel.new }
|
|
204
|
+
@is_rep0_models = Array.new(NUM_STATES) { Omnizip::Algorithms::LZMA::BitModel.new }
|
|
205
|
+
@is_rep1_models = Array.new(NUM_STATES) { Omnizip::Algorithms::LZMA::BitModel.new }
|
|
206
|
+
@is_rep2_models = Array.new(NUM_STATES) { Omnizip::Algorithms::LZMA::BitModel.new }
|
|
207
|
+
@is_rep0_long_models = Array.new(NUM_STATES * (1 << @pb)) do
|
|
208
|
+
Omnizip::Algorithms::LZMA::BitModel.new
|
|
209
|
+
end
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
# Initialize SDK coders
|
|
213
|
+
#
|
|
214
|
+
# @return [void]
|
|
215
|
+
def init_coders
|
|
216
|
+
@literal_encoder = Omnizip::Algorithms::LZMA::LiteralEncoder.new(@lc)
|
|
217
|
+
@length_coder = Omnizip::Algorithms::LZMA::LengthCoder.new(1 << @pb)
|
|
218
|
+
@rep_length_coder = Omnizip::Algorithms::LZMA::LengthCoder.new(1 << @pb)
|
|
219
|
+
@distance_coder = Omnizip::Algorithms::LZMA::DistanceCoder.new(NUM_LEN_TO_POS_STATES)
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
# Determine if a match should be encoded
|
|
223
|
+
#
|
|
224
|
+
# SDK uses complex heuristics considering:
|
|
225
|
+
# - Match length vs literal cost
|
|
226
|
+
# - Position in stream
|
|
227
|
+
# - Previous encoding results
|
|
228
|
+
#
|
|
229
|
+
# Simplified heuristic: encode if length >= 2 and provides benefit
|
|
230
|
+
#
|
|
231
|
+
# @param match [MatchFinder::Match, nil] Found match
|
|
232
|
+
# @return [Boolean] True if match should be encoded
|
|
233
|
+
def should_encode_match?(match)
|
|
234
|
+
return false if match.nil?
|
|
235
|
+
return false if match.length < MATCH_LEN_MIN
|
|
236
|
+
|
|
237
|
+
# CRITICAL: Validate that match distance is within current position
|
|
238
|
+
# The decoder reads from its dictionary: src_pos = dict_pos - distance - 1
|
|
239
|
+
# We need src_pos >= 0, which means distance <= dict_pos (current position)
|
|
240
|
+
# The match finder may return distances up to window_size, but we can only
|
|
241
|
+
# encode distances that reference data we've already encoded
|
|
242
|
+
return false if match.distance > @pos
|
|
243
|
+
|
|
244
|
+
# Simple heuristic: encode matches length >= 2
|
|
245
|
+
# For length 2: only if distance is small (< 128)
|
|
246
|
+
# For length 3+: always encode
|
|
247
|
+
if match.length == 2
|
|
248
|
+
match.distance < 128
|
|
249
|
+
else
|
|
250
|
+
true
|
|
251
|
+
end
|
|
252
|
+
end
|
|
253
|
+
|
|
254
|
+
# Encode a literal byte
|
|
255
|
+
#
|
|
256
|
+
# SDK encoding sequence (from LzmaEnc.c):
|
|
257
|
+
# 1. Encode is_match bit (0 = literal)
|
|
258
|
+
# 2. Calculate literal state
|
|
259
|
+
# 3. Encode literal (matched or unmatched based on state)
|
|
260
|
+
# 4. Update state machine
|
|
261
|
+
# 5. Update dictionary and position
|
|
262
|
+
#
|
|
263
|
+
# @param byte [Integer] Byte to encode (0-255)
|
|
264
|
+
# @param data [String] Full input data (for context)
|
|
265
|
+
# @return [void]
|
|
266
|
+
def encode_literal(byte, _data)
|
|
267
|
+
pos_state = @pos & ((1 << @pb) - 1)
|
|
268
|
+
|
|
269
|
+
# Encode is_match bit (0 = literal)
|
|
270
|
+
# XZ Utils: is_match[state][pos_state] where the array size is NUM_STATES * (1 << pb)
|
|
271
|
+
model_index = (@state.value * (1 << @pb)) + pos_state
|
|
272
|
+
@range_encoder.encode_bit(@is_match_models[model_index], 0)
|
|
273
|
+
|
|
274
|
+
# Calculate previous byte for literal encoding
|
|
275
|
+
# XZ Utils dict_get0 pattern: dict->buf[dict->pos - 1]
|
|
276
|
+
prev_byte = @dict.bytesize.positive? ? @dict[-1].ord : 0
|
|
277
|
+
|
|
278
|
+
# Calculate literal_mask using XZ Utils formula
|
|
279
|
+
# From lzma_common.h:literal_mask_calc
|
|
280
|
+
# literal_mask = (UINT32_C(0x100) << (lp)) - (UINT32_C(0x100) >> (lc))
|
|
281
|
+
literal_mask = (0x100 << @lp) - (0x100 >> @lc)
|
|
282
|
+
|
|
283
|
+
# Encode literal (matched or unmatched)
|
|
284
|
+
# Check if we can use matched literal: need enough data at current position
|
|
285
|
+
# The match is at dict[pos - reps[0] - 1], so we need pos > reps[0]
|
|
286
|
+
if @state.use_matched_literal? && @pos > @reps[0]
|
|
287
|
+
# Matched literal: use match byte from repeat distance
|
|
288
|
+
# The decoder uses get_byte_from_dict(reps[0]) which is dict[dict_pos - reps[0]]
|
|
289
|
+
# We need to use the same formula: dict[pos - reps[0]]
|
|
290
|
+
# Note: This is different from the SDK formula which uses -1
|
|
291
|
+
match_byte = @dict[@pos - @reps[0]].ord
|
|
292
|
+
@literal_encoder.encode_matched(byte, match_byte, @pos, prev_byte,
|
|
293
|
+
@lc, literal_mask, @range_encoder, @literal_models)
|
|
294
|
+
else
|
|
295
|
+
# Unmatched literal: simple 8-bit encoding
|
|
296
|
+
@literal_encoder.encode_unmatched(byte, @pos, prev_byte,
|
|
297
|
+
@lc, literal_mask, @range_encoder, @literal_models)
|
|
298
|
+
end
|
|
299
|
+
|
|
300
|
+
# Update state and dictionary
|
|
301
|
+
@state.update_literal
|
|
302
|
+
@dict << byte.chr
|
|
303
|
+
@pos += 1
|
|
304
|
+
end
|
|
305
|
+
|
|
306
|
+
# Encode a match
|
|
307
|
+
#
|
|
308
|
+
# SDK encoding sequence:
|
|
309
|
+
# 1. Encode is_match bit (1 = match)
|
|
310
|
+
# 2. Encode is_rep bit (0 = regular match)
|
|
311
|
+
# 3. Encode match length using length coder
|
|
312
|
+
# 4. Encode match distance using distance coder
|
|
313
|
+
# 5. Update state machine
|
|
314
|
+
# 6. Update dictionary and position
|
|
315
|
+
#
|
|
316
|
+
# @param match [MatchFinder::Match] Match to encode
|
|
317
|
+
# @param data [String] Full input data (for updating dictionary)
|
|
318
|
+
# @return [void]
|
|
319
|
+
def encode_match(match, data)
|
|
320
|
+
# Defensive check: distance must be >= 1
|
|
321
|
+
raise "Invalid match distance: #{match.distance}" if match.distance < 1
|
|
322
|
+
|
|
323
|
+
pos_state = @pos & ((1 << @pb) - 1)
|
|
324
|
+
|
|
325
|
+
# Encode is_match bit (1 = match)
|
|
326
|
+
# XZ Utils: is_match[state][pos_state] where the array is NUM_STATES * (1 << @pb)
|
|
327
|
+
model_index = (@state.value * (1 << @pb)) + pos_state
|
|
328
|
+
@range_encoder.encode_bit(@is_match_models[model_index], 1)
|
|
329
|
+
|
|
330
|
+
# Encode is_rep bit (0 = regular match, not rep)
|
|
331
|
+
# For now, we only handle regular matches
|
|
332
|
+
@range_encoder.encode_bit(@is_rep_models[@state.value], 0)
|
|
333
|
+
|
|
334
|
+
# Calculate length state for distance encoding
|
|
335
|
+
# XZ Utils formula (from lzma_common.h get_dist_state macro):
|
|
336
|
+
# ((len) < DIST_STATES + MATCH_LEN_MIN ? (len) - MATCH_LEN_MIN : DIST_STATES - 1)
|
|
337
|
+
# This gives: len=2→0, len=3→1, len=4→2, len=5→3, len=6+→3
|
|
338
|
+
len_state = if match.length < NUM_LEN_TO_POS_STATES + MATCH_LEN_MIN
|
|
339
|
+
match.length - MATCH_LEN_MIN
|
|
340
|
+
else
|
|
341
|
+
NUM_LEN_TO_POS_STATES - 1
|
|
342
|
+
end
|
|
343
|
+
|
|
344
|
+
# Encode match length
|
|
345
|
+
@length_coder.encode(@range_encoder,
|
|
346
|
+
match.length - MATCH_LEN_MIN,
|
|
347
|
+
pos_state)
|
|
348
|
+
|
|
349
|
+
# Encode match distance
|
|
350
|
+
# Distance coder expects (distance - 1), decoder will add 1 back
|
|
351
|
+
@distance_coder.encode(@range_encoder,
|
|
352
|
+
match.distance - 1,
|
|
353
|
+
len_state)
|
|
354
|
+
|
|
355
|
+
# Update repeat distances (shift and add new distance)
|
|
356
|
+
# When encoding a regular match, the distance becomes the new rep0
|
|
357
|
+
@reps[3] = @reps[2]
|
|
358
|
+
@reps[2] = @reps[1]
|
|
359
|
+
@reps[1] = @reps[0]
|
|
360
|
+
@reps[0] = match.distance
|
|
361
|
+
|
|
362
|
+
# Update state
|
|
363
|
+
@state.update_match
|
|
364
|
+
|
|
365
|
+
# Update dictionary with matched data
|
|
366
|
+
matched_data = data[@pos, match.length]
|
|
367
|
+
@dict << matched_data
|
|
368
|
+
@pos += match.length
|
|
369
|
+
end
|
|
370
|
+
|
|
371
|
+
# Encode end-of-stream marker
|
|
372
|
+
#
|
|
373
|
+
# SDK EOS marker (from LzmaEnc.c):
|
|
374
|
+
# - Encoded as a match with maximum distance
|
|
375
|
+
# - Signals decoder to stop
|
|
376
|
+
#
|
|
377
|
+
# @return [void]
|
|
378
|
+
def encode_eos_marker
|
|
379
|
+
# Use actual position state, not hardcoded 0
|
|
380
|
+
pos_state = @pos & ((1 << @pb) - 1)
|
|
381
|
+
|
|
382
|
+
# Encode is_match bit (1 = match)
|
|
383
|
+
# XZ Utils: is_match[state][pos_state] where the array is NUM_STATES * (1 << @pb)
|
|
384
|
+
model_index = (@state.value * (1 << @pb)) + pos_state
|
|
385
|
+
@range_encoder.encode_bit(@is_match_models[model_index], 1)
|
|
386
|
+
|
|
387
|
+
# Encode is_rep bit (0 = regular match)
|
|
388
|
+
@range_encoder.encode_bit(@is_rep_models[@state.value], 0)
|
|
389
|
+
|
|
390
|
+
# Calculate len_state to match decoder's calculation
|
|
391
|
+
# Decoder: length = decoded_value + MATCH_LEN_MIN = 0 + 2 = 2
|
|
392
|
+
# len_state = 2 - MATCH_LEN_MIN = 0 (when 2 < 4)
|
|
393
|
+
len_state = 0 # MATCH_LEN_MIN - MATCH_LEN_MIN
|
|
394
|
+
|
|
395
|
+
# Encode minimum length (0, decoder adds MATCH_LEN_MIN to get 2)
|
|
396
|
+
@length_coder.encode(@range_encoder, 0, pos_state)
|
|
397
|
+
|
|
398
|
+
# Encode special EOS distance (0xFFFFFFFF)
|
|
399
|
+
# XZ Utils encode_eopm calls match(coder, pos_state, UINT32_MAX, MATCH_LEN_MIN)
|
|
400
|
+
# Decoder adds 1 to get distance = 0x100000000, which triggers EOS check
|
|
401
|
+
@distance_coder.encode(@range_encoder, 0xFFFFFFFF, len_state)
|
|
402
|
+
end
|
|
403
|
+
|
|
404
|
+
# Calculate literal state index
|
|
405
|
+
# XZ Utils literal_subcoder formula (from lzma_common.h:141-143):
|
|
406
|
+
# ((probs) + 3 * (((((pos) << 8) + (prev_byte)) & (literal_mask)) << (lc)))
|
|
407
|
+
# where literal_mask = (1 << (lc + lp)) - 1
|
|
408
|
+
#
|
|
409
|
+
# The key insight is that (pos << 8) + prev_byte is computed FIRST,
|
|
410
|
+
# then masked, THEN shifted by lc. This is different from our old formula
|
|
411
|
+
# which added pos_part and prev_part separately.
|
|
412
|
+
#
|
|
413
|
+
# IMPORTANT: The literal_subcoder macro returns:
|
|
414
|
+
# probs + 3 * context_value_shifted
|
|
415
|
+
# where context_value_shifted = context_value << lc
|
|
416
|
+
#
|
|
417
|
+
# For our implementation, we return context_value (unshifted) so that
|
|
418
|
+
# the literal encoder can calculate the correct offset: 3 * context_value
|
|
419
|
+
#
|
|
420
|
+
# This creates (1 << (lc + lp)) unique contexts
|
|
421
|
+
#
|
|
422
|
+
# @return [Integer] Literal context value (unshifted, 0-7 for lc=3)
|
|
423
|
+
def calculate_literal_state
|
|
424
|
+
prev_byte = @dict.bytesize.positive? ? @dict[-1].ord : 0
|
|
425
|
+
|
|
426
|
+
# XZ Utils formula from lzma_common.h:literal_mask_calc
|
|
427
|
+
# literal_mask = (UINT32_C(0x100) << (lp)) - (UINT32_C(0x100) >> (lc))
|
|
428
|
+
# For lc=3, lp=0: (256 << 0) - (256 >> 3) = 256 - 32 = 224 (0xE0)
|
|
429
|
+
# IMPORTANT: Use the SAME formula as the decoder to ensure compatibility
|
|
430
|
+
literal_mask = (0x100 << @lp) - (0x100 >> @lc)
|
|
431
|
+
|
|
432
|
+
# Combine pos and prev_byte, then apply mask
|
|
433
|
+
# IMPORTANT: (pos << 8) + prev_byte is computed FIRST, then masked
|
|
434
|
+
(((@pos << 8) + prev_byte) & literal_mask)
|
|
435
|
+
end
|
|
436
|
+
|
|
437
|
+
# Write LZMA header
|
|
438
|
+
#
|
|
439
|
+
# SDK header format:
|
|
440
|
+
# - Property byte: (lc + lp*9 + pb*45)
|
|
441
|
+
# - Dictionary size: 4 bytes little-endian
|
|
442
|
+
# - Uncompressed size: 8 bytes (0xFF for unknown size)
|
|
443
|
+
#
|
|
444
|
+
# @param uncompressed_size [Integer] Original data size
|
|
445
|
+
# @return [void]
|
|
446
|
+
def write_header(_uncompressed_size)
|
|
447
|
+
# Property byte: (lc + lp*9 + pb*45)
|
|
448
|
+
props = @lc + (@lp * 9) + (@pb * 45)
|
|
449
|
+
@output.putc(props)
|
|
450
|
+
|
|
451
|
+
# Dictionary size (4 bytes, little-endian)
|
|
452
|
+
4.times do |i|
|
|
453
|
+
@output.putc((@dict_size >> (i * 8)) & 0xFF)
|
|
454
|
+
end
|
|
455
|
+
|
|
456
|
+
# Uncompressed size (8 bytes, little-endian)
|
|
457
|
+
# For SDK mode, use unknown size marker (0xFFFFFFFFFFFFFFFF)
|
|
458
|
+
# This matches xz/lzma behavior for standalone streams
|
|
459
|
+
8.times { @output.putc(0xFF) }
|
|
460
|
+
end
|
|
461
|
+
end
|
|
462
|
+
end
|
|
463
|
+
end
|
|
464
|
+
end
|
|
465
|
+
end
|