omnizip 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/.rubocop.yml +32 -0
- data/.rubocop_todo.yml +754 -0
- data/COPYING +502 -0
- data/Gemfile +17 -0
- data/LICENSE +12 -0
- data/README.adoc +1045 -0
- data/Rakefile +12 -0
- data/benchmark/README.md +260 -0
- data/benchmark/benchmark_suite.rb +125 -0
- data/benchmark/compression_bench.rb +181 -0
- data/benchmark/filter_bench.rb +180 -0
- data/benchmark/models/benchmark_result.rb +59 -0
- data/benchmark/models/comparison_result.rb +69 -0
- data/benchmark/profile_suite.rb +167 -0
- data/benchmark/reporter.rb +150 -0
- data/benchmark/run_benchmarks.rb +66 -0
- data/benchmark/test_data.rb +137 -0
- data/config/formats/rar3_spec.yml +91 -0
- data/config/formats/rar5_spec.yml +102 -0
- data/docs/.github/workflows/docs.yml +142 -0
- data/docs/.gitignore +21 -0
- data/docs/.lychee.toml +67 -0
- data/docs/Gemfile +13 -0
- data/docs/RAR_WRITE_SUPPORT.md +26 -0
- data/docs/README.md +101 -0
- data/docs/_config.yml +112 -0
- data/docs/assets/logo.svg +1 -0
- data/docs/assets/omnizip-logo.pdf +1540 -11
- data/docs/comparison/feature-matrix.adoc +694 -0
- data/docs/comparison/index.adoc +113 -0
- data/docs/comparison/vs-7zip.adoc +309 -0
- data/docs/comparison/vs-peazip.adoc +77 -0
- data/docs/comparison/vs-rubyzip.adoc +342 -0
- data/docs/comparison/vs-winrar.adoc +100 -0
- data/docs/compatibility.adoc +579 -0
- data/docs/concepts/index.adoc +129 -0
- data/docs/developer/architecture.adoc +256 -0
- data/docs/developer/contributing.adoc +158 -0
- data/docs/developer/index.adoc +25 -0
- data/docs/developer/testing.adoc +212 -0
- data/docs/getting-started/basic-usage.adoc +271 -0
- data/docs/getting-started/index.adoc +42 -0
- data/docs/getting-started/installation.adoc +138 -0
- data/docs/getting-started/quick-start.adoc +185 -0
- data/docs/getting-started/your-first-archive.adoc +218 -0
- data/docs/guides/advanced-features/encryption.adoc +300 -0
- data/docs/guides/advanced-features/index.adoc +49 -0
- data/docs/guides/advanced-features/parallel-processing.adoc +246 -0
- data/docs/guides/advanced-features/progress-tracking.adoc +320 -0
- data/docs/guides/advanced-features/streaming.adoc +212 -0
- data/docs/guides/archive-formats/gzip-format.adoc +107 -0
- data/docs/guides/archive-formats/index.adoc +130 -0
- data/docs/guides/archive-formats/rar-format.adoc +104 -0
- data/docs/guides/archive-formats/rar5.adoc +521 -0
- data/docs/guides/archive-formats/seven-zip-format.adoc +35 -0
- data/docs/guides/archive-formats/tar-format.adoc +106 -0
- data/docs/guides/archive-formats/xz-format.adoc +118 -0
- data/docs/guides/archive-formats/zip-format.adoc +35 -0
- data/docs/guides/compression-algorithms/bzip2.adoc +113 -0
- data/docs/guides/compression-algorithms/deflate.adoc +319 -0
- data/docs/guides/compression-algorithms/index.adoc +190 -0
- data/docs/guides/compression-algorithms/lzma.adoc +398 -0
- data/docs/guides/compression-algorithms/lzma2.adoc +327 -0
- data/docs/guides/compression-algorithms/ppmd.adoc +316 -0
- data/docs/guides/compression-algorithms/zstandard.adoc +361 -0
- data/docs/guides/creating-archives.adoc +354 -0
- data/docs/guides/extracting-archives.adoc +53 -0
- data/docs/guides/format-conversion.adoc +64 -0
- data/docs/guides/index.adoc +49 -0
- data/docs/guides/migration-rubyzip.adoc +217 -0
- data/docs/guides/parity-archives.adoc +605 -0
- data/docs/guides/performance-tuning.adoc +88 -0
- data/docs/index.adoc +218 -0
- data/docs/lychee.toml +67 -0
- data/docs/reference/api/overview.adoc +188 -0
- data/docs/reference/cli/compress-command.adoc +114 -0
- data/docs/reference/cli/overview.adoc +140 -0
- data/docs/reference/index.adoc +26 -0
- data/docs/resources/faq.adoc +185 -0
- data/docs/resources/quick-reference.adoc +222 -0
- data/docs/troubleshooting/index.adoc +208 -0
- data/examples/api_comparison.rb +205 -0
- data/examples/deflate64_example.rb +96 -0
- data/examples/par2_demo.rb +121 -0
- data/examples/quick_start_native.rb +150 -0
- data/examples/quick_start_rubyzip.rb +115 -0
- data/examples/rubyzip_compatibility_demo.rb +194 -0
- data/exe/omnizip +27 -0
- data/lib/omnizip/algorithm.rb +130 -0
- data/lib/omnizip/algorithm_registry.rb +86 -0
- data/lib/omnizip/algorithms/.keep +0 -0
- data/lib/omnizip/algorithms/bzip2/bwt.rb +225 -0
- data/lib/omnizip/algorithms/bzip2/decoder.rb +193 -0
- data/lib/omnizip/algorithms/bzip2/encoder.rb +237 -0
- data/lib/omnizip/algorithms/bzip2/huffman.rb +206 -0
- data/lib/omnizip/algorithms/bzip2/mtf.rb +101 -0
- data/lib/omnizip/algorithms/bzip2/rle.rb +151 -0
- data/lib/omnizip/algorithms/bzip2.rb +130 -0
- data/lib/omnizip/algorithms/deflate/constants.rb +28 -0
- data/lib/omnizip/algorithms/deflate/decoder.rb +38 -0
- data/lib/omnizip/algorithms/deflate/encoder.rb +46 -0
- data/lib/omnizip/algorithms/deflate.rb +128 -0
- data/lib/omnizip/algorithms/deflate64/constants.rb +45 -0
- data/lib/omnizip/algorithms/deflate64/decoder.rb +153 -0
- data/lib/omnizip/algorithms/deflate64/encoder.rb +98 -0
- data/lib/omnizip/algorithms/deflate64/huffman_coder.rb +354 -0
- data/lib/omnizip/algorithms/deflate64/lz77_encoder.rb +142 -0
- data/lib/omnizip/algorithms/deflate64.rb +109 -0
- data/lib/omnizip/algorithms/lzma/bit_model.rb +120 -0
- data/lib/omnizip/algorithms/lzma/constants.rb +112 -0
- data/lib/omnizip/algorithms/lzma/decoder.rb +148 -0
- data/lib/omnizip/algorithms/lzma/dictionary.rb +69 -0
- data/lib/omnizip/algorithms/lzma/distance_coder.rb +415 -0
- data/lib/omnizip/algorithms/lzma/encoder.rb +142 -0
- data/lib/omnizip/algorithms/lzma/length_coder.rb +260 -0
- data/lib/omnizip/algorithms/lzma/literal_decoder.rb +320 -0
- data/lib/omnizip/algorithms/lzma/literal_encoder.rb +210 -0
- data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +341 -0
- data/lib/omnizip/algorithms/lzma/lzma_alone_decoder.rb +192 -0
- data/lib/omnizip/algorithms/lzma/lzma_state.rb +128 -0
- data/lib/omnizip/algorithms/lzma/match.rb +32 -0
- data/lib/omnizip/algorithms/lzma/match_finder.rb +205 -0
- data/lib/omnizip/algorithms/lzma/match_finder_config.rb +142 -0
- data/lib/omnizip/algorithms/lzma/match_finder_factory.rb +88 -0
- data/lib/omnizip/algorithms/lzma/optimal_encoder.rb +130 -0
- data/lib/omnizip/algorithms/lzma/probability_models.rb +72 -0
- data/lib/omnizip/algorithms/lzma/range_coder.rb +85 -0
- data/lib/omnizip/algorithms/lzma/range_decoder.rb +434 -0
- data/lib/omnizip/algorithms/lzma/range_encoder.rb +194 -0
- data/lib/omnizip/algorithms/lzma/state.rb +127 -0
- data/lib/omnizip/algorithms/lzma/xz_buffered_range_encoder.rb +325 -0
- data/lib/omnizip/algorithms/lzma/xz_encoder.rb +426 -0
- data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +645 -0
- data/lib/omnizip/algorithms/lzma/xz_match_finder_adapter.rb +227 -0
- data/lib/omnizip/algorithms/lzma/xz_price_calculator.rb +169 -0
- data/lib/omnizip/algorithms/lzma/xz_probability_models.rb +261 -0
- data/lib/omnizip/algorithms/lzma/xz_range_encoder.rb +223 -0
- data/lib/omnizip/algorithms/lzma/xz_range_encoder_exact.rb +331 -0
- data/lib/omnizip/algorithms/lzma/xz_state.rb +116 -0
- data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +2055 -0
- data/lib/omnizip/algorithms/lzma.rb +238 -0
- data/lib/omnizip/algorithms/lzma2/chunk_manager.rb +182 -0
- data/lib/omnizip/algorithms/lzma2/constants.rb +41 -0
- data/lib/omnizip/algorithms/lzma2/encoder.rb +147 -0
- data/lib/omnizip/algorithms/lzma2/lzma2_chunk.rb +161 -0
- data/lib/omnizip/algorithms/lzma2/properties.rb +179 -0
- data/lib/omnizip/algorithms/lzma2/simple_lzma2_encoder.rb +127 -0
- data/lib/omnizip/algorithms/lzma2/xz_encoder_adapter.rb +85 -0
- data/lib/omnizip/algorithms/lzma2.rb +141 -0
- data/lib/omnizip/algorithms/ppmd7/constants.rb +74 -0
- data/lib/omnizip/algorithms/ppmd7/context.rb +154 -0
- data/lib/omnizip/algorithms/ppmd7/decoder.rb +126 -0
- data/lib/omnizip/algorithms/ppmd7/encoder.rb +163 -0
- data/lib/omnizip/algorithms/ppmd7/model.rb +248 -0
- data/lib/omnizip/algorithms/ppmd7/symbol_state.rb +57 -0
- data/lib/omnizip/algorithms/ppmd7.rb +116 -0
- data/lib/omnizip/algorithms/ppmd8/constants.rb +61 -0
- data/lib/omnizip/algorithms/ppmd8/context.rb +34 -0
- data/lib/omnizip/algorithms/ppmd8/decoder.rb +107 -0
- data/lib/omnizip/algorithms/ppmd8/encoder.rb +138 -0
- data/lib/omnizip/algorithms/ppmd8/model.rb +250 -0
- data/lib/omnizip/algorithms/ppmd8/restoration_method.rb +78 -0
- data/lib/omnizip/algorithms/ppmd8.rb +82 -0
- data/lib/omnizip/algorithms/ppmd_base.rb +138 -0
- data/lib/omnizip/algorithms/sevenzip_lzma2.rb +123 -0
- data/lib/omnizip/algorithms/xz_lzma2.rb +118 -0
- data/lib/omnizip/algorithms/zstandard/constants.rb +25 -0
- data/lib/omnizip/algorithms/zstandard/decoder.rb +46 -0
- data/lib/omnizip/algorithms/zstandard/encoder.rb +51 -0
- data/lib/omnizip/algorithms/zstandard.rb +138 -0
- data/lib/omnizip/buffer/memory_archive.rb +251 -0
- data/lib/omnizip/buffer/memory_extractor.rb +224 -0
- data/lib/omnizip/buffer.rb +176 -0
- data/lib/omnizip/checksum_registry.rb +114 -0
- data/lib/omnizip/checksums/crc32.rb +100 -0
- data/lib/omnizip/checksums/crc64.rb +101 -0
- data/lib/omnizip/checksums/crc_base.rb +158 -0
- data/lib/omnizip/checksums/verifier.rb +131 -0
- data/lib/omnizip/chunked/memory_manager.rb +194 -0
- data/lib/omnizip/chunked/reader.rb +78 -0
- data/lib/omnizip/chunked/writer.rb +120 -0
- data/lib/omnizip/chunked.rb +129 -0
- data/lib/omnizip/cli/output_formatter.rb +104 -0
- data/lib/omnizip/cli.rb +572 -0
- data/lib/omnizip/commands/.keep +0 -0
- data/lib/omnizip/commands/archive_create_command.rb +427 -0
- data/lib/omnizip/commands/archive_extract_command.rb +272 -0
- data/lib/omnizip/commands/archive_list_command.rb +218 -0
- data/lib/omnizip/commands/archive_repair_command.rb +131 -0
- data/lib/omnizip/commands/archive_verify_command.rb +117 -0
- data/lib/omnizip/commands/compress_command.rb +117 -0
- data/lib/omnizip/commands/decompress_command.rb +120 -0
- data/lib/omnizip/commands/list_command.rb +53 -0
- data/lib/omnizip/commands/metadata_command.rb +153 -0
- data/lib/omnizip/commands/parity_create_command.rb +122 -0
- data/lib/omnizip/commands/parity_repair_command.rb +122 -0
- data/lib/omnizip/commands/parity_verify_command.rb +124 -0
- data/lib/omnizip/commands/profile_list_command.rb +56 -0
- data/lib/omnizip/commands/profile_show_command.rb +44 -0
- data/lib/omnizip/convenience.rb +359 -0
- data/lib/omnizip/converter/conversion_registry.rb +49 -0
- data/lib/omnizip/converter/conversion_strategy.rb +121 -0
- data/lib/omnizip/converter/seven_zip_to_zip_strategy.rb +97 -0
- data/lib/omnizip/converter/zip_to_seven_zip_strategy.rb +112 -0
- data/lib/omnizip/converter.rb +105 -0
- data/lib/omnizip/crypto/aes256/cipher.rb +100 -0
- data/lib/omnizip/crypto/aes256/constants.rb +28 -0
- data/lib/omnizip/crypto/aes256/key_derivation.rb +101 -0
- data/lib/omnizip/crypto/aes256.rb +102 -0
- data/lib/omnizip/error.rb +106 -0
- data/lib/omnizip/eta/exponential_smoothing_estimator.rb +98 -0
- data/lib/omnizip/eta/moving_average_estimator.rb +99 -0
- data/lib/omnizip/eta/rate_calculator.rb +104 -0
- data/lib/omnizip/eta/sample_history.rb +143 -0
- data/lib/omnizip/eta/time_estimator.rb +106 -0
- data/lib/omnizip/eta.rb +63 -0
- data/lib/omnizip/extraction/filter_chain.rb +177 -0
- data/lib/omnizip/extraction/glob_pattern.rb +140 -0
- data/lib/omnizip/extraction/pattern_matcher.rb +70 -0
- data/lib/omnizip/extraction/predicate_pattern.rb +52 -0
- data/lib/omnizip/extraction/regex_pattern.rb +50 -0
- data/lib/omnizip/extraction/selective_extractor.rb +240 -0
- data/lib/omnizip/extraction.rb +111 -0
- data/lib/omnizip/file_type/mime_classifier.rb +144 -0
- data/lib/omnizip/file_type.rb +113 -0
- data/lib/omnizip/filter.rb +139 -0
- data/lib/omnizip/filter_pipeline.rb +108 -0
- data/lib/omnizip/filter_registry.rb +166 -0
- data/lib/omnizip/filters/bcj.rb +279 -0
- data/lib/omnizip/filters/bcj2/constants.rb +53 -0
- data/lib/omnizip/filters/bcj2/decoder.rb +200 -0
- data/lib/omnizip/filters/bcj2/encoder.rb +61 -0
- data/lib/omnizip/filters/bcj2/stream_data.rb +93 -0
- data/lib/omnizip/filters/bcj2.rb +99 -0
- data/lib/omnizip/filters/bcj_arm.rb +176 -0
- data/lib/omnizip/filters/bcj_arm64.rb +244 -0
- data/lib/omnizip/filters/bcj_ia64.rb +196 -0
- data/lib/omnizip/filters/bcj_ppc.rb +190 -0
- data/lib/omnizip/filters/bcj_sparc.rb +176 -0
- data/lib/omnizip/filters/bcj_x86.rb +193 -0
- data/lib/omnizip/filters/delta.rb +196 -0
- data/lib/omnizip/filters/filter_base.rb +72 -0
- data/lib/omnizip/filters/registry.rb +123 -0
- data/lib/omnizip/filters/xz_delta.rb +258 -0
- data/lib/omnizip/format_detector.rb +162 -0
- data/lib/omnizip/format_registry.rb +59 -0
- data/lib/omnizip/formats/.keep +0 -0
- data/lib/omnizip/formats/bzip2_file.rb +172 -0
- data/lib/omnizip/formats/cpio/constants.rb +55 -0
- data/lib/omnizip/formats/cpio/entry.rb +385 -0
- data/lib/omnizip/formats/cpio/reader.rb +196 -0
- data/lib/omnizip/formats/cpio/writer.rb +234 -0
- data/lib/omnizip/formats/cpio.rb +140 -0
- data/lib/omnizip/formats/format_spec_loader.rb +230 -0
- data/lib/omnizip/formats/gzip.rb +238 -0
- data/lib/omnizip/formats/iso/directory_builder.rb +297 -0
- data/lib/omnizip/formats/iso/directory_record.rb +152 -0
- data/lib/omnizip/formats/iso/joliet.rb +204 -0
- data/lib/omnizip/formats/iso/path_table.rb +125 -0
- data/lib/omnizip/formats/iso/reader.rb +197 -0
- data/lib/omnizip/formats/iso/rock_ridge.rb +349 -0
- data/lib/omnizip/formats/iso/volume_builder.rb +320 -0
- data/lib/omnizip/formats/iso/volume_descriptor.rb +168 -0
- data/lib/omnizip/formats/iso/writer.rb +530 -0
- data/lib/omnizip/formats/iso.rb +140 -0
- data/lib/omnizip/formats/lzip.rb +175 -0
- data/lib/omnizip/formats/lzma_alone.rb +171 -0
- data/lib/omnizip/formats/rar/archive_repairer.rb +243 -0
- data/lib/omnizip/formats/rar/archive_verifier.rb +195 -0
- data/lib/omnizip/formats/rar/block_parser.rb +243 -0
- data/lib/omnizip/formats/rar/compression/bit_stream.rb +180 -0
- data/lib/omnizip/formats/rar/compression/dispatcher.rb +217 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/decoder.rb +216 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/encoder.rb +158 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_builder.rb +217 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_coder.rb +189 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/match_finder.rb +135 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/sliding_window.rb +165 -0
- data/lib/omnizip/formats/rar/compression/ppmd/context.rb +105 -0
- data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +219 -0
- data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +262 -0
- data/lib/omnizip/formats/rar/compression_method_registry.rb +106 -0
- data/lib/omnizip/formats/rar/constants.rb +82 -0
- data/lib/omnizip/formats/rar/decompressor.rb +238 -0
- data/lib/omnizip/formats/rar/external_writer.rb +312 -0
- data/lib/omnizip/formats/rar/header.rb +192 -0
- data/lib/omnizip/formats/rar/license_validator.rb +109 -0
- data/lib/omnizip/formats/rar/models/rar_archive.rb +77 -0
- data/lib/omnizip/formats/rar/models/rar_entry.rb +65 -0
- data/lib/omnizip/formats/rar/models/rar_volume.rb +56 -0
- data/lib/omnizip/formats/rar/parity_handler.rb +292 -0
- data/lib/omnizip/formats/rar/rar5/compression/lzma.rb +202 -0
- data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +578 -0
- data/lib/omnizip/formats/rar/rar5/compression/store.rb +60 -0
- data/lib/omnizip/formats/rar/rar5/crc32.rb +39 -0
- data/lib/omnizip/formats/rar/rar5/encryption/aes256_cbc.rb +97 -0
- data/lib/omnizip/formats/rar/rar5/encryption/encryption_header.rb +114 -0
- data/lib/omnizip/formats/rar/rar5/encryption/encryption_manager.rb +166 -0
- data/lib/omnizip/formats/rar/rar5/encryption/key_derivation.rb +97 -0
- data/lib/omnizip/formats/rar/rar5/header.rb +187 -0
- data/lib/omnizip/formats/rar/rar5/models/encryption_options.rb +74 -0
- data/lib/omnizip/formats/rar/rar5/models/recovery_options.rb +63 -0
- data/lib/omnizip/formats/rar/rar5/models/solid_options.rb +63 -0
- data/lib/omnizip/formats/rar/rar5/models/volume_options.rb +74 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/ARCHITECTURE.md +290 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_manager.rb +264 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_splitter.rb +155 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_writer.rb +194 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_encoder.rb +109 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_manager.rb +142 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_stream.rb +121 -0
- data/lib/omnizip/formats/rar/rar5/vint.rb +65 -0
- data/lib/omnizip/formats/rar/rar5/writer.rb +466 -0
- data/lib/omnizip/formats/rar/rar_format_base.rb +241 -0
- data/lib/omnizip/formats/rar/reader.rb +366 -0
- data/lib/omnizip/formats/rar/recovery_record.rb +245 -0
- data/lib/omnizip/formats/rar/volume_manager.rb +168 -0
- data/lib/omnizip/formats/rar/writer.rb +431 -0
- data/lib/omnizip/formats/rar.rb +205 -0
- data/lib/omnizip/formats/rar3/compressor.rb +73 -0
- data/lib/omnizip/formats/rar3/decompressor.rb +66 -0
- data/lib/omnizip/formats/rar3/reader.rb +386 -0
- data/lib/omnizip/formats/rar3/writer.rb +219 -0
- data/lib/omnizip/formats/rar5/compressor.rb +73 -0
- data/lib/omnizip/formats/rar5/decompressor.rb +66 -0
- data/lib/omnizip/formats/rar5/reader.rb +342 -0
- data/lib/omnizip/formats/rar5/writer.rb +214 -0
- data/lib/omnizip/formats/seven_zip/coder_chain.rb +150 -0
- data/lib/omnizip/formats/seven_zip/constants.rb +126 -0
- data/lib/omnizip/formats/seven_zip/encoded_header.rb +114 -0
- data/lib/omnizip/formats/seven_zip/encrypted_header.rb +142 -0
- data/lib/omnizip/formats/seven_zip/file_collector.rb +144 -0
- data/lib/omnizip/formats/seven_zip/header.rb +106 -0
- data/lib/omnizip/formats/seven_zip/header_encryptor.rb +134 -0
- data/lib/omnizip/formats/seven_zip/header_writer.rb +466 -0
- data/lib/omnizip/formats/seven_zip/models/coder_info.rb +30 -0
- data/lib/omnizip/formats/seven_zip/models/file_entry.rb +58 -0
- data/lib/omnizip/formats/seven_zip/models/folder.rb +69 -0
- data/lib/omnizip/formats/seven_zip/models/stream_info.rb +42 -0
- data/lib/omnizip/formats/seven_zip/parser.rb +660 -0
- data/lib/omnizip/formats/seven_zip/reader.rb +458 -0
- data/lib/omnizip/formats/seven_zip/split_archive_reader.rb +632 -0
- data/lib/omnizip/formats/seven_zip/split_archive_writer.rb +315 -0
- data/lib/omnizip/formats/seven_zip/stream_compressor.rb +151 -0
- data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +162 -0
- data/lib/omnizip/formats/seven_zip/writer.rb +740 -0
- data/lib/omnizip/formats/seven_zip.rb +93 -0
- data/lib/omnizip/formats/tar/constants.rb +73 -0
- data/lib/omnizip/formats/tar/entry.rb +94 -0
- data/lib/omnizip/formats/tar/header.rb +168 -0
- data/lib/omnizip/formats/tar/reader.rb +121 -0
- data/lib/omnizip/formats/tar/writer.rb +216 -0
- data/lib/omnizip/formats/tar.rb +84 -0
- data/lib/omnizip/formats/xz/reader.rb +116 -0
- data/lib/omnizip/formats/xz.rb +237 -0
- data/lib/omnizip/formats/xz_impl/block_decoder.rb +754 -0
- data/lib/omnizip/formats/xz_impl/block_encoder.rb +306 -0
- data/lib/omnizip/formats/xz_impl/block_header.rb +210 -0
- data/lib/omnizip/formats/xz_impl/block_header_parser.rb +186 -0
- data/lib/omnizip/formats/xz_impl/constants.rb +49 -0
- data/lib/omnizip/formats/xz_impl/index_decoder.rb +174 -0
- data/lib/omnizip/formats/xz_impl/index_encoder.rb +122 -0
- data/lib/omnizip/formats/xz_impl/stream_decoder.rb +468 -0
- data/lib/omnizip/formats/xz_impl/stream_encoder.rb +99 -0
- data/lib/omnizip/formats/xz_impl/stream_footer.rb +81 -0
- data/lib/omnizip/formats/xz_impl/stream_footer_parser.rb +117 -0
- data/lib/omnizip/formats/xz_impl/stream_header.rb +55 -0
- data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +108 -0
- data/lib/omnizip/formats/xz_impl/vli.rb +128 -0
- data/lib/omnizip/formats/xz_impl/writer.rb +421 -0
- data/lib/omnizip/formats/zip/central_directory_header.rb +195 -0
- data/lib/omnizip/formats/zip/constants.rb +69 -0
- data/lib/omnizip/formats/zip/end_of_central_directory.rb +133 -0
- data/lib/omnizip/formats/zip/local_file_header.rb +138 -0
- data/lib/omnizip/formats/zip/reader.rb +250 -0
- data/lib/omnizip/formats/zip/unix_extra_field.rb +153 -0
- data/lib/omnizip/formats/zip/writer.rb +375 -0
- data/lib/omnizip/formats/zip/zip64_end_of_central_directory.rb +104 -0
- data/lib/omnizip/formats/zip/zip64_end_of_central_directory_locator.rb +66 -0
- data/lib/omnizip/formats/zip/zip64_extra_field.rb +114 -0
- data/lib/omnizip/formats/zip.rb +50 -0
- data/lib/omnizip/implementations/base/lzma2_decoder_base.rb +75 -0
- data/lib/omnizip/implementations/base/lzma2_encoder_base.rb +128 -0
- data/lib/omnizip/implementations/base/lzma_decoder_base.rb +83 -0
- data/lib/omnizip/implementations/base/lzma_encoder_base.rb +108 -0
- data/lib/omnizip/implementations/base/state_machine_base.rb +182 -0
- data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +421 -0
- data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +465 -0
- data/lib/omnizip/implementations/seven_zip/lzma/match_finder.rb +288 -0
- data/lib/omnizip/implementations/seven_zip/lzma/range_decoder.rb +200 -0
- data/lib/omnizip/implementations/seven_zip/lzma/range_encoder.rb +197 -0
- data/lib/omnizip/implementations/seven_zip/lzma/state_machine.rb +141 -0
- data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +519 -0
- data/lib/omnizip/implementations/xz_utils/lzma2/decoder.rb +723 -0
- data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +750 -0
- data/lib/omnizip/io/buffered_input.rb +146 -0
- data/lib/omnizip/io/buffered_output.rb +105 -0
- data/lib/omnizip/io/stream_manager.rb +115 -0
- data/lib/omnizip/link_handler/hard_link.rb +79 -0
- data/lib/omnizip/link_handler/symbolic_link.rb +74 -0
- data/lib/omnizip/link_handler.rb +124 -0
- data/lib/omnizip/metadata/archive_metadata.rb +114 -0
- data/lib/omnizip/metadata/entry_metadata.rb +146 -0
- data/lib/omnizip/metadata/metadata_editor.rb +171 -0
- data/lib/omnizip/metadata/metadata_registry.rb +64 -0
- data/lib/omnizip/metadata/metadata_validator.rb +99 -0
- data/lib/omnizip/metadata.rb +57 -0
- data/lib/omnizip/models/.keep +0 -0
- data/lib/omnizip/models/algorithm_metadata.rb +73 -0
- data/lib/omnizip/models/compression_options.rb +71 -0
- data/lib/omnizip/models/conversion_options.rb +87 -0
- data/lib/omnizip/models/conversion_result.rb +135 -0
- data/lib/omnizip/models/eta_result.rb +46 -0
- data/lib/omnizip/models/extraction_rule.rb +115 -0
- data/lib/omnizip/models/filter_chain.rb +144 -0
- data/lib/omnizip/models/filter_config.rb +183 -0
- data/lib/omnizip/models/match_result.rb +124 -0
- data/lib/omnizip/models/optimization_suggestion.rb +91 -0
- data/lib/omnizip/models/parallel_options.rb +104 -0
- data/lib/omnizip/models/performance_result.rb +79 -0
- data/lib/omnizip/models/profile_report.rb +82 -0
- data/lib/omnizip/models/progress_options.rb +38 -0
- data/lib/omnizip/models/split_options.rb +116 -0
- data/lib/omnizip/optimization_registry.rb +81 -0
- data/lib/omnizip/parallel/job_queue.rb +209 -0
- data/lib/omnizip/parallel/job_scheduler.rb +203 -0
- data/lib/omnizip/parallel/parallel_compressor.rb +347 -0
- data/lib/omnizip/parallel/parallel_extractor.rb +329 -0
- data/lib/omnizip/parallel/worker_pool.rb +223 -0
- data/lib/omnizip/parallel.rb +149 -0
- data/lib/omnizip/parity/chunked_block_processor.rb +196 -0
- data/lib/omnizip/parity/galois16.rb +145 -0
- data/lib/omnizip/parity/models/creator_packet.rb +73 -0
- data/lib/omnizip/parity/models/file_description_packet.rb +133 -0
- data/lib/omnizip/parity/models/ifsc_packet.rb +123 -0
- data/lib/omnizip/parity/models/main_packet.rb +128 -0
- data/lib/omnizip/parity/models/packet.rb +156 -0
- data/lib/omnizip/parity/models/packet_registry.rb +109 -0
- data/lib/omnizip/parity/models/recovery_slice_packet.rb +78 -0
- data/lib/omnizip/parity/par2_creator.rb +531 -0
- data/lib/omnizip/parity/par2_repairer.rb +407 -0
- data/lib/omnizip/parity/par2_verifier.rb +364 -0
- data/lib/omnizip/parity/par2cmdline_algorithm.rb +110 -0
- data/lib/omnizip/parity/par2cmdline_coefficients.rb +78 -0
- data/lib/omnizip/parity/reed_solomon_decoder.rb +266 -0
- data/lib/omnizip/parity/reed_solomon_encoder.rb +111 -0
- data/lib/omnizip/parity/reed_solomon_matrix.rb +342 -0
- data/lib/omnizip/parity.rb +186 -0
- data/lib/omnizip/password/encryption_registry.rb +65 -0
- data/lib/omnizip/password/encryption_strategy.rb +96 -0
- data/lib/omnizip/password/password_validator.rb +129 -0
- data/lib/omnizip/password/winzip_aes_strategy.rb +192 -0
- data/lib/omnizip/password/zip_crypto_strategy.rb +141 -0
- data/lib/omnizip/password.rb +87 -0
- data/lib/omnizip/pipe/stream_compressor.rb +124 -0
- data/lib/omnizip/pipe/stream_decompressor.rb +174 -0
- data/lib/omnizip/pipe.rb +121 -0
- data/lib/omnizip/platform/ntfs_streams.rb +201 -0
- data/lib/omnizip/platform.rb +189 -0
- data/lib/omnizip/profile/archive_profile.rb +39 -0
- data/lib/omnizip/profile/balanced_profile.rb +33 -0
- data/lib/omnizip/profile/binary_profile.rb +36 -0
- data/lib/omnizip/profile/compression_profile.rb +158 -0
- data/lib/omnizip/profile/custom_profile.rb +157 -0
- data/lib/omnizip/profile/fast_profile.rb +33 -0
- data/lib/omnizip/profile/maximum_profile.rb +33 -0
- data/lib/omnizip/profile/profile_detector.rb +110 -0
- data/lib/omnizip/profile/profile_registry.rb +161 -0
- data/lib/omnizip/profile/text_profile.rb +36 -0
- data/lib/omnizip/profile.rb +190 -0
- data/lib/omnizip/profiler/memory_profiler.rb +66 -0
- data/lib/omnizip/profiler/method_profiler.rb +49 -0
- data/lib/omnizip/profiler/report_generator.rb +169 -0
- data/lib/omnizip/profiler.rb +204 -0
- data/lib/omnizip/progress/callback_reporter.rb +36 -0
- data/lib/omnizip/progress/console_reporter.rb +62 -0
- data/lib/omnizip/progress/log_reporter.rb +91 -0
- data/lib/omnizip/progress/operation_progress.rb +118 -0
- data/lib/omnizip/progress/progress_bar.rb +156 -0
- data/lib/omnizip/progress/progress_reporter.rb +40 -0
- data/lib/omnizip/progress/progress_tracker.rb +190 -0
- data/lib/omnizip/progress/silent_reporter.rb +24 -0
- data/lib/omnizip/progress.rb +127 -0
- data/lib/omnizip/rubyzip_compat.rb +63 -0
- data/lib/omnizip/temp/safe_extract.rb +168 -0
- data/lib/omnizip/temp/temp_file.rb +124 -0
- data/lib/omnizip/temp/temp_file_pool.rb +109 -0
- data/lib/omnizip/temp.rb +181 -0
- data/lib/omnizip/version.rb +5 -0
- data/lib/omnizip/zip/entry.rb +156 -0
- data/lib/omnizip/zip/file.rb +485 -0
- data/lib/omnizip/zip/input_stream.rb +273 -0
- data/lib/omnizip/zip/output_stream.rb +324 -0
- data/lib/omnizip.rb +156 -0
- data/readme-docs/advanced-features.adoc +515 -0
- data/readme-docs/api-usage.adoc +444 -0
- data/readme-docs/architecture.adoc +449 -0
- data/readme-docs/archive-formats.adoc +479 -0
- data/readme-docs/cli-usage.adoc +222 -0
- data/readme-docs/compression-algorithms.adoc +442 -0
- data/readme-docs/compression-profiles.adoc +247 -0
- data/readme-docs/encryption-checksums.adoc +328 -0
- data/readme-docs/format-converter.adoc +325 -0
- data/readme-docs/installation.adoc +228 -0
- data/readme-docs/par2-archives.adoc +608 -0
- data/readme-docs/performance-profiler.adoc +389 -0
- data/readme-docs/preprocessing-filters.adoc +280 -0
- data/xz-file-format-1.2.1.txt +1174 -0
- metadata +617 -0
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Copyright (C) 2025 Ribose Inc.
|
|
4
|
+
#
|
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a
|
|
6
|
+
# copy of this software and associated documentation files (the "Software"),
|
|
7
|
+
# to deal in the Software without restriction, including without limitation
|
|
8
|
+
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
9
|
+
# and/or sell copies of the Software, and to permit persons to whom the
|
|
10
|
+
# Software is furnished to do so, subject to the following conditions:
|
|
11
|
+
#
|
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
|
13
|
+
# all copies or substantial portions of the Software.
|
|
14
|
+
#
|
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
20
|
+
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
21
|
+
# DEALINGS IN THE SOFTWARE.
|
|
22
|
+
|
|
23
|
+
require_relative "match_finder"
|
|
24
|
+
require_relative "constants"
|
|
25
|
+
|
|
26
|
+
module Omnizip
|
|
27
|
+
module Algorithms
|
|
28
|
+
class LZMA < Algorithm
|
|
29
|
+
# XZ Utils-compatible match finder adapter
|
|
30
|
+
#
|
|
31
|
+
# Wraps existing MatchFinder to provide XZ Utils interface with:
|
|
32
|
+
# - Cursor-based position tracking
|
|
33
|
+
# - Multiple match finding (not just longest)
|
|
34
|
+
# - Skip and lookahead operations
|
|
35
|
+
#
|
|
36
|
+
# Based on: xz/src/liblzma/lz/lz_encoder_mf.c
|
|
37
|
+
class XzMatchFinderAdapter
|
|
38
|
+
include Constants
|
|
39
|
+
|
|
40
|
+
# Match structure matching XZ Utils
|
|
41
|
+
Match = Struct.new(:len, :dist, keyword_init: true) do
|
|
42
|
+
def to_s
|
|
43
|
+
"Match(len=#{len}, dist=#{dist})"
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
attr_reader :matches, :longest_len, :pos
|
|
48
|
+
|
|
49
|
+
# Initialize match finder adapter
|
|
50
|
+
#
|
|
51
|
+
# @param data [String, Array<Integer>] Input data
|
|
52
|
+
# @param dict_size [Integer] Dictionary size (default 8MB for XZ)
|
|
53
|
+
# @param nice_len [Integer] Nice match length (default 32)
|
|
54
|
+
def initialize(data, dict_size: 1 << 23, nice_len: 32)
|
|
55
|
+
@data = data.is_a?(String) ? data.bytes : data
|
|
56
|
+
@pos = 0
|
|
57
|
+
@dict_size = dict_size
|
|
58
|
+
@nice_len = nice_len
|
|
59
|
+
|
|
60
|
+
# Internal state
|
|
61
|
+
@matches = []
|
|
62
|
+
@longest_len = 0
|
|
63
|
+
|
|
64
|
+
# Hash table for match finding
|
|
65
|
+
@hash_table = {}
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Find all matches at current position
|
|
69
|
+
#
|
|
70
|
+
# Finds multiple matches of different lengths, not just the longest.
|
|
71
|
+
# Results stored in @matches array, longest length in @longest_len.
|
|
72
|
+
#
|
|
73
|
+
# @return [Integer] Longest match length (0 if no matches)
|
|
74
|
+
def find_matches
|
|
75
|
+
@matches.clear
|
|
76
|
+
@longest_len = 0
|
|
77
|
+
|
|
78
|
+
return 0 if @pos >= @data.size
|
|
79
|
+
return 0 if available < MATCH_LEN_MIN
|
|
80
|
+
|
|
81
|
+
# CRITICAL: Don't produce matches until there's enough data for decoder
|
|
82
|
+
# The decoder validates: dict_full > distance
|
|
83
|
+
# Where dict_full = decoded_byte_count (starting from 0)
|
|
84
|
+
# So for distance=N to be valid, we need at least N+1 bytes decoded
|
|
85
|
+
# We're at position @pos (0-based), so @pos bytes have been processed
|
|
86
|
+
# For distance=1 match: need @pos >= 2 (so decoder has dict_full=2)
|
|
87
|
+
# For distance=N match: need @pos >= N+1
|
|
88
|
+
# Simple check: Don't produce matches until @pos >= 2
|
|
89
|
+
return 0 if @pos < 2
|
|
90
|
+
|
|
91
|
+
# Find matches using hash chains
|
|
92
|
+
hash_val = compute_hash
|
|
93
|
+
positions = @hash_table[hash_val] || []
|
|
94
|
+
|
|
95
|
+
# Track best matches at each length
|
|
96
|
+
best_distances = {}
|
|
97
|
+
|
|
98
|
+
positions.reverse_each do |prev_pos|
|
|
99
|
+
distance = @pos - prev_pos
|
|
100
|
+
break if distance > @dict_size
|
|
101
|
+
|
|
102
|
+
# Skip self-matching (can happen when lookahead searches same position twice)
|
|
103
|
+
next if distance.zero?
|
|
104
|
+
|
|
105
|
+
match_len = calculate_match_length(prev_pos)
|
|
106
|
+
next if match_len < MATCH_LEN_MIN
|
|
107
|
+
|
|
108
|
+
# Keep best (shortest) distance for each length
|
|
109
|
+
if !best_distances[match_len] || distance < best_distances[match_len]
|
|
110
|
+
best_distances[match_len] = distance
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
# Update longest
|
|
114
|
+
@longest_len = match_len if match_len > @longest_len
|
|
115
|
+
|
|
116
|
+
# Stop if we found nice length
|
|
117
|
+
break if match_len >= @nice_len
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
# Convert to matches array (sorted by length)
|
|
121
|
+
best_distances.keys.sort.each do |len|
|
|
122
|
+
@matches << Match.new(len: len, dist: best_distances[len])
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
# Update hash table
|
|
126
|
+
update_hash(hash_val, @pos)
|
|
127
|
+
|
|
128
|
+
@longest_len
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
# Skip n bytes without finding matches
|
|
132
|
+
#
|
|
133
|
+
# Advances position and updates hash tables but doesn't search for matches.
|
|
134
|
+
# Used for rep matches where we already know what to encode.
|
|
135
|
+
#
|
|
136
|
+
# @param n [Integer] Number of bytes to skip
|
|
137
|
+
def skip(n)
|
|
138
|
+
n.times do
|
|
139
|
+
return if @pos >= @data.size
|
|
140
|
+
|
|
141
|
+
hash_val = compute_hash
|
|
142
|
+
update_hash(hash_val, @pos)
|
|
143
|
+
@pos += 1
|
|
144
|
+
end
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
# Move position forward by one byte
|
|
148
|
+
def move_pos
|
|
149
|
+
@pos += 1
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
# Bytes available from current position
|
|
153
|
+
#
|
|
154
|
+
# @return [Integer] Number of bytes remaining
|
|
155
|
+
def available
|
|
156
|
+
@data.size - @pos
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
# Get current byte at position
|
|
160
|
+
#
|
|
161
|
+
# @return [Integer, nil] Byte value or nil if at end
|
|
162
|
+
def current_byte
|
|
163
|
+
return nil if @pos >= @data.size
|
|
164
|
+
|
|
165
|
+
@data[@pos]
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
# Get byte at offset from current position
|
|
169
|
+
#
|
|
170
|
+
# @param offset [Integer] Offset from current position (can be negative)
|
|
171
|
+
# @return [Integer] Byte value (0 if out of bounds)
|
|
172
|
+
def get_byte(offset)
|
|
173
|
+
pos = @pos + offset
|
|
174
|
+
return 0 if pos.negative? || pos >= @data.size
|
|
175
|
+
|
|
176
|
+
@data[pos]
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
# Reset match finder to beginning
|
|
180
|
+
def reset
|
|
181
|
+
@pos = 0
|
|
182
|
+
@matches.clear
|
|
183
|
+
@longest_len = 0
|
|
184
|
+
@hash_table.clear
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
private
|
|
188
|
+
|
|
189
|
+
# Compute hash value for sequence at current position
|
|
190
|
+
#
|
|
191
|
+
# @return [Integer] Hash value
|
|
192
|
+
def compute_hash
|
|
193
|
+
return 0 if @pos + 2 >= @data.size
|
|
194
|
+
|
|
195
|
+
(@data[@pos] << 16) ^ (@data[@pos + 1] << 8) ^ @data[@pos + 2]
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
# Calculate match length between current position and previous position
|
|
199
|
+
#
|
|
200
|
+
# @param prev_pos [Integer] Previous position to compare against
|
|
201
|
+
# @return [Integer] Length of match
|
|
202
|
+
def calculate_match_length(prev_pos)
|
|
203
|
+
max_len = [available, @nice_len].min
|
|
204
|
+
length = 0
|
|
205
|
+
|
|
206
|
+
while length < max_len && @data[@pos + length] == @data[prev_pos + length]
|
|
207
|
+
length += 1
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
length
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
# Update hash table with position
|
|
214
|
+
#
|
|
215
|
+
# @param hash_val [Integer] Hash value
|
|
216
|
+
# @param pos [Integer] Position to add
|
|
217
|
+
def update_hash(hash_val, pos)
|
|
218
|
+
@hash_table[hash_val] ||= []
|
|
219
|
+
@hash_table[hash_val] << pos
|
|
220
|
+
|
|
221
|
+
# Keep hash chains from growing too large
|
|
222
|
+
@hash_table[hash_val].shift if @hash_table[hash_val].size > 1024
|
|
223
|
+
end
|
|
224
|
+
end
|
|
225
|
+
end
|
|
226
|
+
end
|
|
227
|
+
end
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "constants"
|
|
4
|
+
|
|
5
|
+
module Omnizip
|
|
6
|
+
module Algorithms
|
|
7
|
+
class LZMA < Algorithm
|
|
8
|
+
# XZ Utils-compatible price calculator
|
|
9
|
+
#
|
|
10
|
+
# Calculates the cost (in price units) of encoding symbols using
|
|
11
|
+
# probability models. Prices are based on logarithmic probabilities:
|
|
12
|
+
# price = -log2(probability) * scale_factor
|
|
13
|
+
#
|
|
14
|
+
# Uses precomputed tables for efficiency, matching XZ Utils exactly.
|
|
15
|
+
#
|
|
16
|
+
# Based on: xz/src/liblzma/rangecoder/price.h
|
|
17
|
+
class XzPriceCalculator
|
|
18
|
+
include Constants
|
|
19
|
+
|
|
20
|
+
# Price scale factor (matches XZ Utils)
|
|
21
|
+
PRICE_SHIFT_BITS = 4
|
|
22
|
+
PRICE_SCALE = 1 << PRICE_SHIFT_BITS
|
|
23
|
+
|
|
24
|
+
# BIT_MODEL_TOTAL = 2^11 = 2048 (from Constants, but define locally for clarity)
|
|
25
|
+
BIT_MODEL_TOTAL_LOCAL = 0x800
|
|
26
|
+
BIT_MODEL_TOTAL_BITS = 11
|
|
27
|
+
|
|
28
|
+
# Number of entries in price table
|
|
29
|
+
PRICE_TABLE_SIZE = BIT_MODEL_TOTAL_LOCAL >> PRICE_SHIFT_BITS
|
|
30
|
+
|
|
31
|
+
class << self
|
|
32
|
+
# Calculate price for encoding a single bit
|
|
33
|
+
#
|
|
34
|
+
# @param prob [Integer] Probability model value (0..BIT_MODEL_TOTAL)
|
|
35
|
+
# @param bit [Integer] Bit value (0 or 1)
|
|
36
|
+
# @return [Integer] Price in price units
|
|
37
|
+
def bit_price(prob, bit)
|
|
38
|
+
if bit.zero?
|
|
39
|
+
# Price for encoding 0
|
|
40
|
+
PRICE_TABLE[prob >> PRICE_SHIFT_BITS]
|
|
41
|
+
else
|
|
42
|
+
# Price for encoding 1
|
|
43
|
+
PRICE_TABLE[(BIT_MODEL_TOTAL_LOCAL - prob) >> PRICE_SHIFT_BITS]
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# Calculate price for encoding a symbol using bit tree
|
|
48
|
+
#
|
|
49
|
+
# A bit tree encodes a symbol by encoding its bits from MSB to LSB,
|
|
50
|
+
# using probability models indexed by the partial symbol value.
|
|
51
|
+
#
|
|
52
|
+
# @param probs [Array<BitModel>] Probability models for tree
|
|
53
|
+
# @param num_bits [Integer] Number of bits in symbol
|
|
54
|
+
# @param symbol [Integer] Symbol value to encode
|
|
55
|
+
# @return [Integer] Total price in price units
|
|
56
|
+
def bittree_price(probs, num_bits, symbol)
|
|
57
|
+
price = 0
|
|
58
|
+
symbol |= (1 << num_bits) # Add sentinel bit
|
|
59
|
+
|
|
60
|
+
# Encode bits from MSB to LSB
|
|
61
|
+
(num_bits - 1).downto(0) do |i|
|
|
62
|
+
bit = (symbol >> i) & 1
|
|
63
|
+
model_idx = symbol >> (i + 1)
|
|
64
|
+
price += bit_price(probs[model_idx].probability, bit)
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
price
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# Calculate price for encoding a symbol using reverse bit tree
|
|
71
|
+
#
|
|
72
|
+
# A reverse bit tree encodes a symbol by encoding its bits from
|
|
73
|
+
# LSB to MSB, used for distance encoding.
|
|
74
|
+
#
|
|
75
|
+
# @param probs [Array<BitModel>] Probability models for tree
|
|
76
|
+
# @param num_bits [Integer] Number of bits in symbol
|
|
77
|
+
# @param symbol [Integer] Symbol value to encode
|
|
78
|
+
# @return [Integer] Total price in price units
|
|
79
|
+
def bittree_reverse_price(probs, num_bits, symbol)
|
|
80
|
+
price = 0
|
|
81
|
+
model_idx = 1
|
|
82
|
+
|
|
83
|
+
# Encode bits from LSB to MSB
|
|
84
|
+
num_bits.times do |i|
|
|
85
|
+
bit = (symbol >> i) & 1
|
|
86
|
+
price += bit_price(probs[model_idx].probability, bit)
|
|
87
|
+
model_idx = (model_idx << 1) | bit
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
price
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
# Calculate price for direct bits (uniform distribution)
|
|
94
|
+
#
|
|
95
|
+
# Direct bits have no probability model, each bit costs the same.
|
|
96
|
+
#
|
|
97
|
+
# @param num_bits [Integer] Number of direct bits
|
|
98
|
+
# @return [Integer] Total price in price units
|
|
99
|
+
def direct_price(num_bits)
|
|
100
|
+
# Each direct bit costs 64 units (price of 0.5 probability)
|
|
101
|
+
num_bits << (PRICE_SHIFT_BITS + 2)
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
# Precompute logarithmic price table using Math.log2
|
|
105
|
+
#
|
|
106
|
+
# Generates a table mapping probabilities to prices using the formula:
|
|
107
|
+
# price[i] = -log2(i / BIT_MODEL_TOTAL) * PRICE_SCALE
|
|
108
|
+
#
|
|
109
|
+
# @return [Array<Integer>] Precomputed price table
|
|
110
|
+
def precompute_price_table
|
|
111
|
+
table = Array.new(PRICE_TABLE_SIZE)
|
|
112
|
+
|
|
113
|
+
PRICE_TABLE_SIZE.times do |i|
|
|
114
|
+
if i.zero?
|
|
115
|
+
# Handle zero probability case (maximum price)
|
|
116
|
+
table[i] = 15 << PRICE_SHIFT_BITS
|
|
117
|
+
else
|
|
118
|
+
# Reconstruct probability from table index
|
|
119
|
+
prob = (i << PRICE_SHIFT_BITS) + (PRICE_SCALE >> 1)
|
|
120
|
+
probability = prob.to_f / BIT_MODEL_TOTAL_LOCAL
|
|
121
|
+
|
|
122
|
+
# price = -log2(probability) * PRICE_SCALE
|
|
123
|
+
price = (-Math.log2(probability) * PRICE_SCALE).round
|
|
124
|
+
table[i] = price
|
|
125
|
+
end
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
table
|
|
129
|
+
end
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
# Precomputed logarithmic price table
|
|
133
|
+
# Each entry represents -log2(i/BIT_MODEL_TOTAL) * PRICE_SCALE
|
|
134
|
+
PRICE_TABLE = precompute_price_table.freeze
|
|
135
|
+
|
|
136
|
+
# Instance methods for convenience
|
|
137
|
+
|
|
138
|
+
# @param prob [Integer] Probability value
|
|
139
|
+
# @param bit [Integer] Bit value
|
|
140
|
+
# @return [Integer] Price
|
|
141
|
+
def bit_price(prob, bit)
|
|
142
|
+
self.class.bit_price(prob, bit)
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
# @param probs [Array<BitModel>] Probability models
|
|
146
|
+
# @param num_bits [Integer] Number of bits
|
|
147
|
+
# @param symbol [Integer] Symbol value
|
|
148
|
+
# @return [Integer] Price
|
|
149
|
+
def bittree_price(probs, num_bits, symbol)
|
|
150
|
+
self.class.bittree_price(probs, num_bits, symbol)
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
# @param probs [Array<BitModel>] Probability models
|
|
154
|
+
# @param num_bits [Integer] Number of bits
|
|
155
|
+
# @param symbol [Integer] Symbol value
|
|
156
|
+
# @return [Integer] Price
|
|
157
|
+
def bittree_reverse_price(probs, num_bits, symbol)
|
|
158
|
+
self.class.bittree_reverse_price(probs, num_bits, symbol)
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
# @param num_bits [Integer] Number of direct bits
|
|
162
|
+
# @return [Integer] Price
|
|
163
|
+
def direct_price(num_bits)
|
|
164
|
+
self.class.direct_price(num_bits)
|
|
165
|
+
end
|
|
166
|
+
end
|
|
167
|
+
end
|
|
168
|
+
end
|
|
169
|
+
end
|
|
@@ -0,0 +1,261 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "xz_buffered_range_encoder"
|
|
4
|
+
require_relative "constants"
|
|
5
|
+
|
|
6
|
+
module Omnizip
|
|
7
|
+
module Algorithms
|
|
8
|
+
class LZMA < Algorithm
|
|
9
|
+
# XZ Utils-compatible probability models
|
|
10
|
+
#
|
|
11
|
+
# Organizes all probability models used by LZMA encoder, matching
|
|
12
|
+
# XZ Utils structure exactly. All models start at probability 1024
|
|
13
|
+
# (BIT_MODEL_TOTAL / 2 = 0.5 probability).
|
|
14
|
+
#
|
|
15
|
+
# Uses XzBufferedRangeEncoder::Probability for mutable inline updates.
|
|
16
|
+
#
|
|
17
|
+
# Based on: xz/src/liblzma/lzma/lzma_encoder_private.h
|
|
18
|
+
class XzProbabilityModels
|
|
19
|
+
include Constants
|
|
20
|
+
|
|
21
|
+
# Literal context models
|
|
22
|
+
attr_reader :literal
|
|
23
|
+
|
|
24
|
+
# Match type models
|
|
25
|
+
attr_reader :is_match, :is_rep, :is_rep0, :is_rep1, :is_rep2
|
|
26
|
+
attr_reader :is_rep0_long
|
|
27
|
+
|
|
28
|
+
# Distance models
|
|
29
|
+
attr_reader :dist_slot, :dist_special, :dist_align
|
|
30
|
+
|
|
31
|
+
# Length encoders
|
|
32
|
+
attr_reader :match_len_encoder, :rep_len_encoder
|
|
33
|
+
|
|
34
|
+
# Initialize all probability models
|
|
35
|
+
#
|
|
36
|
+
# @param lc [Integer] Number of literal context bits (0-8)
|
|
37
|
+
# @param lp [Integer] Number of literal position bits (0-4)
|
|
38
|
+
# @param pb [Integer] Number of position bits (0-4)
|
|
39
|
+
def initialize(lc, lp, pb)
|
|
40
|
+
@lc = lc
|
|
41
|
+
@lp = lp
|
|
42
|
+
@pb = pb
|
|
43
|
+
|
|
44
|
+
init_literal_models(lc, lp)
|
|
45
|
+
init_match_models(pb)
|
|
46
|
+
init_distance_models
|
|
47
|
+
init_length_encoders(pb)
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Reset all probability models to initial state
|
|
51
|
+
def reset
|
|
52
|
+
reset_literal_models
|
|
53
|
+
reset_match_models
|
|
54
|
+
reset_distance_models
|
|
55
|
+
reset_length_encoders
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
private
|
|
59
|
+
|
|
60
|
+
# Initialize literal context models
|
|
61
|
+
# Ported from XZ Utils literal_init() in lzma_common.h
|
|
62
|
+
# Size: LITERAL_CODER_SIZE << (lc + lp) = 0x300 * (1 << (lc + lp))
|
|
63
|
+
# This is a FLAT array, not 2D, to match XZ Utils structure
|
|
64
|
+
def init_literal_models(lc, lp)
|
|
65
|
+
coders = 0x300 << (lc + lp)
|
|
66
|
+
@literal = Array.new(coders) { XzBufferedRangeEncoder::Probability.new }
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# Initialize match type models
|
|
70
|
+
def init_match_models(pb)
|
|
71
|
+
num_pos_states = 1 << pb
|
|
72
|
+
|
|
73
|
+
# is_match[state][pos_state]
|
|
74
|
+
@is_match = Array.new(NUM_STATES) do
|
|
75
|
+
Array.new(num_pos_states) { XzBufferedRangeEncoder::Probability.new }
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# is_rep[state]
|
|
79
|
+
@is_rep = Array.new(NUM_STATES) { XzBufferedRangeEncoder::Probability.new }
|
|
80
|
+
|
|
81
|
+
# is_rep0[state]
|
|
82
|
+
@is_rep0 = Array.new(NUM_STATES) { XzBufferedRangeEncoder::Probability.new }
|
|
83
|
+
|
|
84
|
+
# is_rep1[state]
|
|
85
|
+
@is_rep1 = Array.new(NUM_STATES) { XzBufferedRangeEncoder::Probability.new }
|
|
86
|
+
|
|
87
|
+
# is_rep2[state]
|
|
88
|
+
@is_rep2 = Array.new(NUM_STATES) { XzBufferedRangeEncoder::Probability.new }
|
|
89
|
+
|
|
90
|
+
# is_rep0_long[state][pos_state]
|
|
91
|
+
@is_rep0_long = Array.new(NUM_STATES) do
|
|
92
|
+
Array.new(num_pos_states) { XzBufferedRangeEncoder::Probability.new }
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
# Initialize distance models
|
|
97
|
+
def init_distance_models
|
|
98
|
+
# dist_slot[len_to_pos_state][dist_slot]
|
|
99
|
+
# len_to_pos_state: 0-3 (maps match length to state)
|
|
100
|
+
# dist_slot: 0-63 (6-bit distance slot)
|
|
101
|
+
@dist_slot = Array.new(NUM_LEN_TO_POS_STATES) do
|
|
102
|
+
Array.new(NUM_DIST_SLOTS) { XzBufferedRangeEncoder::Probability.new }
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
# dist_special[dist - 4] for slots 4-13 (160 models)
|
|
106
|
+
# Each slot has varying number of bits encoded with models
|
|
107
|
+
num_dist_special = NUM_FULL_DISTANCES - START_POS_MODEL_INDEX
|
|
108
|
+
@dist_special = Array.new(num_dist_special) { XzBufferedRangeEncoder::Probability.new }
|
|
109
|
+
|
|
110
|
+
# dist_align[align_bit] for alignment (16 models)
|
|
111
|
+
@dist_align = Array.new(DIST_ALIGN_SIZE) { XzBufferedRangeEncoder::Probability.new }
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
# Initialize length encoders
|
|
115
|
+
def init_length_encoders(pb)
|
|
116
|
+
@match_len_encoder = LengthEncoder.new(pb)
|
|
117
|
+
@rep_len_encoder = LengthEncoder.new(pb)
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
# Reset methods
|
|
121
|
+
def reset_literal_models
|
|
122
|
+
@literal.each { |prob| prob.value = BIT_MODEL_TOTAL >> 1 }
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
def reset_match_models
|
|
126
|
+
@is_match.each do |pos_states|
|
|
127
|
+
pos_states.each do |prob|
|
|
128
|
+
prob.value = BIT_MODEL_TOTAL >> 1
|
|
129
|
+
end
|
|
130
|
+
end
|
|
131
|
+
@is_rep.each { |prob| prob.value = BIT_MODEL_TOTAL >> 1 }
|
|
132
|
+
@is_rep0.each { |prob| prob.value = BIT_MODEL_TOTAL >> 1 }
|
|
133
|
+
@is_rep1.each { |prob| prob.value = BIT_MODEL_TOTAL >> 1 }
|
|
134
|
+
@is_rep2.each { |prob| prob.value = BIT_MODEL_TOTAL >> 1 }
|
|
135
|
+
@is_rep0_long.each do |pos_states|
|
|
136
|
+
pos_states.each do |prob|
|
|
137
|
+
prob.value = BIT_MODEL_TOTAL >> 1
|
|
138
|
+
end
|
|
139
|
+
end
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
def reset_distance_models
|
|
143
|
+
@dist_slot.each do |slots|
|
|
144
|
+
slots.each do |prob|
|
|
145
|
+
prob.value = BIT_MODEL_TOTAL >> 1
|
|
146
|
+
end
|
|
147
|
+
end
|
|
148
|
+
@dist_special.each { |prob| prob.value = BIT_MODEL_TOTAL >> 1 }
|
|
149
|
+
@dist_align.each { |prob| prob.value = BIT_MODEL_TOTAL >> 1 }
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
def reset_length_encoders
|
|
153
|
+
@match_len_encoder.reset
|
|
154
|
+
@rep_len_encoder.reset
|
|
155
|
+
end
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
# Length encoder with probability models and price tables
|
|
159
|
+
#
|
|
160
|
+
# Encodes match lengths using a 3-tier structure:
|
|
161
|
+
# - Low: lengths 2-9 (choice=0, 3 bits)
|
|
162
|
+
# - Mid: lengths 10-17 (choice=1, choice2=0, 3 bits)
|
|
163
|
+
# - High: lengths 18-273 (choice=1, choice2=1, 8 bits)
|
|
164
|
+
class LengthEncoder
|
|
165
|
+
include Constants
|
|
166
|
+
|
|
167
|
+
attr_reader :choice, :choice2, :low, :mid, :high, :prices, :counters
|
|
168
|
+
|
|
169
|
+
# Initialize length encoder
|
|
170
|
+
#
|
|
171
|
+
# @param pb [Integer] Number of position bits
|
|
172
|
+
def initialize(pb)
|
|
173
|
+
@pb = pb
|
|
174
|
+
@num_pos_states = 1 << pb
|
|
175
|
+
|
|
176
|
+
# Choice bits
|
|
177
|
+
@choice = XzBufferedRangeEncoder::Probability.new
|
|
178
|
+
@choice2 = XzBufferedRangeEncoder::Probability.new
|
|
179
|
+
|
|
180
|
+
# Low lengths (per position state)
|
|
181
|
+
@low = Array.new(@num_pos_states) do
|
|
182
|
+
Array.new(LEN_LOW_SYMBOLS) { XzBufferedRangeEncoder::Probability.new }
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
# Mid lengths (per position state)
|
|
186
|
+
@mid = Array.new(@num_pos_states) do
|
|
187
|
+
Array.new(LEN_MID_SYMBOLS) { XzBufferedRangeEncoder::Probability.new }
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
# High lengths (shared across position states)
|
|
191
|
+
@high = Array.new(LEN_HIGH_SYMBOLS) { XzBufferedRangeEncoder::Probability.new }
|
|
192
|
+
|
|
193
|
+
# Price tables (updated incrementally)
|
|
194
|
+
# prices[pos_state][length - MATCH_LEN_MIN]
|
|
195
|
+
@prices = Array.new(@num_pos_states) do
|
|
196
|
+
Array.new(MATCH_LEN_MAX - MATCH_LEN_MIN + 1, 0)
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
# Counters for price table updates
|
|
200
|
+
@counters = Array.new(@num_pos_states, 0)
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
# Reset all models to initial state
|
|
204
|
+
def reset
|
|
205
|
+
@choice.value = BIT_MODEL_TOTAL >> 1
|
|
206
|
+
@choice2.value = BIT_MODEL_TOTAL >> 1
|
|
207
|
+
@low.each do |models|
|
|
208
|
+
models.each do |prob|
|
|
209
|
+
prob.value = BIT_MODEL_TOTAL >> 1
|
|
210
|
+
end
|
|
211
|
+
end
|
|
212
|
+
@mid.each do |models|
|
|
213
|
+
models.each do |prob|
|
|
214
|
+
prob.value = BIT_MODEL_TOTAL >> 1
|
|
215
|
+
end
|
|
216
|
+
end
|
|
217
|
+
@high.each { |prob| prob.value = BIT_MODEL_TOTAL >> 1 }
|
|
218
|
+
|
|
219
|
+
# Reset price tables
|
|
220
|
+
@prices.each { |pos_prices| pos_prices.fill(0) }
|
|
221
|
+
@counters.fill(0)
|
|
222
|
+
end
|
|
223
|
+
|
|
224
|
+
# Get price for encoding length at position state
|
|
225
|
+
#
|
|
226
|
+
# @param pos_state [Integer] Position state (0 to 2^pb - 1)
|
|
227
|
+
# @param length [Integer] Match length (2 to 273)
|
|
228
|
+
# @return [Integer] Price in price units
|
|
229
|
+
def get_price(pos_state, length)
|
|
230
|
+
@prices[pos_state][length - MATCH_LEN_MIN]
|
|
231
|
+
end
|
|
232
|
+
|
|
233
|
+
# Set price for length at position state
|
|
234
|
+
#
|
|
235
|
+
# @param pos_state [Integer] Position state
|
|
236
|
+
# @param length [Integer] Match length
|
|
237
|
+
# @param price [Integer] Price value
|
|
238
|
+
def set_price(pos_state, length, price)
|
|
239
|
+
@prices[pos_state][length - MATCH_LEN_MIN] = price
|
|
240
|
+
end
|
|
241
|
+
|
|
242
|
+
# Decrement counter for position state
|
|
243
|
+
#
|
|
244
|
+
# @param pos_state [Integer] Position state
|
|
245
|
+
# @return [Boolean] True if counter reached zero
|
|
246
|
+
def decrement_counter(pos_state)
|
|
247
|
+
@counters[pos_state] -= 1
|
|
248
|
+
@counters[pos_state] <= 0
|
|
249
|
+
end
|
|
250
|
+
|
|
251
|
+
# Reset counter for position state
|
|
252
|
+
#
|
|
253
|
+
# @param pos_state [Integer] Position state
|
|
254
|
+
# @param value [Integer] Counter value
|
|
255
|
+
def reset_counter(pos_state, value)
|
|
256
|
+
@counters[pos_state] = value
|
|
257
|
+
end
|
|
258
|
+
end
|
|
259
|
+
end
|
|
260
|
+
end
|
|
261
|
+
end
|