omnizip 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/.rubocop.yml +32 -0
- data/.rubocop_todo.yml +754 -0
- data/COPYING +502 -0
- data/Gemfile +17 -0
- data/LICENSE +12 -0
- data/README.adoc +1045 -0
- data/Rakefile +12 -0
- data/benchmark/README.md +260 -0
- data/benchmark/benchmark_suite.rb +125 -0
- data/benchmark/compression_bench.rb +181 -0
- data/benchmark/filter_bench.rb +180 -0
- data/benchmark/models/benchmark_result.rb +59 -0
- data/benchmark/models/comparison_result.rb +69 -0
- data/benchmark/profile_suite.rb +167 -0
- data/benchmark/reporter.rb +150 -0
- data/benchmark/run_benchmarks.rb +66 -0
- data/benchmark/test_data.rb +137 -0
- data/config/formats/rar3_spec.yml +91 -0
- data/config/formats/rar5_spec.yml +102 -0
- data/docs/.github/workflows/docs.yml +142 -0
- data/docs/.gitignore +21 -0
- data/docs/.lychee.toml +67 -0
- data/docs/Gemfile +13 -0
- data/docs/RAR_WRITE_SUPPORT.md +26 -0
- data/docs/README.md +101 -0
- data/docs/_config.yml +112 -0
- data/docs/assets/logo.svg +1 -0
- data/docs/assets/omnizip-logo.pdf +1540 -11
- data/docs/comparison/feature-matrix.adoc +694 -0
- data/docs/comparison/index.adoc +113 -0
- data/docs/comparison/vs-7zip.adoc +309 -0
- data/docs/comparison/vs-peazip.adoc +77 -0
- data/docs/comparison/vs-rubyzip.adoc +342 -0
- data/docs/comparison/vs-winrar.adoc +100 -0
- data/docs/compatibility.adoc +579 -0
- data/docs/concepts/index.adoc +129 -0
- data/docs/developer/architecture.adoc +256 -0
- data/docs/developer/contributing.adoc +158 -0
- data/docs/developer/index.adoc +25 -0
- data/docs/developer/testing.adoc +212 -0
- data/docs/getting-started/basic-usage.adoc +271 -0
- data/docs/getting-started/index.adoc +42 -0
- data/docs/getting-started/installation.adoc +138 -0
- data/docs/getting-started/quick-start.adoc +185 -0
- data/docs/getting-started/your-first-archive.adoc +218 -0
- data/docs/guides/advanced-features/encryption.adoc +300 -0
- data/docs/guides/advanced-features/index.adoc +49 -0
- data/docs/guides/advanced-features/parallel-processing.adoc +246 -0
- data/docs/guides/advanced-features/progress-tracking.adoc +320 -0
- data/docs/guides/advanced-features/streaming.adoc +212 -0
- data/docs/guides/archive-formats/gzip-format.adoc +107 -0
- data/docs/guides/archive-formats/index.adoc +130 -0
- data/docs/guides/archive-formats/rar-format.adoc +104 -0
- data/docs/guides/archive-formats/rar5.adoc +521 -0
- data/docs/guides/archive-formats/seven-zip-format.adoc +35 -0
- data/docs/guides/archive-formats/tar-format.adoc +106 -0
- data/docs/guides/archive-formats/xz-format.adoc +118 -0
- data/docs/guides/archive-formats/zip-format.adoc +35 -0
- data/docs/guides/compression-algorithms/bzip2.adoc +113 -0
- data/docs/guides/compression-algorithms/deflate.adoc +319 -0
- data/docs/guides/compression-algorithms/index.adoc +190 -0
- data/docs/guides/compression-algorithms/lzma.adoc +398 -0
- data/docs/guides/compression-algorithms/lzma2.adoc +327 -0
- data/docs/guides/compression-algorithms/ppmd.adoc +316 -0
- data/docs/guides/compression-algorithms/zstandard.adoc +361 -0
- data/docs/guides/creating-archives.adoc +354 -0
- data/docs/guides/extracting-archives.adoc +53 -0
- data/docs/guides/format-conversion.adoc +64 -0
- data/docs/guides/index.adoc +49 -0
- data/docs/guides/migration-rubyzip.adoc +217 -0
- data/docs/guides/parity-archives.adoc +605 -0
- data/docs/guides/performance-tuning.adoc +88 -0
- data/docs/index.adoc +218 -0
- data/docs/lychee.toml +67 -0
- data/docs/reference/api/overview.adoc +188 -0
- data/docs/reference/cli/compress-command.adoc +114 -0
- data/docs/reference/cli/overview.adoc +140 -0
- data/docs/reference/index.adoc +26 -0
- data/docs/resources/faq.adoc +185 -0
- data/docs/resources/quick-reference.adoc +222 -0
- data/docs/troubleshooting/index.adoc +208 -0
- data/examples/api_comparison.rb +205 -0
- data/examples/deflate64_example.rb +96 -0
- data/examples/par2_demo.rb +121 -0
- data/examples/quick_start_native.rb +150 -0
- data/examples/quick_start_rubyzip.rb +115 -0
- data/examples/rubyzip_compatibility_demo.rb +194 -0
- data/exe/omnizip +27 -0
- data/lib/omnizip/algorithm.rb +130 -0
- data/lib/omnizip/algorithm_registry.rb +86 -0
- data/lib/omnizip/algorithms/.keep +0 -0
- data/lib/omnizip/algorithms/bzip2/bwt.rb +225 -0
- data/lib/omnizip/algorithms/bzip2/decoder.rb +193 -0
- data/lib/omnizip/algorithms/bzip2/encoder.rb +237 -0
- data/lib/omnizip/algorithms/bzip2/huffman.rb +206 -0
- data/lib/omnizip/algorithms/bzip2/mtf.rb +101 -0
- data/lib/omnizip/algorithms/bzip2/rle.rb +151 -0
- data/lib/omnizip/algorithms/bzip2.rb +130 -0
- data/lib/omnizip/algorithms/deflate/constants.rb +28 -0
- data/lib/omnizip/algorithms/deflate/decoder.rb +38 -0
- data/lib/omnizip/algorithms/deflate/encoder.rb +46 -0
- data/lib/omnizip/algorithms/deflate.rb +128 -0
- data/lib/omnizip/algorithms/deflate64/constants.rb +45 -0
- data/lib/omnizip/algorithms/deflate64/decoder.rb +153 -0
- data/lib/omnizip/algorithms/deflate64/encoder.rb +98 -0
- data/lib/omnizip/algorithms/deflate64/huffman_coder.rb +354 -0
- data/lib/omnizip/algorithms/deflate64/lz77_encoder.rb +142 -0
- data/lib/omnizip/algorithms/deflate64.rb +109 -0
- data/lib/omnizip/algorithms/lzma/bit_model.rb +120 -0
- data/lib/omnizip/algorithms/lzma/constants.rb +112 -0
- data/lib/omnizip/algorithms/lzma/decoder.rb +148 -0
- data/lib/omnizip/algorithms/lzma/dictionary.rb +69 -0
- data/lib/omnizip/algorithms/lzma/distance_coder.rb +415 -0
- data/lib/omnizip/algorithms/lzma/encoder.rb +142 -0
- data/lib/omnizip/algorithms/lzma/length_coder.rb +260 -0
- data/lib/omnizip/algorithms/lzma/literal_decoder.rb +320 -0
- data/lib/omnizip/algorithms/lzma/literal_encoder.rb +210 -0
- data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +341 -0
- data/lib/omnizip/algorithms/lzma/lzma_alone_decoder.rb +192 -0
- data/lib/omnizip/algorithms/lzma/lzma_state.rb +128 -0
- data/lib/omnizip/algorithms/lzma/match.rb +32 -0
- data/lib/omnizip/algorithms/lzma/match_finder.rb +205 -0
- data/lib/omnizip/algorithms/lzma/match_finder_config.rb +142 -0
- data/lib/omnizip/algorithms/lzma/match_finder_factory.rb +88 -0
- data/lib/omnizip/algorithms/lzma/optimal_encoder.rb +130 -0
- data/lib/omnizip/algorithms/lzma/probability_models.rb +72 -0
- data/lib/omnizip/algorithms/lzma/range_coder.rb +85 -0
- data/lib/omnizip/algorithms/lzma/range_decoder.rb +434 -0
- data/lib/omnizip/algorithms/lzma/range_encoder.rb +194 -0
- data/lib/omnizip/algorithms/lzma/state.rb +127 -0
- data/lib/omnizip/algorithms/lzma/xz_buffered_range_encoder.rb +325 -0
- data/lib/omnizip/algorithms/lzma/xz_encoder.rb +426 -0
- data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +645 -0
- data/lib/omnizip/algorithms/lzma/xz_match_finder_adapter.rb +227 -0
- data/lib/omnizip/algorithms/lzma/xz_price_calculator.rb +169 -0
- data/lib/omnizip/algorithms/lzma/xz_probability_models.rb +261 -0
- data/lib/omnizip/algorithms/lzma/xz_range_encoder.rb +223 -0
- data/lib/omnizip/algorithms/lzma/xz_range_encoder_exact.rb +331 -0
- data/lib/omnizip/algorithms/lzma/xz_state.rb +116 -0
- data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +2055 -0
- data/lib/omnizip/algorithms/lzma.rb +238 -0
- data/lib/omnizip/algorithms/lzma2/chunk_manager.rb +182 -0
- data/lib/omnizip/algorithms/lzma2/constants.rb +41 -0
- data/lib/omnizip/algorithms/lzma2/encoder.rb +147 -0
- data/lib/omnizip/algorithms/lzma2/lzma2_chunk.rb +161 -0
- data/lib/omnizip/algorithms/lzma2/properties.rb +179 -0
- data/lib/omnizip/algorithms/lzma2/simple_lzma2_encoder.rb +127 -0
- data/lib/omnizip/algorithms/lzma2/xz_encoder_adapter.rb +85 -0
- data/lib/omnizip/algorithms/lzma2.rb +141 -0
- data/lib/omnizip/algorithms/ppmd7/constants.rb +74 -0
- data/lib/omnizip/algorithms/ppmd7/context.rb +154 -0
- data/lib/omnizip/algorithms/ppmd7/decoder.rb +126 -0
- data/lib/omnizip/algorithms/ppmd7/encoder.rb +163 -0
- data/lib/omnizip/algorithms/ppmd7/model.rb +248 -0
- data/lib/omnizip/algorithms/ppmd7/symbol_state.rb +57 -0
- data/lib/omnizip/algorithms/ppmd7.rb +116 -0
- data/lib/omnizip/algorithms/ppmd8/constants.rb +61 -0
- data/lib/omnizip/algorithms/ppmd8/context.rb +34 -0
- data/lib/omnizip/algorithms/ppmd8/decoder.rb +107 -0
- data/lib/omnizip/algorithms/ppmd8/encoder.rb +138 -0
- data/lib/omnizip/algorithms/ppmd8/model.rb +250 -0
- data/lib/omnizip/algorithms/ppmd8/restoration_method.rb +78 -0
- data/lib/omnizip/algorithms/ppmd8.rb +82 -0
- data/lib/omnizip/algorithms/ppmd_base.rb +138 -0
- data/lib/omnizip/algorithms/sevenzip_lzma2.rb +123 -0
- data/lib/omnizip/algorithms/xz_lzma2.rb +118 -0
- data/lib/omnizip/algorithms/zstandard/constants.rb +25 -0
- data/lib/omnizip/algorithms/zstandard/decoder.rb +46 -0
- data/lib/omnizip/algorithms/zstandard/encoder.rb +51 -0
- data/lib/omnizip/algorithms/zstandard.rb +138 -0
- data/lib/omnizip/buffer/memory_archive.rb +251 -0
- data/lib/omnizip/buffer/memory_extractor.rb +224 -0
- data/lib/omnizip/buffer.rb +176 -0
- data/lib/omnizip/checksum_registry.rb +114 -0
- data/lib/omnizip/checksums/crc32.rb +100 -0
- data/lib/omnizip/checksums/crc64.rb +101 -0
- data/lib/omnizip/checksums/crc_base.rb +158 -0
- data/lib/omnizip/checksums/verifier.rb +131 -0
- data/lib/omnizip/chunked/memory_manager.rb +194 -0
- data/lib/omnizip/chunked/reader.rb +78 -0
- data/lib/omnizip/chunked/writer.rb +120 -0
- data/lib/omnizip/chunked.rb +129 -0
- data/lib/omnizip/cli/output_formatter.rb +104 -0
- data/lib/omnizip/cli.rb +572 -0
- data/lib/omnizip/commands/.keep +0 -0
- data/lib/omnizip/commands/archive_create_command.rb +427 -0
- data/lib/omnizip/commands/archive_extract_command.rb +272 -0
- data/lib/omnizip/commands/archive_list_command.rb +218 -0
- data/lib/omnizip/commands/archive_repair_command.rb +131 -0
- data/lib/omnizip/commands/archive_verify_command.rb +117 -0
- data/lib/omnizip/commands/compress_command.rb +117 -0
- data/lib/omnizip/commands/decompress_command.rb +120 -0
- data/lib/omnizip/commands/list_command.rb +53 -0
- data/lib/omnizip/commands/metadata_command.rb +153 -0
- data/lib/omnizip/commands/parity_create_command.rb +122 -0
- data/lib/omnizip/commands/parity_repair_command.rb +122 -0
- data/lib/omnizip/commands/parity_verify_command.rb +124 -0
- data/lib/omnizip/commands/profile_list_command.rb +56 -0
- data/lib/omnizip/commands/profile_show_command.rb +44 -0
- data/lib/omnizip/convenience.rb +359 -0
- data/lib/omnizip/converter/conversion_registry.rb +49 -0
- data/lib/omnizip/converter/conversion_strategy.rb +121 -0
- data/lib/omnizip/converter/seven_zip_to_zip_strategy.rb +97 -0
- data/lib/omnizip/converter/zip_to_seven_zip_strategy.rb +112 -0
- data/lib/omnizip/converter.rb +105 -0
- data/lib/omnizip/crypto/aes256/cipher.rb +100 -0
- data/lib/omnizip/crypto/aes256/constants.rb +28 -0
- data/lib/omnizip/crypto/aes256/key_derivation.rb +101 -0
- data/lib/omnizip/crypto/aes256.rb +102 -0
- data/lib/omnizip/error.rb +106 -0
- data/lib/omnizip/eta/exponential_smoothing_estimator.rb +98 -0
- data/lib/omnizip/eta/moving_average_estimator.rb +99 -0
- data/lib/omnizip/eta/rate_calculator.rb +104 -0
- data/lib/omnizip/eta/sample_history.rb +143 -0
- data/lib/omnizip/eta/time_estimator.rb +106 -0
- data/lib/omnizip/eta.rb +63 -0
- data/lib/omnizip/extraction/filter_chain.rb +177 -0
- data/lib/omnizip/extraction/glob_pattern.rb +140 -0
- data/lib/omnizip/extraction/pattern_matcher.rb +70 -0
- data/lib/omnizip/extraction/predicate_pattern.rb +52 -0
- data/lib/omnizip/extraction/regex_pattern.rb +50 -0
- data/lib/omnizip/extraction/selective_extractor.rb +240 -0
- data/lib/omnizip/extraction.rb +111 -0
- data/lib/omnizip/file_type/mime_classifier.rb +144 -0
- data/lib/omnizip/file_type.rb +113 -0
- data/lib/omnizip/filter.rb +139 -0
- data/lib/omnizip/filter_pipeline.rb +108 -0
- data/lib/omnizip/filter_registry.rb +166 -0
- data/lib/omnizip/filters/bcj.rb +279 -0
- data/lib/omnizip/filters/bcj2/constants.rb +53 -0
- data/lib/omnizip/filters/bcj2/decoder.rb +200 -0
- data/lib/omnizip/filters/bcj2/encoder.rb +61 -0
- data/lib/omnizip/filters/bcj2/stream_data.rb +93 -0
- data/lib/omnizip/filters/bcj2.rb +99 -0
- data/lib/omnizip/filters/bcj_arm.rb +176 -0
- data/lib/omnizip/filters/bcj_arm64.rb +244 -0
- data/lib/omnizip/filters/bcj_ia64.rb +196 -0
- data/lib/omnizip/filters/bcj_ppc.rb +190 -0
- data/lib/omnizip/filters/bcj_sparc.rb +176 -0
- data/lib/omnizip/filters/bcj_x86.rb +193 -0
- data/lib/omnizip/filters/delta.rb +196 -0
- data/lib/omnizip/filters/filter_base.rb +72 -0
- data/lib/omnizip/filters/registry.rb +123 -0
- data/lib/omnizip/filters/xz_delta.rb +258 -0
- data/lib/omnizip/format_detector.rb +162 -0
- data/lib/omnizip/format_registry.rb +59 -0
- data/lib/omnizip/formats/.keep +0 -0
- data/lib/omnizip/formats/bzip2_file.rb +172 -0
- data/lib/omnizip/formats/cpio/constants.rb +55 -0
- data/lib/omnizip/formats/cpio/entry.rb +385 -0
- data/lib/omnizip/formats/cpio/reader.rb +196 -0
- data/lib/omnizip/formats/cpio/writer.rb +234 -0
- data/lib/omnizip/formats/cpio.rb +140 -0
- data/lib/omnizip/formats/format_spec_loader.rb +230 -0
- data/lib/omnizip/formats/gzip.rb +238 -0
- data/lib/omnizip/formats/iso/directory_builder.rb +297 -0
- data/lib/omnizip/formats/iso/directory_record.rb +152 -0
- data/lib/omnizip/formats/iso/joliet.rb +204 -0
- data/lib/omnizip/formats/iso/path_table.rb +125 -0
- data/lib/omnizip/formats/iso/reader.rb +197 -0
- data/lib/omnizip/formats/iso/rock_ridge.rb +349 -0
- data/lib/omnizip/formats/iso/volume_builder.rb +320 -0
- data/lib/omnizip/formats/iso/volume_descriptor.rb +168 -0
- data/lib/omnizip/formats/iso/writer.rb +530 -0
- data/lib/omnizip/formats/iso.rb +140 -0
- data/lib/omnizip/formats/lzip.rb +175 -0
- data/lib/omnizip/formats/lzma_alone.rb +171 -0
- data/lib/omnizip/formats/rar/archive_repairer.rb +243 -0
- data/lib/omnizip/formats/rar/archive_verifier.rb +195 -0
- data/lib/omnizip/formats/rar/block_parser.rb +243 -0
- data/lib/omnizip/formats/rar/compression/bit_stream.rb +180 -0
- data/lib/omnizip/formats/rar/compression/dispatcher.rb +217 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/decoder.rb +216 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/encoder.rb +158 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_builder.rb +217 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_coder.rb +189 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/match_finder.rb +135 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/sliding_window.rb +165 -0
- data/lib/omnizip/formats/rar/compression/ppmd/context.rb +105 -0
- data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +219 -0
- data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +262 -0
- data/lib/omnizip/formats/rar/compression_method_registry.rb +106 -0
- data/lib/omnizip/formats/rar/constants.rb +82 -0
- data/lib/omnizip/formats/rar/decompressor.rb +238 -0
- data/lib/omnizip/formats/rar/external_writer.rb +312 -0
- data/lib/omnizip/formats/rar/header.rb +192 -0
- data/lib/omnizip/formats/rar/license_validator.rb +109 -0
- data/lib/omnizip/formats/rar/models/rar_archive.rb +77 -0
- data/lib/omnizip/formats/rar/models/rar_entry.rb +65 -0
- data/lib/omnizip/formats/rar/models/rar_volume.rb +56 -0
- data/lib/omnizip/formats/rar/parity_handler.rb +292 -0
- data/lib/omnizip/formats/rar/rar5/compression/lzma.rb +202 -0
- data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +578 -0
- data/lib/omnizip/formats/rar/rar5/compression/store.rb +60 -0
- data/lib/omnizip/formats/rar/rar5/crc32.rb +39 -0
- data/lib/omnizip/formats/rar/rar5/encryption/aes256_cbc.rb +97 -0
- data/lib/omnizip/formats/rar/rar5/encryption/encryption_header.rb +114 -0
- data/lib/omnizip/formats/rar/rar5/encryption/encryption_manager.rb +166 -0
- data/lib/omnizip/formats/rar/rar5/encryption/key_derivation.rb +97 -0
- data/lib/omnizip/formats/rar/rar5/header.rb +187 -0
- data/lib/omnizip/formats/rar/rar5/models/encryption_options.rb +74 -0
- data/lib/omnizip/formats/rar/rar5/models/recovery_options.rb +63 -0
- data/lib/omnizip/formats/rar/rar5/models/solid_options.rb +63 -0
- data/lib/omnizip/formats/rar/rar5/models/volume_options.rb +74 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/ARCHITECTURE.md +290 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_manager.rb +264 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_splitter.rb +155 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_writer.rb +194 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_encoder.rb +109 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_manager.rb +142 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_stream.rb +121 -0
- data/lib/omnizip/formats/rar/rar5/vint.rb +65 -0
- data/lib/omnizip/formats/rar/rar5/writer.rb +466 -0
- data/lib/omnizip/formats/rar/rar_format_base.rb +241 -0
- data/lib/omnizip/formats/rar/reader.rb +366 -0
- data/lib/omnizip/formats/rar/recovery_record.rb +245 -0
- data/lib/omnizip/formats/rar/volume_manager.rb +168 -0
- data/lib/omnizip/formats/rar/writer.rb +431 -0
- data/lib/omnizip/formats/rar.rb +205 -0
- data/lib/omnizip/formats/rar3/compressor.rb +73 -0
- data/lib/omnizip/formats/rar3/decompressor.rb +66 -0
- data/lib/omnizip/formats/rar3/reader.rb +386 -0
- data/lib/omnizip/formats/rar3/writer.rb +219 -0
- data/lib/omnizip/formats/rar5/compressor.rb +73 -0
- data/lib/omnizip/formats/rar5/decompressor.rb +66 -0
- data/lib/omnizip/formats/rar5/reader.rb +342 -0
- data/lib/omnizip/formats/rar5/writer.rb +214 -0
- data/lib/omnizip/formats/seven_zip/coder_chain.rb +150 -0
- data/lib/omnizip/formats/seven_zip/constants.rb +126 -0
- data/lib/omnizip/formats/seven_zip/encoded_header.rb +114 -0
- data/lib/omnizip/formats/seven_zip/encrypted_header.rb +142 -0
- data/lib/omnizip/formats/seven_zip/file_collector.rb +144 -0
- data/lib/omnizip/formats/seven_zip/header.rb +106 -0
- data/lib/omnizip/formats/seven_zip/header_encryptor.rb +134 -0
- data/lib/omnizip/formats/seven_zip/header_writer.rb +466 -0
- data/lib/omnizip/formats/seven_zip/models/coder_info.rb +30 -0
- data/lib/omnizip/formats/seven_zip/models/file_entry.rb +58 -0
- data/lib/omnizip/formats/seven_zip/models/folder.rb +69 -0
- data/lib/omnizip/formats/seven_zip/models/stream_info.rb +42 -0
- data/lib/omnizip/formats/seven_zip/parser.rb +660 -0
- data/lib/omnizip/formats/seven_zip/reader.rb +458 -0
- data/lib/omnizip/formats/seven_zip/split_archive_reader.rb +632 -0
- data/lib/omnizip/formats/seven_zip/split_archive_writer.rb +315 -0
- data/lib/omnizip/formats/seven_zip/stream_compressor.rb +151 -0
- data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +162 -0
- data/lib/omnizip/formats/seven_zip/writer.rb +740 -0
- data/lib/omnizip/formats/seven_zip.rb +93 -0
- data/lib/omnizip/formats/tar/constants.rb +73 -0
- data/lib/omnizip/formats/tar/entry.rb +94 -0
- data/lib/omnizip/formats/tar/header.rb +168 -0
- data/lib/omnizip/formats/tar/reader.rb +121 -0
- data/lib/omnizip/formats/tar/writer.rb +216 -0
- data/lib/omnizip/formats/tar.rb +84 -0
- data/lib/omnizip/formats/xz/reader.rb +116 -0
- data/lib/omnizip/formats/xz.rb +237 -0
- data/lib/omnizip/formats/xz_impl/block_decoder.rb +754 -0
- data/lib/omnizip/formats/xz_impl/block_encoder.rb +306 -0
- data/lib/omnizip/formats/xz_impl/block_header.rb +210 -0
- data/lib/omnizip/formats/xz_impl/block_header_parser.rb +186 -0
- data/lib/omnizip/formats/xz_impl/constants.rb +49 -0
- data/lib/omnizip/formats/xz_impl/index_decoder.rb +174 -0
- data/lib/omnizip/formats/xz_impl/index_encoder.rb +122 -0
- data/lib/omnizip/formats/xz_impl/stream_decoder.rb +468 -0
- data/lib/omnizip/formats/xz_impl/stream_encoder.rb +99 -0
- data/lib/omnizip/formats/xz_impl/stream_footer.rb +81 -0
- data/lib/omnizip/formats/xz_impl/stream_footer_parser.rb +117 -0
- data/lib/omnizip/formats/xz_impl/stream_header.rb +55 -0
- data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +108 -0
- data/lib/omnizip/formats/xz_impl/vli.rb +128 -0
- data/lib/omnizip/formats/xz_impl/writer.rb +421 -0
- data/lib/omnizip/formats/zip/central_directory_header.rb +195 -0
- data/lib/omnizip/formats/zip/constants.rb +69 -0
- data/lib/omnizip/formats/zip/end_of_central_directory.rb +133 -0
- data/lib/omnizip/formats/zip/local_file_header.rb +138 -0
- data/lib/omnizip/formats/zip/reader.rb +250 -0
- data/lib/omnizip/formats/zip/unix_extra_field.rb +153 -0
- data/lib/omnizip/formats/zip/writer.rb +375 -0
- data/lib/omnizip/formats/zip/zip64_end_of_central_directory.rb +104 -0
- data/lib/omnizip/formats/zip/zip64_end_of_central_directory_locator.rb +66 -0
- data/lib/omnizip/formats/zip/zip64_extra_field.rb +114 -0
- data/lib/omnizip/formats/zip.rb +50 -0
- data/lib/omnizip/implementations/base/lzma2_decoder_base.rb +75 -0
- data/lib/omnizip/implementations/base/lzma2_encoder_base.rb +128 -0
- data/lib/omnizip/implementations/base/lzma_decoder_base.rb +83 -0
- data/lib/omnizip/implementations/base/lzma_encoder_base.rb +108 -0
- data/lib/omnizip/implementations/base/state_machine_base.rb +182 -0
- data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +421 -0
- data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +465 -0
- data/lib/omnizip/implementations/seven_zip/lzma/match_finder.rb +288 -0
- data/lib/omnizip/implementations/seven_zip/lzma/range_decoder.rb +200 -0
- data/lib/omnizip/implementations/seven_zip/lzma/range_encoder.rb +197 -0
- data/lib/omnizip/implementations/seven_zip/lzma/state_machine.rb +141 -0
- data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +519 -0
- data/lib/omnizip/implementations/xz_utils/lzma2/decoder.rb +723 -0
- data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +750 -0
- data/lib/omnizip/io/buffered_input.rb +146 -0
- data/lib/omnizip/io/buffered_output.rb +105 -0
- data/lib/omnizip/io/stream_manager.rb +115 -0
- data/lib/omnizip/link_handler/hard_link.rb +79 -0
- data/lib/omnizip/link_handler/symbolic_link.rb +74 -0
- data/lib/omnizip/link_handler.rb +124 -0
- data/lib/omnizip/metadata/archive_metadata.rb +114 -0
- data/lib/omnizip/metadata/entry_metadata.rb +146 -0
- data/lib/omnizip/metadata/metadata_editor.rb +171 -0
- data/lib/omnizip/metadata/metadata_registry.rb +64 -0
- data/lib/omnizip/metadata/metadata_validator.rb +99 -0
- data/lib/omnizip/metadata.rb +57 -0
- data/lib/omnizip/models/.keep +0 -0
- data/lib/omnizip/models/algorithm_metadata.rb +73 -0
- data/lib/omnizip/models/compression_options.rb +71 -0
- data/lib/omnizip/models/conversion_options.rb +87 -0
- data/lib/omnizip/models/conversion_result.rb +135 -0
- data/lib/omnizip/models/eta_result.rb +46 -0
- data/lib/omnizip/models/extraction_rule.rb +115 -0
- data/lib/omnizip/models/filter_chain.rb +144 -0
- data/lib/omnizip/models/filter_config.rb +183 -0
- data/lib/omnizip/models/match_result.rb +124 -0
- data/lib/omnizip/models/optimization_suggestion.rb +91 -0
- data/lib/omnizip/models/parallel_options.rb +104 -0
- data/lib/omnizip/models/performance_result.rb +79 -0
- data/lib/omnizip/models/profile_report.rb +82 -0
- data/lib/omnizip/models/progress_options.rb +38 -0
- data/lib/omnizip/models/split_options.rb +116 -0
- data/lib/omnizip/optimization_registry.rb +81 -0
- data/lib/omnizip/parallel/job_queue.rb +209 -0
- data/lib/omnizip/parallel/job_scheduler.rb +203 -0
- data/lib/omnizip/parallel/parallel_compressor.rb +347 -0
- data/lib/omnizip/parallel/parallel_extractor.rb +329 -0
- data/lib/omnizip/parallel/worker_pool.rb +223 -0
- data/lib/omnizip/parallel.rb +149 -0
- data/lib/omnizip/parity/chunked_block_processor.rb +196 -0
- data/lib/omnizip/parity/galois16.rb +145 -0
- data/lib/omnizip/parity/models/creator_packet.rb +73 -0
- data/lib/omnizip/parity/models/file_description_packet.rb +133 -0
- data/lib/omnizip/parity/models/ifsc_packet.rb +123 -0
- data/lib/omnizip/parity/models/main_packet.rb +128 -0
- data/lib/omnizip/parity/models/packet.rb +156 -0
- data/lib/omnizip/parity/models/packet_registry.rb +109 -0
- data/lib/omnizip/parity/models/recovery_slice_packet.rb +78 -0
- data/lib/omnizip/parity/par2_creator.rb +531 -0
- data/lib/omnizip/parity/par2_repairer.rb +407 -0
- data/lib/omnizip/parity/par2_verifier.rb +364 -0
- data/lib/omnizip/parity/par2cmdline_algorithm.rb +110 -0
- data/lib/omnizip/parity/par2cmdline_coefficients.rb +78 -0
- data/lib/omnizip/parity/reed_solomon_decoder.rb +266 -0
- data/lib/omnizip/parity/reed_solomon_encoder.rb +111 -0
- data/lib/omnizip/parity/reed_solomon_matrix.rb +342 -0
- data/lib/omnizip/parity.rb +186 -0
- data/lib/omnizip/password/encryption_registry.rb +65 -0
- data/lib/omnizip/password/encryption_strategy.rb +96 -0
- data/lib/omnizip/password/password_validator.rb +129 -0
- data/lib/omnizip/password/winzip_aes_strategy.rb +192 -0
- data/lib/omnizip/password/zip_crypto_strategy.rb +141 -0
- data/lib/omnizip/password.rb +87 -0
- data/lib/omnizip/pipe/stream_compressor.rb +124 -0
- data/lib/omnizip/pipe/stream_decompressor.rb +174 -0
- data/lib/omnizip/pipe.rb +121 -0
- data/lib/omnizip/platform/ntfs_streams.rb +201 -0
- data/lib/omnizip/platform.rb +189 -0
- data/lib/omnizip/profile/archive_profile.rb +39 -0
- data/lib/omnizip/profile/balanced_profile.rb +33 -0
- data/lib/omnizip/profile/binary_profile.rb +36 -0
- data/lib/omnizip/profile/compression_profile.rb +158 -0
- data/lib/omnizip/profile/custom_profile.rb +157 -0
- data/lib/omnizip/profile/fast_profile.rb +33 -0
- data/lib/omnizip/profile/maximum_profile.rb +33 -0
- data/lib/omnizip/profile/profile_detector.rb +110 -0
- data/lib/omnizip/profile/profile_registry.rb +161 -0
- data/lib/omnizip/profile/text_profile.rb +36 -0
- data/lib/omnizip/profile.rb +190 -0
- data/lib/omnizip/profiler/memory_profiler.rb +66 -0
- data/lib/omnizip/profiler/method_profiler.rb +49 -0
- data/lib/omnizip/profiler/report_generator.rb +169 -0
- data/lib/omnizip/profiler.rb +204 -0
- data/lib/omnizip/progress/callback_reporter.rb +36 -0
- data/lib/omnizip/progress/console_reporter.rb +62 -0
- data/lib/omnizip/progress/log_reporter.rb +91 -0
- data/lib/omnizip/progress/operation_progress.rb +118 -0
- data/lib/omnizip/progress/progress_bar.rb +156 -0
- data/lib/omnizip/progress/progress_reporter.rb +40 -0
- data/lib/omnizip/progress/progress_tracker.rb +190 -0
- data/lib/omnizip/progress/silent_reporter.rb +24 -0
- data/lib/omnizip/progress.rb +127 -0
- data/lib/omnizip/rubyzip_compat.rb +63 -0
- data/lib/omnizip/temp/safe_extract.rb +168 -0
- data/lib/omnizip/temp/temp_file.rb +124 -0
- data/lib/omnizip/temp/temp_file_pool.rb +109 -0
- data/lib/omnizip/temp.rb +181 -0
- data/lib/omnizip/version.rb +5 -0
- data/lib/omnizip/zip/entry.rb +156 -0
- data/lib/omnizip/zip/file.rb +485 -0
- data/lib/omnizip/zip/input_stream.rb +273 -0
- data/lib/omnizip/zip/output_stream.rb +324 -0
- data/lib/omnizip.rb +156 -0
- data/readme-docs/advanced-features.adoc +515 -0
- data/readme-docs/api-usage.adoc +444 -0
- data/readme-docs/architecture.adoc +449 -0
- data/readme-docs/archive-formats.adoc +479 -0
- data/readme-docs/cli-usage.adoc +222 -0
- data/readme-docs/compression-algorithms.adoc +442 -0
- data/readme-docs/compression-profiles.adoc +247 -0
- data/readme-docs/encryption-checksums.adoc +328 -0
- data/readme-docs/format-converter.adoc +325 -0
- data/readme-docs/installation.adoc +228 -0
- data/readme-docs/par2-archives.adoc +608 -0
- data/readme-docs/performance-profiler.adoc +389 -0
- data/readme-docs/preprocessing-filters.adoc +280 -0
- data/xz-file-format-1.2.1.txt +1174 -0
- metadata +617 -0
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Copyright (C) 2025 Ribose Inc.
|
|
4
|
+
#
|
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a
|
|
6
|
+
# copy of this software and associated documentation files (the "Software"),
|
|
7
|
+
# to deal in the Software without restriction, including without limitation
|
|
8
|
+
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
9
|
+
# and/or sell copies of the Software, and to permit persons to whom the
|
|
10
|
+
# Software is furnished to do so, subject to the following conditions:
|
|
11
|
+
#
|
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
|
13
|
+
# all copies or substantial portions of the Software.
|
|
14
|
+
#
|
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
20
|
+
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
21
|
+
# DEALINGS IN THE SOFTWARE.
|
|
22
|
+
|
|
23
|
+
module Omnizip
|
|
24
|
+
module Algorithms
|
|
25
|
+
class LZMA < Algorithm
|
|
26
|
+
# Range Encoder ported from XZ Utils range_encoder.c
|
|
27
|
+
#
|
|
28
|
+
# This class implements binary range coding, which is the core
|
|
29
|
+
# compression algorithm for LZMA. Range coding is a form of
|
|
30
|
+
# arithmetic coding that encodes bits into a compressed bitstream
|
|
31
|
+
# using probability models.
|
|
32
|
+
#
|
|
33
|
+
# The encoder maintains a range [low, low+range) and narrows this
|
|
34
|
+
# range as bits are encoded. When the range becomes too small, it
|
|
35
|
+
# is normalized and output bytes are produced.
|
|
36
|
+
#
|
|
37
|
+
# Ported from XZ Utils liblzma/range_encoder.c
|
|
38
|
+
class XZRangeEncoder
|
|
39
|
+
TOP_VALUE = 1 << 24
|
|
40
|
+
SHIFT_BITS = 8
|
|
41
|
+
BIT_MODEL_TOTAL_BITS = 11
|
|
42
|
+
BIT_MODEL_TOTAL = 1 << BIT_MODEL_TOTAL_BITS
|
|
43
|
+
|
|
44
|
+
attr_reader :cache, :range, :low
|
|
45
|
+
|
|
46
|
+
# Initialize a new range encoder
|
|
47
|
+
#
|
|
48
|
+
# @param output [IO] Output stream for compressed data
|
|
49
|
+
def initialize(output)
|
|
50
|
+
@output = output
|
|
51
|
+
@low = 0
|
|
52
|
+
@range = 0xFFFFFFFF
|
|
53
|
+
@cache = 0
|
|
54
|
+
@cache_size = 1
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# Encode a single bit using probability model
|
|
58
|
+
#
|
|
59
|
+
# This method encodes a bit (0 or 1) using an adaptive probability
|
|
60
|
+
# model. The probability model is updated after encoding to adapt
|
|
61
|
+
# to the input data statistics.
|
|
62
|
+
#
|
|
63
|
+
# Ported from XZ Utils range_encoder.c rc_bit()
|
|
64
|
+
#
|
|
65
|
+
# @param model [BitModel] Probability model for this bit
|
|
66
|
+
# @param bit [Integer] Bit value to encode (0 or 1)
|
|
67
|
+
# @return [void]
|
|
68
|
+
def encode_bit(model, bit)
|
|
69
|
+
prob = model.probability
|
|
70
|
+
bound = (@range >> BIT_MODEL_TOTAL_BITS) * prob
|
|
71
|
+
|
|
72
|
+
if bit.zero?
|
|
73
|
+
@range = bound
|
|
74
|
+
else
|
|
75
|
+
@low += bound
|
|
76
|
+
@range -= bound
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
normalize! if @range < TOP_VALUE
|
|
80
|
+
|
|
81
|
+
# Update probability model based on bit value
|
|
82
|
+
# Matches decoder behavior (proper OOP symmetry)
|
|
83
|
+
model.update(bit)
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# Encode multiple bits as a bittree
|
|
87
|
+
#
|
|
88
|
+
# A bittree is a binary tree where each node has a probability model.
|
|
89
|
+
# This method encodes a value by traversing the tree from the root,
|
|
90
|
+
# encoding the bit at each node and following the corresponding branch.
|
|
91
|
+
#
|
|
92
|
+
# Ported from XZ Utils range_encoder.c rc_bittree()
|
|
93
|
+
#
|
|
94
|
+
# @param models [Array<BitModel>] Array of probability models for tree nodes
|
|
95
|
+
# @param num_bits [Integer] Number of bits to encode
|
|
96
|
+
# @param value [Integer] Value to encode
|
|
97
|
+
# @return [void]
|
|
98
|
+
def encode_bittree(models, num_bits, value)
|
|
99
|
+
index = 1
|
|
100
|
+
bit = num_bits - 1
|
|
101
|
+
|
|
102
|
+
while bit >= 0
|
|
103
|
+
bit_value = (value >> bit) & 1
|
|
104
|
+
encode_bit(models[index - 1], bit_value)
|
|
105
|
+
index = (index << 1) | bit_value
|
|
106
|
+
bit -= 1
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
# Encode multiple bits as a reverse bittree
|
|
111
|
+
#
|
|
112
|
+
# Similar to encode_bittree but processes bits in reverse order.
|
|
113
|
+
# This is used for certain LZMA encoding operations.
|
|
114
|
+
#
|
|
115
|
+
# Ported from XZ Utils range_encoder.c rc_bittree_reverse()
|
|
116
|
+
#
|
|
117
|
+
# @param models [Array<BitModel>] Array of probability models for tree nodes
|
|
118
|
+
# @param num_bits [Integer] Number of bits to encode
|
|
119
|
+
# @param value [Integer] Value to encode
|
|
120
|
+
# @return [void]
|
|
121
|
+
def encode_bittree_reverse(models, num_bits, value)
|
|
122
|
+
index = 1
|
|
123
|
+
bit = 0
|
|
124
|
+
|
|
125
|
+
while bit < num_bits
|
|
126
|
+
bit_value = (value >> bit) & 1
|
|
127
|
+
encode_bit(models[index - 1], bit_value)
|
|
128
|
+
index = (index << 1) | bit_value
|
|
129
|
+
bit += 1
|
|
130
|
+
end
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
# Encode a direct bit (without probability model)
|
|
134
|
+
#
|
|
135
|
+
# This method encodes a bit with fixed 0.5 probability.
|
|
136
|
+
# Used for encoding values where no adaptive model is available.
|
|
137
|
+
#
|
|
138
|
+
# Ported from XZ Utils range_encoder.c rc_direct()
|
|
139
|
+
#
|
|
140
|
+
# @param value [Integer] Value to encode (0 or 1)
|
|
141
|
+
# @return [void]
|
|
142
|
+
def encode_direct(value)
|
|
143
|
+
@range >>= 1
|
|
144
|
+
@low += @range if value != 0
|
|
145
|
+
normalize! if @range < TOP_VALUE
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
# Flush pending data to output stream
|
|
149
|
+
#
|
|
150
|
+
# This method flushes any remaining data in the range encoder
|
|
151
|
+
# to the output stream. This must be called before the encoder
|
|
152
|
+
# is discarded.
|
|
153
|
+
#
|
|
154
|
+
# Ported from XZ Utils range_encoder.c rc_flush()
|
|
155
|
+
#
|
|
156
|
+
# @return [void]
|
|
157
|
+
def flush!
|
|
158
|
+
(5 - @cache_size).times do
|
|
159
|
+
shift_low
|
|
160
|
+
end
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
private
|
|
164
|
+
|
|
165
|
+
# Normalize the range encoder state
|
|
166
|
+
#
|
|
167
|
+
# When the range becomes too small (< TOP_VALUE), it needs to be
|
|
168
|
+
# normalized by shifting left and outputting bytes as needed.
|
|
169
|
+
#
|
|
170
|
+
# Ported from XZ Utils range_encoder.c rc_normalize()
|
|
171
|
+
#
|
|
172
|
+
# @return [void]
|
|
173
|
+
def normalize!
|
|
174
|
+
if @range < TOP_VALUE
|
|
175
|
+
@range <<= SHIFT_BITS
|
|
176
|
+
shift_low
|
|
177
|
+
end
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
# Shift low value and output bytes as needed
|
|
181
|
+
#
|
|
182
|
+
# This method handles the carry propagation and byte output
|
|
183
|
+
# for the range encoder. When the high byte of low changes,
|
|
184
|
+
# it outputs bytes to the stream.
|
|
185
|
+
#
|
|
186
|
+
# Ported from XZ Utils range_encoder.c rc_shift_low()
|
|
187
|
+
# See: /Users/mulgogi/src/external/xz/src/liblzma/rangecoder/range_encoder.h:140-186
|
|
188
|
+
#
|
|
189
|
+
# @return [void]
|
|
190
|
+
def shift_low
|
|
191
|
+
# Extract low 32 bits and high 32 bits (carry)
|
|
192
|
+
# XZ Utils: if ((uint32_t)(rc->low) < (uint32_t)(0xFF000000) || (uint32_t)(rc->low >> 32) != 0)
|
|
193
|
+
# This condition is TRUE when:
|
|
194
|
+
# - low32 < 0xFF000000 (the high byte of low is NOT 0xFF)
|
|
195
|
+
# - OR high != 0 (there's a carry from the low value)
|
|
196
|
+
# When TRUE: write output bytes
|
|
197
|
+
# When FALSE: increment cache_size (we're in a run of 0xFF bytes)
|
|
198
|
+
low32 = @low & 0xFFFFFFFF
|
|
199
|
+
high = (@low >> 32) & 0xFF
|
|
200
|
+
|
|
201
|
+
if low32 < 0xFF000000 || high != 0
|
|
202
|
+
# Write pending cache bytes (with carry if present)
|
|
203
|
+
temp = @cache
|
|
204
|
+
while @cache_size.positive?
|
|
205
|
+
@output.putc((temp + high) & 0xFF)
|
|
206
|
+
temp = 0xFF
|
|
207
|
+
@cache_size -= 1
|
|
208
|
+
end
|
|
209
|
+
# Update cache to the high byte of low
|
|
210
|
+
@cache = (low32 >> 24) & 0xFF
|
|
211
|
+
else
|
|
212
|
+
# High byte of low is 0xFF and no carry - increment pending count
|
|
213
|
+
@cache_size += 1
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
# Shift low left by 8 bits (keeping only low 24 bits before shift)
|
|
217
|
+
# XZ Utils: low = (low & 0x00FFFFFF) << RC_SHIFT_BITS;
|
|
218
|
+
@low = (low32 & 0x00FFFFFF) << SHIFT_BITS
|
|
219
|
+
end
|
|
220
|
+
end
|
|
221
|
+
end
|
|
222
|
+
end
|
|
223
|
+
end
|
|
@@ -0,0 +1,331 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Copyright (C) 2025 Ribose Inc.
|
|
4
|
+
#
|
|
5
|
+
# Direct port of XZ Utils range encoder to Ruby
|
|
6
|
+
# Based on: xz/src/liblzma/rangecoder/range_encoder.h
|
|
7
|
+
|
|
8
|
+
require_relative "constants"
|
|
9
|
+
|
|
10
|
+
module Omnizip
|
|
11
|
+
module Algorithms
|
|
12
|
+
class LZMA < Algorithm
|
|
13
|
+
# XZ Utils-compatible range encoder (direct port)
|
|
14
|
+
#
|
|
15
|
+
# This is a direct port of the XZ Utils range encoder implementation
|
|
16
|
+
# to ensure exact algorithmic compatibility with XZ Utils output.
|
|
17
|
+
class XzRangeEncoder
|
|
18
|
+
include Constants
|
|
19
|
+
|
|
20
|
+
# Range encoder constants (matching XZ Utils range_common.h)
|
|
21
|
+
SHIFT_BITS = 8 # RC_SHIFT_BITS
|
|
22
|
+
TOP_BITS = 24 # RC_TOP_BITS
|
|
23
|
+
TOP = 0x01000000 # 2^24
|
|
24
|
+
BIT_MODEL_TOTAL_BITS = 11
|
|
25
|
+
BIT_MODEL_TOTAL = 2048 # 2^11
|
|
26
|
+
|
|
27
|
+
# Symbol types (matching XZ Utils enum)
|
|
28
|
+
RC_BIT_0 = 0
|
|
29
|
+
RC_BIT_1 = 1
|
|
30
|
+
RC_DIRECT_0 = 2
|
|
31
|
+
RC_DIRECT_1 = 3
|
|
32
|
+
RC_FLUSH = 4
|
|
33
|
+
|
|
34
|
+
# Maximum symbols that can be queued
|
|
35
|
+
RC_SYMBOLS_MAX = 53
|
|
36
|
+
|
|
37
|
+
attr_reader :out_total, :count, :low, :range, :cache, :cache_size
|
|
38
|
+
|
|
39
|
+
# Initialize the range encoder
|
|
40
|
+
#
|
|
41
|
+
# @param output_stream [IO] The output stream for encoded bytes
|
|
42
|
+
def initialize(output_stream)
|
|
43
|
+
@stream = output_stream
|
|
44
|
+
# Initialize @out_total BEFORE calling reset
|
|
45
|
+
@out_total = 0
|
|
46
|
+
reset
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# Reset encoder to initial state (matches XZ Utils rc_reset)
|
|
50
|
+
def reset
|
|
51
|
+
@low = 0
|
|
52
|
+
@cache_size = 1 # CRITICAL: XZ starts with 1, not 0
|
|
53
|
+
@range = 0xFFFFFFFF
|
|
54
|
+
@cache = 0
|
|
55
|
+
# CRITICAL: Reset @out_total to match XZ Utils behavior (line 63 of range_encoder.h)
|
|
56
|
+
# This ensures bytes_for_decode returns the correct count
|
|
57
|
+
@out_total = 0
|
|
58
|
+
# NOTE: @pre_flush_out_total is NOT reset - it retains its value for bytes_for_decode
|
|
59
|
+
# It will be reset to 0 when a new chunk starts (via initialize)
|
|
60
|
+
@count = 0
|
|
61
|
+
@pos = 0
|
|
62
|
+
@symbols = Array.new(RC_SYMBOLS_MAX, 0)
|
|
63
|
+
@probs = Array.new(RC_SYMBOLS_MAX, nil)
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# Forget pending symbols (matches XZ Utils rc_forget)
|
|
67
|
+
def forget
|
|
68
|
+
raise "Cannot forget while encoding" if @pos != 0
|
|
69
|
+
|
|
70
|
+
@count = 0
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# Queue a bit for encoding (matches XZ Utils rc_bit)
|
|
74
|
+
#
|
|
75
|
+
# @param prob [Probability] Probability model
|
|
76
|
+
# @param bit [Integer] Bit value (0 or 1)
|
|
77
|
+
def bit(prob, bit)
|
|
78
|
+
raise "Symbol buffer overflow" if @count >= RC_SYMBOLS_MAX
|
|
79
|
+
|
|
80
|
+
@symbols[@count] = bit
|
|
81
|
+
@probs[@count] = prob
|
|
82
|
+
@count += 1
|
|
83
|
+
end
|
|
84
|
+
alias queue_bit bit
|
|
85
|
+
|
|
86
|
+
# Queue bittree encoding (matches XZ Utils rc_bittree)
|
|
87
|
+
#
|
|
88
|
+
# @param probs [Array<Probability>] Probability array
|
|
89
|
+
# @param bit_count [Integer] Number of bits
|
|
90
|
+
# @param symbol [Integer] Symbol to encode
|
|
91
|
+
def bittree(probs, bit_count, symbol)
|
|
92
|
+
model_index = 1
|
|
93
|
+
|
|
94
|
+
bit_count.times do
|
|
95
|
+
bit = (symbol >> (bit_count -= 1)) & 1
|
|
96
|
+
bit(probs[model_index], bit)
|
|
97
|
+
model_index = (model_index << 1) | bit
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
# Queue bittree encoding in reverse (matches XZ Utils rc_bittree_reverse)
|
|
102
|
+
#
|
|
103
|
+
# @param probs [Array<Probability>] Probability array
|
|
104
|
+
# @param bit_count [Integer] Number of bits
|
|
105
|
+
# @param symbol [Integer] Symbol to encode
|
|
106
|
+
def bittree_reverse(probs, bit_count, symbol)
|
|
107
|
+
model_index = 1
|
|
108
|
+
|
|
109
|
+
bit_count.times do
|
|
110
|
+
bit = symbol & 1
|
|
111
|
+
symbol >>= 1
|
|
112
|
+
bit(probs[model_index], bit)
|
|
113
|
+
model_index = (model_index << 1) | bit
|
|
114
|
+
end
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
# Queue direct bits (matches XZ Utils rc_direct)
|
|
118
|
+
#
|
|
119
|
+
# @param value [Integer] Value to encode
|
|
120
|
+
# @param bit_count [Integer] Number of bits
|
|
121
|
+
def direct(value, bit_count)
|
|
122
|
+
bit_count.times do
|
|
123
|
+
raise "Symbol buffer overflow" if @count >= RC_SYMBOLS_MAX
|
|
124
|
+
|
|
125
|
+
@symbols[@count] = RC_DIRECT_0 | ((value >> (bit_count -= 1)) & 1)
|
|
126
|
+
@probs[@count] = nil
|
|
127
|
+
@count += 1
|
|
128
|
+
end
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
# Queue flush operation (matches XZ Utils rc_flush)
|
|
132
|
+
def flush
|
|
133
|
+
puts "[FLUSH] Adding 5 RC_FLUSH symbols, @count before=#{@count}" if ENV["DEBUG"]
|
|
134
|
+
5.times do
|
|
135
|
+
raise "Symbol buffer overflow" if @count >= RC_SYMBOLS_MAX
|
|
136
|
+
|
|
137
|
+
@symbols[@count] = RC_FLUSH
|
|
138
|
+
@probs[@count] = nil
|
|
139
|
+
@count += 1
|
|
140
|
+
end
|
|
141
|
+
puts "[FLUSH] @count after=#{@count}" if ENV["DEBUG"]
|
|
142
|
+
end
|
|
143
|
+
alias queue_flush flush
|
|
144
|
+
|
|
145
|
+
# Get number of pending bytes (matches XZ Utils rc_pending)
|
|
146
|
+
#
|
|
147
|
+
# @return [Integer] Number of pending output bytes
|
|
148
|
+
def pending
|
|
149
|
+
@cache_size + 5 - 1
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
# Check if no symbols are queued
|
|
153
|
+
#
|
|
154
|
+
# @return [Boolean] True if no symbols queued
|
|
155
|
+
def none?
|
|
156
|
+
@count.zero?
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
# Encode all queued symbols to output (matches XZ Utils rc_encode)
|
|
160
|
+
#
|
|
161
|
+
# @param out [IO,String] Output buffer
|
|
162
|
+
# @param out_pos [IntegerRef] Current output position
|
|
163
|
+
# @param out_size [Integer] Output buffer size
|
|
164
|
+
# @return [Boolean] True if output buffer filled before encoding complete
|
|
165
|
+
def encode(out, out_pos, out_size)
|
|
166
|
+
raise "Symbol buffer overflow" if @count > RC_SYMBOLS_MAX
|
|
167
|
+
|
|
168
|
+
puts "[ENCODE] Start: @count=#{@count} @pos=#{@pos} @out_total=#{@out_total}" if ENV["DEBUG"]
|
|
169
|
+
|
|
170
|
+
skip_increment = false
|
|
171
|
+
|
|
172
|
+
while @pos < @count
|
|
173
|
+
# Normalize (matches XZ Utils exactly)
|
|
174
|
+
if @range < TOP
|
|
175
|
+
return true if shift_low(out, out_pos, out_size)
|
|
176
|
+
|
|
177
|
+
@range <<= SHIFT_BITS
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
# Encode current symbol
|
|
181
|
+
case @symbols[@pos]
|
|
182
|
+
when RC_BIT_0
|
|
183
|
+
prob = @probs[@pos]
|
|
184
|
+
# XZ Utils: rc->range = (rc->range >> RC_BIT_MODEL_TOTAL_BITS) * prob
|
|
185
|
+
@range = (@range >> BIT_MODEL_TOTAL_BITS) * prob.value
|
|
186
|
+
# XZ Utils: prob += (RC_BIT_MODEL_TOTAL - prob) >> RC_MOVE_BITS
|
|
187
|
+
prob.value += (BIT_MODEL_TOTAL - prob.value) >> MOVE_BITS
|
|
188
|
+
@probs[@pos] = prob
|
|
189
|
+
|
|
190
|
+
when RC_BIT_1
|
|
191
|
+
prob = @probs[@pos]
|
|
192
|
+
# XZ Utils: bound = prob * (rc->range >> RC_BIT_MODEL_TOTAL_BITS)
|
|
193
|
+
bound = prob.value * (@range >> BIT_MODEL_TOTAL_BITS)
|
|
194
|
+
@low += bound
|
|
195
|
+
@range -= bound
|
|
196
|
+
# XZ Utils: prob -= prob >> RC_MOVE_BITS
|
|
197
|
+
prob.value -= prob.value >> MOVE_BITS
|
|
198
|
+
@probs[@pos] = prob
|
|
199
|
+
|
|
200
|
+
when RC_DIRECT_0
|
|
201
|
+
@range >>= 1
|
|
202
|
+
|
|
203
|
+
when RC_DIRECT_1
|
|
204
|
+
@range >>= 1
|
|
205
|
+
@low += @range
|
|
206
|
+
|
|
207
|
+
when RC_FLUSH
|
|
208
|
+
# Prevent further normalizations (XZ Utils behavior)
|
|
209
|
+
@range = 0xFFFFFFFF
|
|
210
|
+
|
|
211
|
+
puts "[ENCODE] RC_FLUSH: @pos=#{@pos} @count=#{@count}" if ENV["DEBUG"]
|
|
212
|
+
|
|
213
|
+
iteration = 0
|
|
214
|
+
# Flush the last five bytes (see rc_flush)
|
|
215
|
+
begin
|
|
216
|
+
iteration += 1
|
|
217
|
+
puts "[ENCODE] RC_FLUSH iteration #{iteration}: @pos=#{@pos}" if ENV["DEBUG"]
|
|
218
|
+
return true if shift_low(out, out_pos, out_size)
|
|
219
|
+
|
|
220
|
+
puts "[ENCODE] After shift_low: @pos=#{@pos}" if ENV["DEBUG"]
|
|
221
|
+
end while (@pos += 1) < @count
|
|
222
|
+
|
|
223
|
+
puts "[ENCODE] After RC_FLUSH loop: total #{iteration} iterations" if ENV["DEBUG"]
|
|
224
|
+
|
|
225
|
+
# Reset the range encoder (matches XZ Utils)
|
|
226
|
+
reset
|
|
227
|
+
# CRITICAL: Skip the @pos increment at loop end because do-while already did it
|
|
228
|
+
skip_increment = true
|
|
229
|
+
break
|
|
230
|
+
|
|
231
|
+
else
|
|
232
|
+
raise "Unknown symbol type: #{@symbols[@pos]}"
|
|
233
|
+
end
|
|
234
|
+
|
|
235
|
+
@pos += 1 unless skip_increment
|
|
236
|
+
end
|
|
237
|
+
|
|
238
|
+
puts "[ENCODE] End: @count=#{@count} @pos=#{@pos} @out_total=#{@out_total}" if ENV["DEBUG"]
|
|
239
|
+
|
|
240
|
+
@count = 0
|
|
241
|
+
@pos = 0
|
|
242
|
+
|
|
243
|
+
false
|
|
244
|
+
end
|
|
245
|
+
|
|
246
|
+
# Shift low bytes to output (matches XZ Utils rc_shift_low)
|
|
247
|
+
#
|
|
248
|
+
# @param out [IO,String] Output buffer
|
|
249
|
+
# @param out_pos [IntegerRef] Current output position
|
|
250
|
+
# @param out_size [Integer] Output buffer size
|
|
251
|
+
# @return [Boolean] True if output buffer filled
|
|
252
|
+
def shift_low(out, out_pos, out_size)
|
|
253
|
+
# XZ Utils: if ((uint32_t)(rc->low) < (uint32_t)(0xFF000000) || (uint32_t)(rc->low >> 32) != 0)
|
|
254
|
+
if (@low & 0xFFFFFFFF) < 0xFF000000 || (@low >> 32) != 0
|
|
255
|
+
# XZ Utils: do { ... } while (--rc->cache_size != 0);
|
|
256
|
+
while @cache_size.positive?
|
|
257
|
+
return true if out_pos.value == out_size
|
|
258
|
+
|
|
259
|
+
# XZ Utils: out[*out_pos] = rc->cache + (uint8_t)(rc->low >> 32)
|
|
260
|
+
output_byte = @cache + ((@low >> 32) & 0xFF)
|
|
261
|
+
|
|
262
|
+
if out.is_a?(String)
|
|
263
|
+
out.setbyte(out_pos.value, output_byte)
|
|
264
|
+
else
|
|
265
|
+
out.putc(output_byte)
|
|
266
|
+
end
|
|
267
|
+
|
|
268
|
+
out_pos.value += 1
|
|
269
|
+
@out_total += 1
|
|
270
|
+
@cache = 0xFF
|
|
271
|
+
|
|
272
|
+
@cache_size -= 1
|
|
273
|
+
end
|
|
274
|
+
|
|
275
|
+
# XZ Utils: rc->cache = (rc->low >> 24) & 0xFF
|
|
276
|
+
@cache = (@low >> 24) & 0xFF
|
|
277
|
+
end
|
|
278
|
+
|
|
279
|
+
# XZ Utils: ++rc->cache_size; rc->low = (rc->low & 0x00FFFFFF) << RC_SHIFT_BITS
|
|
280
|
+
@cache_size += 1
|
|
281
|
+
@low = (@low & 0x00FFFFFF) << SHIFT_BITS
|
|
282
|
+
|
|
283
|
+
false
|
|
284
|
+
end
|
|
285
|
+
|
|
286
|
+
# Adapter method: alias for bit (to match existing API)
|
|
287
|
+
alias queue_bit bit
|
|
288
|
+
|
|
289
|
+
# Adapter method: alias for bittree (to match existing API)
|
|
290
|
+
alias queue_bittree bittree
|
|
291
|
+
|
|
292
|
+
# Adapter method: alias for bittree_reverse (to match existing API)
|
|
293
|
+
alias queue_bittree_reverse bittree_reverse
|
|
294
|
+
|
|
295
|
+
# Adapter method: alias for direct (to match existing API)
|
|
296
|
+
def queue_direct_bits(value, num_bits)
|
|
297
|
+
direct(value, num_bits)
|
|
298
|
+
end
|
|
299
|
+
|
|
300
|
+
# Adapter method: alias for encode (to match existing API)
|
|
301
|
+
alias encode_symbols encode
|
|
302
|
+
|
|
303
|
+
# Adapter method: match existing API
|
|
304
|
+
alias queue_flush flush
|
|
305
|
+
|
|
306
|
+
# Forget pending symbols (matches XZ Utils rc_forget)
|
|
307
|
+
def forget
|
|
308
|
+
raise "Cannot forget while encoding" if @pos != 0
|
|
309
|
+
|
|
310
|
+
@count = 0
|
|
311
|
+
end
|
|
312
|
+
|
|
313
|
+
# Calculate pending output bytes
|
|
314
|
+
#
|
|
315
|
+
# @return [Integer] Number of bytes decoder will consume
|
|
316
|
+
def bytes_for_decode
|
|
317
|
+
@out_total
|
|
318
|
+
end
|
|
319
|
+
end
|
|
320
|
+
|
|
321
|
+
# Reference wrapper for integer (for out_pos parameter)
|
|
322
|
+
class IntRef
|
|
323
|
+
attr_accessor :value
|
|
324
|
+
|
|
325
|
+
def initialize(val)
|
|
326
|
+
@value = val
|
|
327
|
+
end
|
|
328
|
+
end
|
|
329
|
+
end
|
|
330
|
+
end
|
|
331
|
+
end
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Omnizip
|
|
4
|
+
module Algorithms
|
|
5
|
+
class LZMA < Algorithm
|
|
6
|
+
# XZ Utils-compatible LZMA state machine
|
|
7
|
+
#
|
|
8
|
+
# Tracks encoding context via 12-state machine to predict
|
|
9
|
+
# optimal probability models for upcoming symbols.
|
|
10
|
+
#
|
|
11
|
+
# Based on: xz/src/liblzma/lzma/lzma_common.h
|
|
12
|
+
class XzState
|
|
13
|
+
# 12 LZMA states (matching XZ Utils exactly)
|
|
14
|
+
STATE_LIT_LIT = 0 # literal after literal
|
|
15
|
+
STATE_MATCH_LIT_LIT = 1 # literal after literal after match
|
|
16
|
+
STATE_REP_LIT_LIT = 2 # literal after literal after rep
|
|
17
|
+
STATE_SHORTREP_LIT_LIT = 3 # literal after literal after shortrep
|
|
18
|
+
STATE_MATCH_LIT = 4 # literal after match
|
|
19
|
+
STATE_REP_LIT = 5 # literal after rep
|
|
20
|
+
STATE_SHORTREP_LIT = 6 # literal after shortrep
|
|
21
|
+
STATE_LIT_MATCH = 7 # match after literal
|
|
22
|
+
STATE_LIT_LONGREP = 8 # longrep after literal
|
|
23
|
+
STATE_LIT_SHORTREP = 9 # shortrep after literal
|
|
24
|
+
STATE_NONLIT_MATCH = 10 # match after non-literal
|
|
25
|
+
STATE_NONLIT_REP = 11 # rep after non-literal
|
|
26
|
+
|
|
27
|
+
LIT_STATES = 7 # States 0-6 indicate previous was literal
|
|
28
|
+
|
|
29
|
+
attr_accessor :value
|
|
30
|
+
|
|
31
|
+
def initialize(initial = STATE_LIT_LIT)
|
|
32
|
+
@value = initial
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# Update state after encoding literal
|
|
36
|
+
# Matches XZ Utils update_literal() macro
|
|
37
|
+
def update_literal
|
|
38
|
+
old_value = @value
|
|
39
|
+
@value = if @value <= STATE_SHORTREP_LIT_LIT
|
|
40
|
+
STATE_LIT_LIT
|
|
41
|
+
elsif @value <= STATE_LIT_SHORTREP
|
|
42
|
+
@value - 3
|
|
43
|
+
else
|
|
44
|
+
@value - 6
|
|
45
|
+
end
|
|
46
|
+
if ENV["LZMA_DEBUG"]
|
|
47
|
+
warn "DEBUG: update_literal - state: #{old_value} → #{@value}"
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# Update state after encoding match
|
|
52
|
+
# Matches XZ Utils update_match() macro
|
|
53
|
+
def update_match
|
|
54
|
+
old_value = @value
|
|
55
|
+
@value = @value < LIT_STATES ? STATE_LIT_MATCH : STATE_NONLIT_MATCH
|
|
56
|
+
if ENV["LZMA_DEBUG"]
|
|
57
|
+
warn "DEBUG: update_match - state: #{old_value} → #{@value}"
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# Update state after encoding long rep match
|
|
62
|
+
# Matches XZ Utils update_long_rep() macro
|
|
63
|
+
def update_long_rep
|
|
64
|
+
@value = @value < LIT_STATES ? STATE_LIT_LONGREP : STATE_NONLIT_REP
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# Update state after encoding short rep (1 byte)
|
|
68
|
+
# Matches XZ Utils update_short_rep() macro
|
|
69
|
+
def update_short_rep
|
|
70
|
+
@value = @value < LIT_STATES ? STATE_LIT_SHORTREP : STATE_NONLIT_REP
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# Check if previous symbol was literal
|
|
74
|
+
def literal_state?
|
|
75
|
+
@value < LIT_STATES
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# Create a copy of this state
|
|
79
|
+
def dup
|
|
80
|
+
XzState.new(@value)
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
# Reset to initial state
|
|
84
|
+
def reset
|
|
85
|
+
@value = STATE_LIT_LIT
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
# Check if state is valid
|
|
89
|
+
def valid?
|
|
90
|
+
@value.between?(STATE_LIT_LIT, STATE_NONLIT_REP)
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
# String representation for debugging
|
|
94
|
+
def to_s
|
|
95
|
+
STATE_NAMES[@value] || "INVALID(#{@value})"
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
# State names for debugging
|
|
99
|
+
STATE_NAMES = {
|
|
100
|
+
STATE_LIT_LIT => "STATE_LIT_LIT",
|
|
101
|
+
STATE_MATCH_LIT_LIT => "STATE_MATCH_LIT_LIT",
|
|
102
|
+
STATE_REP_LIT_LIT => "STATE_REP_LIT_LIT",
|
|
103
|
+
STATE_SHORTREP_LIT_LIT => "STATE_SHORTREP_LIT_LIT",
|
|
104
|
+
STATE_MATCH_LIT => "STATE_MATCH_LIT",
|
|
105
|
+
STATE_REP_LIT => "STATE_REP_LIT",
|
|
106
|
+
STATE_SHORTREP_LIT => "STATE_SHORTREP_LIT",
|
|
107
|
+
STATE_LIT_MATCH => "STATE_LIT_MATCH",
|
|
108
|
+
STATE_LIT_LONGREP => "STATE_LIT_LONGREP",
|
|
109
|
+
STATE_LIT_SHORTREP => "STATE_LIT_SHORTREP",
|
|
110
|
+
STATE_NONLIT_MATCH => "STATE_NONLIT_MATCH",
|
|
111
|
+
STATE_NONLIT_REP => "STATE_NONLIT_REP",
|
|
112
|
+
}.freeze
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
end
|
|
116
|
+
end
|