omnizip 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/.rubocop.yml +32 -0
- data/.rubocop_todo.yml +754 -0
- data/COPYING +502 -0
- data/Gemfile +17 -0
- data/LICENSE +12 -0
- data/README.adoc +1045 -0
- data/Rakefile +12 -0
- data/benchmark/README.md +260 -0
- data/benchmark/benchmark_suite.rb +125 -0
- data/benchmark/compression_bench.rb +181 -0
- data/benchmark/filter_bench.rb +180 -0
- data/benchmark/models/benchmark_result.rb +59 -0
- data/benchmark/models/comparison_result.rb +69 -0
- data/benchmark/profile_suite.rb +167 -0
- data/benchmark/reporter.rb +150 -0
- data/benchmark/run_benchmarks.rb +66 -0
- data/benchmark/test_data.rb +137 -0
- data/config/formats/rar3_spec.yml +91 -0
- data/config/formats/rar5_spec.yml +102 -0
- data/docs/.github/workflows/docs.yml +142 -0
- data/docs/.gitignore +21 -0
- data/docs/.lychee.toml +67 -0
- data/docs/Gemfile +13 -0
- data/docs/RAR_WRITE_SUPPORT.md +26 -0
- data/docs/README.md +101 -0
- data/docs/_config.yml +112 -0
- data/docs/assets/logo.svg +1 -0
- data/docs/assets/omnizip-logo.pdf +1540 -11
- data/docs/comparison/feature-matrix.adoc +694 -0
- data/docs/comparison/index.adoc +113 -0
- data/docs/comparison/vs-7zip.adoc +309 -0
- data/docs/comparison/vs-peazip.adoc +77 -0
- data/docs/comparison/vs-rubyzip.adoc +342 -0
- data/docs/comparison/vs-winrar.adoc +100 -0
- data/docs/compatibility.adoc +579 -0
- data/docs/concepts/index.adoc +129 -0
- data/docs/developer/architecture.adoc +256 -0
- data/docs/developer/contributing.adoc +158 -0
- data/docs/developer/index.adoc +25 -0
- data/docs/developer/testing.adoc +212 -0
- data/docs/getting-started/basic-usage.adoc +271 -0
- data/docs/getting-started/index.adoc +42 -0
- data/docs/getting-started/installation.adoc +138 -0
- data/docs/getting-started/quick-start.adoc +185 -0
- data/docs/getting-started/your-first-archive.adoc +218 -0
- data/docs/guides/advanced-features/encryption.adoc +300 -0
- data/docs/guides/advanced-features/index.adoc +49 -0
- data/docs/guides/advanced-features/parallel-processing.adoc +246 -0
- data/docs/guides/advanced-features/progress-tracking.adoc +320 -0
- data/docs/guides/advanced-features/streaming.adoc +212 -0
- data/docs/guides/archive-formats/gzip-format.adoc +107 -0
- data/docs/guides/archive-formats/index.adoc +130 -0
- data/docs/guides/archive-formats/rar-format.adoc +104 -0
- data/docs/guides/archive-formats/rar5.adoc +521 -0
- data/docs/guides/archive-formats/seven-zip-format.adoc +35 -0
- data/docs/guides/archive-formats/tar-format.adoc +106 -0
- data/docs/guides/archive-formats/xz-format.adoc +118 -0
- data/docs/guides/archive-formats/zip-format.adoc +35 -0
- data/docs/guides/compression-algorithms/bzip2.adoc +113 -0
- data/docs/guides/compression-algorithms/deflate.adoc +319 -0
- data/docs/guides/compression-algorithms/index.adoc +190 -0
- data/docs/guides/compression-algorithms/lzma.adoc +398 -0
- data/docs/guides/compression-algorithms/lzma2.adoc +327 -0
- data/docs/guides/compression-algorithms/ppmd.adoc +316 -0
- data/docs/guides/compression-algorithms/zstandard.adoc +361 -0
- data/docs/guides/creating-archives.adoc +354 -0
- data/docs/guides/extracting-archives.adoc +53 -0
- data/docs/guides/format-conversion.adoc +64 -0
- data/docs/guides/index.adoc +49 -0
- data/docs/guides/migration-rubyzip.adoc +217 -0
- data/docs/guides/parity-archives.adoc +605 -0
- data/docs/guides/performance-tuning.adoc +88 -0
- data/docs/index.adoc +218 -0
- data/docs/lychee.toml +67 -0
- data/docs/reference/api/overview.adoc +188 -0
- data/docs/reference/cli/compress-command.adoc +114 -0
- data/docs/reference/cli/overview.adoc +140 -0
- data/docs/reference/index.adoc +26 -0
- data/docs/resources/faq.adoc +185 -0
- data/docs/resources/quick-reference.adoc +222 -0
- data/docs/troubleshooting/index.adoc +208 -0
- data/examples/api_comparison.rb +205 -0
- data/examples/deflate64_example.rb +96 -0
- data/examples/par2_demo.rb +121 -0
- data/examples/quick_start_native.rb +150 -0
- data/examples/quick_start_rubyzip.rb +115 -0
- data/examples/rubyzip_compatibility_demo.rb +194 -0
- data/exe/omnizip +27 -0
- data/lib/omnizip/algorithm.rb +130 -0
- data/lib/omnizip/algorithm_registry.rb +86 -0
- data/lib/omnizip/algorithms/.keep +0 -0
- data/lib/omnizip/algorithms/bzip2/bwt.rb +225 -0
- data/lib/omnizip/algorithms/bzip2/decoder.rb +193 -0
- data/lib/omnizip/algorithms/bzip2/encoder.rb +237 -0
- data/lib/omnizip/algorithms/bzip2/huffman.rb +206 -0
- data/lib/omnizip/algorithms/bzip2/mtf.rb +101 -0
- data/lib/omnizip/algorithms/bzip2/rle.rb +151 -0
- data/lib/omnizip/algorithms/bzip2.rb +130 -0
- data/lib/omnizip/algorithms/deflate/constants.rb +28 -0
- data/lib/omnizip/algorithms/deflate/decoder.rb +38 -0
- data/lib/omnizip/algorithms/deflate/encoder.rb +46 -0
- data/lib/omnizip/algorithms/deflate.rb +128 -0
- data/lib/omnizip/algorithms/deflate64/constants.rb +45 -0
- data/lib/omnizip/algorithms/deflate64/decoder.rb +153 -0
- data/lib/omnizip/algorithms/deflate64/encoder.rb +98 -0
- data/lib/omnizip/algorithms/deflate64/huffman_coder.rb +354 -0
- data/lib/omnizip/algorithms/deflate64/lz77_encoder.rb +142 -0
- data/lib/omnizip/algorithms/deflate64.rb +109 -0
- data/lib/omnizip/algorithms/lzma/bit_model.rb +120 -0
- data/lib/omnizip/algorithms/lzma/constants.rb +112 -0
- data/lib/omnizip/algorithms/lzma/decoder.rb +148 -0
- data/lib/omnizip/algorithms/lzma/dictionary.rb +69 -0
- data/lib/omnizip/algorithms/lzma/distance_coder.rb +415 -0
- data/lib/omnizip/algorithms/lzma/encoder.rb +142 -0
- data/lib/omnizip/algorithms/lzma/length_coder.rb +260 -0
- data/lib/omnizip/algorithms/lzma/literal_decoder.rb +320 -0
- data/lib/omnizip/algorithms/lzma/literal_encoder.rb +210 -0
- data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +341 -0
- data/lib/omnizip/algorithms/lzma/lzma_alone_decoder.rb +192 -0
- data/lib/omnizip/algorithms/lzma/lzma_state.rb +128 -0
- data/lib/omnizip/algorithms/lzma/match.rb +32 -0
- data/lib/omnizip/algorithms/lzma/match_finder.rb +205 -0
- data/lib/omnizip/algorithms/lzma/match_finder_config.rb +142 -0
- data/lib/omnizip/algorithms/lzma/match_finder_factory.rb +88 -0
- data/lib/omnizip/algorithms/lzma/optimal_encoder.rb +130 -0
- data/lib/omnizip/algorithms/lzma/probability_models.rb +72 -0
- data/lib/omnizip/algorithms/lzma/range_coder.rb +85 -0
- data/lib/omnizip/algorithms/lzma/range_decoder.rb +434 -0
- data/lib/omnizip/algorithms/lzma/range_encoder.rb +194 -0
- data/lib/omnizip/algorithms/lzma/state.rb +127 -0
- data/lib/omnizip/algorithms/lzma/xz_buffered_range_encoder.rb +325 -0
- data/lib/omnizip/algorithms/lzma/xz_encoder.rb +426 -0
- data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +645 -0
- data/lib/omnizip/algorithms/lzma/xz_match_finder_adapter.rb +227 -0
- data/lib/omnizip/algorithms/lzma/xz_price_calculator.rb +169 -0
- data/lib/omnizip/algorithms/lzma/xz_probability_models.rb +261 -0
- data/lib/omnizip/algorithms/lzma/xz_range_encoder.rb +223 -0
- data/lib/omnizip/algorithms/lzma/xz_range_encoder_exact.rb +331 -0
- data/lib/omnizip/algorithms/lzma/xz_state.rb +116 -0
- data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +2055 -0
- data/lib/omnizip/algorithms/lzma.rb +238 -0
- data/lib/omnizip/algorithms/lzma2/chunk_manager.rb +182 -0
- data/lib/omnizip/algorithms/lzma2/constants.rb +41 -0
- data/lib/omnizip/algorithms/lzma2/encoder.rb +147 -0
- data/lib/omnizip/algorithms/lzma2/lzma2_chunk.rb +161 -0
- data/lib/omnizip/algorithms/lzma2/properties.rb +179 -0
- data/lib/omnizip/algorithms/lzma2/simple_lzma2_encoder.rb +127 -0
- data/lib/omnizip/algorithms/lzma2/xz_encoder_adapter.rb +85 -0
- data/lib/omnizip/algorithms/lzma2.rb +141 -0
- data/lib/omnizip/algorithms/ppmd7/constants.rb +74 -0
- data/lib/omnizip/algorithms/ppmd7/context.rb +154 -0
- data/lib/omnizip/algorithms/ppmd7/decoder.rb +126 -0
- data/lib/omnizip/algorithms/ppmd7/encoder.rb +163 -0
- data/lib/omnizip/algorithms/ppmd7/model.rb +248 -0
- data/lib/omnizip/algorithms/ppmd7/symbol_state.rb +57 -0
- data/lib/omnizip/algorithms/ppmd7.rb +116 -0
- data/lib/omnizip/algorithms/ppmd8/constants.rb +61 -0
- data/lib/omnizip/algorithms/ppmd8/context.rb +34 -0
- data/lib/omnizip/algorithms/ppmd8/decoder.rb +107 -0
- data/lib/omnizip/algorithms/ppmd8/encoder.rb +138 -0
- data/lib/omnizip/algorithms/ppmd8/model.rb +250 -0
- data/lib/omnizip/algorithms/ppmd8/restoration_method.rb +78 -0
- data/lib/omnizip/algorithms/ppmd8.rb +82 -0
- data/lib/omnizip/algorithms/ppmd_base.rb +138 -0
- data/lib/omnizip/algorithms/sevenzip_lzma2.rb +123 -0
- data/lib/omnizip/algorithms/xz_lzma2.rb +118 -0
- data/lib/omnizip/algorithms/zstandard/constants.rb +25 -0
- data/lib/omnizip/algorithms/zstandard/decoder.rb +46 -0
- data/lib/omnizip/algorithms/zstandard/encoder.rb +51 -0
- data/lib/omnizip/algorithms/zstandard.rb +138 -0
- data/lib/omnizip/buffer/memory_archive.rb +251 -0
- data/lib/omnizip/buffer/memory_extractor.rb +224 -0
- data/lib/omnizip/buffer.rb +176 -0
- data/lib/omnizip/checksum_registry.rb +114 -0
- data/lib/omnizip/checksums/crc32.rb +100 -0
- data/lib/omnizip/checksums/crc64.rb +101 -0
- data/lib/omnizip/checksums/crc_base.rb +158 -0
- data/lib/omnizip/checksums/verifier.rb +131 -0
- data/lib/omnizip/chunked/memory_manager.rb +194 -0
- data/lib/omnizip/chunked/reader.rb +78 -0
- data/lib/omnizip/chunked/writer.rb +120 -0
- data/lib/omnizip/chunked.rb +129 -0
- data/lib/omnizip/cli/output_formatter.rb +104 -0
- data/lib/omnizip/cli.rb +572 -0
- data/lib/omnizip/commands/.keep +0 -0
- data/lib/omnizip/commands/archive_create_command.rb +427 -0
- data/lib/omnizip/commands/archive_extract_command.rb +272 -0
- data/lib/omnizip/commands/archive_list_command.rb +218 -0
- data/lib/omnizip/commands/archive_repair_command.rb +131 -0
- data/lib/omnizip/commands/archive_verify_command.rb +117 -0
- data/lib/omnizip/commands/compress_command.rb +117 -0
- data/lib/omnizip/commands/decompress_command.rb +120 -0
- data/lib/omnizip/commands/list_command.rb +53 -0
- data/lib/omnizip/commands/metadata_command.rb +153 -0
- data/lib/omnizip/commands/parity_create_command.rb +122 -0
- data/lib/omnizip/commands/parity_repair_command.rb +122 -0
- data/lib/omnizip/commands/parity_verify_command.rb +124 -0
- data/lib/omnizip/commands/profile_list_command.rb +56 -0
- data/lib/omnizip/commands/profile_show_command.rb +44 -0
- data/lib/omnizip/convenience.rb +359 -0
- data/lib/omnizip/converter/conversion_registry.rb +49 -0
- data/lib/omnizip/converter/conversion_strategy.rb +121 -0
- data/lib/omnizip/converter/seven_zip_to_zip_strategy.rb +97 -0
- data/lib/omnizip/converter/zip_to_seven_zip_strategy.rb +112 -0
- data/lib/omnizip/converter.rb +105 -0
- data/lib/omnizip/crypto/aes256/cipher.rb +100 -0
- data/lib/omnizip/crypto/aes256/constants.rb +28 -0
- data/lib/omnizip/crypto/aes256/key_derivation.rb +101 -0
- data/lib/omnizip/crypto/aes256.rb +102 -0
- data/lib/omnizip/error.rb +106 -0
- data/lib/omnizip/eta/exponential_smoothing_estimator.rb +98 -0
- data/lib/omnizip/eta/moving_average_estimator.rb +99 -0
- data/lib/omnizip/eta/rate_calculator.rb +104 -0
- data/lib/omnizip/eta/sample_history.rb +143 -0
- data/lib/omnizip/eta/time_estimator.rb +106 -0
- data/lib/omnizip/eta.rb +63 -0
- data/lib/omnizip/extraction/filter_chain.rb +177 -0
- data/lib/omnizip/extraction/glob_pattern.rb +140 -0
- data/lib/omnizip/extraction/pattern_matcher.rb +70 -0
- data/lib/omnizip/extraction/predicate_pattern.rb +52 -0
- data/lib/omnizip/extraction/regex_pattern.rb +50 -0
- data/lib/omnizip/extraction/selective_extractor.rb +240 -0
- data/lib/omnizip/extraction.rb +111 -0
- data/lib/omnizip/file_type/mime_classifier.rb +144 -0
- data/lib/omnizip/file_type.rb +113 -0
- data/lib/omnizip/filter.rb +139 -0
- data/lib/omnizip/filter_pipeline.rb +108 -0
- data/lib/omnizip/filter_registry.rb +166 -0
- data/lib/omnizip/filters/bcj.rb +279 -0
- data/lib/omnizip/filters/bcj2/constants.rb +53 -0
- data/lib/omnizip/filters/bcj2/decoder.rb +200 -0
- data/lib/omnizip/filters/bcj2/encoder.rb +61 -0
- data/lib/omnizip/filters/bcj2/stream_data.rb +93 -0
- data/lib/omnizip/filters/bcj2.rb +99 -0
- data/lib/omnizip/filters/bcj_arm.rb +176 -0
- data/lib/omnizip/filters/bcj_arm64.rb +244 -0
- data/lib/omnizip/filters/bcj_ia64.rb +196 -0
- data/lib/omnizip/filters/bcj_ppc.rb +190 -0
- data/lib/omnizip/filters/bcj_sparc.rb +176 -0
- data/lib/omnizip/filters/bcj_x86.rb +193 -0
- data/lib/omnizip/filters/delta.rb +196 -0
- data/lib/omnizip/filters/filter_base.rb +72 -0
- data/lib/omnizip/filters/registry.rb +123 -0
- data/lib/omnizip/filters/xz_delta.rb +258 -0
- data/lib/omnizip/format_detector.rb +162 -0
- data/lib/omnizip/format_registry.rb +59 -0
- data/lib/omnizip/formats/.keep +0 -0
- data/lib/omnizip/formats/bzip2_file.rb +172 -0
- data/lib/omnizip/formats/cpio/constants.rb +55 -0
- data/lib/omnizip/formats/cpio/entry.rb +385 -0
- data/lib/omnizip/formats/cpio/reader.rb +196 -0
- data/lib/omnizip/formats/cpio/writer.rb +234 -0
- data/lib/omnizip/formats/cpio.rb +140 -0
- data/lib/omnizip/formats/format_spec_loader.rb +230 -0
- data/lib/omnizip/formats/gzip.rb +238 -0
- data/lib/omnizip/formats/iso/directory_builder.rb +297 -0
- data/lib/omnizip/formats/iso/directory_record.rb +152 -0
- data/lib/omnizip/formats/iso/joliet.rb +204 -0
- data/lib/omnizip/formats/iso/path_table.rb +125 -0
- data/lib/omnizip/formats/iso/reader.rb +197 -0
- data/lib/omnizip/formats/iso/rock_ridge.rb +349 -0
- data/lib/omnizip/formats/iso/volume_builder.rb +320 -0
- data/lib/omnizip/formats/iso/volume_descriptor.rb +168 -0
- data/lib/omnizip/formats/iso/writer.rb +530 -0
- data/lib/omnizip/formats/iso.rb +140 -0
- data/lib/omnizip/formats/lzip.rb +175 -0
- data/lib/omnizip/formats/lzma_alone.rb +171 -0
- data/lib/omnizip/formats/rar/archive_repairer.rb +243 -0
- data/lib/omnizip/formats/rar/archive_verifier.rb +195 -0
- data/lib/omnizip/formats/rar/block_parser.rb +243 -0
- data/lib/omnizip/formats/rar/compression/bit_stream.rb +180 -0
- data/lib/omnizip/formats/rar/compression/dispatcher.rb +217 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/decoder.rb +216 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/encoder.rb +158 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_builder.rb +217 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_coder.rb +189 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/match_finder.rb +135 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/sliding_window.rb +165 -0
- data/lib/omnizip/formats/rar/compression/ppmd/context.rb +105 -0
- data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +219 -0
- data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +262 -0
- data/lib/omnizip/formats/rar/compression_method_registry.rb +106 -0
- data/lib/omnizip/formats/rar/constants.rb +82 -0
- data/lib/omnizip/formats/rar/decompressor.rb +238 -0
- data/lib/omnizip/formats/rar/external_writer.rb +312 -0
- data/lib/omnizip/formats/rar/header.rb +192 -0
- data/lib/omnizip/formats/rar/license_validator.rb +109 -0
- data/lib/omnizip/formats/rar/models/rar_archive.rb +77 -0
- data/lib/omnizip/formats/rar/models/rar_entry.rb +65 -0
- data/lib/omnizip/formats/rar/models/rar_volume.rb +56 -0
- data/lib/omnizip/formats/rar/parity_handler.rb +292 -0
- data/lib/omnizip/formats/rar/rar5/compression/lzma.rb +202 -0
- data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +578 -0
- data/lib/omnizip/formats/rar/rar5/compression/store.rb +60 -0
- data/lib/omnizip/formats/rar/rar5/crc32.rb +39 -0
- data/lib/omnizip/formats/rar/rar5/encryption/aes256_cbc.rb +97 -0
- data/lib/omnizip/formats/rar/rar5/encryption/encryption_header.rb +114 -0
- data/lib/omnizip/formats/rar/rar5/encryption/encryption_manager.rb +166 -0
- data/lib/omnizip/formats/rar/rar5/encryption/key_derivation.rb +97 -0
- data/lib/omnizip/formats/rar/rar5/header.rb +187 -0
- data/lib/omnizip/formats/rar/rar5/models/encryption_options.rb +74 -0
- data/lib/omnizip/formats/rar/rar5/models/recovery_options.rb +63 -0
- data/lib/omnizip/formats/rar/rar5/models/solid_options.rb +63 -0
- data/lib/omnizip/formats/rar/rar5/models/volume_options.rb +74 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/ARCHITECTURE.md +290 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_manager.rb +264 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_splitter.rb +155 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_writer.rb +194 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_encoder.rb +109 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_manager.rb +142 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_stream.rb +121 -0
- data/lib/omnizip/formats/rar/rar5/vint.rb +65 -0
- data/lib/omnizip/formats/rar/rar5/writer.rb +466 -0
- data/lib/omnizip/formats/rar/rar_format_base.rb +241 -0
- data/lib/omnizip/formats/rar/reader.rb +366 -0
- data/lib/omnizip/formats/rar/recovery_record.rb +245 -0
- data/lib/omnizip/formats/rar/volume_manager.rb +168 -0
- data/lib/omnizip/formats/rar/writer.rb +431 -0
- data/lib/omnizip/formats/rar.rb +205 -0
- data/lib/omnizip/formats/rar3/compressor.rb +73 -0
- data/lib/omnizip/formats/rar3/decompressor.rb +66 -0
- data/lib/omnizip/formats/rar3/reader.rb +386 -0
- data/lib/omnizip/formats/rar3/writer.rb +219 -0
- data/lib/omnizip/formats/rar5/compressor.rb +73 -0
- data/lib/omnizip/formats/rar5/decompressor.rb +66 -0
- data/lib/omnizip/formats/rar5/reader.rb +342 -0
- data/lib/omnizip/formats/rar5/writer.rb +214 -0
- data/lib/omnizip/formats/seven_zip/coder_chain.rb +150 -0
- data/lib/omnizip/formats/seven_zip/constants.rb +126 -0
- data/lib/omnizip/formats/seven_zip/encoded_header.rb +114 -0
- data/lib/omnizip/formats/seven_zip/encrypted_header.rb +142 -0
- data/lib/omnizip/formats/seven_zip/file_collector.rb +144 -0
- data/lib/omnizip/formats/seven_zip/header.rb +106 -0
- data/lib/omnizip/formats/seven_zip/header_encryptor.rb +134 -0
- data/lib/omnizip/formats/seven_zip/header_writer.rb +466 -0
- data/lib/omnizip/formats/seven_zip/models/coder_info.rb +30 -0
- data/lib/omnizip/formats/seven_zip/models/file_entry.rb +58 -0
- data/lib/omnizip/formats/seven_zip/models/folder.rb +69 -0
- data/lib/omnizip/formats/seven_zip/models/stream_info.rb +42 -0
- data/lib/omnizip/formats/seven_zip/parser.rb +660 -0
- data/lib/omnizip/formats/seven_zip/reader.rb +458 -0
- data/lib/omnizip/formats/seven_zip/split_archive_reader.rb +632 -0
- data/lib/omnizip/formats/seven_zip/split_archive_writer.rb +315 -0
- data/lib/omnizip/formats/seven_zip/stream_compressor.rb +151 -0
- data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +162 -0
- data/lib/omnizip/formats/seven_zip/writer.rb +740 -0
- data/lib/omnizip/formats/seven_zip.rb +93 -0
- data/lib/omnizip/formats/tar/constants.rb +73 -0
- data/lib/omnizip/formats/tar/entry.rb +94 -0
- data/lib/omnizip/formats/tar/header.rb +168 -0
- data/lib/omnizip/formats/tar/reader.rb +121 -0
- data/lib/omnizip/formats/tar/writer.rb +216 -0
- data/lib/omnizip/formats/tar.rb +84 -0
- data/lib/omnizip/formats/xz/reader.rb +116 -0
- data/lib/omnizip/formats/xz.rb +237 -0
- data/lib/omnizip/formats/xz_impl/block_decoder.rb +754 -0
- data/lib/omnizip/formats/xz_impl/block_encoder.rb +306 -0
- data/lib/omnizip/formats/xz_impl/block_header.rb +210 -0
- data/lib/omnizip/formats/xz_impl/block_header_parser.rb +186 -0
- data/lib/omnizip/formats/xz_impl/constants.rb +49 -0
- data/lib/omnizip/formats/xz_impl/index_decoder.rb +174 -0
- data/lib/omnizip/formats/xz_impl/index_encoder.rb +122 -0
- data/lib/omnizip/formats/xz_impl/stream_decoder.rb +468 -0
- data/lib/omnizip/formats/xz_impl/stream_encoder.rb +99 -0
- data/lib/omnizip/formats/xz_impl/stream_footer.rb +81 -0
- data/lib/omnizip/formats/xz_impl/stream_footer_parser.rb +117 -0
- data/lib/omnizip/formats/xz_impl/stream_header.rb +55 -0
- data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +108 -0
- data/lib/omnizip/formats/xz_impl/vli.rb +128 -0
- data/lib/omnizip/formats/xz_impl/writer.rb +421 -0
- data/lib/omnizip/formats/zip/central_directory_header.rb +195 -0
- data/lib/omnizip/formats/zip/constants.rb +69 -0
- data/lib/omnizip/formats/zip/end_of_central_directory.rb +133 -0
- data/lib/omnizip/formats/zip/local_file_header.rb +138 -0
- data/lib/omnizip/formats/zip/reader.rb +250 -0
- data/lib/omnizip/formats/zip/unix_extra_field.rb +153 -0
- data/lib/omnizip/formats/zip/writer.rb +375 -0
- data/lib/omnizip/formats/zip/zip64_end_of_central_directory.rb +104 -0
- data/lib/omnizip/formats/zip/zip64_end_of_central_directory_locator.rb +66 -0
- data/lib/omnizip/formats/zip/zip64_extra_field.rb +114 -0
- data/lib/omnizip/formats/zip.rb +50 -0
- data/lib/omnizip/implementations/base/lzma2_decoder_base.rb +75 -0
- data/lib/omnizip/implementations/base/lzma2_encoder_base.rb +128 -0
- data/lib/omnizip/implementations/base/lzma_decoder_base.rb +83 -0
- data/lib/omnizip/implementations/base/lzma_encoder_base.rb +108 -0
- data/lib/omnizip/implementations/base/state_machine_base.rb +182 -0
- data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +421 -0
- data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +465 -0
- data/lib/omnizip/implementations/seven_zip/lzma/match_finder.rb +288 -0
- data/lib/omnizip/implementations/seven_zip/lzma/range_decoder.rb +200 -0
- data/lib/omnizip/implementations/seven_zip/lzma/range_encoder.rb +197 -0
- data/lib/omnizip/implementations/seven_zip/lzma/state_machine.rb +141 -0
- data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +519 -0
- data/lib/omnizip/implementations/xz_utils/lzma2/decoder.rb +723 -0
- data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +750 -0
- data/lib/omnizip/io/buffered_input.rb +146 -0
- data/lib/omnizip/io/buffered_output.rb +105 -0
- data/lib/omnizip/io/stream_manager.rb +115 -0
- data/lib/omnizip/link_handler/hard_link.rb +79 -0
- data/lib/omnizip/link_handler/symbolic_link.rb +74 -0
- data/lib/omnizip/link_handler.rb +124 -0
- data/lib/omnizip/metadata/archive_metadata.rb +114 -0
- data/lib/omnizip/metadata/entry_metadata.rb +146 -0
- data/lib/omnizip/metadata/metadata_editor.rb +171 -0
- data/lib/omnizip/metadata/metadata_registry.rb +64 -0
- data/lib/omnizip/metadata/metadata_validator.rb +99 -0
- data/lib/omnizip/metadata.rb +57 -0
- data/lib/omnizip/models/.keep +0 -0
- data/lib/omnizip/models/algorithm_metadata.rb +73 -0
- data/lib/omnizip/models/compression_options.rb +71 -0
- data/lib/omnizip/models/conversion_options.rb +87 -0
- data/lib/omnizip/models/conversion_result.rb +135 -0
- data/lib/omnizip/models/eta_result.rb +46 -0
- data/lib/omnizip/models/extraction_rule.rb +115 -0
- data/lib/omnizip/models/filter_chain.rb +144 -0
- data/lib/omnizip/models/filter_config.rb +183 -0
- data/lib/omnizip/models/match_result.rb +124 -0
- data/lib/omnizip/models/optimization_suggestion.rb +91 -0
- data/lib/omnizip/models/parallel_options.rb +104 -0
- data/lib/omnizip/models/performance_result.rb +79 -0
- data/lib/omnizip/models/profile_report.rb +82 -0
- data/lib/omnizip/models/progress_options.rb +38 -0
- data/lib/omnizip/models/split_options.rb +116 -0
- data/lib/omnizip/optimization_registry.rb +81 -0
- data/lib/omnizip/parallel/job_queue.rb +209 -0
- data/lib/omnizip/parallel/job_scheduler.rb +203 -0
- data/lib/omnizip/parallel/parallel_compressor.rb +347 -0
- data/lib/omnizip/parallel/parallel_extractor.rb +329 -0
- data/lib/omnizip/parallel/worker_pool.rb +223 -0
- data/lib/omnizip/parallel.rb +149 -0
- data/lib/omnizip/parity/chunked_block_processor.rb +196 -0
- data/lib/omnizip/parity/galois16.rb +145 -0
- data/lib/omnizip/parity/models/creator_packet.rb +73 -0
- data/lib/omnizip/parity/models/file_description_packet.rb +133 -0
- data/lib/omnizip/parity/models/ifsc_packet.rb +123 -0
- data/lib/omnizip/parity/models/main_packet.rb +128 -0
- data/lib/omnizip/parity/models/packet.rb +156 -0
- data/lib/omnizip/parity/models/packet_registry.rb +109 -0
- data/lib/omnizip/parity/models/recovery_slice_packet.rb +78 -0
- data/lib/omnizip/parity/par2_creator.rb +531 -0
- data/lib/omnizip/parity/par2_repairer.rb +407 -0
- data/lib/omnizip/parity/par2_verifier.rb +364 -0
- data/lib/omnizip/parity/par2cmdline_algorithm.rb +110 -0
- data/lib/omnizip/parity/par2cmdline_coefficients.rb +78 -0
- data/lib/omnizip/parity/reed_solomon_decoder.rb +266 -0
- data/lib/omnizip/parity/reed_solomon_encoder.rb +111 -0
- data/lib/omnizip/parity/reed_solomon_matrix.rb +342 -0
- data/lib/omnizip/parity.rb +186 -0
- data/lib/omnizip/password/encryption_registry.rb +65 -0
- data/lib/omnizip/password/encryption_strategy.rb +96 -0
- data/lib/omnizip/password/password_validator.rb +129 -0
- data/lib/omnizip/password/winzip_aes_strategy.rb +192 -0
- data/lib/omnizip/password/zip_crypto_strategy.rb +141 -0
- data/lib/omnizip/password.rb +87 -0
- data/lib/omnizip/pipe/stream_compressor.rb +124 -0
- data/lib/omnizip/pipe/stream_decompressor.rb +174 -0
- data/lib/omnizip/pipe.rb +121 -0
- data/lib/omnizip/platform/ntfs_streams.rb +201 -0
- data/lib/omnizip/platform.rb +189 -0
- data/lib/omnizip/profile/archive_profile.rb +39 -0
- data/lib/omnizip/profile/balanced_profile.rb +33 -0
- data/lib/omnizip/profile/binary_profile.rb +36 -0
- data/lib/omnizip/profile/compression_profile.rb +158 -0
- data/lib/omnizip/profile/custom_profile.rb +157 -0
- data/lib/omnizip/profile/fast_profile.rb +33 -0
- data/lib/omnizip/profile/maximum_profile.rb +33 -0
- data/lib/omnizip/profile/profile_detector.rb +110 -0
- data/lib/omnizip/profile/profile_registry.rb +161 -0
- data/lib/omnizip/profile/text_profile.rb +36 -0
- data/lib/omnizip/profile.rb +190 -0
- data/lib/omnizip/profiler/memory_profiler.rb +66 -0
- data/lib/omnizip/profiler/method_profiler.rb +49 -0
- data/lib/omnizip/profiler/report_generator.rb +169 -0
- data/lib/omnizip/profiler.rb +204 -0
- data/lib/omnizip/progress/callback_reporter.rb +36 -0
- data/lib/omnizip/progress/console_reporter.rb +62 -0
- data/lib/omnizip/progress/log_reporter.rb +91 -0
- data/lib/omnizip/progress/operation_progress.rb +118 -0
- data/lib/omnizip/progress/progress_bar.rb +156 -0
- data/lib/omnizip/progress/progress_reporter.rb +40 -0
- data/lib/omnizip/progress/progress_tracker.rb +190 -0
- data/lib/omnizip/progress/silent_reporter.rb +24 -0
- data/lib/omnizip/progress.rb +127 -0
- data/lib/omnizip/rubyzip_compat.rb +63 -0
- data/lib/omnizip/temp/safe_extract.rb +168 -0
- data/lib/omnizip/temp/temp_file.rb +124 -0
- data/lib/omnizip/temp/temp_file_pool.rb +109 -0
- data/lib/omnizip/temp.rb +181 -0
- data/lib/omnizip/version.rb +5 -0
- data/lib/omnizip/zip/entry.rb +156 -0
- data/lib/omnizip/zip/file.rb +485 -0
- data/lib/omnizip/zip/input_stream.rb +273 -0
- data/lib/omnizip/zip/output_stream.rb +324 -0
- data/lib/omnizip.rb +156 -0
- data/readme-docs/advanced-features.adoc +515 -0
- data/readme-docs/api-usage.adoc +444 -0
- data/readme-docs/architecture.adoc +449 -0
- data/readme-docs/archive-formats.adoc +479 -0
- data/readme-docs/cli-usage.adoc +222 -0
- data/readme-docs/compression-algorithms.adoc +442 -0
- data/readme-docs/compression-profiles.adoc +247 -0
- data/readme-docs/encryption-checksums.adoc +328 -0
- data/readme-docs/format-converter.adoc +325 -0
- data/readme-docs/installation.adoc +228 -0
- data/readme-docs/par2-archives.adoc +608 -0
- data/readme-docs/performance-profiler.adoc +389 -0
- data/readme-docs/preprocessing-filters.adoc +280 -0
- data/xz-file-format-1.2.1.txt +1174 -0
- metadata +617 -0
|
@@ -0,0 +1,415 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Copyright (C) 2025 Ribose Inc.
|
|
4
|
+
#
|
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a
|
|
6
|
+
# copy of this software and associated documentation files (the "Software"),
|
|
7
|
+
# to deal in the Software without restriction, including without limitation
|
|
8
|
+
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
9
|
+
# and/or sell copies of the Software, and to permit persons to whom the
|
|
10
|
+
# Software is furnished to do so, subject to the following conditions:
|
|
11
|
+
#
|
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
|
13
|
+
# all copies or substantial portions of the Software.
|
|
14
|
+
#
|
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
20
|
+
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
21
|
+
# DEALINGS IN THE SOFTWARE.
|
|
22
|
+
|
|
23
|
+
require_relative "constants"
|
|
24
|
+
require_relative "bit_model"
|
|
25
|
+
|
|
26
|
+
module Omnizip
|
|
27
|
+
module Algorithms
|
|
28
|
+
class LZMA < Algorithm
|
|
29
|
+
# SDK-compatible distance encoder/decoder
|
|
30
|
+
#
|
|
31
|
+
# This class implements the LZMA SDK's distance encoding scheme:
|
|
32
|
+
# - Slot 0-3: Direct encoding (no extra bits)
|
|
33
|
+
# - Slot 4-13: Slot + 1-5 direct bits
|
|
34
|
+
# - Slot 14+: Slot + fixed bits + aligned bits
|
|
35
|
+
#
|
|
36
|
+
# The slot categorizes distances into ranges, and extra bits
|
|
37
|
+
# specify the exact position within that range.
|
|
38
|
+
class DistanceCoder
|
|
39
|
+
include Constants
|
|
40
|
+
|
|
41
|
+
# Initialize the distance coder
|
|
42
|
+
#
|
|
43
|
+
# @param num_len_to_pos_states [Integer] Number of length states for slot selection
|
|
44
|
+
def initialize(num_len_to_pos_states)
|
|
45
|
+
@num_len_to_pos_states = num_len_to_pos_states
|
|
46
|
+
|
|
47
|
+
# Slot encoders: one per length state, 128 models each
|
|
48
|
+
# Tree needs 2^(num_bits+1) models for a 6-bit tree: indices 1-127
|
|
49
|
+
# This matches the tree decode algorithm which accesses up to index 127
|
|
50
|
+
@slot_encoders = Array.new(num_len_to_pos_states) do
|
|
51
|
+
Array.new(1 << (NUM_DIST_SLOT_BITS + 1)) { BitModel.new }
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# Position encoders for slots 4-13
|
|
55
|
+
@pos_encoders = Array.new(NUM_FULL_DISTANCES - END_POS_MODEL_INDEX) do
|
|
56
|
+
BitModel.new
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# Aligned encoder for slots 14+ (4-bit aligned)
|
|
60
|
+
# Tree needs 2^5 = 32 models for 4-bit tree
|
|
61
|
+
@align_encoder = Array.new(1 << (DIST_ALIGN_BITS + 1)) do
|
|
62
|
+
BitModel.new
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
# Precompute distance slot lookup table for fast encoding
|
|
66
|
+
@slot_fast = Array.new(DIST_SLOT_FAST_LIMIT)
|
|
67
|
+
init_slot_fast_table
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# Reset all probability models in place
|
|
71
|
+
#
|
|
72
|
+
# This method resets the bit models to their initial state.
|
|
73
|
+
# Called during state reset to reinitialize probability models.
|
|
74
|
+
#
|
|
75
|
+
# @return [void]
|
|
76
|
+
def reset_models
|
|
77
|
+
if (ENV["DEBUG_RESET_MODELS"]) && (ENV["LZMA_DEBUG_DISTANCE"])
|
|
78
|
+
puts " [DistanceCoder.reset_models] Resetting #{@slot_encoders.size} len_states, each with #{@slot_encoders[0]&.size || '?'} models"
|
|
79
|
+
end
|
|
80
|
+
@slot_encoders.each do |len_state_models|
|
|
81
|
+
len_state_models.each(&:reset)
|
|
82
|
+
end
|
|
83
|
+
@pos_encoders.each(&:reset)
|
|
84
|
+
@align_encoder.each(&:reset)
|
|
85
|
+
if (ENV["DEBUG_RESET_MODELS"]) && (ENV["LZMA_DEBUG_DISTANCE"])
|
|
86
|
+
puts " [DistanceCoder.reset_models] Done resetting"
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
# Encode a match distance using SDK-compatible encoding
|
|
91
|
+
#
|
|
92
|
+
# @param range_encoder [RangeEncoder] The range encoder
|
|
93
|
+
# @param distance [Integer] Distance value (already subtracted 1)
|
|
94
|
+
# @param len_state [Integer] Length state for slot selection
|
|
95
|
+
# @return [void]
|
|
96
|
+
def encode(range_encoder, distance, len_state)
|
|
97
|
+
slot = get_dist_slot(distance)
|
|
98
|
+
|
|
99
|
+
if ENV["LZMA_DEBUG_ENCODE"]
|
|
100
|
+
puts "[DistanceCoder.encode] distance=#{distance} slot=#{slot} len_state=#{len_state}"
|
|
101
|
+
puts "[DistanceCoder.encode] CALLING encode_tree with symbol=#{slot}"
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
# Encode the slot using the appropriate slot encoder
|
|
105
|
+
encode_tree(range_encoder, @slot_encoders[len_state], slot,
|
|
106
|
+
NUM_DIST_SLOT_BITS)
|
|
107
|
+
|
|
108
|
+
# Encode extra bits based on slot
|
|
109
|
+
if slot >= START_POS_MODEL_INDEX
|
|
110
|
+
footer_bits = (slot >> 1) - 1
|
|
111
|
+
base = (2 | (slot & 1)) << footer_bits
|
|
112
|
+
|
|
113
|
+
if slot < END_POS_MODEL_INDEX
|
|
114
|
+
# Slots 4-13: Use position encoders (reverse tree encoding)
|
|
115
|
+
encode_reverse_tree(range_encoder,
|
|
116
|
+
@pos_encoders,
|
|
117
|
+
base - slot - 1,
|
|
118
|
+
distance - base,
|
|
119
|
+
footer_bits)
|
|
120
|
+
else
|
|
121
|
+
# Slots 14+: Fixed direct bits + aligned bits
|
|
122
|
+
# Encode high bits as direct bits
|
|
123
|
+
range_encoder.encode_direct_bits((distance - base) >> DIST_ALIGN_BITS,
|
|
124
|
+
footer_bits - DIST_ALIGN_BITS)
|
|
125
|
+
|
|
126
|
+
# Encode low 4 bits using aligned encoder (reverse tree)
|
|
127
|
+
encode_reverse_tree(range_encoder,
|
|
128
|
+
@align_encoder,
|
|
129
|
+
0,
|
|
130
|
+
distance - base,
|
|
131
|
+
DIST_ALIGN_BITS)
|
|
132
|
+
end
|
|
133
|
+
end
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
# Decode a match distance using SDK-compatible decoding
|
|
137
|
+
#
|
|
138
|
+
# @param range_decoder [RangeDecoder] The range decoder
|
|
139
|
+
# @param len_state [Integer] Length state for slot selection
|
|
140
|
+
# @return [Integer] Decoded distance value (before adding 1)
|
|
141
|
+
def decode(range_decoder, len_state)
|
|
142
|
+
# DEBUG: Trace specific calls to find corruption
|
|
143
|
+
$distance_decode_count ||= 0
|
|
144
|
+
debug_calls = (320..330)
|
|
145
|
+
debug_this = debug_calls.include?($distance_decode_count)
|
|
146
|
+
trace_326 = ($distance_decode_count == 326)
|
|
147
|
+
trace_325 = ($distance_decode_count == 325)
|
|
148
|
+
|
|
149
|
+
# DEBUG: Trace large distances (> 100000)
|
|
150
|
+
trace_large = $distance_decode_count.between?(25,
|
|
151
|
+
35) || $distance_decode_count.between?(
|
|
152
|
+
315, 330
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
# DEBUG: Trace all when LZMA_DEBUG_DISTANCE is set
|
|
156
|
+
trace_all = ENV["LZMA_DEBUG_DISTANCE"]
|
|
157
|
+
|
|
158
|
+
if (trace_325 || trace_large || trace_all) && (ENV["LZMA_DEBUG_DISTANCE"])
|
|
159
|
+
puts " [DistanceCoder.decode ##{$distance_decode_count}] START - len_state=#{len_state}"
|
|
160
|
+
puts " BEFORE: range=#{range_decoder.range.inspect}, code=#{range_decoder.code.inspect}"
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
slot = decode_tree(range_decoder, @slot_encoders[len_state],
|
|
164
|
+
NUM_DIST_SLOT_BITS)
|
|
165
|
+
|
|
166
|
+
if (debug_this || trace_large || trace_all) && (ENV["LZMA_DEBUG_DISTANCE"])
|
|
167
|
+
puts " [DistanceCoder.decode ##{$distance_decode_count}] len_state=#{len_state}, slot=#{slot}"
|
|
168
|
+
puts " @slot_encoders[#{len_state}] object_id=#{@slot_encoders[len_state].object_id}"
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
# Decode extra bits based on slot
|
|
172
|
+
if slot < START_POS_MODEL_INDEX
|
|
173
|
+
# Slots 0-3: No extra bits
|
|
174
|
+
$distance_decode_count += 1
|
|
175
|
+
if debug_this && (ENV["LZMA_DEBUG_DISTANCE"])
|
|
176
|
+
puts " -> distance=#{slot}"
|
|
177
|
+
end
|
|
178
|
+
slot
|
|
179
|
+
else
|
|
180
|
+
footer_bits = (slot >> 1) - 1
|
|
181
|
+
|
|
182
|
+
if slot < END_POS_MODEL_INDEX
|
|
183
|
+
# Slots 4-13: Use position encoders (reverse tree decoding)
|
|
184
|
+
base = (2 | (slot & 1)) << footer_bits
|
|
185
|
+
result = base + decode_reverse_tree(range_decoder,
|
|
186
|
+
@pos_encoders,
|
|
187
|
+
base - slot - 1,
|
|
188
|
+
footer_bits)
|
|
189
|
+
$distance_decode_count += 1
|
|
190
|
+
if debug_this && (ENV["LZMA_DEBUG_DISTANCE"])
|
|
191
|
+
puts " -> distance=#{result} (slot #{slot})"
|
|
192
|
+
end
|
|
193
|
+
else
|
|
194
|
+
# Slots 14+: Fixed direct bits + aligned bits
|
|
195
|
+
# XZ Utils pattern (lzma_decoder.c:500-514):
|
|
196
|
+
# - Start with rep0 = 2 + (slot & 1)
|
|
197
|
+
# - Decode high_bits using rc_direct (builds up from starting value)
|
|
198
|
+
# - Shift left by ALIGN_BITS
|
|
199
|
+
# - Decode low_bits using aligned encoder
|
|
200
|
+
# - Add symbol (slot) to final result
|
|
201
|
+
|
|
202
|
+
footer_bits = (slot >> 1) - 1
|
|
203
|
+
num_direct_bits = footer_bits - DIST_ALIGN_BITS
|
|
204
|
+
|
|
205
|
+
# XZ Utils pattern for slot >= 14:
|
|
206
|
+
# rep0 = 2 + (slot & 1)
|
|
207
|
+
# rc_direct(rep0, num_direct_bits)
|
|
208
|
+
# rep0 <<= ALIGN_BITS
|
|
209
|
+
# rc_bittree_rev4(coder->pos_align)
|
|
210
|
+
# IMPORTANT: slot value is NOT added to result
|
|
211
|
+
# Reference: /Users/mulgogi/src/external/xz/src/liblzma/lzma/lzma_decoder.c:507-512
|
|
212
|
+
result = 2 + (slot & 1)
|
|
213
|
+
|
|
214
|
+
# Use decode_direct_bits_with_base to match XZ Utils rc_direct
|
|
215
|
+
# rc_direct builds on the base value iteratively
|
|
216
|
+
result = range_decoder.decode_direct_bits_with_base(num_direct_bits, result)
|
|
217
|
+
|
|
218
|
+
# Decode low 4 bits using aligned encoder (reverse tree)
|
|
219
|
+
low_bits = decode_reverse_tree(range_decoder,
|
|
220
|
+
@align_encoder,
|
|
221
|
+
0,
|
|
222
|
+
DIST_ALIGN_BITS)
|
|
223
|
+
if trace_326 && (ENV["LZMA_DEBUG_DISTANCE"])
|
|
224
|
+
puts " TRACE_326: low_bits=#{low_bits}"
|
|
225
|
+
end
|
|
226
|
+
|
|
227
|
+
# Final result: (result << 4) + low_bits
|
|
228
|
+
# NOTE: slot value is NOT added (XZ Utils pattern - line 513 adds symbol for EOPM check only)
|
|
229
|
+
result = (result << DIST_ALIGN_BITS) + low_bits
|
|
230
|
+
$distance_decode_count += 1
|
|
231
|
+
if (debug_this || trace_large) && (ENV["LZMA_DEBUG_DISTANCE"])
|
|
232
|
+
puts " -> slot=#{slot}, result_after_direct=#{result >> DIST_ALIGN_BITS}, low_bits=#{low_bits}, distance=#{result}"
|
|
233
|
+
end
|
|
234
|
+
if result > 100000
|
|
235
|
+
puts " [LARGE_DISTANCE ##{$distance_decode_count}] distance=#{result}, slot=#{slot}" if ENV["LZMA_DEBUG_DISTANCE"]
|
|
236
|
+
puts " BEFORE: range_decoder.range=#{range_decoder.range}, range_decoder.code=#{range_decoder.code}" if ENV["LZMA_DEBUG_DISTANCE"]
|
|
237
|
+
end
|
|
238
|
+
end
|
|
239
|
+
result
|
|
240
|
+
end
|
|
241
|
+
end
|
|
242
|
+
|
|
243
|
+
private
|
|
244
|
+
|
|
245
|
+
# Initialize fast distance slot lookup table
|
|
246
|
+
#
|
|
247
|
+
# @return [void]
|
|
248
|
+
def init_slot_fast_table
|
|
249
|
+
# Fill table based on slot ranges
|
|
250
|
+
# Slot 0: distance 0
|
|
251
|
+
# Slot 1: distance 1
|
|
252
|
+
# Slot 2: distance 2
|
|
253
|
+
# Slot 3: distance 3
|
|
254
|
+
# Slot 4: distances 4-5
|
|
255
|
+
# Slot 5: distances 6-7
|
|
256
|
+
# Slot 6: distances 8-11
|
|
257
|
+
# etc.
|
|
258
|
+
|
|
259
|
+
slot = 0
|
|
260
|
+
c = 0
|
|
261
|
+
|
|
262
|
+
while slot < NUM_DIST_SLOTS && c < DIST_SLOT_FAST_LIMIT
|
|
263
|
+
# Calculate the start and end of this slot's range
|
|
264
|
+
if slot < 4
|
|
265
|
+
# Slots 0-3 map to single distances
|
|
266
|
+
@slot_fast[c] = slot
|
|
267
|
+
c += 1
|
|
268
|
+
slot += 1
|
|
269
|
+
else
|
|
270
|
+
# Slots 4+ have power-of-2 ranges
|
|
271
|
+
footer_bits = (slot >> 1) - 1
|
|
272
|
+
range_size = 1 << footer_bits
|
|
273
|
+
|
|
274
|
+
# Fill this slot's range
|
|
275
|
+
range_size.times do
|
|
276
|
+
break if c >= DIST_SLOT_FAST_LIMIT
|
|
277
|
+
|
|
278
|
+
@slot_fast[c] = slot
|
|
279
|
+
c += 1
|
|
280
|
+
end
|
|
281
|
+
slot += 1
|
|
282
|
+
end
|
|
283
|
+
end
|
|
284
|
+
end
|
|
285
|
+
|
|
286
|
+
# Get the distance slot for a given distance
|
|
287
|
+
#
|
|
288
|
+
# @param distance [Integer] Distance value
|
|
289
|
+
# @return [Integer] Distance slot (0-63)
|
|
290
|
+
def get_dist_slot(distance)
|
|
291
|
+
if distance < DIST_SLOT_FAST_LIMIT
|
|
292
|
+
@slot_fast[distance]
|
|
293
|
+
else
|
|
294
|
+
# For large distances, calculate slot directly
|
|
295
|
+
# Find the highest bit position
|
|
296
|
+
n = 31
|
|
297
|
+
while n >= 0
|
|
298
|
+
break if (distance >> n) != 0
|
|
299
|
+
|
|
300
|
+
n -= 1
|
|
301
|
+
end
|
|
302
|
+
|
|
303
|
+
# slot = 2 * n + high_bit
|
|
304
|
+
((n << 1) + ((distance >> (n - 1)) & 1))
|
|
305
|
+
end
|
|
306
|
+
end
|
|
307
|
+
|
|
308
|
+
# Encode a value using a tree of bit models
|
|
309
|
+
#
|
|
310
|
+
# @param range_encoder [RangeEncoder] The range encoder
|
|
311
|
+
# @param models [Array<BitModel>] Array of bit models for the tree
|
|
312
|
+
# @param symbol [Integer] Symbol to encode
|
|
313
|
+
# @param num_bits [Integer] Number of bits in the tree
|
|
314
|
+
# @return [void]
|
|
315
|
+
def encode_tree(range_encoder, models, symbol, num_bits)
|
|
316
|
+
m = 1
|
|
317
|
+
trace_all = ENV["TRACE_ALL_SLOT_ENCODE"]
|
|
318
|
+
iteration = 0
|
|
319
|
+
|
|
320
|
+
if trace_all && (ENV["LZMA_DEBUG_ENCODE"])
|
|
321
|
+
puts " [encode_tree START] RECEIVED symbol=#{symbol}, num_bits=#{num_bits}"
|
|
322
|
+
puts " BEFORE: range=#{range_encoder.range}, low=#{range_encoder.low}"
|
|
323
|
+
end
|
|
324
|
+
|
|
325
|
+
(num_bits - 1).downto(0) do |i|
|
|
326
|
+
iteration += 1
|
|
327
|
+
bit = (symbol >> i) & 1
|
|
328
|
+
if trace_all && (ENV["LZMA_DEBUG_ENCODE"])
|
|
329
|
+
model_idx = m
|
|
330
|
+
puts " [#{iteration}/#{num_bits}] i=#{i}, bit=#{bit}, m=#{m}, model_idx=#{model_idx}, prob=#{models[m].probability}"
|
|
331
|
+
end
|
|
332
|
+
range_encoder.encode_bit(models[m], bit)
|
|
333
|
+
m = (m << 1) | bit
|
|
334
|
+
end
|
|
335
|
+
|
|
336
|
+
if trace_all && (ENV["LZMA_DEBUG_ENCODE"])
|
|
337
|
+
puts " AFTER: range=#{range_encoder.range}, low=#{range_encoder.low}"
|
|
338
|
+
puts " [encode_tree END] ENCODED symbol=#{symbol}"
|
|
339
|
+
end
|
|
340
|
+
end
|
|
341
|
+
|
|
342
|
+
# Decode a value using a tree of bit models
|
|
343
|
+
#
|
|
344
|
+
# @param range_decoder [RangeDecoder] The range decoder
|
|
345
|
+
# @param models [Array<BitModel>] Array of bit models for the tree
|
|
346
|
+
# @param num_bits [Integer] Number of bits in the tree
|
|
347
|
+
# @return [Integer] Decoded symbol
|
|
348
|
+
def decode_tree(range_decoder, models, num_bits)
|
|
349
|
+
m = 1
|
|
350
|
+
symbol = 0
|
|
351
|
+
trace_this = (num_bits == 6 && ENV.fetch("TRACE_SLOT_DECODE",
|
|
352
|
+
nil)) || ($distance_decode_count == 28)
|
|
353
|
+
trace_all = ENV["TRACE_ALL_SLOT_DECODE"]
|
|
354
|
+
iteration = 0
|
|
355
|
+
|
|
356
|
+
if (trace_this || trace_all) && (ENV["LZMA_DEBUG_DISTANCE"])
|
|
357
|
+
puts " [decode_tree START] num_bits=#{num_bits}, range=#{range_decoder.range}, code=#{range_decoder.code}"
|
|
358
|
+
puts " models array object_id=#{models.object_id}"
|
|
359
|
+
end
|
|
360
|
+
|
|
361
|
+
(num_bits - 1).downto(0) do |i|
|
|
362
|
+
iteration += 1
|
|
363
|
+
model = models[m]
|
|
364
|
+
bit = range_decoder.decode_bit(model)
|
|
365
|
+
m = (m << 1) | bit
|
|
366
|
+
symbol |= (bit << i)
|
|
367
|
+
if (trace_this || trace_all) && (ENV["LZMA_DEBUG_DISTANCE"])
|
|
368
|
+
puts " [#{iteration}/#{num_bits}] i=#{i}, bit=#{bit}, m=#{m}, model.object_id=#{model.object_id}, prob=#{model.probability}, symbol=#{symbol}"
|
|
369
|
+
end
|
|
370
|
+
end
|
|
371
|
+
if (trace_this || trace_all) && (ENV["LZMA_DEBUG_DISTANCE"])
|
|
372
|
+
puts " [decode_tree END] symbol=#{symbol}"
|
|
373
|
+
end
|
|
374
|
+
symbol
|
|
375
|
+
end
|
|
376
|
+
|
|
377
|
+
# Encode a value using reverse bit-tree encoding
|
|
378
|
+
#
|
|
379
|
+
# @param range_encoder [RangeEncoder] The range encoder
|
|
380
|
+
# @param models [Array<BitModel>] Array of bit models
|
|
381
|
+
# @param base_idx [Integer] Base index into models array
|
|
382
|
+
# @param symbol [Integer] Symbol to encode
|
|
383
|
+
# @param num_bits [Integer] Number of bits
|
|
384
|
+
# @return [void]
|
|
385
|
+
def encode_reverse_tree(range_encoder, models, base_idx, symbol,
|
|
386
|
+
num_bits)
|
|
387
|
+
m = 1
|
|
388
|
+
num_bits.times do |i|
|
|
389
|
+
bit = (symbol >> i) & 1
|
|
390
|
+
range_encoder.encode_bit(models[base_idx + m], bit)
|
|
391
|
+
m = (m << 1) | bit
|
|
392
|
+
end
|
|
393
|
+
end
|
|
394
|
+
|
|
395
|
+
# Decode a value using reverse bit-tree decoding
|
|
396
|
+
#
|
|
397
|
+
# @param range_decoder [RangeDecoder] The range decoder
|
|
398
|
+
# @param models [Array<BitModel>] Array of bit models
|
|
399
|
+
# @param base_idx [Integer] Base index into models array
|
|
400
|
+
# @param num_bits [Integer] Number of bits
|
|
401
|
+
# @return [Integer] Decoded symbol
|
|
402
|
+
def decode_reverse_tree(range_decoder, models, base_idx, num_bits)
|
|
403
|
+
m = 1
|
|
404
|
+
symbol = 0
|
|
405
|
+
num_bits.times do |i|
|
|
406
|
+
bit = range_decoder.decode_bit(models[base_idx + m])
|
|
407
|
+
m = (m << 1) | bit
|
|
408
|
+
symbol |= (bit << i)
|
|
409
|
+
end
|
|
410
|
+
symbol
|
|
411
|
+
end
|
|
412
|
+
end
|
|
413
|
+
end
|
|
414
|
+
end
|
|
415
|
+
end
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Copyright (C) 2025 Ribose Inc.
|
|
4
|
+
#
|
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a
|
|
6
|
+
# copy of this software and associated documentation files (the "Software"),
|
|
7
|
+
# to deal in the Software without restriction, including without limitation
|
|
8
|
+
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
9
|
+
# and/or sell copies of the Software, and to permit persons to whom the
|
|
10
|
+
# Software is furnished to do so, subject to the following conditions:
|
|
11
|
+
#
|
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
|
13
|
+
# all copies or substantial portions of the Software.
|
|
14
|
+
#
|
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
20
|
+
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
21
|
+
# DEALINGS IN THE SOFTWARE.
|
|
22
|
+
|
|
23
|
+
require_relative "constants"
|
|
24
|
+
require_relative "xz_encoder"
|
|
25
|
+
require_relative "../../implementations/seven_zip/lzma/encoder"
|
|
26
|
+
|
|
27
|
+
module Omnizip
|
|
28
|
+
module Algorithms
|
|
29
|
+
class LZMA < Algorithm
|
|
30
|
+
# LZMA Encoder - Factory for LZMA compression implementations
|
|
31
|
+
#
|
|
32
|
+
# This class provides a unified interface for LZMA encoding, delegating
|
|
33
|
+
# to the appropriate implementation based on the target format:
|
|
34
|
+
#
|
|
35
|
+
# 1. SDK-compatible (default): For 7-Zip containers, uses 7-Zip SDK implementation
|
|
36
|
+
# 2. XZ-compatible: For XZ/LZMA files, uses XZ Utils implementation
|
|
37
|
+
#
|
|
38
|
+
# The encoder produces a stream that consists of:
|
|
39
|
+
# - Property byte (lc, lp, pb parameters)
|
|
40
|
+
# - Dictionary size (4 bytes)
|
|
41
|
+
# - Uncompressed size (8 bytes)
|
|
42
|
+
# - Compressed data
|
|
43
|
+
class Encoder
|
|
44
|
+
include Constants
|
|
45
|
+
|
|
46
|
+
attr_reader :dict_size, :lc, :lp, :pb
|
|
47
|
+
|
|
48
|
+
# Initialize the encoder
|
|
49
|
+
#
|
|
50
|
+
# @param output [IO] Output stream for compressed data
|
|
51
|
+
# @param options [Hash] Encoding options
|
|
52
|
+
# @option options [Integer] :dict_size Dictionary size
|
|
53
|
+
# @option options [Integer] :lc Literal context bits (0-8)
|
|
54
|
+
# @option options [Integer] :lp Literal position bits (0-4)
|
|
55
|
+
# @option options [Integer] :pb Position bits (0-4)
|
|
56
|
+
# @option options [Boolean] :write_size Write actual size (false for standalone .lzma)
|
|
57
|
+
# @option options [Boolean] :sdk_compatible Use SDK-compatible encoding (default: true)
|
|
58
|
+
# @option options [Boolean] :xz_compatible Use XZ-compatible encoding (default: false)
|
|
59
|
+
# @option options [Boolean] :raw_mode Skip header for raw LZMA encoding (for 7-Zip/LZMA2)
|
|
60
|
+
def initialize(output, options = {})
|
|
61
|
+
@output = output
|
|
62
|
+
@dict_size = options[:dict_size] || (1 << 16) # 64KB default
|
|
63
|
+
@lc = options[:lc] || 3
|
|
64
|
+
@lp = options[:lp] || 0
|
|
65
|
+
@pb = options[:pb] || 2
|
|
66
|
+
@write_size = options.fetch(:write_size, true)
|
|
67
|
+
@xz_compatible = options.fetch(:xz_compatible, false)
|
|
68
|
+
@sdk_compatible = options.fetch(:sdk_compatible, !@xz_compatible)
|
|
69
|
+
@raw_mode = options.fetch(:raw_mode, false)
|
|
70
|
+
|
|
71
|
+
validate_parameters
|
|
72
|
+
|
|
73
|
+
# Factory pattern: create implementation based on mode
|
|
74
|
+
@impl = if @xz_compatible
|
|
75
|
+
# Use XzEncoder (XZ Utils LZMA)
|
|
76
|
+
XzEncoderAdapter.new(output, options)
|
|
77
|
+
else
|
|
78
|
+
# Use SdkEncoder (7-Zip LZMA SDK compatible) - DEFAULT
|
|
79
|
+
Implementations::SevenZip::LZMA::Encoder.new(output, options)
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
# Encode a stream of data
|
|
84
|
+
#
|
|
85
|
+
# @param input [String, IO] Input data to compress
|
|
86
|
+
# @return [Array<String, Integer>, void] Tuple of [data, decode_bytes] in raw mode, void otherwise
|
|
87
|
+
def encode_stream(input)
|
|
88
|
+
@impl.encode_stream(input)
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
private
|
|
92
|
+
|
|
93
|
+
# Validate encoding parameters
|
|
94
|
+
#
|
|
95
|
+
# @return [void]
|
|
96
|
+
# @raise [ArgumentError] If parameters are invalid
|
|
97
|
+
def validate_parameters
|
|
98
|
+
raise ArgumentError, "lc must be 0-8" unless @lc.between?(0, 8)
|
|
99
|
+
raise ArgumentError, "lp must be 0-4" unless @lp.between?(0, 4)
|
|
100
|
+
raise ArgumentError, "pb must be 0-4" unless @pb.between?(0, 4)
|
|
101
|
+
return if @dict_size.between?(DICT_SIZE_MIN, DICT_SIZE_MAX)
|
|
102
|
+
|
|
103
|
+
raise ArgumentError, "Invalid dictionary size"
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
# Adapter for XzEncoder to match SdkEncoder interface
|
|
108
|
+
#
|
|
109
|
+
# XzEncoder has a different interface (encode(input, output) vs encode_stream(input)).
|
|
110
|
+
# This adapter wraps XzEncoder to provide the same interface as SdkEncoder.
|
|
111
|
+
class XzEncoderAdapter
|
|
112
|
+
# Initialize adapter
|
|
113
|
+
#
|
|
114
|
+
# @param output [IO] Output stream
|
|
115
|
+
# @param options [Hash] Encoding options
|
|
116
|
+
def initialize(output, options = {})
|
|
117
|
+
@output = output
|
|
118
|
+
@options = options
|
|
119
|
+
@xz_encoder = XzEncoder.new(options)
|
|
120
|
+
@bytes_for_decode = nil
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
# Encode stream (matches SdkEncoder interface)
|
|
124
|
+
#
|
|
125
|
+
# @param input [String, IO] Input data to compress
|
|
126
|
+
# @return [Array<String, Integer>] Tuple of [compressed_data, decode_bytes]
|
|
127
|
+
def encode_stream(input)
|
|
128
|
+
input_data = input.is_a?(String) ? input : input.read
|
|
129
|
+
@bytes_for_decode = @xz_encoder.encode(input_data, @output)
|
|
130
|
+
[@output.string, @bytes_for_decode]
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
# Get bytes for decode (for LZMA2 compatibility)
|
|
134
|
+
#
|
|
135
|
+
# @return [Integer] Number of bytes decoder will consume
|
|
136
|
+
def bytes_for_decode
|
|
137
|
+
@bytes_for_decode || @output.string.bytesize
|
|
138
|
+
end
|
|
139
|
+
end
|
|
140
|
+
end
|
|
141
|
+
end
|
|
142
|
+
end
|