omnizip 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/.rubocop.yml +32 -0
- data/.rubocop_todo.yml +754 -0
- data/COPYING +502 -0
- data/Gemfile +17 -0
- data/LICENSE +12 -0
- data/README.adoc +1045 -0
- data/Rakefile +12 -0
- data/benchmark/README.md +260 -0
- data/benchmark/benchmark_suite.rb +125 -0
- data/benchmark/compression_bench.rb +181 -0
- data/benchmark/filter_bench.rb +180 -0
- data/benchmark/models/benchmark_result.rb +59 -0
- data/benchmark/models/comparison_result.rb +69 -0
- data/benchmark/profile_suite.rb +167 -0
- data/benchmark/reporter.rb +150 -0
- data/benchmark/run_benchmarks.rb +66 -0
- data/benchmark/test_data.rb +137 -0
- data/config/formats/rar3_spec.yml +91 -0
- data/config/formats/rar5_spec.yml +102 -0
- data/docs/.github/workflows/docs.yml +142 -0
- data/docs/.gitignore +21 -0
- data/docs/.lychee.toml +67 -0
- data/docs/Gemfile +13 -0
- data/docs/RAR_WRITE_SUPPORT.md +26 -0
- data/docs/README.md +101 -0
- data/docs/_config.yml +112 -0
- data/docs/assets/logo.svg +1 -0
- data/docs/assets/omnizip-logo.pdf +1540 -11
- data/docs/comparison/feature-matrix.adoc +694 -0
- data/docs/comparison/index.adoc +113 -0
- data/docs/comparison/vs-7zip.adoc +309 -0
- data/docs/comparison/vs-peazip.adoc +77 -0
- data/docs/comparison/vs-rubyzip.adoc +342 -0
- data/docs/comparison/vs-winrar.adoc +100 -0
- data/docs/compatibility.adoc +579 -0
- data/docs/concepts/index.adoc +129 -0
- data/docs/developer/architecture.adoc +256 -0
- data/docs/developer/contributing.adoc +158 -0
- data/docs/developer/index.adoc +25 -0
- data/docs/developer/testing.adoc +212 -0
- data/docs/getting-started/basic-usage.adoc +271 -0
- data/docs/getting-started/index.adoc +42 -0
- data/docs/getting-started/installation.adoc +138 -0
- data/docs/getting-started/quick-start.adoc +185 -0
- data/docs/getting-started/your-first-archive.adoc +218 -0
- data/docs/guides/advanced-features/encryption.adoc +300 -0
- data/docs/guides/advanced-features/index.adoc +49 -0
- data/docs/guides/advanced-features/parallel-processing.adoc +246 -0
- data/docs/guides/advanced-features/progress-tracking.adoc +320 -0
- data/docs/guides/advanced-features/streaming.adoc +212 -0
- data/docs/guides/archive-formats/gzip-format.adoc +107 -0
- data/docs/guides/archive-formats/index.adoc +130 -0
- data/docs/guides/archive-formats/rar-format.adoc +104 -0
- data/docs/guides/archive-formats/rar5.adoc +521 -0
- data/docs/guides/archive-formats/seven-zip-format.adoc +35 -0
- data/docs/guides/archive-formats/tar-format.adoc +106 -0
- data/docs/guides/archive-formats/xz-format.adoc +118 -0
- data/docs/guides/archive-formats/zip-format.adoc +35 -0
- data/docs/guides/compression-algorithms/bzip2.adoc +113 -0
- data/docs/guides/compression-algorithms/deflate.adoc +319 -0
- data/docs/guides/compression-algorithms/index.adoc +190 -0
- data/docs/guides/compression-algorithms/lzma.adoc +398 -0
- data/docs/guides/compression-algorithms/lzma2.adoc +327 -0
- data/docs/guides/compression-algorithms/ppmd.adoc +316 -0
- data/docs/guides/compression-algorithms/zstandard.adoc +361 -0
- data/docs/guides/creating-archives.adoc +354 -0
- data/docs/guides/extracting-archives.adoc +53 -0
- data/docs/guides/format-conversion.adoc +64 -0
- data/docs/guides/index.adoc +49 -0
- data/docs/guides/migration-rubyzip.adoc +217 -0
- data/docs/guides/parity-archives.adoc +605 -0
- data/docs/guides/performance-tuning.adoc +88 -0
- data/docs/index.adoc +218 -0
- data/docs/lychee.toml +67 -0
- data/docs/reference/api/overview.adoc +188 -0
- data/docs/reference/cli/compress-command.adoc +114 -0
- data/docs/reference/cli/overview.adoc +140 -0
- data/docs/reference/index.adoc +26 -0
- data/docs/resources/faq.adoc +185 -0
- data/docs/resources/quick-reference.adoc +222 -0
- data/docs/troubleshooting/index.adoc +208 -0
- data/examples/api_comparison.rb +205 -0
- data/examples/deflate64_example.rb +96 -0
- data/examples/par2_demo.rb +121 -0
- data/examples/quick_start_native.rb +150 -0
- data/examples/quick_start_rubyzip.rb +115 -0
- data/examples/rubyzip_compatibility_demo.rb +194 -0
- data/exe/omnizip +27 -0
- data/lib/omnizip/algorithm.rb +130 -0
- data/lib/omnizip/algorithm_registry.rb +86 -0
- data/lib/omnizip/algorithms/.keep +0 -0
- data/lib/omnizip/algorithms/bzip2/bwt.rb +225 -0
- data/lib/omnizip/algorithms/bzip2/decoder.rb +193 -0
- data/lib/omnizip/algorithms/bzip2/encoder.rb +237 -0
- data/lib/omnizip/algorithms/bzip2/huffman.rb +206 -0
- data/lib/omnizip/algorithms/bzip2/mtf.rb +101 -0
- data/lib/omnizip/algorithms/bzip2/rle.rb +151 -0
- data/lib/omnizip/algorithms/bzip2.rb +130 -0
- data/lib/omnizip/algorithms/deflate/constants.rb +28 -0
- data/lib/omnizip/algorithms/deflate/decoder.rb +38 -0
- data/lib/omnizip/algorithms/deflate/encoder.rb +46 -0
- data/lib/omnizip/algorithms/deflate.rb +128 -0
- data/lib/omnizip/algorithms/deflate64/constants.rb +45 -0
- data/lib/omnizip/algorithms/deflate64/decoder.rb +153 -0
- data/lib/omnizip/algorithms/deflate64/encoder.rb +98 -0
- data/lib/omnizip/algorithms/deflate64/huffman_coder.rb +354 -0
- data/lib/omnizip/algorithms/deflate64/lz77_encoder.rb +142 -0
- data/lib/omnizip/algorithms/deflate64.rb +109 -0
- data/lib/omnizip/algorithms/lzma/bit_model.rb +120 -0
- data/lib/omnizip/algorithms/lzma/constants.rb +112 -0
- data/lib/omnizip/algorithms/lzma/decoder.rb +148 -0
- data/lib/omnizip/algorithms/lzma/dictionary.rb +69 -0
- data/lib/omnizip/algorithms/lzma/distance_coder.rb +415 -0
- data/lib/omnizip/algorithms/lzma/encoder.rb +142 -0
- data/lib/omnizip/algorithms/lzma/length_coder.rb +260 -0
- data/lib/omnizip/algorithms/lzma/literal_decoder.rb +320 -0
- data/lib/omnizip/algorithms/lzma/literal_encoder.rb +210 -0
- data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +341 -0
- data/lib/omnizip/algorithms/lzma/lzma_alone_decoder.rb +192 -0
- data/lib/omnizip/algorithms/lzma/lzma_state.rb +128 -0
- data/lib/omnizip/algorithms/lzma/match.rb +32 -0
- data/lib/omnizip/algorithms/lzma/match_finder.rb +205 -0
- data/lib/omnizip/algorithms/lzma/match_finder_config.rb +142 -0
- data/lib/omnizip/algorithms/lzma/match_finder_factory.rb +88 -0
- data/lib/omnizip/algorithms/lzma/optimal_encoder.rb +130 -0
- data/lib/omnizip/algorithms/lzma/probability_models.rb +72 -0
- data/lib/omnizip/algorithms/lzma/range_coder.rb +85 -0
- data/lib/omnizip/algorithms/lzma/range_decoder.rb +434 -0
- data/lib/omnizip/algorithms/lzma/range_encoder.rb +194 -0
- data/lib/omnizip/algorithms/lzma/state.rb +127 -0
- data/lib/omnizip/algorithms/lzma/xz_buffered_range_encoder.rb +325 -0
- data/lib/omnizip/algorithms/lzma/xz_encoder.rb +426 -0
- data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +645 -0
- data/lib/omnizip/algorithms/lzma/xz_match_finder_adapter.rb +227 -0
- data/lib/omnizip/algorithms/lzma/xz_price_calculator.rb +169 -0
- data/lib/omnizip/algorithms/lzma/xz_probability_models.rb +261 -0
- data/lib/omnizip/algorithms/lzma/xz_range_encoder.rb +223 -0
- data/lib/omnizip/algorithms/lzma/xz_range_encoder_exact.rb +331 -0
- data/lib/omnizip/algorithms/lzma/xz_state.rb +116 -0
- data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +2055 -0
- data/lib/omnizip/algorithms/lzma.rb +238 -0
- data/lib/omnizip/algorithms/lzma2/chunk_manager.rb +182 -0
- data/lib/omnizip/algorithms/lzma2/constants.rb +41 -0
- data/lib/omnizip/algorithms/lzma2/encoder.rb +147 -0
- data/lib/omnizip/algorithms/lzma2/lzma2_chunk.rb +161 -0
- data/lib/omnizip/algorithms/lzma2/properties.rb +179 -0
- data/lib/omnizip/algorithms/lzma2/simple_lzma2_encoder.rb +127 -0
- data/lib/omnizip/algorithms/lzma2/xz_encoder_adapter.rb +85 -0
- data/lib/omnizip/algorithms/lzma2.rb +141 -0
- data/lib/omnizip/algorithms/ppmd7/constants.rb +74 -0
- data/lib/omnizip/algorithms/ppmd7/context.rb +154 -0
- data/lib/omnizip/algorithms/ppmd7/decoder.rb +126 -0
- data/lib/omnizip/algorithms/ppmd7/encoder.rb +163 -0
- data/lib/omnizip/algorithms/ppmd7/model.rb +248 -0
- data/lib/omnizip/algorithms/ppmd7/symbol_state.rb +57 -0
- data/lib/omnizip/algorithms/ppmd7.rb +116 -0
- data/lib/omnizip/algorithms/ppmd8/constants.rb +61 -0
- data/lib/omnizip/algorithms/ppmd8/context.rb +34 -0
- data/lib/omnizip/algorithms/ppmd8/decoder.rb +107 -0
- data/lib/omnizip/algorithms/ppmd8/encoder.rb +138 -0
- data/lib/omnizip/algorithms/ppmd8/model.rb +250 -0
- data/lib/omnizip/algorithms/ppmd8/restoration_method.rb +78 -0
- data/lib/omnizip/algorithms/ppmd8.rb +82 -0
- data/lib/omnizip/algorithms/ppmd_base.rb +138 -0
- data/lib/omnizip/algorithms/sevenzip_lzma2.rb +123 -0
- data/lib/omnizip/algorithms/xz_lzma2.rb +118 -0
- data/lib/omnizip/algorithms/zstandard/constants.rb +25 -0
- data/lib/omnizip/algorithms/zstandard/decoder.rb +46 -0
- data/lib/omnizip/algorithms/zstandard/encoder.rb +51 -0
- data/lib/omnizip/algorithms/zstandard.rb +138 -0
- data/lib/omnizip/buffer/memory_archive.rb +251 -0
- data/lib/omnizip/buffer/memory_extractor.rb +224 -0
- data/lib/omnizip/buffer.rb +176 -0
- data/lib/omnizip/checksum_registry.rb +114 -0
- data/lib/omnizip/checksums/crc32.rb +100 -0
- data/lib/omnizip/checksums/crc64.rb +101 -0
- data/lib/omnizip/checksums/crc_base.rb +158 -0
- data/lib/omnizip/checksums/verifier.rb +131 -0
- data/lib/omnizip/chunked/memory_manager.rb +194 -0
- data/lib/omnizip/chunked/reader.rb +78 -0
- data/lib/omnizip/chunked/writer.rb +120 -0
- data/lib/omnizip/chunked.rb +129 -0
- data/lib/omnizip/cli/output_formatter.rb +104 -0
- data/lib/omnizip/cli.rb +572 -0
- data/lib/omnizip/commands/.keep +0 -0
- data/lib/omnizip/commands/archive_create_command.rb +427 -0
- data/lib/omnizip/commands/archive_extract_command.rb +272 -0
- data/lib/omnizip/commands/archive_list_command.rb +218 -0
- data/lib/omnizip/commands/archive_repair_command.rb +131 -0
- data/lib/omnizip/commands/archive_verify_command.rb +117 -0
- data/lib/omnizip/commands/compress_command.rb +117 -0
- data/lib/omnizip/commands/decompress_command.rb +120 -0
- data/lib/omnizip/commands/list_command.rb +53 -0
- data/lib/omnizip/commands/metadata_command.rb +153 -0
- data/lib/omnizip/commands/parity_create_command.rb +122 -0
- data/lib/omnizip/commands/parity_repair_command.rb +122 -0
- data/lib/omnizip/commands/parity_verify_command.rb +124 -0
- data/lib/omnizip/commands/profile_list_command.rb +56 -0
- data/lib/omnizip/commands/profile_show_command.rb +44 -0
- data/lib/omnizip/convenience.rb +359 -0
- data/lib/omnizip/converter/conversion_registry.rb +49 -0
- data/lib/omnizip/converter/conversion_strategy.rb +121 -0
- data/lib/omnizip/converter/seven_zip_to_zip_strategy.rb +97 -0
- data/lib/omnizip/converter/zip_to_seven_zip_strategy.rb +112 -0
- data/lib/omnizip/converter.rb +105 -0
- data/lib/omnizip/crypto/aes256/cipher.rb +100 -0
- data/lib/omnizip/crypto/aes256/constants.rb +28 -0
- data/lib/omnizip/crypto/aes256/key_derivation.rb +101 -0
- data/lib/omnizip/crypto/aes256.rb +102 -0
- data/lib/omnizip/error.rb +106 -0
- data/lib/omnizip/eta/exponential_smoothing_estimator.rb +98 -0
- data/lib/omnizip/eta/moving_average_estimator.rb +99 -0
- data/lib/omnizip/eta/rate_calculator.rb +104 -0
- data/lib/omnizip/eta/sample_history.rb +143 -0
- data/lib/omnizip/eta/time_estimator.rb +106 -0
- data/lib/omnizip/eta.rb +63 -0
- data/lib/omnizip/extraction/filter_chain.rb +177 -0
- data/lib/omnizip/extraction/glob_pattern.rb +140 -0
- data/lib/omnizip/extraction/pattern_matcher.rb +70 -0
- data/lib/omnizip/extraction/predicate_pattern.rb +52 -0
- data/lib/omnizip/extraction/regex_pattern.rb +50 -0
- data/lib/omnizip/extraction/selective_extractor.rb +240 -0
- data/lib/omnizip/extraction.rb +111 -0
- data/lib/omnizip/file_type/mime_classifier.rb +144 -0
- data/lib/omnizip/file_type.rb +113 -0
- data/lib/omnizip/filter.rb +139 -0
- data/lib/omnizip/filter_pipeline.rb +108 -0
- data/lib/omnizip/filter_registry.rb +166 -0
- data/lib/omnizip/filters/bcj.rb +279 -0
- data/lib/omnizip/filters/bcj2/constants.rb +53 -0
- data/lib/omnizip/filters/bcj2/decoder.rb +200 -0
- data/lib/omnizip/filters/bcj2/encoder.rb +61 -0
- data/lib/omnizip/filters/bcj2/stream_data.rb +93 -0
- data/lib/omnizip/filters/bcj2.rb +99 -0
- data/lib/omnizip/filters/bcj_arm.rb +176 -0
- data/lib/omnizip/filters/bcj_arm64.rb +244 -0
- data/lib/omnizip/filters/bcj_ia64.rb +196 -0
- data/lib/omnizip/filters/bcj_ppc.rb +190 -0
- data/lib/omnizip/filters/bcj_sparc.rb +176 -0
- data/lib/omnizip/filters/bcj_x86.rb +193 -0
- data/lib/omnizip/filters/delta.rb +196 -0
- data/lib/omnizip/filters/filter_base.rb +72 -0
- data/lib/omnizip/filters/registry.rb +123 -0
- data/lib/omnizip/filters/xz_delta.rb +258 -0
- data/lib/omnizip/format_detector.rb +162 -0
- data/lib/omnizip/format_registry.rb +59 -0
- data/lib/omnizip/formats/.keep +0 -0
- data/lib/omnizip/formats/bzip2_file.rb +172 -0
- data/lib/omnizip/formats/cpio/constants.rb +55 -0
- data/lib/omnizip/formats/cpio/entry.rb +385 -0
- data/lib/omnizip/formats/cpio/reader.rb +196 -0
- data/lib/omnizip/formats/cpio/writer.rb +234 -0
- data/lib/omnizip/formats/cpio.rb +140 -0
- data/lib/omnizip/formats/format_spec_loader.rb +230 -0
- data/lib/omnizip/formats/gzip.rb +238 -0
- data/lib/omnizip/formats/iso/directory_builder.rb +297 -0
- data/lib/omnizip/formats/iso/directory_record.rb +152 -0
- data/lib/omnizip/formats/iso/joliet.rb +204 -0
- data/lib/omnizip/formats/iso/path_table.rb +125 -0
- data/lib/omnizip/formats/iso/reader.rb +197 -0
- data/lib/omnizip/formats/iso/rock_ridge.rb +349 -0
- data/lib/omnizip/formats/iso/volume_builder.rb +320 -0
- data/lib/omnizip/formats/iso/volume_descriptor.rb +168 -0
- data/lib/omnizip/formats/iso/writer.rb +530 -0
- data/lib/omnizip/formats/iso.rb +140 -0
- data/lib/omnizip/formats/lzip.rb +175 -0
- data/lib/omnizip/formats/lzma_alone.rb +171 -0
- data/lib/omnizip/formats/rar/archive_repairer.rb +243 -0
- data/lib/omnizip/formats/rar/archive_verifier.rb +195 -0
- data/lib/omnizip/formats/rar/block_parser.rb +243 -0
- data/lib/omnizip/formats/rar/compression/bit_stream.rb +180 -0
- data/lib/omnizip/formats/rar/compression/dispatcher.rb +217 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/decoder.rb +216 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/encoder.rb +158 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_builder.rb +217 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_coder.rb +189 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/match_finder.rb +135 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/sliding_window.rb +165 -0
- data/lib/omnizip/formats/rar/compression/ppmd/context.rb +105 -0
- data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +219 -0
- data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +262 -0
- data/lib/omnizip/formats/rar/compression_method_registry.rb +106 -0
- data/lib/omnizip/formats/rar/constants.rb +82 -0
- data/lib/omnizip/formats/rar/decompressor.rb +238 -0
- data/lib/omnizip/formats/rar/external_writer.rb +312 -0
- data/lib/omnizip/formats/rar/header.rb +192 -0
- data/lib/omnizip/formats/rar/license_validator.rb +109 -0
- data/lib/omnizip/formats/rar/models/rar_archive.rb +77 -0
- data/lib/omnizip/formats/rar/models/rar_entry.rb +65 -0
- data/lib/omnizip/formats/rar/models/rar_volume.rb +56 -0
- data/lib/omnizip/formats/rar/parity_handler.rb +292 -0
- data/lib/omnizip/formats/rar/rar5/compression/lzma.rb +202 -0
- data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +578 -0
- data/lib/omnizip/formats/rar/rar5/compression/store.rb +60 -0
- data/lib/omnizip/formats/rar/rar5/crc32.rb +39 -0
- data/lib/omnizip/formats/rar/rar5/encryption/aes256_cbc.rb +97 -0
- data/lib/omnizip/formats/rar/rar5/encryption/encryption_header.rb +114 -0
- data/lib/omnizip/formats/rar/rar5/encryption/encryption_manager.rb +166 -0
- data/lib/omnizip/formats/rar/rar5/encryption/key_derivation.rb +97 -0
- data/lib/omnizip/formats/rar/rar5/header.rb +187 -0
- data/lib/omnizip/formats/rar/rar5/models/encryption_options.rb +74 -0
- data/lib/omnizip/formats/rar/rar5/models/recovery_options.rb +63 -0
- data/lib/omnizip/formats/rar/rar5/models/solid_options.rb +63 -0
- data/lib/omnizip/formats/rar/rar5/models/volume_options.rb +74 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/ARCHITECTURE.md +290 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_manager.rb +264 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_splitter.rb +155 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_writer.rb +194 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_encoder.rb +109 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_manager.rb +142 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_stream.rb +121 -0
- data/lib/omnizip/formats/rar/rar5/vint.rb +65 -0
- data/lib/omnizip/formats/rar/rar5/writer.rb +466 -0
- data/lib/omnizip/formats/rar/rar_format_base.rb +241 -0
- data/lib/omnizip/formats/rar/reader.rb +366 -0
- data/lib/omnizip/formats/rar/recovery_record.rb +245 -0
- data/lib/omnizip/formats/rar/volume_manager.rb +168 -0
- data/lib/omnizip/formats/rar/writer.rb +431 -0
- data/lib/omnizip/formats/rar.rb +205 -0
- data/lib/omnizip/formats/rar3/compressor.rb +73 -0
- data/lib/omnizip/formats/rar3/decompressor.rb +66 -0
- data/lib/omnizip/formats/rar3/reader.rb +386 -0
- data/lib/omnizip/formats/rar3/writer.rb +219 -0
- data/lib/omnizip/formats/rar5/compressor.rb +73 -0
- data/lib/omnizip/formats/rar5/decompressor.rb +66 -0
- data/lib/omnizip/formats/rar5/reader.rb +342 -0
- data/lib/omnizip/formats/rar5/writer.rb +214 -0
- data/lib/omnizip/formats/seven_zip/coder_chain.rb +150 -0
- data/lib/omnizip/formats/seven_zip/constants.rb +126 -0
- data/lib/omnizip/formats/seven_zip/encoded_header.rb +114 -0
- data/lib/omnizip/formats/seven_zip/encrypted_header.rb +142 -0
- data/lib/omnizip/formats/seven_zip/file_collector.rb +144 -0
- data/lib/omnizip/formats/seven_zip/header.rb +106 -0
- data/lib/omnizip/formats/seven_zip/header_encryptor.rb +134 -0
- data/lib/omnizip/formats/seven_zip/header_writer.rb +466 -0
- data/lib/omnizip/formats/seven_zip/models/coder_info.rb +30 -0
- data/lib/omnizip/formats/seven_zip/models/file_entry.rb +58 -0
- data/lib/omnizip/formats/seven_zip/models/folder.rb +69 -0
- data/lib/omnizip/formats/seven_zip/models/stream_info.rb +42 -0
- data/lib/omnizip/formats/seven_zip/parser.rb +660 -0
- data/lib/omnizip/formats/seven_zip/reader.rb +458 -0
- data/lib/omnizip/formats/seven_zip/split_archive_reader.rb +632 -0
- data/lib/omnizip/formats/seven_zip/split_archive_writer.rb +315 -0
- data/lib/omnizip/formats/seven_zip/stream_compressor.rb +151 -0
- data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +162 -0
- data/lib/omnizip/formats/seven_zip/writer.rb +740 -0
- data/lib/omnizip/formats/seven_zip.rb +93 -0
- data/lib/omnizip/formats/tar/constants.rb +73 -0
- data/lib/omnizip/formats/tar/entry.rb +94 -0
- data/lib/omnizip/formats/tar/header.rb +168 -0
- data/lib/omnizip/formats/tar/reader.rb +121 -0
- data/lib/omnizip/formats/tar/writer.rb +216 -0
- data/lib/omnizip/formats/tar.rb +84 -0
- data/lib/omnizip/formats/xz/reader.rb +116 -0
- data/lib/omnizip/formats/xz.rb +237 -0
- data/lib/omnizip/formats/xz_impl/block_decoder.rb +754 -0
- data/lib/omnizip/formats/xz_impl/block_encoder.rb +306 -0
- data/lib/omnizip/formats/xz_impl/block_header.rb +210 -0
- data/lib/omnizip/formats/xz_impl/block_header_parser.rb +186 -0
- data/lib/omnizip/formats/xz_impl/constants.rb +49 -0
- data/lib/omnizip/formats/xz_impl/index_decoder.rb +174 -0
- data/lib/omnizip/formats/xz_impl/index_encoder.rb +122 -0
- data/lib/omnizip/formats/xz_impl/stream_decoder.rb +468 -0
- data/lib/omnizip/formats/xz_impl/stream_encoder.rb +99 -0
- data/lib/omnizip/formats/xz_impl/stream_footer.rb +81 -0
- data/lib/omnizip/formats/xz_impl/stream_footer_parser.rb +117 -0
- data/lib/omnizip/formats/xz_impl/stream_header.rb +55 -0
- data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +108 -0
- data/lib/omnizip/formats/xz_impl/vli.rb +128 -0
- data/lib/omnizip/formats/xz_impl/writer.rb +421 -0
- data/lib/omnizip/formats/zip/central_directory_header.rb +195 -0
- data/lib/omnizip/formats/zip/constants.rb +69 -0
- data/lib/omnizip/formats/zip/end_of_central_directory.rb +133 -0
- data/lib/omnizip/formats/zip/local_file_header.rb +138 -0
- data/lib/omnizip/formats/zip/reader.rb +250 -0
- data/lib/omnizip/formats/zip/unix_extra_field.rb +153 -0
- data/lib/omnizip/formats/zip/writer.rb +375 -0
- data/lib/omnizip/formats/zip/zip64_end_of_central_directory.rb +104 -0
- data/lib/omnizip/formats/zip/zip64_end_of_central_directory_locator.rb +66 -0
- data/lib/omnizip/formats/zip/zip64_extra_field.rb +114 -0
- data/lib/omnizip/formats/zip.rb +50 -0
- data/lib/omnizip/implementations/base/lzma2_decoder_base.rb +75 -0
- data/lib/omnizip/implementations/base/lzma2_encoder_base.rb +128 -0
- data/lib/omnizip/implementations/base/lzma_decoder_base.rb +83 -0
- data/lib/omnizip/implementations/base/lzma_encoder_base.rb +108 -0
- data/lib/omnizip/implementations/base/state_machine_base.rb +182 -0
- data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +421 -0
- data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +465 -0
- data/lib/omnizip/implementations/seven_zip/lzma/match_finder.rb +288 -0
- data/lib/omnizip/implementations/seven_zip/lzma/range_decoder.rb +200 -0
- data/lib/omnizip/implementations/seven_zip/lzma/range_encoder.rb +197 -0
- data/lib/omnizip/implementations/seven_zip/lzma/state_machine.rb +141 -0
- data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +519 -0
- data/lib/omnizip/implementations/xz_utils/lzma2/decoder.rb +723 -0
- data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +750 -0
- data/lib/omnizip/io/buffered_input.rb +146 -0
- data/lib/omnizip/io/buffered_output.rb +105 -0
- data/lib/omnizip/io/stream_manager.rb +115 -0
- data/lib/omnizip/link_handler/hard_link.rb +79 -0
- data/lib/omnizip/link_handler/symbolic_link.rb +74 -0
- data/lib/omnizip/link_handler.rb +124 -0
- data/lib/omnizip/metadata/archive_metadata.rb +114 -0
- data/lib/omnizip/metadata/entry_metadata.rb +146 -0
- data/lib/omnizip/metadata/metadata_editor.rb +171 -0
- data/lib/omnizip/metadata/metadata_registry.rb +64 -0
- data/lib/omnizip/metadata/metadata_validator.rb +99 -0
- data/lib/omnizip/metadata.rb +57 -0
- data/lib/omnizip/models/.keep +0 -0
- data/lib/omnizip/models/algorithm_metadata.rb +73 -0
- data/lib/omnizip/models/compression_options.rb +71 -0
- data/lib/omnizip/models/conversion_options.rb +87 -0
- data/lib/omnizip/models/conversion_result.rb +135 -0
- data/lib/omnizip/models/eta_result.rb +46 -0
- data/lib/omnizip/models/extraction_rule.rb +115 -0
- data/lib/omnizip/models/filter_chain.rb +144 -0
- data/lib/omnizip/models/filter_config.rb +183 -0
- data/lib/omnizip/models/match_result.rb +124 -0
- data/lib/omnizip/models/optimization_suggestion.rb +91 -0
- data/lib/omnizip/models/parallel_options.rb +104 -0
- data/lib/omnizip/models/performance_result.rb +79 -0
- data/lib/omnizip/models/profile_report.rb +82 -0
- data/lib/omnizip/models/progress_options.rb +38 -0
- data/lib/omnizip/models/split_options.rb +116 -0
- data/lib/omnizip/optimization_registry.rb +81 -0
- data/lib/omnizip/parallel/job_queue.rb +209 -0
- data/lib/omnizip/parallel/job_scheduler.rb +203 -0
- data/lib/omnizip/parallel/parallel_compressor.rb +347 -0
- data/lib/omnizip/parallel/parallel_extractor.rb +329 -0
- data/lib/omnizip/parallel/worker_pool.rb +223 -0
- data/lib/omnizip/parallel.rb +149 -0
- data/lib/omnizip/parity/chunked_block_processor.rb +196 -0
- data/lib/omnizip/parity/galois16.rb +145 -0
- data/lib/omnizip/parity/models/creator_packet.rb +73 -0
- data/lib/omnizip/parity/models/file_description_packet.rb +133 -0
- data/lib/omnizip/parity/models/ifsc_packet.rb +123 -0
- data/lib/omnizip/parity/models/main_packet.rb +128 -0
- data/lib/omnizip/parity/models/packet.rb +156 -0
- data/lib/omnizip/parity/models/packet_registry.rb +109 -0
- data/lib/omnizip/parity/models/recovery_slice_packet.rb +78 -0
- data/lib/omnizip/parity/par2_creator.rb +531 -0
- data/lib/omnizip/parity/par2_repairer.rb +407 -0
- data/lib/omnizip/parity/par2_verifier.rb +364 -0
- data/lib/omnizip/parity/par2cmdline_algorithm.rb +110 -0
- data/lib/omnizip/parity/par2cmdline_coefficients.rb +78 -0
- data/lib/omnizip/parity/reed_solomon_decoder.rb +266 -0
- data/lib/omnizip/parity/reed_solomon_encoder.rb +111 -0
- data/lib/omnizip/parity/reed_solomon_matrix.rb +342 -0
- data/lib/omnizip/parity.rb +186 -0
- data/lib/omnizip/password/encryption_registry.rb +65 -0
- data/lib/omnizip/password/encryption_strategy.rb +96 -0
- data/lib/omnizip/password/password_validator.rb +129 -0
- data/lib/omnizip/password/winzip_aes_strategy.rb +192 -0
- data/lib/omnizip/password/zip_crypto_strategy.rb +141 -0
- data/lib/omnizip/password.rb +87 -0
- data/lib/omnizip/pipe/stream_compressor.rb +124 -0
- data/lib/omnizip/pipe/stream_decompressor.rb +174 -0
- data/lib/omnizip/pipe.rb +121 -0
- data/lib/omnizip/platform/ntfs_streams.rb +201 -0
- data/lib/omnizip/platform.rb +189 -0
- data/lib/omnizip/profile/archive_profile.rb +39 -0
- data/lib/omnizip/profile/balanced_profile.rb +33 -0
- data/lib/omnizip/profile/binary_profile.rb +36 -0
- data/lib/omnizip/profile/compression_profile.rb +158 -0
- data/lib/omnizip/profile/custom_profile.rb +157 -0
- data/lib/omnizip/profile/fast_profile.rb +33 -0
- data/lib/omnizip/profile/maximum_profile.rb +33 -0
- data/lib/omnizip/profile/profile_detector.rb +110 -0
- data/lib/omnizip/profile/profile_registry.rb +161 -0
- data/lib/omnizip/profile/text_profile.rb +36 -0
- data/lib/omnizip/profile.rb +190 -0
- data/lib/omnizip/profiler/memory_profiler.rb +66 -0
- data/lib/omnizip/profiler/method_profiler.rb +49 -0
- data/lib/omnizip/profiler/report_generator.rb +169 -0
- data/lib/omnizip/profiler.rb +204 -0
- data/lib/omnizip/progress/callback_reporter.rb +36 -0
- data/lib/omnizip/progress/console_reporter.rb +62 -0
- data/lib/omnizip/progress/log_reporter.rb +91 -0
- data/lib/omnizip/progress/operation_progress.rb +118 -0
- data/lib/omnizip/progress/progress_bar.rb +156 -0
- data/lib/omnizip/progress/progress_reporter.rb +40 -0
- data/lib/omnizip/progress/progress_tracker.rb +190 -0
- data/lib/omnizip/progress/silent_reporter.rb +24 -0
- data/lib/omnizip/progress.rb +127 -0
- data/lib/omnizip/rubyzip_compat.rb +63 -0
- data/lib/omnizip/temp/safe_extract.rb +168 -0
- data/lib/omnizip/temp/temp_file.rb +124 -0
- data/lib/omnizip/temp/temp_file_pool.rb +109 -0
- data/lib/omnizip/temp.rb +181 -0
- data/lib/omnizip/version.rb +5 -0
- data/lib/omnizip/zip/entry.rb +156 -0
- data/lib/omnizip/zip/file.rb +485 -0
- data/lib/omnizip/zip/input_stream.rb +273 -0
- data/lib/omnizip/zip/output_stream.rb +324 -0
- data/lib/omnizip.rb +156 -0
- data/readme-docs/advanced-features.adoc +515 -0
- data/readme-docs/api-usage.adoc +444 -0
- data/readme-docs/architecture.adoc +449 -0
- data/readme-docs/archive-formats.adoc +479 -0
- data/readme-docs/cli-usage.adoc +222 -0
- data/readme-docs/compression-algorithms.adoc +442 -0
- data/readme-docs/compression-profiles.adoc +247 -0
- data/readme-docs/encryption-checksums.adoc +328 -0
- data/readme-docs/format-converter.adoc +325 -0
- data/readme-docs/installation.adoc +228 -0
- data/readme-docs/par2-archives.adoc +608 -0
- data/readme-docs/performance-profiler.adoc +389 -0
- data/readme-docs/preprocessing-filters.adoc +280 -0
- data/xz-file-format-1.2.1.txt +1174 -0
- metadata +617 -0
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "constants"
|
|
4
|
+
|
|
5
|
+
module Omnizip
|
|
6
|
+
module Algorithms
|
|
7
|
+
class Deflate64
|
|
8
|
+
# LZ77 encoder with 64KB sliding window for Deflate64
|
|
9
|
+
class LZ77Encoder
|
|
10
|
+
include Constants
|
|
11
|
+
|
|
12
|
+
attr_reader :window_size
|
|
13
|
+
|
|
14
|
+
def initialize(window_size = DICTIONARY_SIZE)
|
|
15
|
+
@window_size = window_size
|
|
16
|
+
@window = []
|
|
17
|
+
@hash_table = {}
|
|
18
|
+
@position = 0
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
# Find matches in data and return array of literals and match tokens
|
|
22
|
+
#
|
|
23
|
+
# @param data [String] Input data to compress
|
|
24
|
+
# @return [Array<Hash>] Array of match tokens
|
|
25
|
+
def find_matches(data)
|
|
26
|
+
tokens = []
|
|
27
|
+
pos = 0
|
|
28
|
+
|
|
29
|
+
while pos < data.bytesize
|
|
30
|
+
match = find_longest_match(pos, data)
|
|
31
|
+
|
|
32
|
+
if match && match[:length] >= MIN_MATCH_LENGTH
|
|
33
|
+
tokens << {
|
|
34
|
+
type: :match,
|
|
35
|
+
length: match[:length],
|
|
36
|
+
distance: match[:distance],
|
|
37
|
+
}
|
|
38
|
+
pos += match[:length]
|
|
39
|
+
else
|
|
40
|
+
tokens << {
|
|
41
|
+
type: :literal,
|
|
42
|
+
value: data.getbyte(pos),
|
|
43
|
+
}
|
|
44
|
+
pos += 1
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
update_window(data, pos)
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
tokens
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
private
|
|
54
|
+
|
|
55
|
+
# Find longest match for current position
|
|
56
|
+
#
|
|
57
|
+
# @param pos [Integer] Current position in data
|
|
58
|
+
# @param data [String] Input data
|
|
59
|
+
# @return [Hash, nil] Match information or nil
|
|
60
|
+
def find_longest_match(pos, data)
|
|
61
|
+
return nil if pos + MIN_MATCH_LENGTH > data.bytesize
|
|
62
|
+
|
|
63
|
+
best_match = nil
|
|
64
|
+
best_length = MIN_MATCH_LENGTH - 1
|
|
65
|
+
|
|
66
|
+
# Calculate hash for current position
|
|
67
|
+
hash = calculate_hash(data, pos)
|
|
68
|
+
candidates = @hash_table[hash] || []
|
|
69
|
+
|
|
70
|
+
# Search through candidate matches
|
|
71
|
+
candidates.reverse.take(MAX_CHAIN_LENGTH).each do |candidate_pos|
|
|
72
|
+
distance = pos - candidate_pos
|
|
73
|
+
break if distance > MAX_DISTANCE
|
|
74
|
+
|
|
75
|
+
# Find match length
|
|
76
|
+
length = match_length(data, pos, candidate_pos)
|
|
77
|
+
|
|
78
|
+
if length > best_length
|
|
79
|
+
best_length = length
|
|
80
|
+
best_match = {
|
|
81
|
+
length: length,
|
|
82
|
+
distance: distance,
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
break if length >= NICE_MATCH
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# Add current position to hash table
|
|
90
|
+
@hash_table[hash] ||= []
|
|
91
|
+
@hash_table[hash] << pos
|
|
92
|
+
|
|
93
|
+
best_match
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
# Calculate match length between two positions
|
|
97
|
+
#
|
|
98
|
+
# @param data [String] Input data
|
|
99
|
+
# @param pos1 [Integer] First position
|
|
100
|
+
# @param pos2 [Integer] Second position
|
|
101
|
+
# @return [Integer] Match length
|
|
102
|
+
def match_length(data, pos1, pos2)
|
|
103
|
+
max_length = [MAX_MATCH_LENGTH, data.bytesize - pos1].min
|
|
104
|
+
length = 0
|
|
105
|
+
|
|
106
|
+
while length < max_length &&
|
|
107
|
+
data.getbyte(pos1 + length) == data.getbyte(pos2 + length)
|
|
108
|
+
length += 1
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
length
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
# Calculate hash value for position
|
|
115
|
+
#
|
|
116
|
+
# @param data [String] Input data
|
|
117
|
+
# @param pos [Integer] Position to hash
|
|
118
|
+
# @return [Integer] Hash value
|
|
119
|
+
def calculate_hash(data, pos)
|
|
120
|
+
return 0 if pos + MIN_MATCH_LENGTH > data.bytesize
|
|
121
|
+
|
|
122
|
+
hash = 0
|
|
123
|
+
MIN_MATCH_LENGTH.times do |i|
|
|
124
|
+
hash = ((hash << HASH_SHIFT) ^
|
|
125
|
+
data.getbyte(pos + i)) & (HASH_SIZE - 1)
|
|
126
|
+
end
|
|
127
|
+
hash
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
# Update sliding window
|
|
131
|
+
#
|
|
132
|
+
# @param data [String] Input data
|
|
133
|
+
# @param pos [Integer] Current position
|
|
134
|
+
def update_window(data, pos)
|
|
135
|
+
@window << data.getbyte(pos - 1) if pos.positive?
|
|
136
|
+
@window.shift if @window.size > @window_size
|
|
137
|
+
@position = pos
|
|
138
|
+
end
|
|
139
|
+
end
|
|
140
|
+
end
|
|
141
|
+
end
|
|
142
|
+
end
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../algorithm"
|
|
4
|
+
require "zlib"
|
|
5
|
+
|
|
6
|
+
module Omnizip
|
|
7
|
+
module Algorithms
|
|
8
|
+
# Deflate64 (Enhanced Deflate) compression algorithm
|
|
9
|
+
#
|
|
10
|
+
# Extends standard Deflate with:
|
|
11
|
+
# - 64KB sliding window (vs 32KB)
|
|
12
|
+
# - Better compression for large files
|
|
13
|
+
# - ZIP compression method 9
|
|
14
|
+
#
|
|
15
|
+
# NOTE: This is a simplified implementation that uses standard
|
|
16
|
+
# Deflate internally, as true Deflate64 requires complex
|
|
17
|
+
# bit-level manipulation that is better handled by libraries
|
|
18
|
+
# specifically designed for it.
|
|
19
|
+
class Deflate64 < Algorithm
|
|
20
|
+
# Constants
|
|
21
|
+
DICTIONARY_SIZE = 65_536 # 64KB window
|
|
22
|
+
|
|
23
|
+
# Algorithm metadata
|
|
24
|
+
def self.metadata
|
|
25
|
+
{
|
|
26
|
+
name: "Deflate64",
|
|
27
|
+
type: :compression,
|
|
28
|
+
streaming_supported: true,
|
|
29
|
+
dictionary_size: DICTIONARY_SIZE,
|
|
30
|
+
compression_method: 9,
|
|
31
|
+
description: "Enhanced Deflate with 64KB window",
|
|
32
|
+
}
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# Compress input stream to output stream
|
|
36
|
+
#
|
|
37
|
+
# @param input [IO] Input stream
|
|
38
|
+
# @param output [IO] Output stream
|
|
39
|
+
# @param options [Hash] Compression options
|
|
40
|
+
# @option options [Integer] :level Compression level (1-9)
|
|
41
|
+
def compress(input, output, options = {})
|
|
42
|
+
level = options[:level] || Zlib::DEFAULT_COMPRESSION
|
|
43
|
+
|
|
44
|
+
data = input.read
|
|
45
|
+
return if data.nil? || data.empty?
|
|
46
|
+
|
|
47
|
+
# Use Zlib::Deflate with maximum window size
|
|
48
|
+
deflater = Zlib::Deflate.new(
|
|
49
|
+
level,
|
|
50
|
+
Zlib::MAX_WBITS, # Maximum window size
|
|
51
|
+
Zlib::MAX_MEM_LEVEL,
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
compressed = deflater.deflate(data, Zlib::FINISH)
|
|
55
|
+
deflater.close
|
|
56
|
+
|
|
57
|
+
output.write(compressed)
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# Decompress input stream to output stream
|
|
61
|
+
#
|
|
62
|
+
# @param input [IO] Input stream
|
|
63
|
+
# @param output [IO] Output stream
|
|
64
|
+
# @param options [Hash] Decompression options
|
|
65
|
+
def decompress(input, output, _options = {})
|
|
66
|
+
compressed = input.read
|
|
67
|
+
return if compressed.nil? || compressed.empty?
|
|
68
|
+
|
|
69
|
+
# Set output to binary mode if it's a StringIO
|
|
70
|
+
output.set_encoding(Encoding::BINARY) if output.respond_to?(:set_encoding)
|
|
71
|
+
output.binmode if output.respond_to?(:binmode)
|
|
72
|
+
|
|
73
|
+
# Use Zlib::Inflate with maximum window size
|
|
74
|
+
inflater = Zlib::Inflate.new(Zlib::MAX_WBITS)
|
|
75
|
+
decompressed = inflater.inflate(compressed)
|
|
76
|
+
inflater.close
|
|
77
|
+
|
|
78
|
+
# Force binary encoding to match original data
|
|
79
|
+
decompressed.force_encoding(Encoding::BINARY)
|
|
80
|
+
|
|
81
|
+
output.write(decompressed)
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
# Check if streaming is supported
|
|
85
|
+
#
|
|
86
|
+
# @return [Boolean] Always true for Deflate64
|
|
87
|
+
def self.streaming_supported?
|
|
88
|
+
true
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# Get dictionary size
|
|
92
|
+
#
|
|
93
|
+
# @return [Integer] 64KB
|
|
94
|
+
def self.dictionary_size
|
|
95
|
+
DICTIONARY_SIZE
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
# Get compression method ID for ZIP format
|
|
99
|
+
#
|
|
100
|
+
# @return [Integer] Method 9
|
|
101
|
+
def self.compression_method
|
|
102
|
+
9
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
# Register algorithm
|
|
109
|
+
Omnizip::AlgorithmRegistry.register(:deflate64, Omnizip::Algorithms::Deflate64)
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Copyright (C) 2025 Ribose Inc.
|
|
4
|
+
#
|
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a
|
|
6
|
+
# copy of this software and associated documentation files (the "Software"),
|
|
7
|
+
# to deal in the Software without restriction, including without limitation
|
|
8
|
+
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
9
|
+
# and/or sell copies of the Software, and to permit persons to whom the
|
|
10
|
+
# Software is furnished to do so, subject to the following conditions:
|
|
11
|
+
#
|
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
|
13
|
+
# all copies or substantial portions of the Software.
|
|
14
|
+
#
|
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
20
|
+
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
21
|
+
# DEALINGS IN THE SOFTWARE.
|
|
22
|
+
|
|
23
|
+
module Omnizip
|
|
24
|
+
module Algorithms
|
|
25
|
+
class LZMA < Algorithm
|
|
26
|
+
# Adaptive probability model for range coding
|
|
27
|
+
#
|
|
28
|
+
# This class manages probability states for individual bits in the
|
|
29
|
+
# range coder. It uses adaptive arithmetic coding where probabilities
|
|
30
|
+
# are updated based on actual bit values encountered during encoding
|
|
31
|
+
# or decoding.
|
|
32
|
+
#
|
|
33
|
+
# Ported from XZ Utils range_encoder.c probability model implementation.
|
|
34
|
+
class BitModel
|
|
35
|
+
PROB_INIT = 1024 # Initial probability (0.5)
|
|
36
|
+
MOVE_BITS = 5 # Probability update speed
|
|
37
|
+
MAX_PROB = 1 << 11 # 4096
|
|
38
|
+
BIT_MODEL_TOTAL = 0x800 # XZ Utils RC_BIT_MODEL_TOTAL = 2048
|
|
39
|
+
|
|
40
|
+
attr_reader :probability
|
|
41
|
+
|
|
42
|
+
# Initialize a new bit probability model
|
|
43
|
+
#
|
|
44
|
+
# @param initial_prob [Integer] Initial probability value (default: PROB_INIT)
|
|
45
|
+
def initialize(initial_prob = PROB_INIT)
|
|
46
|
+
@probability = initial_prob
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# Update the probability model based on an actual bit value
|
|
50
|
+
#
|
|
51
|
+
# This method implements the XZ Utils adaptive algorithm:
|
|
52
|
+
# - If bit is 0: probability increases (shifts toward encoding 0)
|
|
53
|
+
# - If bit is 1: probability decreases (shifts toward encoding 1)
|
|
54
|
+
#
|
|
55
|
+
# The update uses a shift operation (MOVE_BITS) to control the
|
|
56
|
+
# adaptation rate. Smaller MOVE_BITS means faster adaptation.
|
|
57
|
+
#
|
|
58
|
+
# XZ Utils formula (lzma/lzma_encoder.c:RC_BIT_*):
|
|
59
|
+
# bit 0: prob += (RC_BIT_MODEL_TOTAL - prob) >> RC_MOVE_BITS
|
|
60
|
+
# bit 1: prob -= prob >> RC_MOVE_BITS
|
|
61
|
+
# where RC_BIT_MODEL_TOTAL = 2048, RC_MOVE_BITS = 5
|
|
62
|
+
#
|
|
63
|
+
# @param bit [Integer] The actual bit value (0 or 1)
|
|
64
|
+
# @return [void]
|
|
65
|
+
def update(bit)
|
|
66
|
+
if bit.zero?
|
|
67
|
+
# XZ Utils formula: prob += (RC_BIT_MODEL_TOTAL - prob) >> RC_MOVE_BITS
|
|
68
|
+
@probability += ((BIT_MODEL_TOTAL - @probability) >> MOVE_BITS)
|
|
69
|
+
else
|
|
70
|
+
# XZ Utils formula: prob -= prob >> RC_MOVE_BITS
|
|
71
|
+
@probability -= (@probability >> MOVE_BITS)
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
# @deprecated Use {update} instead (same functionality, XZ Utils compatible)
|
|
76
|
+
def update!(bit)
|
|
77
|
+
update(bit)
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# Reset the probability model to initial state
|
|
81
|
+
#
|
|
82
|
+
# @return [void]
|
|
83
|
+
def reset
|
|
84
|
+
@probability = PROB_INIT
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
# Get the probability of encoding a 0 bit
|
|
88
|
+
#
|
|
89
|
+
# @return [Integer] Probability value (0..MAX_PROB)
|
|
90
|
+
def prob_0
|
|
91
|
+
@probability
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
# Get the probability of encoding a 1 bit
|
|
95
|
+
#
|
|
96
|
+
# @return [Integer] Probability value (0..MAX_PROB)
|
|
97
|
+
def prob_1
|
|
98
|
+
MAX_PROB - @probability
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
# Create a copy of this bit model
|
|
102
|
+
#
|
|
103
|
+
# @return [BitModel] A new BitModel with the same probability
|
|
104
|
+
def dup
|
|
105
|
+
BitModel.new(@probability)
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
# For range coder: get probability scaled to 11 bits (XZ Utils compatibility)
|
|
109
|
+
#
|
|
110
|
+
# This method returns the probability value in the format expected
|
|
111
|
+
# by the range coder for encoding/decoding operations.
|
|
112
|
+
#
|
|
113
|
+
# @return [Integer] Probability value (0..MAX_PROB)
|
|
114
|
+
def to_range
|
|
115
|
+
@probability
|
|
116
|
+
end
|
|
117
|
+
end
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
end
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Copyright (C) 2025 Ribose Inc.
|
|
4
|
+
#
|
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a
|
|
6
|
+
# copy of this software and associated documentation files (the "Software"),
|
|
7
|
+
# to deal in the Software without restriction, including without limitation
|
|
8
|
+
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
9
|
+
# and/or sell copies of the Software, and to permit persons to whom the
|
|
10
|
+
# Software is furnished to do so, subject to the following conditions:
|
|
11
|
+
#
|
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
|
13
|
+
# all copies or substantial portions of the Software.
|
|
14
|
+
#
|
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
20
|
+
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
21
|
+
# DEALINGS IN THE SOFTWARE.
|
|
22
|
+
|
|
23
|
+
module Omnizip
|
|
24
|
+
module Algorithms
|
|
25
|
+
class LZMA
|
|
26
|
+
# LZMA algorithm constants
|
|
27
|
+
#
|
|
28
|
+
# This module contains all constants used by the LZMA algorithm,
|
|
29
|
+
# including range coding parameters, probability models, and
|
|
30
|
+
# compression limits.
|
|
31
|
+
module Constants
|
|
32
|
+
# Range coder constants
|
|
33
|
+
# TOP: Threshold for range normalization (2^24)
|
|
34
|
+
TOP = 0x01000000
|
|
35
|
+
|
|
36
|
+
# BIT_MODEL_TOTAL: Total probability range for bit models (2^11)
|
|
37
|
+
BIT_MODEL_TOTAL = 0x800
|
|
38
|
+
|
|
39
|
+
# BIT_MODEL_MOVE_BITS: Number of bits to shift for prob updates
|
|
40
|
+
MOVE_BITS = 5
|
|
41
|
+
|
|
42
|
+
# INIT_PROBS: Initial probability value (0.5 probability)
|
|
43
|
+
INIT_PROBS = BIT_MODEL_TOTAL >> 1
|
|
44
|
+
|
|
45
|
+
# Number of bits used in direct bit encoding
|
|
46
|
+
NUM_DIRECT_BITS = 8
|
|
47
|
+
|
|
48
|
+
# LZMA state constants
|
|
49
|
+
# Number of position bits for literal context (lp)
|
|
50
|
+
NUM_LIT_POS_BITS_MAX = 4
|
|
51
|
+
|
|
52
|
+
# Number of literal context bits (lc)
|
|
53
|
+
NUM_LIT_CONTEXT_BITS_MAX = 8
|
|
54
|
+
|
|
55
|
+
# Number of position bits (pb)
|
|
56
|
+
NUM_POS_BITS_MAX = 4
|
|
57
|
+
|
|
58
|
+
# Number of LZMA states (from state machine)
|
|
59
|
+
NUM_STATES = 12
|
|
60
|
+
|
|
61
|
+
# Dictionary size limits
|
|
62
|
+
DICT_SIZE_MIN = 1 << 12 # 4KB
|
|
63
|
+
DICT_SIZE_MAX = 1 << 30 # 1GB
|
|
64
|
+
|
|
65
|
+
# Match length constants
|
|
66
|
+
MATCH_LEN_MIN = 2
|
|
67
|
+
MATCH_LEN_MAX = 273
|
|
68
|
+
|
|
69
|
+
# Number of distance slots
|
|
70
|
+
NUM_DIST_SLOTS = 64
|
|
71
|
+
|
|
72
|
+
# Position states
|
|
73
|
+
POS_STATES_MAX = 1 << NUM_POS_BITS_MAX
|
|
74
|
+
|
|
75
|
+
# Literal coder size
|
|
76
|
+
LIT_SIZE_MAX = (1 << (NUM_LIT_POS_BITS_MAX +
|
|
77
|
+
NUM_LIT_CONTEXT_BITS_MAX))
|
|
78
|
+
|
|
79
|
+
# Number of length to position states
|
|
80
|
+
NUM_LEN_TO_POS_STATES = 4
|
|
81
|
+
|
|
82
|
+
# Compression levels
|
|
83
|
+
COMPRESSION_LEVEL_MIN = 0
|
|
84
|
+
COMPRESSION_LEVEL_MAX = 9
|
|
85
|
+
COMPRESSION_LEVEL_DEFAULT = 5
|
|
86
|
+
|
|
87
|
+
# End of stream marker
|
|
88
|
+
EOS_MARKER = true
|
|
89
|
+
|
|
90
|
+
# SDK-specific encoding constants
|
|
91
|
+
# Length encoding constants
|
|
92
|
+
NUM_LEN_LOW_BITS = 3
|
|
93
|
+
NUM_LEN_MID_BITS = 3
|
|
94
|
+
NUM_LEN_HIGH_BITS = 8
|
|
95
|
+
LEN_LOW_SYMBOLS = 1 << NUM_LEN_LOW_BITS
|
|
96
|
+
LEN_MID_SYMBOLS = 1 << NUM_LEN_MID_BITS
|
|
97
|
+
LEN_HIGH_SYMBOLS = 1 << NUM_LEN_HIGH_BITS
|
|
98
|
+
|
|
99
|
+
# Distance encoding constants
|
|
100
|
+
NUM_DIST_SLOT_BITS = 6
|
|
101
|
+
DIST_ALIGN_BITS = 4
|
|
102
|
+
DIST_ALIGN_SIZE = 1 << DIST_ALIGN_BITS
|
|
103
|
+
START_POS_MODEL_INDEX = 4
|
|
104
|
+
END_POS_MODEL_INDEX = 14
|
|
105
|
+
NUM_FULL_DISTANCES = 1 << (END_POS_MODEL_INDEX >> 1)
|
|
106
|
+
|
|
107
|
+
# Distance slot calculation helper
|
|
108
|
+
DIST_SLOT_FAST_LIMIT = 1 << (NUM_DIST_SLOT_BITS + 1)
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
end
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Copyright (C) 2025 Ribose Inc.
|
|
4
|
+
#
|
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a
|
|
6
|
+
# copy of this software and associated documentation files (the "Software"),
|
|
7
|
+
# to deal in the Software without restriction, including without limitation
|
|
8
|
+
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
9
|
+
# and/or sell copies of the Software, and to permit persons to whom the
|
|
10
|
+
# Software is furnished to do so, subject to the following conditions:
|
|
11
|
+
#
|
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
|
13
|
+
# all copies or substantial portions of the Software.
|
|
14
|
+
#
|
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
20
|
+
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
21
|
+
# DEALINGS IN THE SOFTWARE.
|
|
22
|
+
|
|
23
|
+
require_relative "xz_utils_decoder"
|
|
24
|
+
|
|
25
|
+
module Omnizip
|
|
26
|
+
module Algorithms
|
|
27
|
+
class LZMA < Algorithm
|
|
28
|
+
# LZMA Decoder - Factory for LZMA decompression implementations
|
|
29
|
+
#
|
|
30
|
+
# This class provides a unified interface for LZMA decoding, delegating
|
|
31
|
+
# to the XZ Utils implementation for full compatibility.
|
|
32
|
+
#
|
|
33
|
+
# The decoder reads a stream that consists of:
|
|
34
|
+
# - Property byte (lc, lp, pb parameters)
|
|
35
|
+
# - Dictionary size (4 bytes)
|
|
36
|
+
# - Uncompressed size (8 bytes)
|
|
37
|
+
# - Compressed data
|
|
38
|
+
class Decoder
|
|
39
|
+
attr_reader :dict_size, :lc, :lp, :pb, :uncompressed_size
|
|
40
|
+
|
|
41
|
+
# Initialize the decoder
|
|
42
|
+
#
|
|
43
|
+
# @param input [IO] Input stream of compressed data
|
|
44
|
+
# @param options [Hash] Decoding options
|
|
45
|
+
# @option options [Boolean] :raw_mode Skip header parsing for raw LZMA (for LZMA2)
|
|
46
|
+
# @option options [Integer] :dict_size Dictionary size for raw mode
|
|
47
|
+
def initialize(input, options = {})
|
|
48
|
+
# Use XZ Utils LZMA decoder (full XZ Utils compatibility)
|
|
49
|
+
@impl = XzUtilsDecoder.new(input, options)
|
|
50
|
+
|
|
51
|
+
# Expose header info for backward compatibility
|
|
52
|
+
@lc = @impl.lc
|
|
53
|
+
@lp = @impl.lp
|
|
54
|
+
@pb = @impl.pb
|
|
55
|
+
@dict_size = @impl.dict_size
|
|
56
|
+
@uncompressed_size = @impl.uncompressed_size
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# Decode a compressed stream
|
|
60
|
+
#
|
|
61
|
+
# @param output [IO, nil] Optional output stream (if nil, returns String)
|
|
62
|
+
# @param preserve_dict [Boolean] Whether to preserve dictionary from previous decode
|
|
63
|
+
# @return [String, Integer] Decompressed data or bytes written
|
|
64
|
+
def decode_stream(output = nil, preserve_dict: false)
|
|
65
|
+
@impl.decode_stream(output, preserve_dict: preserve_dict)
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Reset the decoder state for reuse with new properties
|
|
69
|
+
#
|
|
70
|
+
# This method is used by LZMA2 decoder for multi-chunk streams.
|
|
71
|
+
#
|
|
72
|
+
# @param new_lc [Integer, nil] New lc value (if nil, keeps current)
|
|
73
|
+
# @param new_lp [Integer, nil] New lp value (if nil, keeps current)
|
|
74
|
+
# @param new_pb [Integer, nil] New pb value (if nil, keeps current)
|
|
75
|
+
# @param preserve_dict [Boolean] If true, preserve dictionary state (pos, dict_full)
|
|
76
|
+
# @return [void]
|
|
77
|
+
def reset(new_lc: nil, new_lp: nil, new_pb: nil, preserve_dict: false)
|
|
78
|
+
@impl.reset(new_lc: new_lc, new_lp: new_lp, new_pb: new_pb,
|
|
79
|
+
preserve_dict: preserve_dict)
|
|
80
|
+
|
|
81
|
+
# Update cached properties
|
|
82
|
+
@lc = @impl.lc
|
|
83
|
+
@lp = @impl.lp
|
|
84
|
+
@pb = @impl.pb
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
# Reset only state machine and rep distances, preserve probability models
|
|
88
|
+
#
|
|
89
|
+
# This method is used by LZMA2 decoder for multi-chunk streams.
|
|
90
|
+
#
|
|
91
|
+
# @return [void]
|
|
92
|
+
def reset_state_only
|
|
93
|
+
@impl.reset_state_only
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
# Prepare state reset - called BEFORE setting new input
|
|
97
|
+
#
|
|
98
|
+
# This method is used by LZMA2 decoder for multi-chunk streams.
|
|
99
|
+
#
|
|
100
|
+
# @return [void]
|
|
101
|
+
def prepare_state_reset
|
|
102
|
+
@impl.prepare_state_reset
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
# Reset state machine only - preserves rep distances
|
|
106
|
+
#
|
|
107
|
+
# This method is used by LZMA2 decoder for multi-chunk streams
|
|
108
|
+
# where we want to reset the state machine but preserve rep distances
|
|
109
|
+
# from the previous chunk (control >= 0xA0 but < 0xC0).
|
|
110
|
+
#
|
|
111
|
+
# @return [void]
|
|
112
|
+
def reset_state_machine_only
|
|
113
|
+
@impl.reset_state_machine_only
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
# Finish state reset - called AFTER setting new input
|
|
117
|
+
#
|
|
118
|
+
# This method is used by LZMA2 decoder for multi-chunk streams.
|
|
119
|
+
#
|
|
120
|
+
# @return [void]
|
|
121
|
+
def finish_state_reset
|
|
122
|
+
@impl.finish_state_reset
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
# Set new input stream for chunked decoding
|
|
126
|
+
#
|
|
127
|
+
# This method is used by LZMA2 decoder for multi-chunk streams.
|
|
128
|
+
#
|
|
129
|
+
# @param new_input [IO] New input stream
|
|
130
|
+
# @return [void]
|
|
131
|
+
def set_input(new_input)
|
|
132
|
+
@impl.set_input(new_input)
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
# Set uncompressed size for chunked decoding
|
|
136
|
+
#
|
|
137
|
+
# This method is used by LZMA2 decoder for multi-chunk streams.
|
|
138
|
+
#
|
|
139
|
+
# @param size [Integer] Uncompressed size
|
|
140
|
+
# @param allow_eopm [Boolean] Whether to allow end-of-stream marker
|
|
141
|
+
# @return [void]
|
|
142
|
+
def set_uncompressed_size(size, allow_eopm: true)
|
|
143
|
+
@impl.set_uncompressed_size(size, allow_eopm: allow_eopm)
|
|
144
|
+
end
|
|
145
|
+
end
|
|
146
|
+
end
|
|
147
|
+
end
|
|
148
|
+
end
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Omnizip
|
|
4
|
+
module Algorithms
|
|
5
|
+
class LZMA < Algorithm
|
|
6
|
+
# Circular buffer dictionary for LZMA sliding window
|
|
7
|
+
# Ported from XZ Utils lzma_decoder.c
|
|
8
|
+
class Dictionary
|
|
9
|
+
attr_reader :size, :position, :buffer
|
|
10
|
+
|
|
11
|
+
def initialize(size)
|
|
12
|
+
@size = size
|
|
13
|
+
@buffer = String.new(encoding: Encoding::BINARY)
|
|
14
|
+
@position = 0
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
# Append bytes to dictionary
|
|
18
|
+
def append(data)
|
|
19
|
+
data.each_byte do |byte|
|
|
20
|
+
@buffer << byte
|
|
21
|
+
@position += 1
|
|
22
|
+
|
|
23
|
+
# Trim if exceeds size
|
|
24
|
+
if @buffer.bytesize > @size
|
|
25
|
+
excess = @buffer.bytesize - @size
|
|
26
|
+
@buffer = @buffer.byteslice(excess..-1)
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Read bytes from dictionary at a distance back
|
|
32
|
+
def read_bytes(distance, length)
|
|
33
|
+
raise "Invalid distance: #{distance}" if distance > @buffer.bytesize
|
|
34
|
+
|
|
35
|
+
result = String.new(encoding: Encoding::BINARY)
|
|
36
|
+
src_pos = @buffer.bytesize - distance
|
|
37
|
+
|
|
38
|
+
length.times do |i|
|
|
39
|
+
byte = @buffer[(src_pos + i) % @buffer.bytesize]
|
|
40
|
+
result << byte
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
result
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# Get byte at distance back
|
|
47
|
+
def get_byte(distance)
|
|
48
|
+
raise "Invalid distance: #{distance}" if distance > @buffer.bytesize
|
|
49
|
+
|
|
50
|
+
@buffer.getbyte(@buffer.bytesize - distance)
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Reset dictionary
|
|
54
|
+
def reset!
|
|
55
|
+
@buffer.clear
|
|
56
|
+
@position = 0
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# Clone dictionary
|
|
60
|
+
def clone
|
|
61
|
+
dict = Dictionary.new(@size)
|
|
62
|
+
dict.instance_variable_set(:@buffer, @buffer.dup)
|
|
63
|
+
dict.instance_variable_set(:@position, @position)
|
|
64
|
+
dict
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
end
|