omnizip 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/.rubocop.yml +32 -0
- data/.rubocop_todo.yml +754 -0
- data/COPYING +502 -0
- data/Gemfile +17 -0
- data/LICENSE +12 -0
- data/README.adoc +1045 -0
- data/Rakefile +12 -0
- data/benchmark/README.md +260 -0
- data/benchmark/benchmark_suite.rb +125 -0
- data/benchmark/compression_bench.rb +181 -0
- data/benchmark/filter_bench.rb +180 -0
- data/benchmark/models/benchmark_result.rb +59 -0
- data/benchmark/models/comparison_result.rb +69 -0
- data/benchmark/profile_suite.rb +167 -0
- data/benchmark/reporter.rb +150 -0
- data/benchmark/run_benchmarks.rb +66 -0
- data/benchmark/test_data.rb +137 -0
- data/config/formats/rar3_spec.yml +91 -0
- data/config/formats/rar5_spec.yml +102 -0
- data/docs/.github/workflows/docs.yml +142 -0
- data/docs/.gitignore +21 -0
- data/docs/.lychee.toml +67 -0
- data/docs/Gemfile +13 -0
- data/docs/RAR_WRITE_SUPPORT.md +26 -0
- data/docs/README.md +101 -0
- data/docs/_config.yml +112 -0
- data/docs/assets/logo.svg +1 -0
- data/docs/assets/omnizip-logo.pdf +1540 -11
- data/docs/comparison/feature-matrix.adoc +694 -0
- data/docs/comparison/index.adoc +113 -0
- data/docs/comparison/vs-7zip.adoc +309 -0
- data/docs/comparison/vs-peazip.adoc +77 -0
- data/docs/comparison/vs-rubyzip.adoc +342 -0
- data/docs/comparison/vs-winrar.adoc +100 -0
- data/docs/compatibility.adoc +579 -0
- data/docs/concepts/index.adoc +129 -0
- data/docs/developer/architecture.adoc +256 -0
- data/docs/developer/contributing.adoc +158 -0
- data/docs/developer/index.adoc +25 -0
- data/docs/developer/testing.adoc +212 -0
- data/docs/getting-started/basic-usage.adoc +271 -0
- data/docs/getting-started/index.adoc +42 -0
- data/docs/getting-started/installation.adoc +138 -0
- data/docs/getting-started/quick-start.adoc +185 -0
- data/docs/getting-started/your-first-archive.adoc +218 -0
- data/docs/guides/advanced-features/encryption.adoc +300 -0
- data/docs/guides/advanced-features/index.adoc +49 -0
- data/docs/guides/advanced-features/parallel-processing.adoc +246 -0
- data/docs/guides/advanced-features/progress-tracking.adoc +320 -0
- data/docs/guides/advanced-features/streaming.adoc +212 -0
- data/docs/guides/archive-formats/gzip-format.adoc +107 -0
- data/docs/guides/archive-formats/index.adoc +130 -0
- data/docs/guides/archive-formats/rar-format.adoc +104 -0
- data/docs/guides/archive-formats/rar5.adoc +521 -0
- data/docs/guides/archive-formats/seven-zip-format.adoc +35 -0
- data/docs/guides/archive-formats/tar-format.adoc +106 -0
- data/docs/guides/archive-formats/xz-format.adoc +118 -0
- data/docs/guides/archive-formats/zip-format.adoc +35 -0
- data/docs/guides/compression-algorithms/bzip2.adoc +113 -0
- data/docs/guides/compression-algorithms/deflate.adoc +319 -0
- data/docs/guides/compression-algorithms/index.adoc +190 -0
- data/docs/guides/compression-algorithms/lzma.adoc +398 -0
- data/docs/guides/compression-algorithms/lzma2.adoc +327 -0
- data/docs/guides/compression-algorithms/ppmd.adoc +316 -0
- data/docs/guides/compression-algorithms/zstandard.adoc +361 -0
- data/docs/guides/creating-archives.adoc +354 -0
- data/docs/guides/extracting-archives.adoc +53 -0
- data/docs/guides/format-conversion.adoc +64 -0
- data/docs/guides/index.adoc +49 -0
- data/docs/guides/migration-rubyzip.adoc +217 -0
- data/docs/guides/parity-archives.adoc +605 -0
- data/docs/guides/performance-tuning.adoc +88 -0
- data/docs/index.adoc +218 -0
- data/docs/lychee.toml +67 -0
- data/docs/reference/api/overview.adoc +188 -0
- data/docs/reference/cli/compress-command.adoc +114 -0
- data/docs/reference/cli/overview.adoc +140 -0
- data/docs/reference/index.adoc +26 -0
- data/docs/resources/faq.adoc +185 -0
- data/docs/resources/quick-reference.adoc +222 -0
- data/docs/troubleshooting/index.adoc +208 -0
- data/examples/api_comparison.rb +205 -0
- data/examples/deflate64_example.rb +96 -0
- data/examples/par2_demo.rb +121 -0
- data/examples/quick_start_native.rb +150 -0
- data/examples/quick_start_rubyzip.rb +115 -0
- data/examples/rubyzip_compatibility_demo.rb +194 -0
- data/exe/omnizip +27 -0
- data/lib/omnizip/algorithm.rb +130 -0
- data/lib/omnizip/algorithm_registry.rb +86 -0
- data/lib/omnizip/algorithms/.keep +0 -0
- data/lib/omnizip/algorithms/bzip2/bwt.rb +225 -0
- data/lib/omnizip/algorithms/bzip2/decoder.rb +193 -0
- data/lib/omnizip/algorithms/bzip2/encoder.rb +237 -0
- data/lib/omnizip/algorithms/bzip2/huffman.rb +206 -0
- data/lib/omnizip/algorithms/bzip2/mtf.rb +101 -0
- data/lib/omnizip/algorithms/bzip2/rle.rb +151 -0
- data/lib/omnizip/algorithms/bzip2.rb +130 -0
- data/lib/omnizip/algorithms/deflate/constants.rb +28 -0
- data/lib/omnizip/algorithms/deflate/decoder.rb +38 -0
- data/lib/omnizip/algorithms/deflate/encoder.rb +46 -0
- data/lib/omnizip/algorithms/deflate.rb +128 -0
- data/lib/omnizip/algorithms/deflate64/constants.rb +45 -0
- data/lib/omnizip/algorithms/deflate64/decoder.rb +153 -0
- data/lib/omnizip/algorithms/deflate64/encoder.rb +98 -0
- data/lib/omnizip/algorithms/deflate64/huffman_coder.rb +354 -0
- data/lib/omnizip/algorithms/deflate64/lz77_encoder.rb +142 -0
- data/lib/omnizip/algorithms/deflate64.rb +109 -0
- data/lib/omnizip/algorithms/lzma/bit_model.rb +120 -0
- data/lib/omnizip/algorithms/lzma/constants.rb +112 -0
- data/lib/omnizip/algorithms/lzma/decoder.rb +148 -0
- data/lib/omnizip/algorithms/lzma/dictionary.rb +69 -0
- data/lib/omnizip/algorithms/lzma/distance_coder.rb +415 -0
- data/lib/omnizip/algorithms/lzma/encoder.rb +142 -0
- data/lib/omnizip/algorithms/lzma/length_coder.rb +260 -0
- data/lib/omnizip/algorithms/lzma/literal_decoder.rb +320 -0
- data/lib/omnizip/algorithms/lzma/literal_encoder.rb +210 -0
- data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +341 -0
- data/lib/omnizip/algorithms/lzma/lzma_alone_decoder.rb +192 -0
- data/lib/omnizip/algorithms/lzma/lzma_state.rb +128 -0
- data/lib/omnizip/algorithms/lzma/match.rb +32 -0
- data/lib/omnizip/algorithms/lzma/match_finder.rb +205 -0
- data/lib/omnizip/algorithms/lzma/match_finder_config.rb +142 -0
- data/lib/omnizip/algorithms/lzma/match_finder_factory.rb +88 -0
- data/lib/omnizip/algorithms/lzma/optimal_encoder.rb +130 -0
- data/lib/omnizip/algorithms/lzma/probability_models.rb +72 -0
- data/lib/omnizip/algorithms/lzma/range_coder.rb +85 -0
- data/lib/omnizip/algorithms/lzma/range_decoder.rb +434 -0
- data/lib/omnizip/algorithms/lzma/range_encoder.rb +194 -0
- data/lib/omnizip/algorithms/lzma/state.rb +127 -0
- data/lib/omnizip/algorithms/lzma/xz_buffered_range_encoder.rb +325 -0
- data/lib/omnizip/algorithms/lzma/xz_encoder.rb +426 -0
- data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +645 -0
- data/lib/omnizip/algorithms/lzma/xz_match_finder_adapter.rb +227 -0
- data/lib/omnizip/algorithms/lzma/xz_price_calculator.rb +169 -0
- data/lib/omnizip/algorithms/lzma/xz_probability_models.rb +261 -0
- data/lib/omnizip/algorithms/lzma/xz_range_encoder.rb +223 -0
- data/lib/omnizip/algorithms/lzma/xz_range_encoder_exact.rb +331 -0
- data/lib/omnizip/algorithms/lzma/xz_state.rb +116 -0
- data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +2055 -0
- data/lib/omnizip/algorithms/lzma.rb +238 -0
- data/lib/omnizip/algorithms/lzma2/chunk_manager.rb +182 -0
- data/lib/omnizip/algorithms/lzma2/constants.rb +41 -0
- data/lib/omnizip/algorithms/lzma2/encoder.rb +147 -0
- data/lib/omnizip/algorithms/lzma2/lzma2_chunk.rb +161 -0
- data/lib/omnizip/algorithms/lzma2/properties.rb +179 -0
- data/lib/omnizip/algorithms/lzma2/simple_lzma2_encoder.rb +127 -0
- data/lib/omnizip/algorithms/lzma2/xz_encoder_adapter.rb +85 -0
- data/lib/omnizip/algorithms/lzma2.rb +141 -0
- data/lib/omnizip/algorithms/ppmd7/constants.rb +74 -0
- data/lib/omnizip/algorithms/ppmd7/context.rb +154 -0
- data/lib/omnizip/algorithms/ppmd7/decoder.rb +126 -0
- data/lib/omnizip/algorithms/ppmd7/encoder.rb +163 -0
- data/lib/omnizip/algorithms/ppmd7/model.rb +248 -0
- data/lib/omnizip/algorithms/ppmd7/symbol_state.rb +57 -0
- data/lib/omnizip/algorithms/ppmd7.rb +116 -0
- data/lib/omnizip/algorithms/ppmd8/constants.rb +61 -0
- data/lib/omnizip/algorithms/ppmd8/context.rb +34 -0
- data/lib/omnizip/algorithms/ppmd8/decoder.rb +107 -0
- data/lib/omnizip/algorithms/ppmd8/encoder.rb +138 -0
- data/lib/omnizip/algorithms/ppmd8/model.rb +250 -0
- data/lib/omnizip/algorithms/ppmd8/restoration_method.rb +78 -0
- data/lib/omnizip/algorithms/ppmd8.rb +82 -0
- data/lib/omnizip/algorithms/ppmd_base.rb +138 -0
- data/lib/omnizip/algorithms/sevenzip_lzma2.rb +123 -0
- data/lib/omnizip/algorithms/xz_lzma2.rb +118 -0
- data/lib/omnizip/algorithms/zstandard/constants.rb +25 -0
- data/lib/omnizip/algorithms/zstandard/decoder.rb +46 -0
- data/lib/omnizip/algorithms/zstandard/encoder.rb +51 -0
- data/lib/omnizip/algorithms/zstandard.rb +138 -0
- data/lib/omnizip/buffer/memory_archive.rb +251 -0
- data/lib/omnizip/buffer/memory_extractor.rb +224 -0
- data/lib/omnizip/buffer.rb +176 -0
- data/lib/omnizip/checksum_registry.rb +114 -0
- data/lib/omnizip/checksums/crc32.rb +100 -0
- data/lib/omnizip/checksums/crc64.rb +101 -0
- data/lib/omnizip/checksums/crc_base.rb +158 -0
- data/lib/omnizip/checksums/verifier.rb +131 -0
- data/lib/omnizip/chunked/memory_manager.rb +194 -0
- data/lib/omnizip/chunked/reader.rb +78 -0
- data/lib/omnizip/chunked/writer.rb +120 -0
- data/lib/omnizip/chunked.rb +129 -0
- data/lib/omnizip/cli/output_formatter.rb +104 -0
- data/lib/omnizip/cli.rb +572 -0
- data/lib/omnizip/commands/.keep +0 -0
- data/lib/omnizip/commands/archive_create_command.rb +427 -0
- data/lib/omnizip/commands/archive_extract_command.rb +272 -0
- data/lib/omnizip/commands/archive_list_command.rb +218 -0
- data/lib/omnizip/commands/archive_repair_command.rb +131 -0
- data/lib/omnizip/commands/archive_verify_command.rb +117 -0
- data/lib/omnizip/commands/compress_command.rb +117 -0
- data/lib/omnizip/commands/decompress_command.rb +120 -0
- data/lib/omnizip/commands/list_command.rb +53 -0
- data/lib/omnizip/commands/metadata_command.rb +153 -0
- data/lib/omnizip/commands/parity_create_command.rb +122 -0
- data/lib/omnizip/commands/parity_repair_command.rb +122 -0
- data/lib/omnizip/commands/parity_verify_command.rb +124 -0
- data/lib/omnizip/commands/profile_list_command.rb +56 -0
- data/lib/omnizip/commands/profile_show_command.rb +44 -0
- data/lib/omnizip/convenience.rb +359 -0
- data/lib/omnizip/converter/conversion_registry.rb +49 -0
- data/lib/omnizip/converter/conversion_strategy.rb +121 -0
- data/lib/omnizip/converter/seven_zip_to_zip_strategy.rb +97 -0
- data/lib/omnizip/converter/zip_to_seven_zip_strategy.rb +112 -0
- data/lib/omnizip/converter.rb +105 -0
- data/lib/omnizip/crypto/aes256/cipher.rb +100 -0
- data/lib/omnizip/crypto/aes256/constants.rb +28 -0
- data/lib/omnizip/crypto/aes256/key_derivation.rb +101 -0
- data/lib/omnizip/crypto/aes256.rb +102 -0
- data/lib/omnizip/error.rb +106 -0
- data/lib/omnizip/eta/exponential_smoothing_estimator.rb +98 -0
- data/lib/omnizip/eta/moving_average_estimator.rb +99 -0
- data/lib/omnizip/eta/rate_calculator.rb +104 -0
- data/lib/omnizip/eta/sample_history.rb +143 -0
- data/lib/omnizip/eta/time_estimator.rb +106 -0
- data/lib/omnizip/eta.rb +63 -0
- data/lib/omnizip/extraction/filter_chain.rb +177 -0
- data/lib/omnizip/extraction/glob_pattern.rb +140 -0
- data/lib/omnizip/extraction/pattern_matcher.rb +70 -0
- data/lib/omnizip/extraction/predicate_pattern.rb +52 -0
- data/lib/omnizip/extraction/regex_pattern.rb +50 -0
- data/lib/omnizip/extraction/selective_extractor.rb +240 -0
- data/lib/omnizip/extraction.rb +111 -0
- data/lib/omnizip/file_type/mime_classifier.rb +144 -0
- data/lib/omnizip/file_type.rb +113 -0
- data/lib/omnizip/filter.rb +139 -0
- data/lib/omnizip/filter_pipeline.rb +108 -0
- data/lib/omnizip/filter_registry.rb +166 -0
- data/lib/omnizip/filters/bcj.rb +279 -0
- data/lib/omnizip/filters/bcj2/constants.rb +53 -0
- data/lib/omnizip/filters/bcj2/decoder.rb +200 -0
- data/lib/omnizip/filters/bcj2/encoder.rb +61 -0
- data/lib/omnizip/filters/bcj2/stream_data.rb +93 -0
- data/lib/omnizip/filters/bcj2.rb +99 -0
- data/lib/omnizip/filters/bcj_arm.rb +176 -0
- data/lib/omnizip/filters/bcj_arm64.rb +244 -0
- data/lib/omnizip/filters/bcj_ia64.rb +196 -0
- data/lib/omnizip/filters/bcj_ppc.rb +190 -0
- data/lib/omnizip/filters/bcj_sparc.rb +176 -0
- data/lib/omnizip/filters/bcj_x86.rb +193 -0
- data/lib/omnizip/filters/delta.rb +196 -0
- data/lib/omnizip/filters/filter_base.rb +72 -0
- data/lib/omnizip/filters/registry.rb +123 -0
- data/lib/omnizip/filters/xz_delta.rb +258 -0
- data/lib/omnizip/format_detector.rb +162 -0
- data/lib/omnizip/format_registry.rb +59 -0
- data/lib/omnizip/formats/.keep +0 -0
- data/lib/omnizip/formats/bzip2_file.rb +172 -0
- data/lib/omnizip/formats/cpio/constants.rb +55 -0
- data/lib/omnizip/formats/cpio/entry.rb +385 -0
- data/lib/omnizip/formats/cpio/reader.rb +196 -0
- data/lib/omnizip/formats/cpio/writer.rb +234 -0
- data/lib/omnizip/formats/cpio.rb +140 -0
- data/lib/omnizip/formats/format_spec_loader.rb +230 -0
- data/lib/omnizip/formats/gzip.rb +238 -0
- data/lib/omnizip/formats/iso/directory_builder.rb +297 -0
- data/lib/omnizip/formats/iso/directory_record.rb +152 -0
- data/lib/omnizip/formats/iso/joliet.rb +204 -0
- data/lib/omnizip/formats/iso/path_table.rb +125 -0
- data/lib/omnizip/formats/iso/reader.rb +197 -0
- data/lib/omnizip/formats/iso/rock_ridge.rb +349 -0
- data/lib/omnizip/formats/iso/volume_builder.rb +320 -0
- data/lib/omnizip/formats/iso/volume_descriptor.rb +168 -0
- data/lib/omnizip/formats/iso/writer.rb +530 -0
- data/lib/omnizip/formats/iso.rb +140 -0
- data/lib/omnizip/formats/lzip.rb +175 -0
- data/lib/omnizip/formats/lzma_alone.rb +171 -0
- data/lib/omnizip/formats/rar/archive_repairer.rb +243 -0
- data/lib/omnizip/formats/rar/archive_verifier.rb +195 -0
- data/lib/omnizip/formats/rar/block_parser.rb +243 -0
- data/lib/omnizip/formats/rar/compression/bit_stream.rb +180 -0
- data/lib/omnizip/formats/rar/compression/dispatcher.rb +217 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/decoder.rb +216 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/encoder.rb +158 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_builder.rb +217 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_coder.rb +189 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/match_finder.rb +135 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/sliding_window.rb +165 -0
- data/lib/omnizip/formats/rar/compression/ppmd/context.rb +105 -0
- data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +219 -0
- data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +262 -0
- data/lib/omnizip/formats/rar/compression_method_registry.rb +106 -0
- data/lib/omnizip/formats/rar/constants.rb +82 -0
- data/lib/omnizip/formats/rar/decompressor.rb +238 -0
- data/lib/omnizip/formats/rar/external_writer.rb +312 -0
- data/lib/omnizip/formats/rar/header.rb +192 -0
- data/lib/omnizip/formats/rar/license_validator.rb +109 -0
- data/lib/omnizip/formats/rar/models/rar_archive.rb +77 -0
- data/lib/omnizip/formats/rar/models/rar_entry.rb +65 -0
- data/lib/omnizip/formats/rar/models/rar_volume.rb +56 -0
- data/lib/omnizip/formats/rar/parity_handler.rb +292 -0
- data/lib/omnizip/formats/rar/rar5/compression/lzma.rb +202 -0
- data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +578 -0
- data/lib/omnizip/formats/rar/rar5/compression/store.rb +60 -0
- data/lib/omnizip/formats/rar/rar5/crc32.rb +39 -0
- data/lib/omnizip/formats/rar/rar5/encryption/aes256_cbc.rb +97 -0
- data/lib/omnizip/formats/rar/rar5/encryption/encryption_header.rb +114 -0
- data/lib/omnizip/formats/rar/rar5/encryption/encryption_manager.rb +166 -0
- data/lib/omnizip/formats/rar/rar5/encryption/key_derivation.rb +97 -0
- data/lib/omnizip/formats/rar/rar5/header.rb +187 -0
- data/lib/omnizip/formats/rar/rar5/models/encryption_options.rb +74 -0
- data/lib/omnizip/formats/rar/rar5/models/recovery_options.rb +63 -0
- data/lib/omnizip/formats/rar/rar5/models/solid_options.rb +63 -0
- data/lib/omnizip/formats/rar/rar5/models/volume_options.rb +74 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/ARCHITECTURE.md +290 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_manager.rb +264 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_splitter.rb +155 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_writer.rb +194 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_encoder.rb +109 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_manager.rb +142 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_stream.rb +121 -0
- data/lib/omnizip/formats/rar/rar5/vint.rb +65 -0
- data/lib/omnizip/formats/rar/rar5/writer.rb +466 -0
- data/lib/omnizip/formats/rar/rar_format_base.rb +241 -0
- data/lib/omnizip/formats/rar/reader.rb +366 -0
- data/lib/omnizip/formats/rar/recovery_record.rb +245 -0
- data/lib/omnizip/formats/rar/volume_manager.rb +168 -0
- data/lib/omnizip/formats/rar/writer.rb +431 -0
- data/lib/omnizip/formats/rar.rb +205 -0
- data/lib/omnizip/formats/rar3/compressor.rb +73 -0
- data/lib/omnizip/formats/rar3/decompressor.rb +66 -0
- data/lib/omnizip/formats/rar3/reader.rb +386 -0
- data/lib/omnizip/formats/rar3/writer.rb +219 -0
- data/lib/omnizip/formats/rar5/compressor.rb +73 -0
- data/lib/omnizip/formats/rar5/decompressor.rb +66 -0
- data/lib/omnizip/formats/rar5/reader.rb +342 -0
- data/lib/omnizip/formats/rar5/writer.rb +214 -0
- data/lib/omnizip/formats/seven_zip/coder_chain.rb +150 -0
- data/lib/omnizip/formats/seven_zip/constants.rb +126 -0
- data/lib/omnizip/formats/seven_zip/encoded_header.rb +114 -0
- data/lib/omnizip/formats/seven_zip/encrypted_header.rb +142 -0
- data/lib/omnizip/formats/seven_zip/file_collector.rb +144 -0
- data/lib/omnizip/formats/seven_zip/header.rb +106 -0
- data/lib/omnizip/formats/seven_zip/header_encryptor.rb +134 -0
- data/lib/omnizip/formats/seven_zip/header_writer.rb +466 -0
- data/lib/omnizip/formats/seven_zip/models/coder_info.rb +30 -0
- data/lib/omnizip/formats/seven_zip/models/file_entry.rb +58 -0
- data/lib/omnizip/formats/seven_zip/models/folder.rb +69 -0
- data/lib/omnizip/formats/seven_zip/models/stream_info.rb +42 -0
- data/lib/omnizip/formats/seven_zip/parser.rb +660 -0
- data/lib/omnizip/formats/seven_zip/reader.rb +458 -0
- data/lib/omnizip/formats/seven_zip/split_archive_reader.rb +632 -0
- data/lib/omnizip/formats/seven_zip/split_archive_writer.rb +315 -0
- data/lib/omnizip/formats/seven_zip/stream_compressor.rb +151 -0
- data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +162 -0
- data/lib/omnizip/formats/seven_zip/writer.rb +740 -0
- data/lib/omnizip/formats/seven_zip.rb +93 -0
- data/lib/omnizip/formats/tar/constants.rb +73 -0
- data/lib/omnizip/formats/tar/entry.rb +94 -0
- data/lib/omnizip/formats/tar/header.rb +168 -0
- data/lib/omnizip/formats/tar/reader.rb +121 -0
- data/lib/omnizip/formats/tar/writer.rb +216 -0
- data/lib/omnizip/formats/tar.rb +84 -0
- data/lib/omnizip/formats/xz/reader.rb +116 -0
- data/lib/omnizip/formats/xz.rb +237 -0
- data/lib/omnizip/formats/xz_impl/block_decoder.rb +754 -0
- data/lib/omnizip/formats/xz_impl/block_encoder.rb +306 -0
- data/lib/omnizip/formats/xz_impl/block_header.rb +210 -0
- data/lib/omnizip/formats/xz_impl/block_header_parser.rb +186 -0
- data/lib/omnizip/formats/xz_impl/constants.rb +49 -0
- data/lib/omnizip/formats/xz_impl/index_decoder.rb +174 -0
- data/lib/omnizip/formats/xz_impl/index_encoder.rb +122 -0
- data/lib/omnizip/formats/xz_impl/stream_decoder.rb +468 -0
- data/lib/omnizip/formats/xz_impl/stream_encoder.rb +99 -0
- data/lib/omnizip/formats/xz_impl/stream_footer.rb +81 -0
- data/lib/omnizip/formats/xz_impl/stream_footer_parser.rb +117 -0
- data/lib/omnizip/formats/xz_impl/stream_header.rb +55 -0
- data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +108 -0
- data/lib/omnizip/formats/xz_impl/vli.rb +128 -0
- data/lib/omnizip/formats/xz_impl/writer.rb +421 -0
- data/lib/omnizip/formats/zip/central_directory_header.rb +195 -0
- data/lib/omnizip/formats/zip/constants.rb +69 -0
- data/lib/omnizip/formats/zip/end_of_central_directory.rb +133 -0
- data/lib/omnizip/formats/zip/local_file_header.rb +138 -0
- data/lib/omnizip/formats/zip/reader.rb +250 -0
- data/lib/omnizip/formats/zip/unix_extra_field.rb +153 -0
- data/lib/omnizip/formats/zip/writer.rb +375 -0
- data/lib/omnizip/formats/zip/zip64_end_of_central_directory.rb +104 -0
- data/lib/omnizip/formats/zip/zip64_end_of_central_directory_locator.rb +66 -0
- data/lib/omnizip/formats/zip/zip64_extra_field.rb +114 -0
- data/lib/omnizip/formats/zip.rb +50 -0
- data/lib/omnizip/implementations/base/lzma2_decoder_base.rb +75 -0
- data/lib/omnizip/implementations/base/lzma2_encoder_base.rb +128 -0
- data/lib/omnizip/implementations/base/lzma_decoder_base.rb +83 -0
- data/lib/omnizip/implementations/base/lzma_encoder_base.rb +108 -0
- data/lib/omnizip/implementations/base/state_machine_base.rb +182 -0
- data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +421 -0
- data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +465 -0
- data/lib/omnizip/implementations/seven_zip/lzma/match_finder.rb +288 -0
- data/lib/omnizip/implementations/seven_zip/lzma/range_decoder.rb +200 -0
- data/lib/omnizip/implementations/seven_zip/lzma/range_encoder.rb +197 -0
- data/lib/omnizip/implementations/seven_zip/lzma/state_machine.rb +141 -0
- data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +519 -0
- data/lib/omnizip/implementations/xz_utils/lzma2/decoder.rb +723 -0
- data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +750 -0
- data/lib/omnizip/io/buffered_input.rb +146 -0
- data/lib/omnizip/io/buffered_output.rb +105 -0
- data/lib/omnizip/io/stream_manager.rb +115 -0
- data/lib/omnizip/link_handler/hard_link.rb +79 -0
- data/lib/omnizip/link_handler/symbolic_link.rb +74 -0
- data/lib/omnizip/link_handler.rb +124 -0
- data/lib/omnizip/metadata/archive_metadata.rb +114 -0
- data/lib/omnizip/metadata/entry_metadata.rb +146 -0
- data/lib/omnizip/metadata/metadata_editor.rb +171 -0
- data/lib/omnizip/metadata/metadata_registry.rb +64 -0
- data/lib/omnizip/metadata/metadata_validator.rb +99 -0
- data/lib/omnizip/metadata.rb +57 -0
- data/lib/omnizip/models/.keep +0 -0
- data/lib/omnizip/models/algorithm_metadata.rb +73 -0
- data/lib/omnizip/models/compression_options.rb +71 -0
- data/lib/omnizip/models/conversion_options.rb +87 -0
- data/lib/omnizip/models/conversion_result.rb +135 -0
- data/lib/omnizip/models/eta_result.rb +46 -0
- data/lib/omnizip/models/extraction_rule.rb +115 -0
- data/lib/omnizip/models/filter_chain.rb +144 -0
- data/lib/omnizip/models/filter_config.rb +183 -0
- data/lib/omnizip/models/match_result.rb +124 -0
- data/lib/omnizip/models/optimization_suggestion.rb +91 -0
- data/lib/omnizip/models/parallel_options.rb +104 -0
- data/lib/omnizip/models/performance_result.rb +79 -0
- data/lib/omnizip/models/profile_report.rb +82 -0
- data/lib/omnizip/models/progress_options.rb +38 -0
- data/lib/omnizip/models/split_options.rb +116 -0
- data/lib/omnizip/optimization_registry.rb +81 -0
- data/lib/omnizip/parallel/job_queue.rb +209 -0
- data/lib/omnizip/parallel/job_scheduler.rb +203 -0
- data/lib/omnizip/parallel/parallel_compressor.rb +347 -0
- data/lib/omnizip/parallel/parallel_extractor.rb +329 -0
- data/lib/omnizip/parallel/worker_pool.rb +223 -0
- data/lib/omnizip/parallel.rb +149 -0
- data/lib/omnizip/parity/chunked_block_processor.rb +196 -0
- data/lib/omnizip/parity/galois16.rb +145 -0
- data/lib/omnizip/parity/models/creator_packet.rb +73 -0
- data/lib/omnizip/parity/models/file_description_packet.rb +133 -0
- data/lib/omnizip/parity/models/ifsc_packet.rb +123 -0
- data/lib/omnizip/parity/models/main_packet.rb +128 -0
- data/lib/omnizip/parity/models/packet.rb +156 -0
- data/lib/omnizip/parity/models/packet_registry.rb +109 -0
- data/lib/omnizip/parity/models/recovery_slice_packet.rb +78 -0
- data/lib/omnizip/parity/par2_creator.rb +531 -0
- data/lib/omnizip/parity/par2_repairer.rb +407 -0
- data/lib/omnizip/parity/par2_verifier.rb +364 -0
- data/lib/omnizip/parity/par2cmdline_algorithm.rb +110 -0
- data/lib/omnizip/parity/par2cmdline_coefficients.rb +78 -0
- data/lib/omnizip/parity/reed_solomon_decoder.rb +266 -0
- data/lib/omnizip/parity/reed_solomon_encoder.rb +111 -0
- data/lib/omnizip/parity/reed_solomon_matrix.rb +342 -0
- data/lib/omnizip/parity.rb +186 -0
- data/lib/omnizip/password/encryption_registry.rb +65 -0
- data/lib/omnizip/password/encryption_strategy.rb +96 -0
- data/lib/omnizip/password/password_validator.rb +129 -0
- data/lib/omnizip/password/winzip_aes_strategy.rb +192 -0
- data/lib/omnizip/password/zip_crypto_strategy.rb +141 -0
- data/lib/omnizip/password.rb +87 -0
- data/lib/omnizip/pipe/stream_compressor.rb +124 -0
- data/lib/omnizip/pipe/stream_decompressor.rb +174 -0
- data/lib/omnizip/pipe.rb +121 -0
- data/lib/omnizip/platform/ntfs_streams.rb +201 -0
- data/lib/omnizip/platform.rb +189 -0
- data/lib/omnizip/profile/archive_profile.rb +39 -0
- data/lib/omnizip/profile/balanced_profile.rb +33 -0
- data/lib/omnizip/profile/binary_profile.rb +36 -0
- data/lib/omnizip/profile/compression_profile.rb +158 -0
- data/lib/omnizip/profile/custom_profile.rb +157 -0
- data/lib/omnizip/profile/fast_profile.rb +33 -0
- data/lib/omnizip/profile/maximum_profile.rb +33 -0
- data/lib/omnizip/profile/profile_detector.rb +110 -0
- data/lib/omnizip/profile/profile_registry.rb +161 -0
- data/lib/omnizip/profile/text_profile.rb +36 -0
- data/lib/omnizip/profile.rb +190 -0
- data/lib/omnizip/profiler/memory_profiler.rb +66 -0
- data/lib/omnizip/profiler/method_profiler.rb +49 -0
- data/lib/omnizip/profiler/report_generator.rb +169 -0
- data/lib/omnizip/profiler.rb +204 -0
- data/lib/omnizip/progress/callback_reporter.rb +36 -0
- data/lib/omnizip/progress/console_reporter.rb +62 -0
- data/lib/omnizip/progress/log_reporter.rb +91 -0
- data/lib/omnizip/progress/operation_progress.rb +118 -0
- data/lib/omnizip/progress/progress_bar.rb +156 -0
- data/lib/omnizip/progress/progress_reporter.rb +40 -0
- data/lib/omnizip/progress/progress_tracker.rb +190 -0
- data/lib/omnizip/progress/silent_reporter.rb +24 -0
- data/lib/omnizip/progress.rb +127 -0
- data/lib/omnizip/rubyzip_compat.rb +63 -0
- data/lib/omnizip/temp/safe_extract.rb +168 -0
- data/lib/omnizip/temp/temp_file.rb +124 -0
- data/lib/omnizip/temp/temp_file_pool.rb +109 -0
- data/lib/omnizip/temp.rb +181 -0
- data/lib/omnizip/version.rb +5 -0
- data/lib/omnizip/zip/entry.rb +156 -0
- data/lib/omnizip/zip/file.rb +485 -0
- data/lib/omnizip/zip/input_stream.rb +273 -0
- data/lib/omnizip/zip/output_stream.rb +324 -0
- data/lib/omnizip.rb +156 -0
- data/readme-docs/advanced-features.adoc +515 -0
- data/readme-docs/api-usage.adoc +444 -0
- data/readme-docs/architecture.adoc +449 -0
- data/readme-docs/archive-formats.adoc +479 -0
- data/readme-docs/cli-usage.adoc +222 -0
- data/readme-docs/compression-algorithms.adoc +442 -0
- data/readme-docs/compression-profiles.adoc +247 -0
- data/readme-docs/encryption-checksums.adoc +328 -0
- data/readme-docs/format-converter.adoc +325 -0
- data/readme-docs/installation.adoc +228 -0
- data/readme-docs/par2-archives.adoc +608 -0
- data/readme-docs/performance-profiler.adoc +389 -0
- data/readme-docs/preprocessing-filters.adoc +280 -0
- data/xz-file-format-1.2.1.txt +1174 -0
- metadata +617 -0
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Copyright (C) 2025 Ribose Inc.
|
|
4
|
+
#
|
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a
|
|
6
|
+
# copy of this software and associated documentation files (the "Software"),
|
|
7
|
+
# to deal in the Software without restriction, including without limitation
|
|
8
|
+
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
9
|
+
# and/or sell copies of the Software, and to permit persons to whom the
|
|
10
|
+
# Software is furnished to do so, subject to the following conditions:
|
|
11
|
+
#
|
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
|
13
|
+
# all copies or substantial portions of the Software.
|
|
14
|
+
#
|
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
20
|
+
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
21
|
+
# DEALINGS IN THE SOFTWARE.
|
|
22
|
+
|
|
23
|
+
require_relative "../../../../algorithms/ppmd7/context"
|
|
24
|
+
|
|
25
|
+
module Omnizip
|
|
26
|
+
module Formats
|
|
27
|
+
module Rar
|
|
28
|
+
module Compression
|
|
29
|
+
module PPMd
|
|
30
|
+
# RAR variant H context node in PPMd model
|
|
31
|
+
#
|
|
32
|
+
# Adapts PPMd7 Context for RAR-specific requirements:
|
|
33
|
+
# - Different memory allocation strategy
|
|
34
|
+
# - RAR-specific escape frequency initialization
|
|
35
|
+
# - Modified probability update rules
|
|
36
|
+
#
|
|
37
|
+
# Responsibilities:
|
|
38
|
+
# - ONE responsibility: Manage RAR PPMd variant H context
|
|
39
|
+
# - Track symbol statistics for RAR compression
|
|
40
|
+
# - Maintain context tree structure
|
|
41
|
+
# - Handle RAR-specific probability updates
|
|
42
|
+
class Context < Omnizip::Algorithms::PPMd7::Context
|
|
43
|
+
# RAR variant H escape frequency constant
|
|
44
|
+
# RAR uses different initial escape frequency than PPMd7
|
|
45
|
+
RAR_INIT_ESCAPE_FREQ = 1
|
|
46
|
+
|
|
47
|
+
# Initialize a new RAR variant H context
|
|
48
|
+
#
|
|
49
|
+
# @param order [Integer] The order of this context (depth in tree)
|
|
50
|
+
# @param suffix [Context, nil] Parent context (shorter context)
|
|
51
|
+
def initialize(order, suffix = nil)
|
|
52
|
+
super
|
|
53
|
+
# RAR variant H uses different escape frequency initialization
|
|
54
|
+
@escape_freq = RAR_INIT_ESCAPE_FREQ
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# Update symbol frequency after encoding/decoding (RAR variant)
|
|
58
|
+
#
|
|
59
|
+
# RAR variant H uses a slightly different update strategy
|
|
60
|
+
# compared to standard PPMd7.
|
|
61
|
+
#
|
|
62
|
+
# @param symbol [Integer] The symbol to update
|
|
63
|
+
# @param increment [Integer] Amount to increase frequency
|
|
64
|
+
# @return [void]
|
|
65
|
+
def update_symbol(symbol, increment = 1)
|
|
66
|
+
state = @states[symbol]
|
|
67
|
+
return unless state
|
|
68
|
+
|
|
69
|
+
# RAR variant H frequency update
|
|
70
|
+
state.freq += increment
|
|
71
|
+
@sum_freq += increment
|
|
72
|
+
|
|
73
|
+
# RAR uses different rescaling threshold
|
|
74
|
+
rescale_frequencies if @sum_freq > rar_max_freq
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
private
|
|
78
|
+
|
|
79
|
+
# RAR variant H maximum frequency threshold
|
|
80
|
+
#
|
|
81
|
+
# @return [Integer] Maximum frequency before rescaling
|
|
82
|
+
def rar_max_freq
|
|
83
|
+
# RAR uses 124 as maximum frequency (same as PPMd7)
|
|
84
|
+
124
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
# Rescale frequencies when they grow too large (RAR variant)
|
|
88
|
+
#
|
|
89
|
+
# RAR variant H uses same rescaling strategy as PPMd7
|
|
90
|
+
# but this method is here for future RAR-specific modifications.
|
|
91
|
+
#
|
|
92
|
+
# @return [void]
|
|
93
|
+
def rescale_frequencies
|
|
94
|
+
@sum_freq = 0
|
|
95
|
+
@states.each_value do |state|
|
|
96
|
+
state.freq = [(state.freq + 1) / 2, 1].max
|
|
97
|
+
@sum_freq += state.freq
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
end
|
|
@@ -0,0 +1,219 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Copyright (C) 2025 Ribose Inc.
|
|
4
|
+
#
|
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a
|
|
6
|
+
# copy of this software and associated documentation files (the "Software"),
|
|
7
|
+
# to deal in the Software without restriction, including without limitation
|
|
8
|
+
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
9
|
+
# and/or sell copies of the Software, and to permit persons to whom the
|
|
10
|
+
# Software is furnished to do so, subject to the following conditions:
|
|
11
|
+
#
|
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
|
13
|
+
# all copies or substantial portions of the Software.
|
|
14
|
+
#
|
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
20
|
+
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
21
|
+
# DEALINGS IN THE SOFTWARE.
|
|
22
|
+
|
|
23
|
+
require_relative "../../../../algorithms/ppmd7/decoder"
|
|
24
|
+
require_relative "../../../../algorithms/ppmd7/model"
|
|
25
|
+
require_relative "context"
|
|
26
|
+
|
|
27
|
+
module Omnizip
|
|
28
|
+
module Formats
|
|
29
|
+
module Rar
|
|
30
|
+
module Compression
|
|
31
|
+
module PPMd
|
|
32
|
+
# RAR PPMd variant H decoder
|
|
33
|
+
#
|
|
34
|
+
# Implements decoding for RAR's PPMd variant H compression method.
|
|
35
|
+
# This adapts the standard PPMd7 algorithm for RAR-specific
|
|
36
|
+
# requirements:
|
|
37
|
+
#
|
|
38
|
+
# - Different memory model initialization
|
|
39
|
+
# - RAR-specific escape code handling
|
|
40
|
+
# - Modified context order selection
|
|
41
|
+
# - Different binary symbol encoding
|
|
42
|
+
#
|
|
43
|
+
# Responsibilities:
|
|
44
|
+
# - ONE responsibility: Decode RAR PPMd variant H compressed data
|
|
45
|
+
# - Manage decoder state and context
|
|
46
|
+
# - Transform compressed bits to original bytes
|
|
47
|
+
# - Maintain synchronized model state
|
|
48
|
+
class Decoder < Omnizip::Algorithms::PPMd7::Decoder
|
|
49
|
+
# RAR variant H specific constants
|
|
50
|
+
RAR_MAX_ORDER = 16
|
|
51
|
+
RAR_MIN_ORDER = 2
|
|
52
|
+
RAR_DEFAULT_ORDER = 6
|
|
53
|
+
|
|
54
|
+
# RAR memory size multiplier (MB to bytes)
|
|
55
|
+
RAR_MEM_MULTIPLIER = 1024 * 1024
|
|
56
|
+
|
|
57
|
+
# Initialize the RAR PPMd decoder
|
|
58
|
+
#
|
|
59
|
+
# @param input [IO] Input stream of compressed data
|
|
60
|
+
# @param options [Hash] Decoding options
|
|
61
|
+
# @option options [Integer] :model_order Maximum context order
|
|
62
|
+
# @option options [Integer] :mem_size Memory size in MB for RAR
|
|
63
|
+
def initialize(input, options = {})
|
|
64
|
+
@input = input
|
|
65
|
+
@options = options
|
|
66
|
+
|
|
67
|
+
# RAR uses memory size in MB, convert to bytes
|
|
68
|
+
mem_size_mb = options[:mem_size] || 16
|
|
69
|
+
mem_size_bytes = mem_size_mb * RAR_MEM_MULTIPLIER
|
|
70
|
+
|
|
71
|
+
# Initialize model with RAR parameters
|
|
72
|
+
@model = initialize_rar_model(
|
|
73
|
+
options[:model_order] || RAR_DEFAULT_ORDER,
|
|
74
|
+
mem_size_bytes,
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
# Use standard range decoder
|
|
78
|
+
@range_decoder = Omnizip::Algorithms::LZMA::RangeDecoder.new(input)
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
# Decode a stream back to original bytes
|
|
82
|
+
#
|
|
83
|
+
# RAR variant H decoding process:
|
|
84
|
+
# 1. Read compressed bits using range decoder
|
|
85
|
+
# 2. Use model to find corresponding symbol
|
|
86
|
+
# 3. Update model to stay synchronized
|
|
87
|
+
# 4. Handle RAR-specific escape codes
|
|
88
|
+
#
|
|
89
|
+
# @param max_bytes [Integer, nil] Maximum bytes to decode
|
|
90
|
+
# @return [String] Decoded data
|
|
91
|
+
def decode_stream(max_bytes = nil)
|
|
92
|
+
result = String.new(encoding: Encoding::BINARY)
|
|
93
|
+
|
|
94
|
+
# For now, decode a reasonable amount
|
|
95
|
+
# Real implementation would use proper termination
|
|
96
|
+
limit = max_bytes || 1000
|
|
97
|
+
|
|
98
|
+
limit.times do
|
|
99
|
+
symbol = decode_symbol
|
|
100
|
+
break if symbol.nil?
|
|
101
|
+
|
|
102
|
+
result << symbol.chr
|
|
103
|
+
rescue EOFError, Omnizip::DecompressionError
|
|
104
|
+
# Handle EOF gracefully - end of compressed data
|
|
105
|
+
break
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
result
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
private
|
|
112
|
+
|
|
113
|
+
# Initialize RAR variant H PPMd model
|
|
114
|
+
#
|
|
115
|
+
# RAR uses slightly different initialization than PPMd7:
|
|
116
|
+
# - Different context creation strategy
|
|
117
|
+
# - RAR-specific memory allocation
|
|
118
|
+
# - Modified root context initialization
|
|
119
|
+
#
|
|
120
|
+
# @param max_order [Integer] Maximum context order
|
|
121
|
+
# @param memory_size [Integer] Memory size in bytes
|
|
122
|
+
# @return [Omnizip::Algorithms::PPMd7::Model] Initialized model
|
|
123
|
+
def initialize_rar_model(max_order, memory_size)
|
|
124
|
+
# Validate RAR parameters
|
|
125
|
+
unless max_order.between?(RAR_MIN_ORDER, RAR_MAX_ORDER)
|
|
126
|
+
raise ArgumentError,
|
|
127
|
+
"RAR max_order must be between #{RAR_MIN_ORDER} and " \
|
|
128
|
+
"#{RAR_MAX_ORDER}"
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
# Create model with RAR parameters
|
|
132
|
+
# Note: Using PPMd7::Model as base, but with RAR contexts
|
|
133
|
+
Omnizip::Algorithms::PPMd7::Model.new(max_order, memory_size)
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
# Decode a single symbol using RAR variant H
|
|
137
|
+
#
|
|
138
|
+
# RAR uses same basic decoding as PPMd7 but with
|
|
139
|
+
# different escape code handling.
|
|
140
|
+
#
|
|
141
|
+
# @return [Integer, nil] Decoded byte or nil if end
|
|
142
|
+
def decode_symbol
|
|
143
|
+
# Simplified decoding - real implementation needs:
|
|
144
|
+
# 1. Proper context selection
|
|
145
|
+
# 2. RAR-specific escape handling
|
|
146
|
+
# 3. Binary symbol encoding
|
|
147
|
+
# 4. Proper termination detection
|
|
148
|
+
|
|
149
|
+
# Decode range value
|
|
150
|
+
value = @range_decoder.decode_direct_bits(16)
|
|
151
|
+
|
|
152
|
+
# Find symbol from range using current context
|
|
153
|
+
symbol = find_symbol_from_range(value)
|
|
154
|
+
return nil if symbol.nil?
|
|
155
|
+
|
|
156
|
+
# Update model to stay in sync
|
|
157
|
+
@model.update(symbol)
|
|
158
|
+
|
|
159
|
+
symbol
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
# Decode RAR-specific escape code
|
|
163
|
+
#
|
|
164
|
+
# RAR variant H uses different escape code values
|
|
165
|
+
# and handling compared to standard PPMd7.
|
|
166
|
+
#
|
|
167
|
+
# Escape codes in RAR:
|
|
168
|
+
# - 0: New symbol follows
|
|
169
|
+
# - 1: Same as last symbol (run-length)
|
|
170
|
+
# - 2-255: Reserved for future use
|
|
171
|
+
#
|
|
172
|
+
# @return [Integer, nil] Escape code or nil
|
|
173
|
+
def decode_escape_code
|
|
174
|
+
# RAR escape codes differ from PPMd7
|
|
175
|
+
# This is a placeholder for the proper implementation
|
|
176
|
+
|
|
177
|
+
# For now, return 0 (new symbol follows)
|
|
178
|
+
# Real implementation would decode from range coder
|
|
179
|
+
0
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
# Find symbol from decoded range value (RAR variant)
|
|
183
|
+
#
|
|
184
|
+
# Uses RAR-specific probability distribution to map
|
|
185
|
+
# range value back to original symbol.
|
|
186
|
+
#
|
|
187
|
+
# @param value [Integer] Decoded range value
|
|
188
|
+
# @return [Integer, nil] The symbol
|
|
189
|
+
def find_symbol_from_range(value)
|
|
190
|
+
# This is simplified - real RAR implementation uses:
|
|
191
|
+
# 1. Current context's probability distribution
|
|
192
|
+
# 2. RAR-specific escape handling
|
|
193
|
+
# 3. Proper cumulative frequency calculation
|
|
194
|
+
|
|
195
|
+
context = @model.root_context
|
|
196
|
+
|
|
197
|
+
# Find symbol whose cumulative range contains value
|
|
198
|
+
scale = 0x10000
|
|
199
|
+
cum_freq = 0
|
|
200
|
+
|
|
201
|
+
context.states.keys.sort.each do |symbol|
|
|
202
|
+
state = context.states[symbol]
|
|
203
|
+
next_cum = cum_freq + state.freq
|
|
204
|
+
sym_low = (cum_freq * scale) / context.total_freq
|
|
205
|
+
sym_high = (next_cum * scale) / context.total_freq
|
|
206
|
+
|
|
207
|
+
return symbol if value >= sym_low && value < sym_high
|
|
208
|
+
|
|
209
|
+
cum_freq = next_cum
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
nil
|
|
213
|
+
end
|
|
214
|
+
end
|
|
215
|
+
end
|
|
216
|
+
end
|
|
217
|
+
end
|
|
218
|
+
end
|
|
219
|
+
end
|
|
@@ -0,0 +1,262 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Copyright (C) 2025 Ribose Inc.
|
|
4
|
+
#
|
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a
|
|
6
|
+
# copy of this software and associated documentation files (the "Software"),
|
|
7
|
+
# to deal in the Software without restriction, including without limitation
|
|
8
|
+
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
9
|
+
# and/or sell copies of the Software, and to permit persons to whom the
|
|
10
|
+
# Software is furnished to do so, subject to the following conditions:
|
|
11
|
+
#
|
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
|
13
|
+
# all copies or substantial portions of the Software.
|
|
14
|
+
#
|
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
20
|
+
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
21
|
+
# DEALINGS IN THE SOFTWARE.
|
|
22
|
+
|
|
23
|
+
require_relative "../../../../algorithms/ppmd7/encoder"
|
|
24
|
+
require_relative "../../../../algorithms/ppmd7/model"
|
|
25
|
+
require_relative "../../../../algorithms/lzma/range_encoder"
|
|
26
|
+
require_relative "context"
|
|
27
|
+
|
|
28
|
+
module Omnizip
|
|
29
|
+
module Formats
|
|
30
|
+
module Rar
|
|
31
|
+
module Compression
|
|
32
|
+
module PPMd
|
|
33
|
+
# RAR PPMd variant H encoder
|
|
34
|
+
#
|
|
35
|
+
# Implements encoding for RAR's PPMd variant H compression method.
|
|
36
|
+
# This adapts the standard PPMd7 algorithm for RAR-specific
|
|
37
|
+
# requirements:
|
|
38
|
+
#
|
|
39
|
+
# - Different memory model initialization
|
|
40
|
+
# - RAR-specific escape code handling
|
|
41
|
+
# - Modified context order selection
|
|
42
|
+
# - Different binary symbol encoding
|
|
43
|
+
#
|
|
44
|
+
# Responsibilities:
|
|
45
|
+
# - ONE responsibility: Encode data using RAR PPMd variant H
|
|
46
|
+
# - Manage encoder state and context
|
|
47
|
+
# - Transform original bytes to compressed bits
|
|
48
|
+
# - Maintain synchronized model state (matches decoder)
|
|
49
|
+
class Encoder < Omnizip::Algorithms::PPMd7::Encoder
|
|
50
|
+
# RAR variant H specific constants
|
|
51
|
+
RAR_MAX_ORDER = 16
|
|
52
|
+
RAR_MIN_ORDER = 2
|
|
53
|
+
RAR_DEFAULT_ORDER = 6
|
|
54
|
+
|
|
55
|
+
# RAR memory size multiplier (MB to bytes)
|
|
56
|
+
RAR_MEM_MULTIPLIER = 1024 * 1024
|
|
57
|
+
|
|
58
|
+
# Accessor for memory size (for testing)
|
|
59
|
+
def memory_size
|
|
60
|
+
@model.instance_variable_get(:@mem_size)
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# Initialize the RAR PPMd encoder
|
|
64
|
+
#
|
|
65
|
+
# @param output [IO] Output stream for compressed data
|
|
66
|
+
# @param options [Hash] Encoding options
|
|
67
|
+
# @option options [Integer] :model_order Maximum context order
|
|
68
|
+
# @option options [Integer] :mem_size Memory size in MB for RAR
|
|
69
|
+
def initialize(output, options = {})
|
|
70
|
+
@output = output
|
|
71
|
+
@options = options
|
|
72
|
+
|
|
73
|
+
# RAR uses memory size in MB, convert to bytes
|
|
74
|
+
mem_size_mb = options[:mem_size] || 16
|
|
75
|
+
mem_size_bytes = mem_size_mb * RAR_MEM_MULTIPLIER
|
|
76
|
+
|
|
77
|
+
# Initialize model with RAR parameters
|
|
78
|
+
@model = initialize_rar_model(
|
|
79
|
+
options[:model_order] || RAR_DEFAULT_ORDER,
|
|
80
|
+
mem_size_bytes,
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
# Use range encoder for bit output
|
|
84
|
+
@range_encoder = Omnizip::Algorithms::LZMA::RangeEncoder.new(output)
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
# Encode a stream to compressed bytes
|
|
88
|
+
#
|
|
89
|
+
# RAR variant H encoding process:
|
|
90
|
+
# 1. Read byte from input
|
|
91
|
+
# 2. Find symbol in current context
|
|
92
|
+
# 3. Encode using range coder with probabilities
|
|
93
|
+
# 4. Update model to stay synchronized with decoder
|
|
94
|
+
# 5. Handle RAR-specific escape codes if needed
|
|
95
|
+
#
|
|
96
|
+
# @param input [IO] Input stream to compress
|
|
97
|
+
# @param max_bytes [Integer, nil] Maximum bytes to encode
|
|
98
|
+
# @return [Integer] Number of bytes encoded
|
|
99
|
+
def encode_stream(input, max_bytes = nil)
|
|
100
|
+
bytes_encoded = 0
|
|
101
|
+
|
|
102
|
+
loop do
|
|
103
|
+
break if max_bytes && bytes_encoded >= max_bytes
|
|
104
|
+
|
|
105
|
+
byte = input.read(1)
|
|
106
|
+
break unless byte
|
|
107
|
+
|
|
108
|
+
encode_symbol(byte.ord)
|
|
109
|
+
bytes_encoded += 1
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
# Flush encoder to ensure all data is written
|
|
113
|
+
@range_encoder.flush
|
|
114
|
+
bytes_encoded
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
private
|
|
118
|
+
|
|
119
|
+
# Initialize RAR variant H PPMd model
|
|
120
|
+
#
|
|
121
|
+
# RAR uses slightly different initialization than PPMd7:
|
|
122
|
+
# - Different context creation strategy
|
|
123
|
+
# - RAR-specific memory allocation
|
|
124
|
+
# - Modified root context initialization
|
|
125
|
+
#
|
|
126
|
+
# @param max_order [Integer] Maximum context order
|
|
127
|
+
# @param memory_size [Integer] Memory size in bytes
|
|
128
|
+
# @return [Omnizip::Algorithms::PPMd7::Model] Initialized model
|
|
129
|
+
def initialize_rar_model(max_order, memory_size)
|
|
130
|
+
# Validate RAR parameters
|
|
131
|
+
unless max_order.between?(RAR_MIN_ORDER, RAR_MAX_ORDER)
|
|
132
|
+
raise ArgumentError,
|
|
133
|
+
"RAR max_order must be between #{RAR_MIN_ORDER} and " \
|
|
134
|
+
"#{RAR_MAX_ORDER}"
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
# Create model with RAR parameters
|
|
138
|
+
# Note: Using PPMd7::Model as base, but with RAR contexts
|
|
139
|
+
Omnizip::Algorithms::PPMd7::Model.new(max_order, memory_size)
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
# Encode a single symbol using RAR variant H
|
|
143
|
+
#
|
|
144
|
+
# RAR uses same basic encoding as PPMd7 but with
|
|
145
|
+
# different escape code handling and probability calculation.
|
|
146
|
+
#
|
|
147
|
+
# Process:
|
|
148
|
+
# 1. Get current context
|
|
149
|
+
# 2. Check if symbol exists in context
|
|
150
|
+
# 3. If yes: encode using frequency information
|
|
151
|
+
# 4. If no: encode escape + new symbol
|
|
152
|
+
# 5. Update model state
|
|
153
|
+
#
|
|
154
|
+
# @param byte [Integer] Byte value to encode (0-255)
|
|
155
|
+
# @return [void]
|
|
156
|
+
def encode_symbol(byte)
|
|
157
|
+
# Get current context
|
|
158
|
+
context = @model.current_context
|
|
159
|
+
|
|
160
|
+
# Find symbol in context (returns SymbolState or nil)
|
|
161
|
+
state = context.find_symbol(byte)
|
|
162
|
+
|
|
163
|
+
if state
|
|
164
|
+
# Encode using frequency information
|
|
165
|
+
encode_symbol_in_context(byte, state, context)
|
|
166
|
+
else
|
|
167
|
+
# Encode escape + new symbol
|
|
168
|
+
encode_escape_code
|
|
169
|
+
encode_new_symbol(byte)
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
# Update model to stay synchronized with decoder
|
|
173
|
+
@model.update(byte)
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
# Encode symbol that exists in current context
|
|
177
|
+
#
|
|
178
|
+
# Uses the frequency information from the context to
|
|
179
|
+
# calculate probability range for range encoder.
|
|
180
|
+
#
|
|
181
|
+
# @param byte [Integer] Symbol to encode
|
|
182
|
+
# @param state [SymbolState] Symbol's state
|
|
183
|
+
# @param context [Context] Current context
|
|
184
|
+
# @return [void]
|
|
185
|
+
def encode_symbol_in_context(byte, state, context)
|
|
186
|
+
# Get frequency from state
|
|
187
|
+
freq = state.freq
|
|
188
|
+
total_freq = context.total_freq
|
|
189
|
+
|
|
190
|
+
# Calculate cumulative frequency (for range low)
|
|
191
|
+
cum_freq = 0
|
|
192
|
+
context.states.each do |sym, st|
|
|
193
|
+
break if sym >= byte
|
|
194
|
+
|
|
195
|
+
cum_freq += st.freq
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
# Encode range using frequencies
|
|
199
|
+
encode_range(cum_freq, freq, total_freq)
|
|
200
|
+
end
|
|
201
|
+
|
|
202
|
+
# Encode RAR-specific escape code
|
|
203
|
+
#
|
|
204
|
+
# RAR variant H uses different escape code values
|
|
205
|
+
# and handling compared to standard PPMd7.
|
|
206
|
+
#
|
|
207
|
+
# Escape codes in RAR:
|
|
208
|
+
# - 0: New symbol follows
|
|
209
|
+
# - 1: Same as last symbol (run-length)
|
|
210
|
+
# - 2-255: Reserved for future use
|
|
211
|
+
#
|
|
212
|
+
# @return [void]
|
|
213
|
+
def encode_escape_code
|
|
214
|
+
# RAR escape codes differ from PPMd7
|
|
215
|
+
# For now, encode escape code 0 (new symbol follows)
|
|
216
|
+
# Real implementation would use context's escape frequency
|
|
217
|
+
|
|
218
|
+
# Simplified: encode direct bits for escape
|
|
219
|
+
@range_encoder.encode_direct_bits(0, 2)
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
# Encode new symbol not in current context
|
|
223
|
+
#
|
|
224
|
+
# When a symbol doesn't exist in the current context,
|
|
225
|
+
# encode it using uniform distribution (all symbols
|
|
226
|
+
# equally likely).
|
|
227
|
+
#
|
|
228
|
+
# @param byte [Integer] Symbol to encode
|
|
229
|
+
# @return [void]
|
|
230
|
+
def encode_new_symbol(byte)
|
|
231
|
+
# Encode as direct 8 bits (uniform distribution)
|
|
232
|
+
@range_encoder.encode_direct_bits(byte, 8)
|
|
233
|
+
end
|
|
234
|
+
|
|
235
|
+
# Encode a range for the symbol
|
|
236
|
+
#
|
|
237
|
+
# Converts frequency information to range and encodes
|
|
238
|
+
# using the range encoder.
|
|
239
|
+
#
|
|
240
|
+
# @param cum_freq [Integer] Cumulative frequency
|
|
241
|
+
# @param freq [Integer] Symbol frequency
|
|
242
|
+
# @param total_freq [Integer] Total frequency
|
|
243
|
+
# @return [void]
|
|
244
|
+
def encode_range(cum_freq, freq, total_freq)
|
|
245
|
+
# Scale to range coder scale (16-bit)
|
|
246
|
+
scale = 0x10000
|
|
247
|
+
low = (cum_freq * scale) / total_freq
|
|
248
|
+
((cum_freq + freq) * scale) / total_freq
|
|
249
|
+
|
|
250
|
+
# Encode using direct bits
|
|
251
|
+
# Full implementation would use proper range subdivision
|
|
252
|
+
@range_encoder.encode_direct_bits(low, 16)
|
|
253
|
+
|
|
254
|
+
# In proper implementation, would also need to encode
|
|
255
|
+
# the range width (high - low) somehow
|
|
256
|
+
end
|
|
257
|
+
end
|
|
258
|
+
end
|
|
259
|
+
end
|
|
260
|
+
end
|
|
261
|
+
end
|
|
262
|
+
end
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Omnizip
|
|
4
|
+
module Formats
|
|
5
|
+
module Rar
|
|
6
|
+
# Registry for RAR compression methods
|
|
7
|
+
#
|
|
8
|
+
# This class manages the registration and retrieval of compression
|
|
9
|
+
# methods for RAR archives. It follows the Registry pattern to allow
|
|
10
|
+
# dynamic addition of compression methods without modifying core code.
|
|
11
|
+
#
|
|
12
|
+
# @example Registering a compression method
|
|
13
|
+
# CompressionMethodRegistry.register(
|
|
14
|
+
# :rar3_normal,
|
|
15
|
+
# Rar3::Compressor,
|
|
16
|
+
# Rar3::Decompressor
|
|
17
|
+
# )
|
|
18
|
+
#
|
|
19
|
+
# @example Getting a compressor
|
|
20
|
+
# compressor = CompressionMethodRegistry.compressor(:rar3_normal)
|
|
21
|
+
class CompressionMethodRegistry
|
|
22
|
+
class << self
|
|
23
|
+
# Register a compression method
|
|
24
|
+
#
|
|
25
|
+
# @param name [Symbol] The method name
|
|
26
|
+
# @param compressor [Class] The compressor class
|
|
27
|
+
# @param decompressor [Class] The decompressor class
|
|
28
|
+
# @return [void]
|
|
29
|
+
def register(name, compressor, decompressor)
|
|
30
|
+
methods[name] = {
|
|
31
|
+
compressor: compressor,
|
|
32
|
+
decompressor: decompressor,
|
|
33
|
+
}
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# Get a compressor for a method
|
|
37
|
+
#
|
|
38
|
+
# @param name [Symbol] The method name
|
|
39
|
+
# @return [Class] The compressor class
|
|
40
|
+
# @raise [Error::FormatError] If method not registered
|
|
41
|
+
def compressor(name)
|
|
42
|
+
method_data = methods[name]
|
|
43
|
+
return method_data[:compressor] if method_data
|
|
44
|
+
|
|
45
|
+
raise Error::FormatError,
|
|
46
|
+
"No compressor registered for method: #{name}"
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# Get a decompressor for a method
|
|
50
|
+
#
|
|
51
|
+
# @param name [Symbol] The method name
|
|
52
|
+
# @return [Class] The decompressor class
|
|
53
|
+
# @raise [Error::FormatError] If method not registered
|
|
54
|
+
def decompressor(name)
|
|
55
|
+
method_data = methods[name]
|
|
56
|
+
return method_data[:decompressor] if method_data
|
|
57
|
+
|
|
58
|
+
raise Error::FormatError,
|
|
59
|
+
"No decompressor registered for method: #{name}"
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
# Check if a method is registered
|
|
63
|
+
#
|
|
64
|
+
# @param name [Symbol] The method name
|
|
65
|
+
# @return [Boolean] True if registered
|
|
66
|
+
def registered?(name)
|
|
67
|
+
methods.key?(name)
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# Get all registered method names
|
|
71
|
+
#
|
|
72
|
+
# @return [Array<Symbol>] The registered method names
|
|
73
|
+
def registered_methods
|
|
74
|
+
methods.keys
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
# Clear all registered methods (primarily for testing)
|
|
78
|
+
#
|
|
79
|
+
# @return [void]
|
|
80
|
+
def clear
|
|
81
|
+
@methods = {}
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
# Get a compression method for a RAR version and level
|
|
85
|
+
#
|
|
86
|
+
# @param version [String] The RAR version (e.g., "3.0", "5.0")
|
|
87
|
+
# @param level [Symbol] The compression level
|
|
88
|
+
# @return [Symbol] The method name
|
|
89
|
+
def method_for_version(version, level)
|
|
90
|
+
prefix = version.start_with?("5") ? "rar5" : "rar3"
|
|
91
|
+
:"#{prefix}_#{level}"
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
private
|
|
95
|
+
|
|
96
|
+
# Storage for registered methods
|
|
97
|
+
#
|
|
98
|
+
# @return [Hash] The methods hash
|
|
99
|
+
def methods
|
|
100
|
+
@methods ||= {}
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
end
|