omnizip 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/.rubocop.yml +32 -0
- data/.rubocop_todo.yml +754 -0
- data/COPYING +502 -0
- data/Gemfile +17 -0
- data/LICENSE +12 -0
- data/README.adoc +1045 -0
- data/Rakefile +12 -0
- data/benchmark/README.md +260 -0
- data/benchmark/benchmark_suite.rb +125 -0
- data/benchmark/compression_bench.rb +181 -0
- data/benchmark/filter_bench.rb +180 -0
- data/benchmark/models/benchmark_result.rb +59 -0
- data/benchmark/models/comparison_result.rb +69 -0
- data/benchmark/profile_suite.rb +167 -0
- data/benchmark/reporter.rb +150 -0
- data/benchmark/run_benchmarks.rb +66 -0
- data/benchmark/test_data.rb +137 -0
- data/config/formats/rar3_spec.yml +91 -0
- data/config/formats/rar5_spec.yml +102 -0
- data/docs/.github/workflows/docs.yml +142 -0
- data/docs/.gitignore +21 -0
- data/docs/.lychee.toml +67 -0
- data/docs/Gemfile +13 -0
- data/docs/RAR_WRITE_SUPPORT.md +26 -0
- data/docs/README.md +101 -0
- data/docs/_config.yml +112 -0
- data/docs/assets/logo.svg +1 -0
- data/docs/assets/omnizip-logo.pdf +1540 -11
- data/docs/comparison/feature-matrix.adoc +694 -0
- data/docs/comparison/index.adoc +113 -0
- data/docs/comparison/vs-7zip.adoc +309 -0
- data/docs/comparison/vs-peazip.adoc +77 -0
- data/docs/comparison/vs-rubyzip.adoc +342 -0
- data/docs/comparison/vs-winrar.adoc +100 -0
- data/docs/compatibility.adoc +579 -0
- data/docs/concepts/index.adoc +129 -0
- data/docs/developer/architecture.adoc +256 -0
- data/docs/developer/contributing.adoc +158 -0
- data/docs/developer/index.adoc +25 -0
- data/docs/developer/testing.adoc +212 -0
- data/docs/getting-started/basic-usage.adoc +271 -0
- data/docs/getting-started/index.adoc +42 -0
- data/docs/getting-started/installation.adoc +138 -0
- data/docs/getting-started/quick-start.adoc +185 -0
- data/docs/getting-started/your-first-archive.adoc +218 -0
- data/docs/guides/advanced-features/encryption.adoc +300 -0
- data/docs/guides/advanced-features/index.adoc +49 -0
- data/docs/guides/advanced-features/parallel-processing.adoc +246 -0
- data/docs/guides/advanced-features/progress-tracking.adoc +320 -0
- data/docs/guides/advanced-features/streaming.adoc +212 -0
- data/docs/guides/archive-formats/gzip-format.adoc +107 -0
- data/docs/guides/archive-formats/index.adoc +130 -0
- data/docs/guides/archive-formats/rar-format.adoc +104 -0
- data/docs/guides/archive-formats/rar5.adoc +521 -0
- data/docs/guides/archive-formats/seven-zip-format.adoc +35 -0
- data/docs/guides/archive-formats/tar-format.adoc +106 -0
- data/docs/guides/archive-formats/xz-format.adoc +118 -0
- data/docs/guides/archive-formats/zip-format.adoc +35 -0
- data/docs/guides/compression-algorithms/bzip2.adoc +113 -0
- data/docs/guides/compression-algorithms/deflate.adoc +319 -0
- data/docs/guides/compression-algorithms/index.adoc +190 -0
- data/docs/guides/compression-algorithms/lzma.adoc +398 -0
- data/docs/guides/compression-algorithms/lzma2.adoc +327 -0
- data/docs/guides/compression-algorithms/ppmd.adoc +316 -0
- data/docs/guides/compression-algorithms/zstandard.adoc +361 -0
- data/docs/guides/creating-archives.adoc +354 -0
- data/docs/guides/extracting-archives.adoc +53 -0
- data/docs/guides/format-conversion.adoc +64 -0
- data/docs/guides/index.adoc +49 -0
- data/docs/guides/migration-rubyzip.adoc +217 -0
- data/docs/guides/parity-archives.adoc +605 -0
- data/docs/guides/performance-tuning.adoc +88 -0
- data/docs/index.adoc +218 -0
- data/docs/lychee.toml +67 -0
- data/docs/reference/api/overview.adoc +188 -0
- data/docs/reference/cli/compress-command.adoc +114 -0
- data/docs/reference/cli/overview.adoc +140 -0
- data/docs/reference/index.adoc +26 -0
- data/docs/resources/faq.adoc +185 -0
- data/docs/resources/quick-reference.adoc +222 -0
- data/docs/troubleshooting/index.adoc +208 -0
- data/examples/api_comparison.rb +205 -0
- data/examples/deflate64_example.rb +96 -0
- data/examples/par2_demo.rb +121 -0
- data/examples/quick_start_native.rb +150 -0
- data/examples/quick_start_rubyzip.rb +115 -0
- data/examples/rubyzip_compatibility_demo.rb +194 -0
- data/exe/omnizip +27 -0
- data/lib/omnizip/algorithm.rb +130 -0
- data/lib/omnizip/algorithm_registry.rb +86 -0
- data/lib/omnizip/algorithms/.keep +0 -0
- data/lib/omnizip/algorithms/bzip2/bwt.rb +225 -0
- data/lib/omnizip/algorithms/bzip2/decoder.rb +193 -0
- data/lib/omnizip/algorithms/bzip2/encoder.rb +237 -0
- data/lib/omnizip/algorithms/bzip2/huffman.rb +206 -0
- data/lib/omnizip/algorithms/bzip2/mtf.rb +101 -0
- data/lib/omnizip/algorithms/bzip2/rle.rb +151 -0
- data/lib/omnizip/algorithms/bzip2.rb +130 -0
- data/lib/omnizip/algorithms/deflate/constants.rb +28 -0
- data/lib/omnizip/algorithms/deflate/decoder.rb +38 -0
- data/lib/omnizip/algorithms/deflate/encoder.rb +46 -0
- data/lib/omnizip/algorithms/deflate.rb +128 -0
- data/lib/omnizip/algorithms/deflate64/constants.rb +45 -0
- data/lib/omnizip/algorithms/deflate64/decoder.rb +153 -0
- data/lib/omnizip/algorithms/deflate64/encoder.rb +98 -0
- data/lib/omnizip/algorithms/deflate64/huffman_coder.rb +354 -0
- data/lib/omnizip/algorithms/deflate64/lz77_encoder.rb +142 -0
- data/lib/omnizip/algorithms/deflate64.rb +109 -0
- data/lib/omnizip/algorithms/lzma/bit_model.rb +120 -0
- data/lib/omnizip/algorithms/lzma/constants.rb +112 -0
- data/lib/omnizip/algorithms/lzma/decoder.rb +148 -0
- data/lib/omnizip/algorithms/lzma/dictionary.rb +69 -0
- data/lib/omnizip/algorithms/lzma/distance_coder.rb +415 -0
- data/lib/omnizip/algorithms/lzma/encoder.rb +142 -0
- data/lib/omnizip/algorithms/lzma/length_coder.rb +260 -0
- data/lib/omnizip/algorithms/lzma/literal_decoder.rb +320 -0
- data/lib/omnizip/algorithms/lzma/literal_encoder.rb +210 -0
- data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +341 -0
- data/lib/omnizip/algorithms/lzma/lzma_alone_decoder.rb +192 -0
- data/lib/omnizip/algorithms/lzma/lzma_state.rb +128 -0
- data/lib/omnizip/algorithms/lzma/match.rb +32 -0
- data/lib/omnizip/algorithms/lzma/match_finder.rb +205 -0
- data/lib/omnizip/algorithms/lzma/match_finder_config.rb +142 -0
- data/lib/omnizip/algorithms/lzma/match_finder_factory.rb +88 -0
- data/lib/omnizip/algorithms/lzma/optimal_encoder.rb +130 -0
- data/lib/omnizip/algorithms/lzma/probability_models.rb +72 -0
- data/lib/omnizip/algorithms/lzma/range_coder.rb +85 -0
- data/lib/omnizip/algorithms/lzma/range_decoder.rb +434 -0
- data/lib/omnizip/algorithms/lzma/range_encoder.rb +194 -0
- data/lib/omnizip/algorithms/lzma/state.rb +127 -0
- data/lib/omnizip/algorithms/lzma/xz_buffered_range_encoder.rb +325 -0
- data/lib/omnizip/algorithms/lzma/xz_encoder.rb +426 -0
- data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +645 -0
- data/lib/omnizip/algorithms/lzma/xz_match_finder_adapter.rb +227 -0
- data/lib/omnizip/algorithms/lzma/xz_price_calculator.rb +169 -0
- data/lib/omnizip/algorithms/lzma/xz_probability_models.rb +261 -0
- data/lib/omnizip/algorithms/lzma/xz_range_encoder.rb +223 -0
- data/lib/omnizip/algorithms/lzma/xz_range_encoder_exact.rb +331 -0
- data/lib/omnizip/algorithms/lzma/xz_state.rb +116 -0
- data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +2055 -0
- data/lib/omnizip/algorithms/lzma.rb +238 -0
- data/lib/omnizip/algorithms/lzma2/chunk_manager.rb +182 -0
- data/lib/omnizip/algorithms/lzma2/constants.rb +41 -0
- data/lib/omnizip/algorithms/lzma2/encoder.rb +147 -0
- data/lib/omnizip/algorithms/lzma2/lzma2_chunk.rb +161 -0
- data/lib/omnizip/algorithms/lzma2/properties.rb +179 -0
- data/lib/omnizip/algorithms/lzma2/simple_lzma2_encoder.rb +127 -0
- data/lib/omnizip/algorithms/lzma2/xz_encoder_adapter.rb +85 -0
- data/lib/omnizip/algorithms/lzma2.rb +141 -0
- data/lib/omnizip/algorithms/ppmd7/constants.rb +74 -0
- data/lib/omnizip/algorithms/ppmd7/context.rb +154 -0
- data/lib/omnizip/algorithms/ppmd7/decoder.rb +126 -0
- data/lib/omnizip/algorithms/ppmd7/encoder.rb +163 -0
- data/lib/omnizip/algorithms/ppmd7/model.rb +248 -0
- data/lib/omnizip/algorithms/ppmd7/symbol_state.rb +57 -0
- data/lib/omnizip/algorithms/ppmd7.rb +116 -0
- data/lib/omnizip/algorithms/ppmd8/constants.rb +61 -0
- data/lib/omnizip/algorithms/ppmd8/context.rb +34 -0
- data/lib/omnizip/algorithms/ppmd8/decoder.rb +107 -0
- data/lib/omnizip/algorithms/ppmd8/encoder.rb +138 -0
- data/lib/omnizip/algorithms/ppmd8/model.rb +250 -0
- data/lib/omnizip/algorithms/ppmd8/restoration_method.rb +78 -0
- data/lib/omnizip/algorithms/ppmd8.rb +82 -0
- data/lib/omnizip/algorithms/ppmd_base.rb +138 -0
- data/lib/omnizip/algorithms/sevenzip_lzma2.rb +123 -0
- data/lib/omnizip/algorithms/xz_lzma2.rb +118 -0
- data/lib/omnizip/algorithms/zstandard/constants.rb +25 -0
- data/lib/omnizip/algorithms/zstandard/decoder.rb +46 -0
- data/lib/omnizip/algorithms/zstandard/encoder.rb +51 -0
- data/lib/omnizip/algorithms/zstandard.rb +138 -0
- data/lib/omnizip/buffer/memory_archive.rb +251 -0
- data/lib/omnizip/buffer/memory_extractor.rb +224 -0
- data/lib/omnizip/buffer.rb +176 -0
- data/lib/omnizip/checksum_registry.rb +114 -0
- data/lib/omnizip/checksums/crc32.rb +100 -0
- data/lib/omnizip/checksums/crc64.rb +101 -0
- data/lib/omnizip/checksums/crc_base.rb +158 -0
- data/lib/omnizip/checksums/verifier.rb +131 -0
- data/lib/omnizip/chunked/memory_manager.rb +194 -0
- data/lib/omnizip/chunked/reader.rb +78 -0
- data/lib/omnizip/chunked/writer.rb +120 -0
- data/lib/omnizip/chunked.rb +129 -0
- data/lib/omnizip/cli/output_formatter.rb +104 -0
- data/lib/omnizip/cli.rb +572 -0
- data/lib/omnizip/commands/.keep +0 -0
- data/lib/omnizip/commands/archive_create_command.rb +427 -0
- data/lib/omnizip/commands/archive_extract_command.rb +272 -0
- data/lib/omnizip/commands/archive_list_command.rb +218 -0
- data/lib/omnizip/commands/archive_repair_command.rb +131 -0
- data/lib/omnizip/commands/archive_verify_command.rb +117 -0
- data/lib/omnizip/commands/compress_command.rb +117 -0
- data/lib/omnizip/commands/decompress_command.rb +120 -0
- data/lib/omnizip/commands/list_command.rb +53 -0
- data/lib/omnizip/commands/metadata_command.rb +153 -0
- data/lib/omnizip/commands/parity_create_command.rb +122 -0
- data/lib/omnizip/commands/parity_repair_command.rb +122 -0
- data/lib/omnizip/commands/parity_verify_command.rb +124 -0
- data/lib/omnizip/commands/profile_list_command.rb +56 -0
- data/lib/omnizip/commands/profile_show_command.rb +44 -0
- data/lib/omnizip/convenience.rb +359 -0
- data/lib/omnizip/converter/conversion_registry.rb +49 -0
- data/lib/omnizip/converter/conversion_strategy.rb +121 -0
- data/lib/omnizip/converter/seven_zip_to_zip_strategy.rb +97 -0
- data/lib/omnizip/converter/zip_to_seven_zip_strategy.rb +112 -0
- data/lib/omnizip/converter.rb +105 -0
- data/lib/omnizip/crypto/aes256/cipher.rb +100 -0
- data/lib/omnizip/crypto/aes256/constants.rb +28 -0
- data/lib/omnizip/crypto/aes256/key_derivation.rb +101 -0
- data/lib/omnizip/crypto/aes256.rb +102 -0
- data/lib/omnizip/error.rb +106 -0
- data/lib/omnizip/eta/exponential_smoothing_estimator.rb +98 -0
- data/lib/omnizip/eta/moving_average_estimator.rb +99 -0
- data/lib/omnizip/eta/rate_calculator.rb +104 -0
- data/lib/omnizip/eta/sample_history.rb +143 -0
- data/lib/omnizip/eta/time_estimator.rb +106 -0
- data/lib/omnizip/eta.rb +63 -0
- data/lib/omnizip/extraction/filter_chain.rb +177 -0
- data/lib/omnizip/extraction/glob_pattern.rb +140 -0
- data/lib/omnizip/extraction/pattern_matcher.rb +70 -0
- data/lib/omnizip/extraction/predicate_pattern.rb +52 -0
- data/lib/omnizip/extraction/regex_pattern.rb +50 -0
- data/lib/omnizip/extraction/selective_extractor.rb +240 -0
- data/lib/omnizip/extraction.rb +111 -0
- data/lib/omnizip/file_type/mime_classifier.rb +144 -0
- data/lib/omnizip/file_type.rb +113 -0
- data/lib/omnizip/filter.rb +139 -0
- data/lib/omnizip/filter_pipeline.rb +108 -0
- data/lib/omnizip/filter_registry.rb +166 -0
- data/lib/omnizip/filters/bcj.rb +279 -0
- data/lib/omnizip/filters/bcj2/constants.rb +53 -0
- data/lib/omnizip/filters/bcj2/decoder.rb +200 -0
- data/lib/omnizip/filters/bcj2/encoder.rb +61 -0
- data/lib/omnizip/filters/bcj2/stream_data.rb +93 -0
- data/lib/omnizip/filters/bcj2.rb +99 -0
- data/lib/omnizip/filters/bcj_arm.rb +176 -0
- data/lib/omnizip/filters/bcj_arm64.rb +244 -0
- data/lib/omnizip/filters/bcj_ia64.rb +196 -0
- data/lib/omnizip/filters/bcj_ppc.rb +190 -0
- data/lib/omnizip/filters/bcj_sparc.rb +176 -0
- data/lib/omnizip/filters/bcj_x86.rb +193 -0
- data/lib/omnizip/filters/delta.rb +196 -0
- data/lib/omnizip/filters/filter_base.rb +72 -0
- data/lib/omnizip/filters/registry.rb +123 -0
- data/lib/omnizip/filters/xz_delta.rb +258 -0
- data/lib/omnizip/format_detector.rb +162 -0
- data/lib/omnizip/format_registry.rb +59 -0
- data/lib/omnizip/formats/.keep +0 -0
- data/lib/omnizip/formats/bzip2_file.rb +172 -0
- data/lib/omnizip/formats/cpio/constants.rb +55 -0
- data/lib/omnizip/formats/cpio/entry.rb +385 -0
- data/lib/omnizip/formats/cpio/reader.rb +196 -0
- data/lib/omnizip/formats/cpio/writer.rb +234 -0
- data/lib/omnizip/formats/cpio.rb +140 -0
- data/lib/omnizip/formats/format_spec_loader.rb +230 -0
- data/lib/omnizip/formats/gzip.rb +238 -0
- data/lib/omnizip/formats/iso/directory_builder.rb +297 -0
- data/lib/omnizip/formats/iso/directory_record.rb +152 -0
- data/lib/omnizip/formats/iso/joliet.rb +204 -0
- data/lib/omnizip/formats/iso/path_table.rb +125 -0
- data/lib/omnizip/formats/iso/reader.rb +197 -0
- data/lib/omnizip/formats/iso/rock_ridge.rb +349 -0
- data/lib/omnizip/formats/iso/volume_builder.rb +320 -0
- data/lib/omnizip/formats/iso/volume_descriptor.rb +168 -0
- data/lib/omnizip/formats/iso/writer.rb +530 -0
- data/lib/omnizip/formats/iso.rb +140 -0
- data/lib/omnizip/formats/lzip.rb +175 -0
- data/lib/omnizip/formats/lzma_alone.rb +171 -0
- data/lib/omnizip/formats/rar/archive_repairer.rb +243 -0
- data/lib/omnizip/formats/rar/archive_verifier.rb +195 -0
- data/lib/omnizip/formats/rar/block_parser.rb +243 -0
- data/lib/omnizip/formats/rar/compression/bit_stream.rb +180 -0
- data/lib/omnizip/formats/rar/compression/dispatcher.rb +217 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/decoder.rb +216 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/encoder.rb +158 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_builder.rb +217 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_coder.rb +189 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/match_finder.rb +135 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/sliding_window.rb +165 -0
- data/lib/omnizip/formats/rar/compression/ppmd/context.rb +105 -0
- data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +219 -0
- data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +262 -0
- data/lib/omnizip/formats/rar/compression_method_registry.rb +106 -0
- data/lib/omnizip/formats/rar/constants.rb +82 -0
- data/lib/omnizip/formats/rar/decompressor.rb +238 -0
- data/lib/omnizip/formats/rar/external_writer.rb +312 -0
- data/lib/omnizip/formats/rar/header.rb +192 -0
- data/lib/omnizip/formats/rar/license_validator.rb +109 -0
- data/lib/omnizip/formats/rar/models/rar_archive.rb +77 -0
- data/lib/omnizip/formats/rar/models/rar_entry.rb +65 -0
- data/lib/omnizip/formats/rar/models/rar_volume.rb +56 -0
- data/lib/omnizip/formats/rar/parity_handler.rb +292 -0
- data/lib/omnizip/formats/rar/rar5/compression/lzma.rb +202 -0
- data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +578 -0
- data/lib/omnizip/formats/rar/rar5/compression/store.rb +60 -0
- data/lib/omnizip/formats/rar/rar5/crc32.rb +39 -0
- data/lib/omnizip/formats/rar/rar5/encryption/aes256_cbc.rb +97 -0
- data/lib/omnizip/formats/rar/rar5/encryption/encryption_header.rb +114 -0
- data/lib/omnizip/formats/rar/rar5/encryption/encryption_manager.rb +166 -0
- data/lib/omnizip/formats/rar/rar5/encryption/key_derivation.rb +97 -0
- data/lib/omnizip/formats/rar/rar5/header.rb +187 -0
- data/lib/omnizip/formats/rar/rar5/models/encryption_options.rb +74 -0
- data/lib/omnizip/formats/rar/rar5/models/recovery_options.rb +63 -0
- data/lib/omnizip/formats/rar/rar5/models/solid_options.rb +63 -0
- data/lib/omnizip/formats/rar/rar5/models/volume_options.rb +74 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/ARCHITECTURE.md +290 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_manager.rb +264 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_splitter.rb +155 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_writer.rb +194 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_encoder.rb +109 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_manager.rb +142 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_stream.rb +121 -0
- data/lib/omnizip/formats/rar/rar5/vint.rb +65 -0
- data/lib/omnizip/formats/rar/rar5/writer.rb +466 -0
- data/lib/omnizip/formats/rar/rar_format_base.rb +241 -0
- data/lib/omnizip/formats/rar/reader.rb +366 -0
- data/lib/omnizip/formats/rar/recovery_record.rb +245 -0
- data/lib/omnizip/formats/rar/volume_manager.rb +168 -0
- data/lib/omnizip/formats/rar/writer.rb +431 -0
- data/lib/omnizip/formats/rar.rb +205 -0
- data/lib/omnizip/formats/rar3/compressor.rb +73 -0
- data/lib/omnizip/formats/rar3/decompressor.rb +66 -0
- data/lib/omnizip/formats/rar3/reader.rb +386 -0
- data/lib/omnizip/formats/rar3/writer.rb +219 -0
- data/lib/omnizip/formats/rar5/compressor.rb +73 -0
- data/lib/omnizip/formats/rar5/decompressor.rb +66 -0
- data/lib/omnizip/formats/rar5/reader.rb +342 -0
- data/lib/omnizip/formats/rar5/writer.rb +214 -0
- data/lib/omnizip/formats/seven_zip/coder_chain.rb +150 -0
- data/lib/omnizip/formats/seven_zip/constants.rb +126 -0
- data/lib/omnizip/formats/seven_zip/encoded_header.rb +114 -0
- data/lib/omnizip/formats/seven_zip/encrypted_header.rb +142 -0
- data/lib/omnizip/formats/seven_zip/file_collector.rb +144 -0
- data/lib/omnizip/formats/seven_zip/header.rb +106 -0
- data/lib/omnizip/formats/seven_zip/header_encryptor.rb +134 -0
- data/lib/omnizip/formats/seven_zip/header_writer.rb +466 -0
- data/lib/omnizip/formats/seven_zip/models/coder_info.rb +30 -0
- data/lib/omnizip/formats/seven_zip/models/file_entry.rb +58 -0
- data/lib/omnizip/formats/seven_zip/models/folder.rb +69 -0
- data/lib/omnizip/formats/seven_zip/models/stream_info.rb +42 -0
- data/lib/omnizip/formats/seven_zip/parser.rb +660 -0
- data/lib/omnizip/formats/seven_zip/reader.rb +458 -0
- data/lib/omnizip/formats/seven_zip/split_archive_reader.rb +632 -0
- data/lib/omnizip/formats/seven_zip/split_archive_writer.rb +315 -0
- data/lib/omnizip/formats/seven_zip/stream_compressor.rb +151 -0
- data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +162 -0
- data/lib/omnizip/formats/seven_zip/writer.rb +740 -0
- data/lib/omnizip/formats/seven_zip.rb +93 -0
- data/lib/omnizip/formats/tar/constants.rb +73 -0
- data/lib/omnizip/formats/tar/entry.rb +94 -0
- data/lib/omnizip/formats/tar/header.rb +168 -0
- data/lib/omnizip/formats/tar/reader.rb +121 -0
- data/lib/omnizip/formats/tar/writer.rb +216 -0
- data/lib/omnizip/formats/tar.rb +84 -0
- data/lib/omnizip/formats/xz/reader.rb +116 -0
- data/lib/omnizip/formats/xz.rb +237 -0
- data/lib/omnizip/formats/xz_impl/block_decoder.rb +754 -0
- data/lib/omnizip/formats/xz_impl/block_encoder.rb +306 -0
- data/lib/omnizip/formats/xz_impl/block_header.rb +210 -0
- data/lib/omnizip/formats/xz_impl/block_header_parser.rb +186 -0
- data/lib/omnizip/formats/xz_impl/constants.rb +49 -0
- data/lib/omnizip/formats/xz_impl/index_decoder.rb +174 -0
- data/lib/omnizip/formats/xz_impl/index_encoder.rb +122 -0
- data/lib/omnizip/formats/xz_impl/stream_decoder.rb +468 -0
- data/lib/omnizip/formats/xz_impl/stream_encoder.rb +99 -0
- data/lib/omnizip/formats/xz_impl/stream_footer.rb +81 -0
- data/lib/omnizip/formats/xz_impl/stream_footer_parser.rb +117 -0
- data/lib/omnizip/formats/xz_impl/stream_header.rb +55 -0
- data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +108 -0
- data/lib/omnizip/formats/xz_impl/vli.rb +128 -0
- data/lib/omnizip/formats/xz_impl/writer.rb +421 -0
- data/lib/omnizip/formats/zip/central_directory_header.rb +195 -0
- data/lib/omnizip/formats/zip/constants.rb +69 -0
- data/lib/omnizip/formats/zip/end_of_central_directory.rb +133 -0
- data/lib/omnizip/formats/zip/local_file_header.rb +138 -0
- data/lib/omnizip/formats/zip/reader.rb +250 -0
- data/lib/omnizip/formats/zip/unix_extra_field.rb +153 -0
- data/lib/omnizip/formats/zip/writer.rb +375 -0
- data/lib/omnizip/formats/zip/zip64_end_of_central_directory.rb +104 -0
- data/lib/omnizip/formats/zip/zip64_end_of_central_directory_locator.rb +66 -0
- data/lib/omnizip/formats/zip/zip64_extra_field.rb +114 -0
- data/lib/omnizip/formats/zip.rb +50 -0
- data/lib/omnizip/implementations/base/lzma2_decoder_base.rb +75 -0
- data/lib/omnizip/implementations/base/lzma2_encoder_base.rb +128 -0
- data/lib/omnizip/implementations/base/lzma_decoder_base.rb +83 -0
- data/lib/omnizip/implementations/base/lzma_encoder_base.rb +108 -0
- data/lib/omnizip/implementations/base/state_machine_base.rb +182 -0
- data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +421 -0
- data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +465 -0
- data/lib/omnizip/implementations/seven_zip/lzma/match_finder.rb +288 -0
- data/lib/omnizip/implementations/seven_zip/lzma/range_decoder.rb +200 -0
- data/lib/omnizip/implementations/seven_zip/lzma/range_encoder.rb +197 -0
- data/lib/omnizip/implementations/seven_zip/lzma/state_machine.rb +141 -0
- data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +519 -0
- data/lib/omnizip/implementations/xz_utils/lzma2/decoder.rb +723 -0
- data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +750 -0
- data/lib/omnizip/io/buffered_input.rb +146 -0
- data/lib/omnizip/io/buffered_output.rb +105 -0
- data/lib/omnizip/io/stream_manager.rb +115 -0
- data/lib/omnizip/link_handler/hard_link.rb +79 -0
- data/lib/omnizip/link_handler/symbolic_link.rb +74 -0
- data/lib/omnizip/link_handler.rb +124 -0
- data/lib/omnizip/metadata/archive_metadata.rb +114 -0
- data/lib/omnizip/metadata/entry_metadata.rb +146 -0
- data/lib/omnizip/metadata/metadata_editor.rb +171 -0
- data/lib/omnizip/metadata/metadata_registry.rb +64 -0
- data/lib/omnizip/metadata/metadata_validator.rb +99 -0
- data/lib/omnizip/metadata.rb +57 -0
- data/lib/omnizip/models/.keep +0 -0
- data/lib/omnizip/models/algorithm_metadata.rb +73 -0
- data/lib/omnizip/models/compression_options.rb +71 -0
- data/lib/omnizip/models/conversion_options.rb +87 -0
- data/lib/omnizip/models/conversion_result.rb +135 -0
- data/lib/omnizip/models/eta_result.rb +46 -0
- data/lib/omnizip/models/extraction_rule.rb +115 -0
- data/lib/omnizip/models/filter_chain.rb +144 -0
- data/lib/omnizip/models/filter_config.rb +183 -0
- data/lib/omnizip/models/match_result.rb +124 -0
- data/lib/omnizip/models/optimization_suggestion.rb +91 -0
- data/lib/omnizip/models/parallel_options.rb +104 -0
- data/lib/omnizip/models/performance_result.rb +79 -0
- data/lib/omnizip/models/profile_report.rb +82 -0
- data/lib/omnizip/models/progress_options.rb +38 -0
- data/lib/omnizip/models/split_options.rb +116 -0
- data/lib/omnizip/optimization_registry.rb +81 -0
- data/lib/omnizip/parallel/job_queue.rb +209 -0
- data/lib/omnizip/parallel/job_scheduler.rb +203 -0
- data/lib/omnizip/parallel/parallel_compressor.rb +347 -0
- data/lib/omnizip/parallel/parallel_extractor.rb +329 -0
- data/lib/omnizip/parallel/worker_pool.rb +223 -0
- data/lib/omnizip/parallel.rb +149 -0
- data/lib/omnizip/parity/chunked_block_processor.rb +196 -0
- data/lib/omnizip/parity/galois16.rb +145 -0
- data/lib/omnizip/parity/models/creator_packet.rb +73 -0
- data/lib/omnizip/parity/models/file_description_packet.rb +133 -0
- data/lib/omnizip/parity/models/ifsc_packet.rb +123 -0
- data/lib/omnizip/parity/models/main_packet.rb +128 -0
- data/lib/omnizip/parity/models/packet.rb +156 -0
- data/lib/omnizip/parity/models/packet_registry.rb +109 -0
- data/lib/omnizip/parity/models/recovery_slice_packet.rb +78 -0
- data/lib/omnizip/parity/par2_creator.rb +531 -0
- data/lib/omnizip/parity/par2_repairer.rb +407 -0
- data/lib/omnizip/parity/par2_verifier.rb +364 -0
- data/lib/omnizip/parity/par2cmdline_algorithm.rb +110 -0
- data/lib/omnizip/parity/par2cmdline_coefficients.rb +78 -0
- data/lib/omnizip/parity/reed_solomon_decoder.rb +266 -0
- data/lib/omnizip/parity/reed_solomon_encoder.rb +111 -0
- data/lib/omnizip/parity/reed_solomon_matrix.rb +342 -0
- data/lib/omnizip/parity.rb +186 -0
- data/lib/omnizip/password/encryption_registry.rb +65 -0
- data/lib/omnizip/password/encryption_strategy.rb +96 -0
- data/lib/omnizip/password/password_validator.rb +129 -0
- data/lib/omnizip/password/winzip_aes_strategy.rb +192 -0
- data/lib/omnizip/password/zip_crypto_strategy.rb +141 -0
- data/lib/omnizip/password.rb +87 -0
- data/lib/omnizip/pipe/stream_compressor.rb +124 -0
- data/lib/omnizip/pipe/stream_decompressor.rb +174 -0
- data/lib/omnizip/pipe.rb +121 -0
- data/lib/omnizip/platform/ntfs_streams.rb +201 -0
- data/lib/omnizip/platform.rb +189 -0
- data/lib/omnizip/profile/archive_profile.rb +39 -0
- data/lib/omnizip/profile/balanced_profile.rb +33 -0
- data/lib/omnizip/profile/binary_profile.rb +36 -0
- data/lib/omnizip/profile/compression_profile.rb +158 -0
- data/lib/omnizip/profile/custom_profile.rb +157 -0
- data/lib/omnizip/profile/fast_profile.rb +33 -0
- data/lib/omnizip/profile/maximum_profile.rb +33 -0
- data/lib/omnizip/profile/profile_detector.rb +110 -0
- data/lib/omnizip/profile/profile_registry.rb +161 -0
- data/lib/omnizip/profile/text_profile.rb +36 -0
- data/lib/omnizip/profile.rb +190 -0
- data/lib/omnizip/profiler/memory_profiler.rb +66 -0
- data/lib/omnizip/profiler/method_profiler.rb +49 -0
- data/lib/omnizip/profiler/report_generator.rb +169 -0
- data/lib/omnizip/profiler.rb +204 -0
- data/lib/omnizip/progress/callback_reporter.rb +36 -0
- data/lib/omnizip/progress/console_reporter.rb +62 -0
- data/lib/omnizip/progress/log_reporter.rb +91 -0
- data/lib/omnizip/progress/operation_progress.rb +118 -0
- data/lib/omnizip/progress/progress_bar.rb +156 -0
- data/lib/omnizip/progress/progress_reporter.rb +40 -0
- data/lib/omnizip/progress/progress_tracker.rb +190 -0
- data/lib/omnizip/progress/silent_reporter.rb +24 -0
- data/lib/omnizip/progress.rb +127 -0
- data/lib/omnizip/rubyzip_compat.rb +63 -0
- data/lib/omnizip/temp/safe_extract.rb +168 -0
- data/lib/omnizip/temp/temp_file.rb +124 -0
- data/lib/omnizip/temp/temp_file_pool.rb +109 -0
- data/lib/omnizip/temp.rb +181 -0
- data/lib/omnizip/version.rb +5 -0
- data/lib/omnizip/zip/entry.rb +156 -0
- data/lib/omnizip/zip/file.rb +485 -0
- data/lib/omnizip/zip/input_stream.rb +273 -0
- data/lib/omnizip/zip/output_stream.rb +324 -0
- data/lib/omnizip.rb +156 -0
- data/readme-docs/advanced-features.adoc +515 -0
- data/readme-docs/api-usage.adoc +444 -0
- data/readme-docs/architecture.adoc +449 -0
- data/readme-docs/archive-formats.adoc +479 -0
- data/readme-docs/cli-usage.adoc +222 -0
- data/readme-docs/compression-algorithms.adoc +442 -0
- data/readme-docs/compression-profiles.adoc +247 -0
- data/readme-docs/encryption-checksums.adoc +328 -0
- data/readme-docs/format-converter.adoc +325 -0
- data/readme-docs/installation.adoc +228 -0
- data/readme-docs/par2-archives.adoc +608 -0
- data/readme-docs/performance-profiler.adoc +389 -0
- data/readme-docs/preprocessing-filters.adoc +280 -0
- data/xz-file-format-1.2.1.txt +1174 -0
- metadata +617 -0
|
@@ -0,0 +1,248 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Copyright (C) 2025 Ribose Inc.
|
|
4
|
+
#
|
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a
|
|
6
|
+
# copy of this software and associated documentation files (the "Software"),
|
|
7
|
+
# to deal in the Software without restriction, including without limitation
|
|
8
|
+
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
9
|
+
# and/or sell copies of the Software, and to permit persons to whom the
|
|
10
|
+
# Software is furnished to do so, subject to the following conditions:
|
|
11
|
+
#
|
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
|
13
|
+
# all copies or substantial portions of the Software.
|
|
14
|
+
#
|
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
20
|
+
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
21
|
+
# DEALINGS IN THE SOFTWARE.
|
|
22
|
+
|
|
23
|
+
require_relative "constants"
|
|
24
|
+
require_relative "context"
|
|
25
|
+
|
|
26
|
+
module Omnizip
|
|
27
|
+
module Algorithms
|
|
28
|
+
class PPMd7 < Algorithm
|
|
29
|
+
# Core PPMd7 prediction model
|
|
30
|
+
#
|
|
31
|
+
# The model maintains a tree of contexts, where each context
|
|
32
|
+
# represents a sequence of symbols that have appeared in the
|
|
33
|
+
# input. The model predicts the next symbol based on the
|
|
34
|
+
# current context's statistics.
|
|
35
|
+
#
|
|
36
|
+
# The model uses Prediction by Partial Matching (PPM), which
|
|
37
|
+
# tries progressively shorter contexts until it finds one
|
|
38
|
+
# that has seen the current symbol before.
|
|
39
|
+
class Model
|
|
40
|
+
include Constants
|
|
41
|
+
|
|
42
|
+
attr_reader :max_order, :root_context, :current_context
|
|
43
|
+
|
|
44
|
+
# Initialize the PPMd7 model
|
|
45
|
+
#
|
|
46
|
+
# @param max_order [Integer] Maximum context order (2-16)
|
|
47
|
+
# @param mem_size [Integer] Memory size for context allocation
|
|
48
|
+
def initialize(max_order = DEFAULT_ORDER, mem_size = DEFAULT_MEM_SIZE)
|
|
49
|
+
validate_parameters(max_order, mem_size)
|
|
50
|
+
|
|
51
|
+
@max_order = max_order
|
|
52
|
+
@mem_size = mem_size
|
|
53
|
+
|
|
54
|
+
# Initialize context tree with root (order -1)
|
|
55
|
+
@root_context = Context.new(-1, nil)
|
|
56
|
+
@current_context = @root_context
|
|
57
|
+
|
|
58
|
+
# Context history for maintaining context chain
|
|
59
|
+
@context_history = []
|
|
60
|
+
|
|
61
|
+
# Initialize root context with uniform distribution
|
|
62
|
+
initialize_root_context
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
# Get probability information for encoding/decoding a symbol
|
|
66
|
+
#
|
|
67
|
+
# Returns information needed by range coder:
|
|
68
|
+
# - cumulative frequency up to symbol
|
|
69
|
+
# - symbol frequency
|
|
70
|
+
# - total frequency
|
|
71
|
+
# - whether this is an escape
|
|
72
|
+
#
|
|
73
|
+
# @param symbol [Integer, nil] Symbol to encode (nil for decode)
|
|
74
|
+
# @return [Hash] Probability information
|
|
75
|
+
def get_symbol_probability(symbol = nil)
|
|
76
|
+
context = find_context_with_symbol(symbol)
|
|
77
|
+
|
|
78
|
+
if context && (state = context.find_symbol(symbol))
|
|
79
|
+
# Symbol found in context
|
|
80
|
+
cum_freq = cumulative_frequency(context, symbol)
|
|
81
|
+
{
|
|
82
|
+
context: context,
|
|
83
|
+
cumulative_freq: cum_freq,
|
|
84
|
+
freq: state.freq,
|
|
85
|
+
total_freq: context.total_freq,
|
|
86
|
+
escape: false,
|
|
87
|
+
}
|
|
88
|
+
else
|
|
89
|
+
# Use escape symbol
|
|
90
|
+
cum_freq = escape_cumulative_frequency(context || @root_context)
|
|
91
|
+
{
|
|
92
|
+
context: context || @root_context,
|
|
93
|
+
cumulative_freq: cum_freq,
|
|
94
|
+
freq: (context || @root_context).escape_freq,
|
|
95
|
+
total_freq: (context || @root_context).total_freq,
|
|
96
|
+
escape: true,
|
|
97
|
+
}
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
# Update model after encoding/decoding a symbol
|
|
102
|
+
#
|
|
103
|
+
# This updates context statistics and creates new contexts
|
|
104
|
+
# as needed.
|
|
105
|
+
#
|
|
106
|
+
# @param symbol [Integer] The symbol that was encoded/decoded
|
|
107
|
+
# @return [void]
|
|
108
|
+
def update(symbol)
|
|
109
|
+
# Update current context or create new symbol
|
|
110
|
+
if @current_context.find_symbol(symbol)
|
|
111
|
+
@current_context.update_symbol(symbol)
|
|
112
|
+
else
|
|
113
|
+
@current_context.add_symbol(symbol)
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
# Move to next context
|
|
117
|
+
update_current_context(symbol)
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
# Reset model to initial state
|
|
121
|
+
#
|
|
122
|
+
# @return [void]
|
|
123
|
+
def reset
|
|
124
|
+
@root_context = Context.new(-1, nil)
|
|
125
|
+
@current_context = @root_context
|
|
126
|
+
@context_history.clear
|
|
127
|
+
initialize_root_context
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
private
|
|
131
|
+
|
|
132
|
+
# Validate initialization parameters
|
|
133
|
+
#
|
|
134
|
+
# @param max_order [Integer] Maximum context order
|
|
135
|
+
# @param mem_size [Integer] Memory size
|
|
136
|
+
# @return [void]
|
|
137
|
+
# @raise [ArgumentError] If parameters are invalid
|
|
138
|
+
def validate_parameters(max_order, mem_size)
|
|
139
|
+
unless max_order.between?(MIN_ORDER, MAX_ORDER)
|
|
140
|
+
raise ArgumentError,
|
|
141
|
+
"max_order must be between #{MIN_ORDER} and #{MAX_ORDER}"
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
return if mem_size.between?(MIN_MEM_SIZE, MAX_MEM_SIZE)
|
|
145
|
+
|
|
146
|
+
raise ArgumentError,
|
|
147
|
+
"mem_size must be between #{MIN_MEM_SIZE} and " \
|
|
148
|
+
"#{MAX_MEM_SIZE}"
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
# Initialize root context with all possible symbols
|
|
152
|
+
#
|
|
153
|
+
# The root context (order -1) contains all 256 possible
|
|
154
|
+
# byte values with equal frequency. This ensures we can
|
|
155
|
+
# always encode any symbol.
|
|
156
|
+
#
|
|
157
|
+
# @return [void]
|
|
158
|
+
def initialize_root_context
|
|
159
|
+
ALPHABET_SIZE.times do |symbol|
|
|
160
|
+
@root_context.add_symbol(symbol, 1)
|
|
161
|
+
end
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
# Find context that contains the given symbol
|
|
165
|
+
#
|
|
166
|
+
# Searches from current context up through suffixes until
|
|
167
|
+
# finding one that has seen this symbol, or reaching root.
|
|
168
|
+
#
|
|
169
|
+
# @param symbol [Integer, nil] Symbol to find
|
|
170
|
+
# @return [Context, nil] Context containing symbol or nil
|
|
171
|
+
def find_context_with_symbol(symbol)
|
|
172
|
+
return nil if symbol.nil?
|
|
173
|
+
|
|
174
|
+
context = @current_context
|
|
175
|
+
while context
|
|
176
|
+
return context if context.find_symbol(symbol)
|
|
177
|
+
|
|
178
|
+
context = context.suffix
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
nil
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
# Calculate cumulative frequency up to (but not including) symbol
|
|
185
|
+
#
|
|
186
|
+
# @param context [Context] The context
|
|
187
|
+
# @param symbol [Integer] The symbol
|
|
188
|
+
# @return [Integer] Cumulative frequency
|
|
189
|
+
def cumulative_frequency(context, symbol)
|
|
190
|
+
cum_freq = 0
|
|
191
|
+
context.states.each do |sym, state|
|
|
192
|
+
break if sym >= symbol
|
|
193
|
+
|
|
194
|
+
cum_freq += state.freq
|
|
195
|
+
end
|
|
196
|
+
cum_freq
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
# Calculate cumulative frequency for escape symbol
|
|
200
|
+
#
|
|
201
|
+
# The escape symbol comes after all regular symbols in the
|
|
202
|
+
# frequency range.
|
|
203
|
+
#
|
|
204
|
+
# @param context [Context] The context
|
|
205
|
+
# @return [Integer] Cumulative frequency for escape
|
|
206
|
+
def escape_cumulative_frequency(context)
|
|
207
|
+
context.sum_freq
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
# Update current context after processing a symbol
|
|
211
|
+
#
|
|
212
|
+
# This maintains the context chain, creating new contexts
|
|
213
|
+
# as needed to extend the order.
|
|
214
|
+
#
|
|
215
|
+
# @param symbol [Integer] Symbol that was processed
|
|
216
|
+
# @return [void]
|
|
217
|
+
def update_current_context(symbol)
|
|
218
|
+
# Add to context history
|
|
219
|
+
@context_history.push(symbol)
|
|
220
|
+
|
|
221
|
+
# Limit history to max order
|
|
222
|
+
@context_history.shift if @context_history.size > @max_order
|
|
223
|
+
|
|
224
|
+
# Find or create context for new history
|
|
225
|
+
@current_context = find_or_create_context(@context_history)
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
# Find or create a context for the given symbol sequence
|
|
229
|
+
#
|
|
230
|
+
# @param sequence [Array<Integer>] Sequence of symbols
|
|
231
|
+
# @return [Context] The context for this sequence
|
|
232
|
+
def find_or_create_context(sequence)
|
|
233
|
+
return @root_context if sequence.empty?
|
|
234
|
+
|
|
235
|
+
# Start from root and build up context chain
|
|
236
|
+
context = @root_context
|
|
237
|
+
sequence.each_with_index do |_symbol, index|
|
|
238
|
+
suffix = index.zero? ? @root_context : context
|
|
239
|
+
order = index
|
|
240
|
+
context = Context.new(order, suffix)
|
|
241
|
+
end
|
|
242
|
+
|
|
243
|
+
context
|
|
244
|
+
end
|
|
245
|
+
end
|
|
246
|
+
end
|
|
247
|
+
end
|
|
248
|
+
end
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Copyright (C) 2025 Ribose Inc.
|
|
4
|
+
#
|
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a
|
|
6
|
+
# copy of this software and associated documentation files (the "Software"),
|
|
7
|
+
# to deal in the Software without restriction, including without limitation
|
|
8
|
+
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
9
|
+
# and/or sell copies of the Software, and to permit persons to whom the
|
|
10
|
+
# Software is furnished to do so, subject to the following conditions:
|
|
11
|
+
#
|
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
|
13
|
+
# all copies or substantial portions of the Software.
|
|
14
|
+
#
|
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
20
|
+
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
21
|
+
# DEALINGS IN THE SOFTWARE.
|
|
22
|
+
|
|
23
|
+
module Omnizip
|
|
24
|
+
module Algorithms
|
|
25
|
+
class PPMd7 < Algorithm
|
|
26
|
+
# Represents a symbol's state within a context
|
|
27
|
+
#
|
|
28
|
+
# Each symbol that appears after a context has an associated
|
|
29
|
+
# state that tracks its frequency. This is used to calculate
|
|
30
|
+
# the probability of each symbol appearing next.
|
|
31
|
+
class SymbolState
|
|
32
|
+
attr_reader :symbol
|
|
33
|
+
attr_accessor :freq
|
|
34
|
+
|
|
35
|
+
# Initialize a new symbol state
|
|
36
|
+
#
|
|
37
|
+
# @param symbol [Integer] The symbol value (0-255)
|
|
38
|
+
# @param freq [Integer] Initial frequency (default: 1)
|
|
39
|
+
def initialize(symbol, freq = 1)
|
|
40
|
+
@symbol = symbol
|
|
41
|
+
@freq = freq
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
# Get the probability of this symbol
|
|
45
|
+
#
|
|
46
|
+
# The probability is proportional to the frequency.
|
|
47
|
+
# This is used by the range coder to encode/decode.
|
|
48
|
+
#
|
|
49
|
+
# @param total_freq [Integer] Total frequency in context
|
|
50
|
+
# @return [Float] Probability (0.0 to 1.0)
|
|
51
|
+
def probability(total_freq)
|
|
52
|
+
@freq.to_f / total_freq
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
end
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Copyright (C) 2025 Ribose Inc.
|
|
4
|
+
#
|
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a
|
|
6
|
+
# copy of this software and associated documentation files (the "Software"),
|
|
7
|
+
# to deal in the Software without restriction, including without limitation
|
|
8
|
+
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
9
|
+
# and/or sell copies of the Software, and to permit persons to whom the
|
|
10
|
+
# Software is furnished to do so, subject to the following conditions:
|
|
11
|
+
#
|
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
|
13
|
+
# all copies or substantial portions of the Software.
|
|
14
|
+
#
|
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
20
|
+
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
21
|
+
# DEALINGS IN THE SOFTWARE.
|
|
22
|
+
|
|
23
|
+
require_relative "../algorithm"
|
|
24
|
+
require_relative "ppmd7/constants"
|
|
25
|
+
require_relative "ppmd7/symbol_state"
|
|
26
|
+
require_relative "ppmd7/context"
|
|
27
|
+
require_relative "ppmd7/model"
|
|
28
|
+
require_relative "ppmd7/encoder"
|
|
29
|
+
require_relative "ppmd7/decoder"
|
|
30
|
+
|
|
31
|
+
module Omnizip
|
|
32
|
+
module Algorithms
|
|
33
|
+
# PPMd7 compression algorithm
|
|
34
|
+
#
|
|
35
|
+
# PPMd (Prediction by Partial Matching) is a statistical compression
|
|
36
|
+
# algorithm that excels at text compression. It uses context-based
|
|
37
|
+
# prediction to achieve high compression ratios on text files.
|
|
38
|
+
#
|
|
39
|
+
# This implementation follows the PPMd7 specification as used in 7-Zip.
|
|
40
|
+
class PPMd7 < Algorithm
|
|
41
|
+
include Constants
|
|
42
|
+
|
|
43
|
+
# Algorithm metadata
|
|
44
|
+
#
|
|
45
|
+
# @return [AlgorithmMetadata] Metadata describing this algorithm
|
|
46
|
+
def self.metadata
|
|
47
|
+
Models::AlgorithmMetadata.new.tap do |m|
|
|
48
|
+
m.name = "ppmd7"
|
|
49
|
+
m.description = "PPMd7 - Prediction by Partial Matching " \
|
|
50
|
+
"for statistical text compression"
|
|
51
|
+
m.version = "1.0.0"
|
|
52
|
+
m.supports_streaming = true
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Compress data using PPMd7
|
|
57
|
+
#
|
|
58
|
+
# @param input [IO, String] Input data to compress
|
|
59
|
+
# @param output [IO, String] Output for compressed data
|
|
60
|
+
# @param options [Hash] Compression options
|
|
61
|
+
# @option options [Integer] :model_order Context order (2-16)
|
|
62
|
+
# @option options [Integer] :mem_size Memory size
|
|
63
|
+
# @return [void]
|
|
64
|
+
def compress(input, output, options = {})
|
|
65
|
+
input = prepare_input(input)
|
|
66
|
+
output = prepare_output(output)
|
|
67
|
+
|
|
68
|
+
encoder = PPMd7::Encoder.new(output, options)
|
|
69
|
+
encoder.encode_stream(input)
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# Decompress data using PPMd7
|
|
73
|
+
#
|
|
74
|
+
# @param input [IO, String] Compressed input data
|
|
75
|
+
# @param output [IO, String] Output for decompressed data
|
|
76
|
+
# @param options [Hash] Decompression options
|
|
77
|
+
# @option options [Integer] :model_order Context order (2-16)
|
|
78
|
+
# @option options [Integer] :mem_size Memory size
|
|
79
|
+
# @return [void]
|
|
80
|
+
def decompress(input, output, options = {})
|
|
81
|
+
input = prepare_input(input)
|
|
82
|
+
output = prepare_output(output)
|
|
83
|
+
|
|
84
|
+
decoder = PPMd7::Decoder.new(input, options)
|
|
85
|
+
result = decoder.decode_stream
|
|
86
|
+
|
|
87
|
+
output.write(result)
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
private
|
|
91
|
+
|
|
92
|
+
# Prepare input for processing
|
|
93
|
+
#
|
|
94
|
+
# @param input [IO, String] Input data
|
|
95
|
+
# @return [IO] IO object ready for reading
|
|
96
|
+
def prepare_input(input)
|
|
97
|
+
return input if input.is_a?(IO)
|
|
98
|
+
|
|
99
|
+
StringIO.new(input.to_s)
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
# Prepare output for processing
|
|
103
|
+
#
|
|
104
|
+
# @param output [IO, String, nil] Output destination
|
|
105
|
+
# @return [IO] IO object ready for writing
|
|
106
|
+
def prepare_output(output)
|
|
107
|
+
return output if output.is_a?(IO)
|
|
108
|
+
|
|
109
|
+
StringIO.new(String.new(encoding: Encoding::BINARY))
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
# Register algorithm with registry
|
|
116
|
+
Omnizip::AlgorithmRegistry.register(:ppmd7, Omnizip::Algorithms::PPMd7)
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Copyright (C) 2025 Ribose Inc.
|
|
4
|
+
#
|
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a
|
|
6
|
+
# copy of this software and associated documentation files (the "Software"),
|
|
7
|
+
# to deal in the Software without restriction, including without limitation
|
|
8
|
+
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
9
|
+
# and/or sell copies of the Software, and to permit persons to whom the
|
|
10
|
+
# Software is furnished to do so, subject to the following conditions:
|
|
11
|
+
#
|
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
|
13
|
+
# all copies or substantial portions of the Software.
|
|
14
|
+
#
|
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
20
|
+
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
21
|
+
# DEALINGS IN THE SOFTWARE.
|
|
22
|
+
|
|
23
|
+
module Omnizip
|
|
24
|
+
module Algorithms
|
|
25
|
+
class PPMd8 < PPMdBase
|
|
26
|
+
# Constants specific to PPMd8 algorithm
|
|
27
|
+
#
|
|
28
|
+
# PPMd8 adds restoration methods and enhanced memory management
|
|
29
|
+
# compared to PPMd7.
|
|
30
|
+
module Constants
|
|
31
|
+
# Restoration method constants
|
|
32
|
+
RESTORE_METHOD_RESTART = 0
|
|
33
|
+
RESTORE_METHOD_CUT_OFF = 1
|
|
34
|
+
DEFAULT_RESTORE_METHOD = RESTORE_METHOD_RESTART
|
|
35
|
+
|
|
36
|
+
# Probability scaling factors
|
|
37
|
+
PROB_TOTAL = 2048
|
|
38
|
+
MAX_FREQ = 124
|
|
39
|
+
INIT_ESCAPE_FREQ = 1
|
|
40
|
+
|
|
41
|
+
# Context management
|
|
42
|
+
SEE_CONTEXTS = 25
|
|
43
|
+
SUFFIX_CONTEXTS = 32
|
|
44
|
+
|
|
45
|
+
# Update intervals
|
|
46
|
+
INT_BITS = 7
|
|
47
|
+
PERIOD_BITS = 7
|
|
48
|
+
BIN_SCALE = 1 << 13
|
|
49
|
+
INTERVAL = 1 << INT_BITS
|
|
50
|
+
|
|
51
|
+
# Memory management
|
|
52
|
+
UNIT_SIZE = 12
|
|
53
|
+
MAX_STATES = 256
|
|
54
|
+
UNIT_ALLOC_SIZE = 12
|
|
55
|
+
|
|
56
|
+
# PPMd8-specific: glue counting threshold
|
|
57
|
+
GLUE_COUNT_THRESHOLD = 255
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
end
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Copyright (C) 2025 Ribose Inc.
|
|
4
|
+
|
|
5
|
+
require_relative "../ppmd7/context"
|
|
6
|
+
|
|
7
|
+
module Omnizip
|
|
8
|
+
module Algorithms
|
|
9
|
+
class PPMd8 < PPMdBase
|
|
10
|
+
# PPMd8 Context - Enhanced version with Union types
|
|
11
|
+
#
|
|
12
|
+
# Represents a context node in the PPMd8 model tree.
|
|
13
|
+
# PPMd8 uses optimized memory layout with Union types.
|
|
14
|
+
class Context < PPMd7::Context
|
|
15
|
+
include Constants
|
|
16
|
+
|
|
17
|
+
attr_accessor :num_stats, :flags, :sum_freq, :glue_count
|
|
18
|
+
|
|
19
|
+
def initialize(order, suffix)
|
|
20
|
+
super
|
|
21
|
+
@num_stats = 0
|
|
22
|
+
@flags = 0
|
|
23
|
+
@sum_freq = 0
|
|
24
|
+
@glue_count = 0
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# PPMd8-specific: Check if context needs memory restoration
|
|
28
|
+
def needs_restoration?
|
|
29
|
+
@glue_count >= GLUE_COUNT_THRESHOLD
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Copyright (C) 2025 Ribose Inc.
|
|
4
|
+
#
|
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a
|
|
6
|
+
# copy of this software and associated documentation files (the "Software"),
|
|
7
|
+
# to deal in the Software without restriction, including without limitation
|
|
8
|
+
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
9
|
+
# and/or sell copies of the Software, and to permit persons to whom the
|
|
10
|
+
# Software is furnished to do so, subject to the following conditions:
|
|
11
|
+
#
|
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
|
13
|
+
# all copies or substantial portions of the Software.
|
|
14
|
+
#
|
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
20
|
+
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
21
|
+
# DEALINGS IN THE SOFTWARE.
|
|
22
|
+
|
|
23
|
+
require_relative "model"
|
|
24
|
+
require_relative "../lzma/range_decoder"
|
|
25
|
+
|
|
26
|
+
module Omnizip
|
|
27
|
+
module Algorithms
|
|
28
|
+
class PPMd8 < PPMdBase
|
|
29
|
+
# PPMd8 Decoder
|
|
30
|
+
#
|
|
31
|
+
# Decodes streams compressed with PPMd8, maintaining
|
|
32
|
+
# synchronized model state with the encoder.
|
|
33
|
+
class Decoder
|
|
34
|
+
include PPMdBase::BaseConstants
|
|
35
|
+
include Constants
|
|
36
|
+
|
|
37
|
+
attr_reader :model
|
|
38
|
+
|
|
39
|
+
# Initialize the decoder
|
|
40
|
+
#
|
|
41
|
+
# @param input [IO] Input stream of compressed data
|
|
42
|
+
# @param options [Hash] Decoding options
|
|
43
|
+
# @option options [Integer] :model_order Maximum context order
|
|
44
|
+
# @option options [Integer] :mem_size Memory size for model
|
|
45
|
+
# @option options [Integer] :restore_method Restoration method
|
|
46
|
+
def initialize(input, options = {})
|
|
47
|
+
@input = input
|
|
48
|
+
@model = Model.new(
|
|
49
|
+
options[:model_order] || DEFAULT_ORDER,
|
|
50
|
+
options[:mem_size] || DEFAULT_MEM_SIZE,
|
|
51
|
+
options[:restore_method] || DEFAULT_RESTORE_METHOD,
|
|
52
|
+
)
|
|
53
|
+
@range_decoder = LZMA::RangeDecoder.new(input)
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Decode a stream back to original bytes
|
|
57
|
+
#
|
|
58
|
+
# @return [String] Decoded data
|
|
59
|
+
# @raise [NotImplementedError] PPMd8 is not yet fully implemented
|
|
60
|
+
def decode_stream
|
|
61
|
+
raise NotImplementedError,
|
|
62
|
+
"PPMd8 decompression is not yet fully implemented. " \
|
|
63
|
+
"The arithmetic coding integration requires completion. " \
|
|
64
|
+
"Please use PPMd7 or other compression algorithms instead."
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
private
|
|
68
|
+
|
|
69
|
+
# Decode a single symbol
|
|
70
|
+
#
|
|
71
|
+
# @return [Integer, nil] Decoded byte or nil if end
|
|
72
|
+
def decode_symbol
|
|
73
|
+
value = @range_decoder.decode_direct_bits(16)
|
|
74
|
+
symbol = find_symbol_from_range(value)
|
|
75
|
+
return nil if symbol.nil?
|
|
76
|
+
|
|
77
|
+
@model.update(symbol)
|
|
78
|
+
symbol
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
# Find symbol from decoded range value
|
|
82
|
+
#
|
|
83
|
+
# @param value [Integer] Decoded range value
|
|
84
|
+
# @return [Integer, nil] The symbol
|
|
85
|
+
def find_symbol_from_range(value)
|
|
86
|
+
context = @model.root_context
|
|
87
|
+
|
|
88
|
+
scale = 0x10000
|
|
89
|
+
cum_freq = 0
|
|
90
|
+
|
|
91
|
+
context.states.keys.sort.each do |symbol|
|
|
92
|
+
state = context.states[symbol]
|
|
93
|
+
next_cum = cum_freq + state.freq
|
|
94
|
+
sym_low = (cum_freq * scale) / context.total_freq
|
|
95
|
+
sym_high = (next_cum * scale) / context.total_freq
|
|
96
|
+
|
|
97
|
+
return symbol if value >= sym_low && value < sym_high
|
|
98
|
+
|
|
99
|
+
cum_freq = next_cum
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
nil
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
end
|