omnizip 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/.rubocop.yml +32 -0
- data/.rubocop_todo.yml +754 -0
- data/COPYING +502 -0
- data/Gemfile +17 -0
- data/LICENSE +12 -0
- data/README.adoc +1045 -0
- data/Rakefile +12 -0
- data/benchmark/README.md +260 -0
- data/benchmark/benchmark_suite.rb +125 -0
- data/benchmark/compression_bench.rb +181 -0
- data/benchmark/filter_bench.rb +180 -0
- data/benchmark/models/benchmark_result.rb +59 -0
- data/benchmark/models/comparison_result.rb +69 -0
- data/benchmark/profile_suite.rb +167 -0
- data/benchmark/reporter.rb +150 -0
- data/benchmark/run_benchmarks.rb +66 -0
- data/benchmark/test_data.rb +137 -0
- data/config/formats/rar3_spec.yml +91 -0
- data/config/formats/rar5_spec.yml +102 -0
- data/docs/.github/workflows/docs.yml +142 -0
- data/docs/.gitignore +21 -0
- data/docs/.lychee.toml +67 -0
- data/docs/Gemfile +13 -0
- data/docs/RAR_WRITE_SUPPORT.md +26 -0
- data/docs/README.md +101 -0
- data/docs/_config.yml +112 -0
- data/docs/assets/logo.svg +1 -0
- data/docs/assets/omnizip-logo.pdf +1540 -11
- data/docs/comparison/feature-matrix.adoc +694 -0
- data/docs/comparison/index.adoc +113 -0
- data/docs/comparison/vs-7zip.adoc +309 -0
- data/docs/comparison/vs-peazip.adoc +77 -0
- data/docs/comparison/vs-rubyzip.adoc +342 -0
- data/docs/comparison/vs-winrar.adoc +100 -0
- data/docs/compatibility.adoc +579 -0
- data/docs/concepts/index.adoc +129 -0
- data/docs/developer/architecture.adoc +256 -0
- data/docs/developer/contributing.adoc +158 -0
- data/docs/developer/index.adoc +25 -0
- data/docs/developer/testing.adoc +212 -0
- data/docs/getting-started/basic-usage.adoc +271 -0
- data/docs/getting-started/index.adoc +42 -0
- data/docs/getting-started/installation.adoc +138 -0
- data/docs/getting-started/quick-start.adoc +185 -0
- data/docs/getting-started/your-first-archive.adoc +218 -0
- data/docs/guides/advanced-features/encryption.adoc +300 -0
- data/docs/guides/advanced-features/index.adoc +49 -0
- data/docs/guides/advanced-features/parallel-processing.adoc +246 -0
- data/docs/guides/advanced-features/progress-tracking.adoc +320 -0
- data/docs/guides/advanced-features/streaming.adoc +212 -0
- data/docs/guides/archive-formats/gzip-format.adoc +107 -0
- data/docs/guides/archive-formats/index.adoc +130 -0
- data/docs/guides/archive-formats/rar-format.adoc +104 -0
- data/docs/guides/archive-formats/rar5.adoc +521 -0
- data/docs/guides/archive-formats/seven-zip-format.adoc +35 -0
- data/docs/guides/archive-formats/tar-format.adoc +106 -0
- data/docs/guides/archive-formats/xz-format.adoc +118 -0
- data/docs/guides/archive-formats/zip-format.adoc +35 -0
- data/docs/guides/compression-algorithms/bzip2.adoc +113 -0
- data/docs/guides/compression-algorithms/deflate.adoc +319 -0
- data/docs/guides/compression-algorithms/index.adoc +190 -0
- data/docs/guides/compression-algorithms/lzma.adoc +398 -0
- data/docs/guides/compression-algorithms/lzma2.adoc +327 -0
- data/docs/guides/compression-algorithms/ppmd.adoc +316 -0
- data/docs/guides/compression-algorithms/zstandard.adoc +361 -0
- data/docs/guides/creating-archives.adoc +354 -0
- data/docs/guides/extracting-archives.adoc +53 -0
- data/docs/guides/format-conversion.adoc +64 -0
- data/docs/guides/index.adoc +49 -0
- data/docs/guides/migration-rubyzip.adoc +217 -0
- data/docs/guides/parity-archives.adoc +605 -0
- data/docs/guides/performance-tuning.adoc +88 -0
- data/docs/index.adoc +218 -0
- data/docs/lychee.toml +67 -0
- data/docs/reference/api/overview.adoc +188 -0
- data/docs/reference/cli/compress-command.adoc +114 -0
- data/docs/reference/cli/overview.adoc +140 -0
- data/docs/reference/index.adoc +26 -0
- data/docs/resources/faq.adoc +185 -0
- data/docs/resources/quick-reference.adoc +222 -0
- data/docs/troubleshooting/index.adoc +208 -0
- data/examples/api_comparison.rb +205 -0
- data/examples/deflate64_example.rb +96 -0
- data/examples/par2_demo.rb +121 -0
- data/examples/quick_start_native.rb +150 -0
- data/examples/quick_start_rubyzip.rb +115 -0
- data/examples/rubyzip_compatibility_demo.rb +194 -0
- data/exe/omnizip +27 -0
- data/lib/omnizip/algorithm.rb +130 -0
- data/lib/omnizip/algorithm_registry.rb +86 -0
- data/lib/omnizip/algorithms/.keep +0 -0
- data/lib/omnizip/algorithms/bzip2/bwt.rb +225 -0
- data/lib/omnizip/algorithms/bzip2/decoder.rb +193 -0
- data/lib/omnizip/algorithms/bzip2/encoder.rb +237 -0
- data/lib/omnizip/algorithms/bzip2/huffman.rb +206 -0
- data/lib/omnizip/algorithms/bzip2/mtf.rb +101 -0
- data/lib/omnizip/algorithms/bzip2/rle.rb +151 -0
- data/lib/omnizip/algorithms/bzip2.rb +130 -0
- data/lib/omnizip/algorithms/deflate/constants.rb +28 -0
- data/lib/omnizip/algorithms/deflate/decoder.rb +38 -0
- data/lib/omnizip/algorithms/deflate/encoder.rb +46 -0
- data/lib/omnizip/algorithms/deflate.rb +128 -0
- data/lib/omnizip/algorithms/deflate64/constants.rb +45 -0
- data/lib/omnizip/algorithms/deflate64/decoder.rb +153 -0
- data/lib/omnizip/algorithms/deflate64/encoder.rb +98 -0
- data/lib/omnizip/algorithms/deflate64/huffman_coder.rb +354 -0
- data/lib/omnizip/algorithms/deflate64/lz77_encoder.rb +142 -0
- data/lib/omnizip/algorithms/deflate64.rb +109 -0
- data/lib/omnizip/algorithms/lzma/bit_model.rb +120 -0
- data/lib/omnizip/algorithms/lzma/constants.rb +112 -0
- data/lib/omnizip/algorithms/lzma/decoder.rb +148 -0
- data/lib/omnizip/algorithms/lzma/dictionary.rb +69 -0
- data/lib/omnizip/algorithms/lzma/distance_coder.rb +415 -0
- data/lib/omnizip/algorithms/lzma/encoder.rb +142 -0
- data/lib/omnizip/algorithms/lzma/length_coder.rb +260 -0
- data/lib/omnizip/algorithms/lzma/literal_decoder.rb +320 -0
- data/lib/omnizip/algorithms/lzma/literal_encoder.rb +210 -0
- data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +341 -0
- data/lib/omnizip/algorithms/lzma/lzma_alone_decoder.rb +192 -0
- data/lib/omnizip/algorithms/lzma/lzma_state.rb +128 -0
- data/lib/omnizip/algorithms/lzma/match.rb +32 -0
- data/lib/omnizip/algorithms/lzma/match_finder.rb +205 -0
- data/lib/omnizip/algorithms/lzma/match_finder_config.rb +142 -0
- data/lib/omnizip/algorithms/lzma/match_finder_factory.rb +88 -0
- data/lib/omnizip/algorithms/lzma/optimal_encoder.rb +130 -0
- data/lib/omnizip/algorithms/lzma/probability_models.rb +72 -0
- data/lib/omnizip/algorithms/lzma/range_coder.rb +85 -0
- data/lib/omnizip/algorithms/lzma/range_decoder.rb +434 -0
- data/lib/omnizip/algorithms/lzma/range_encoder.rb +194 -0
- data/lib/omnizip/algorithms/lzma/state.rb +127 -0
- data/lib/omnizip/algorithms/lzma/xz_buffered_range_encoder.rb +325 -0
- data/lib/omnizip/algorithms/lzma/xz_encoder.rb +426 -0
- data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +645 -0
- data/lib/omnizip/algorithms/lzma/xz_match_finder_adapter.rb +227 -0
- data/lib/omnizip/algorithms/lzma/xz_price_calculator.rb +169 -0
- data/lib/omnizip/algorithms/lzma/xz_probability_models.rb +261 -0
- data/lib/omnizip/algorithms/lzma/xz_range_encoder.rb +223 -0
- data/lib/omnizip/algorithms/lzma/xz_range_encoder_exact.rb +331 -0
- data/lib/omnizip/algorithms/lzma/xz_state.rb +116 -0
- data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +2055 -0
- data/lib/omnizip/algorithms/lzma.rb +238 -0
- data/lib/omnizip/algorithms/lzma2/chunk_manager.rb +182 -0
- data/lib/omnizip/algorithms/lzma2/constants.rb +41 -0
- data/lib/omnizip/algorithms/lzma2/encoder.rb +147 -0
- data/lib/omnizip/algorithms/lzma2/lzma2_chunk.rb +161 -0
- data/lib/omnizip/algorithms/lzma2/properties.rb +179 -0
- data/lib/omnizip/algorithms/lzma2/simple_lzma2_encoder.rb +127 -0
- data/lib/omnizip/algorithms/lzma2/xz_encoder_adapter.rb +85 -0
- data/lib/omnizip/algorithms/lzma2.rb +141 -0
- data/lib/omnizip/algorithms/ppmd7/constants.rb +74 -0
- data/lib/omnizip/algorithms/ppmd7/context.rb +154 -0
- data/lib/omnizip/algorithms/ppmd7/decoder.rb +126 -0
- data/lib/omnizip/algorithms/ppmd7/encoder.rb +163 -0
- data/lib/omnizip/algorithms/ppmd7/model.rb +248 -0
- data/lib/omnizip/algorithms/ppmd7/symbol_state.rb +57 -0
- data/lib/omnizip/algorithms/ppmd7.rb +116 -0
- data/lib/omnizip/algorithms/ppmd8/constants.rb +61 -0
- data/lib/omnizip/algorithms/ppmd8/context.rb +34 -0
- data/lib/omnizip/algorithms/ppmd8/decoder.rb +107 -0
- data/lib/omnizip/algorithms/ppmd8/encoder.rb +138 -0
- data/lib/omnizip/algorithms/ppmd8/model.rb +250 -0
- data/lib/omnizip/algorithms/ppmd8/restoration_method.rb +78 -0
- data/lib/omnizip/algorithms/ppmd8.rb +82 -0
- data/lib/omnizip/algorithms/ppmd_base.rb +138 -0
- data/lib/omnizip/algorithms/sevenzip_lzma2.rb +123 -0
- data/lib/omnizip/algorithms/xz_lzma2.rb +118 -0
- data/lib/omnizip/algorithms/zstandard/constants.rb +25 -0
- data/lib/omnizip/algorithms/zstandard/decoder.rb +46 -0
- data/lib/omnizip/algorithms/zstandard/encoder.rb +51 -0
- data/lib/omnizip/algorithms/zstandard.rb +138 -0
- data/lib/omnizip/buffer/memory_archive.rb +251 -0
- data/lib/omnizip/buffer/memory_extractor.rb +224 -0
- data/lib/omnizip/buffer.rb +176 -0
- data/lib/omnizip/checksum_registry.rb +114 -0
- data/lib/omnizip/checksums/crc32.rb +100 -0
- data/lib/omnizip/checksums/crc64.rb +101 -0
- data/lib/omnizip/checksums/crc_base.rb +158 -0
- data/lib/omnizip/checksums/verifier.rb +131 -0
- data/lib/omnizip/chunked/memory_manager.rb +194 -0
- data/lib/omnizip/chunked/reader.rb +78 -0
- data/lib/omnizip/chunked/writer.rb +120 -0
- data/lib/omnizip/chunked.rb +129 -0
- data/lib/omnizip/cli/output_formatter.rb +104 -0
- data/lib/omnizip/cli.rb +572 -0
- data/lib/omnizip/commands/.keep +0 -0
- data/lib/omnizip/commands/archive_create_command.rb +427 -0
- data/lib/omnizip/commands/archive_extract_command.rb +272 -0
- data/lib/omnizip/commands/archive_list_command.rb +218 -0
- data/lib/omnizip/commands/archive_repair_command.rb +131 -0
- data/lib/omnizip/commands/archive_verify_command.rb +117 -0
- data/lib/omnizip/commands/compress_command.rb +117 -0
- data/lib/omnizip/commands/decompress_command.rb +120 -0
- data/lib/omnizip/commands/list_command.rb +53 -0
- data/lib/omnizip/commands/metadata_command.rb +153 -0
- data/lib/omnizip/commands/parity_create_command.rb +122 -0
- data/lib/omnizip/commands/parity_repair_command.rb +122 -0
- data/lib/omnizip/commands/parity_verify_command.rb +124 -0
- data/lib/omnizip/commands/profile_list_command.rb +56 -0
- data/lib/omnizip/commands/profile_show_command.rb +44 -0
- data/lib/omnizip/convenience.rb +359 -0
- data/lib/omnizip/converter/conversion_registry.rb +49 -0
- data/lib/omnizip/converter/conversion_strategy.rb +121 -0
- data/lib/omnizip/converter/seven_zip_to_zip_strategy.rb +97 -0
- data/lib/omnizip/converter/zip_to_seven_zip_strategy.rb +112 -0
- data/lib/omnizip/converter.rb +105 -0
- data/lib/omnizip/crypto/aes256/cipher.rb +100 -0
- data/lib/omnizip/crypto/aes256/constants.rb +28 -0
- data/lib/omnizip/crypto/aes256/key_derivation.rb +101 -0
- data/lib/omnizip/crypto/aes256.rb +102 -0
- data/lib/omnizip/error.rb +106 -0
- data/lib/omnizip/eta/exponential_smoothing_estimator.rb +98 -0
- data/lib/omnizip/eta/moving_average_estimator.rb +99 -0
- data/lib/omnizip/eta/rate_calculator.rb +104 -0
- data/lib/omnizip/eta/sample_history.rb +143 -0
- data/lib/omnizip/eta/time_estimator.rb +106 -0
- data/lib/omnizip/eta.rb +63 -0
- data/lib/omnizip/extraction/filter_chain.rb +177 -0
- data/lib/omnizip/extraction/glob_pattern.rb +140 -0
- data/lib/omnizip/extraction/pattern_matcher.rb +70 -0
- data/lib/omnizip/extraction/predicate_pattern.rb +52 -0
- data/lib/omnizip/extraction/regex_pattern.rb +50 -0
- data/lib/omnizip/extraction/selective_extractor.rb +240 -0
- data/lib/omnizip/extraction.rb +111 -0
- data/lib/omnizip/file_type/mime_classifier.rb +144 -0
- data/lib/omnizip/file_type.rb +113 -0
- data/lib/omnizip/filter.rb +139 -0
- data/lib/omnizip/filter_pipeline.rb +108 -0
- data/lib/omnizip/filter_registry.rb +166 -0
- data/lib/omnizip/filters/bcj.rb +279 -0
- data/lib/omnizip/filters/bcj2/constants.rb +53 -0
- data/lib/omnizip/filters/bcj2/decoder.rb +200 -0
- data/lib/omnizip/filters/bcj2/encoder.rb +61 -0
- data/lib/omnizip/filters/bcj2/stream_data.rb +93 -0
- data/lib/omnizip/filters/bcj2.rb +99 -0
- data/lib/omnizip/filters/bcj_arm.rb +176 -0
- data/lib/omnizip/filters/bcj_arm64.rb +244 -0
- data/lib/omnizip/filters/bcj_ia64.rb +196 -0
- data/lib/omnizip/filters/bcj_ppc.rb +190 -0
- data/lib/omnizip/filters/bcj_sparc.rb +176 -0
- data/lib/omnizip/filters/bcj_x86.rb +193 -0
- data/lib/omnizip/filters/delta.rb +196 -0
- data/lib/omnizip/filters/filter_base.rb +72 -0
- data/lib/omnizip/filters/registry.rb +123 -0
- data/lib/omnizip/filters/xz_delta.rb +258 -0
- data/lib/omnizip/format_detector.rb +162 -0
- data/lib/omnizip/format_registry.rb +59 -0
- data/lib/omnizip/formats/.keep +0 -0
- data/lib/omnizip/formats/bzip2_file.rb +172 -0
- data/lib/omnizip/formats/cpio/constants.rb +55 -0
- data/lib/omnizip/formats/cpio/entry.rb +385 -0
- data/lib/omnizip/formats/cpio/reader.rb +196 -0
- data/lib/omnizip/formats/cpio/writer.rb +234 -0
- data/lib/omnizip/formats/cpio.rb +140 -0
- data/lib/omnizip/formats/format_spec_loader.rb +230 -0
- data/lib/omnizip/formats/gzip.rb +238 -0
- data/lib/omnizip/formats/iso/directory_builder.rb +297 -0
- data/lib/omnizip/formats/iso/directory_record.rb +152 -0
- data/lib/omnizip/formats/iso/joliet.rb +204 -0
- data/lib/omnizip/formats/iso/path_table.rb +125 -0
- data/lib/omnizip/formats/iso/reader.rb +197 -0
- data/lib/omnizip/formats/iso/rock_ridge.rb +349 -0
- data/lib/omnizip/formats/iso/volume_builder.rb +320 -0
- data/lib/omnizip/formats/iso/volume_descriptor.rb +168 -0
- data/lib/omnizip/formats/iso/writer.rb +530 -0
- data/lib/omnizip/formats/iso.rb +140 -0
- data/lib/omnizip/formats/lzip.rb +175 -0
- data/lib/omnizip/formats/lzma_alone.rb +171 -0
- data/lib/omnizip/formats/rar/archive_repairer.rb +243 -0
- data/lib/omnizip/formats/rar/archive_verifier.rb +195 -0
- data/lib/omnizip/formats/rar/block_parser.rb +243 -0
- data/lib/omnizip/formats/rar/compression/bit_stream.rb +180 -0
- data/lib/omnizip/formats/rar/compression/dispatcher.rb +217 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/decoder.rb +216 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/encoder.rb +158 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_builder.rb +217 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_coder.rb +189 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/match_finder.rb +135 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/sliding_window.rb +165 -0
- data/lib/omnizip/formats/rar/compression/ppmd/context.rb +105 -0
- data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +219 -0
- data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +262 -0
- data/lib/omnizip/formats/rar/compression_method_registry.rb +106 -0
- data/lib/omnizip/formats/rar/constants.rb +82 -0
- data/lib/omnizip/formats/rar/decompressor.rb +238 -0
- data/lib/omnizip/formats/rar/external_writer.rb +312 -0
- data/lib/omnizip/formats/rar/header.rb +192 -0
- data/lib/omnizip/formats/rar/license_validator.rb +109 -0
- data/lib/omnizip/formats/rar/models/rar_archive.rb +77 -0
- data/lib/omnizip/formats/rar/models/rar_entry.rb +65 -0
- data/lib/omnizip/formats/rar/models/rar_volume.rb +56 -0
- data/lib/omnizip/formats/rar/parity_handler.rb +292 -0
- data/lib/omnizip/formats/rar/rar5/compression/lzma.rb +202 -0
- data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +578 -0
- data/lib/omnizip/formats/rar/rar5/compression/store.rb +60 -0
- data/lib/omnizip/formats/rar/rar5/crc32.rb +39 -0
- data/lib/omnizip/formats/rar/rar5/encryption/aes256_cbc.rb +97 -0
- data/lib/omnizip/formats/rar/rar5/encryption/encryption_header.rb +114 -0
- data/lib/omnizip/formats/rar/rar5/encryption/encryption_manager.rb +166 -0
- data/lib/omnizip/formats/rar/rar5/encryption/key_derivation.rb +97 -0
- data/lib/omnizip/formats/rar/rar5/header.rb +187 -0
- data/lib/omnizip/formats/rar/rar5/models/encryption_options.rb +74 -0
- data/lib/omnizip/formats/rar/rar5/models/recovery_options.rb +63 -0
- data/lib/omnizip/formats/rar/rar5/models/solid_options.rb +63 -0
- data/lib/omnizip/formats/rar/rar5/models/volume_options.rb +74 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/ARCHITECTURE.md +290 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_manager.rb +264 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_splitter.rb +155 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_writer.rb +194 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_encoder.rb +109 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_manager.rb +142 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_stream.rb +121 -0
- data/lib/omnizip/formats/rar/rar5/vint.rb +65 -0
- data/lib/omnizip/formats/rar/rar5/writer.rb +466 -0
- data/lib/omnizip/formats/rar/rar_format_base.rb +241 -0
- data/lib/omnizip/formats/rar/reader.rb +366 -0
- data/lib/omnizip/formats/rar/recovery_record.rb +245 -0
- data/lib/omnizip/formats/rar/volume_manager.rb +168 -0
- data/lib/omnizip/formats/rar/writer.rb +431 -0
- data/lib/omnizip/formats/rar.rb +205 -0
- data/lib/omnizip/formats/rar3/compressor.rb +73 -0
- data/lib/omnizip/formats/rar3/decompressor.rb +66 -0
- data/lib/omnizip/formats/rar3/reader.rb +386 -0
- data/lib/omnizip/formats/rar3/writer.rb +219 -0
- data/lib/omnizip/formats/rar5/compressor.rb +73 -0
- data/lib/omnizip/formats/rar5/decompressor.rb +66 -0
- data/lib/omnizip/formats/rar5/reader.rb +342 -0
- data/lib/omnizip/formats/rar5/writer.rb +214 -0
- data/lib/omnizip/formats/seven_zip/coder_chain.rb +150 -0
- data/lib/omnizip/formats/seven_zip/constants.rb +126 -0
- data/lib/omnizip/formats/seven_zip/encoded_header.rb +114 -0
- data/lib/omnizip/formats/seven_zip/encrypted_header.rb +142 -0
- data/lib/omnizip/formats/seven_zip/file_collector.rb +144 -0
- data/lib/omnizip/formats/seven_zip/header.rb +106 -0
- data/lib/omnizip/formats/seven_zip/header_encryptor.rb +134 -0
- data/lib/omnizip/formats/seven_zip/header_writer.rb +466 -0
- data/lib/omnizip/formats/seven_zip/models/coder_info.rb +30 -0
- data/lib/omnizip/formats/seven_zip/models/file_entry.rb +58 -0
- data/lib/omnizip/formats/seven_zip/models/folder.rb +69 -0
- data/lib/omnizip/formats/seven_zip/models/stream_info.rb +42 -0
- data/lib/omnizip/formats/seven_zip/parser.rb +660 -0
- data/lib/omnizip/formats/seven_zip/reader.rb +458 -0
- data/lib/omnizip/formats/seven_zip/split_archive_reader.rb +632 -0
- data/lib/omnizip/formats/seven_zip/split_archive_writer.rb +315 -0
- data/lib/omnizip/formats/seven_zip/stream_compressor.rb +151 -0
- data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +162 -0
- data/lib/omnizip/formats/seven_zip/writer.rb +740 -0
- data/lib/omnizip/formats/seven_zip.rb +93 -0
- data/lib/omnizip/formats/tar/constants.rb +73 -0
- data/lib/omnizip/formats/tar/entry.rb +94 -0
- data/lib/omnizip/formats/tar/header.rb +168 -0
- data/lib/omnizip/formats/tar/reader.rb +121 -0
- data/lib/omnizip/formats/tar/writer.rb +216 -0
- data/lib/omnizip/formats/tar.rb +84 -0
- data/lib/omnizip/formats/xz/reader.rb +116 -0
- data/lib/omnizip/formats/xz.rb +237 -0
- data/lib/omnizip/formats/xz_impl/block_decoder.rb +754 -0
- data/lib/omnizip/formats/xz_impl/block_encoder.rb +306 -0
- data/lib/omnizip/formats/xz_impl/block_header.rb +210 -0
- data/lib/omnizip/formats/xz_impl/block_header_parser.rb +186 -0
- data/lib/omnizip/formats/xz_impl/constants.rb +49 -0
- data/lib/omnizip/formats/xz_impl/index_decoder.rb +174 -0
- data/lib/omnizip/formats/xz_impl/index_encoder.rb +122 -0
- data/lib/omnizip/formats/xz_impl/stream_decoder.rb +468 -0
- data/lib/omnizip/formats/xz_impl/stream_encoder.rb +99 -0
- data/lib/omnizip/formats/xz_impl/stream_footer.rb +81 -0
- data/lib/omnizip/formats/xz_impl/stream_footer_parser.rb +117 -0
- data/lib/omnizip/formats/xz_impl/stream_header.rb +55 -0
- data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +108 -0
- data/lib/omnizip/formats/xz_impl/vli.rb +128 -0
- data/lib/omnizip/formats/xz_impl/writer.rb +421 -0
- data/lib/omnizip/formats/zip/central_directory_header.rb +195 -0
- data/lib/omnizip/formats/zip/constants.rb +69 -0
- data/lib/omnizip/formats/zip/end_of_central_directory.rb +133 -0
- data/lib/omnizip/formats/zip/local_file_header.rb +138 -0
- data/lib/omnizip/formats/zip/reader.rb +250 -0
- data/lib/omnizip/formats/zip/unix_extra_field.rb +153 -0
- data/lib/omnizip/formats/zip/writer.rb +375 -0
- data/lib/omnizip/formats/zip/zip64_end_of_central_directory.rb +104 -0
- data/lib/omnizip/formats/zip/zip64_end_of_central_directory_locator.rb +66 -0
- data/lib/omnizip/formats/zip/zip64_extra_field.rb +114 -0
- data/lib/omnizip/formats/zip.rb +50 -0
- data/lib/omnizip/implementations/base/lzma2_decoder_base.rb +75 -0
- data/lib/omnizip/implementations/base/lzma2_encoder_base.rb +128 -0
- data/lib/omnizip/implementations/base/lzma_decoder_base.rb +83 -0
- data/lib/omnizip/implementations/base/lzma_encoder_base.rb +108 -0
- data/lib/omnizip/implementations/base/state_machine_base.rb +182 -0
- data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +421 -0
- data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +465 -0
- data/lib/omnizip/implementations/seven_zip/lzma/match_finder.rb +288 -0
- data/lib/omnizip/implementations/seven_zip/lzma/range_decoder.rb +200 -0
- data/lib/omnizip/implementations/seven_zip/lzma/range_encoder.rb +197 -0
- data/lib/omnizip/implementations/seven_zip/lzma/state_machine.rb +141 -0
- data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +519 -0
- data/lib/omnizip/implementations/xz_utils/lzma2/decoder.rb +723 -0
- data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +750 -0
- data/lib/omnizip/io/buffered_input.rb +146 -0
- data/lib/omnizip/io/buffered_output.rb +105 -0
- data/lib/omnizip/io/stream_manager.rb +115 -0
- data/lib/omnizip/link_handler/hard_link.rb +79 -0
- data/lib/omnizip/link_handler/symbolic_link.rb +74 -0
- data/lib/omnizip/link_handler.rb +124 -0
- data/lib/omnizip/metadata/archive_metadata.rb +114 -0
- data/lib/omnizip/metadata/entry_metadata.rb +146 -0
- data/lib/omnizip/metadata/metadata_editor.rb +171 -0
- data/lib/omnizip/metadata/metadata_registry.rb +64 -0
- data/lib/omnizip/metadata/metadata_validator.rb +99 -0
- data/lib/omnizip/metadata.rb +57 -0
- data/lib/omnizip/models/.keep +0 -0
- data/lib/omnizip/models/algorithm_metadata.rb +73 -0
- data/lib/omnizip/models/compression_options.rb +71 -0
- data/lib/omnizip/models/conversion_options.rb +87 -0
- data/lib/omnizip/models/conversion_result.rb +135 -0
- data/lib/omnizip/models/eta_result.rb +46 -0
- data/lib/omnizip/models/extraction_rule.rb +115 -0
- data/lib/omnizip/models/filter_chain.rb +144 -0
- data/lib/omnizip/models/filter_config.rb +183 -0
- data/lib/omnizip/models/match_result.rb +124 -0
- data/lib/omnizip/models/optimization_suggestion.rb +91 -0
- data/lib/omnizip/models/parallel_options.rb +104 -0
- data/lib/omnizip/models/performance_result.rb +79 -0
- data/lib/omnizip/models/profile_report.rb +82 -0
- data/lib/omnizip/models/progress_options.rb +38 -0
- data/lib/omnizip/models/split_options.rb +116 -0
- data/lib/omnizip/optimization_registry.rb +81 -0
- data/lib/omnizip/parallel/job_queue.rb +209 -0
- data/lib/omnizip/parallel/job_scheduler.rb +203 -0
- data/lib/omnizip/parallel/parallel_compressor.rb +347 -0
- data/lib/omnizip/parallel/parallel_extractor.rb +329 -0
- data/lib/omnizip/parallel/worker_pool.rb +223 -0
- data/lib/omnizip/parallel.rb +149 -0
- data/lib/omnizip/parity/chunked_block_processor.rb +196 -0
- data/lib/omnizip/parity/galois16.rb +145 -0
- data/lib/omnizip/parity/models/creator_packet.rb +73 -0
- data/lib/omnizip/parity/models/file_description_packet.rb +133 -0
- data/lib/omnizip/parity/models/ifsc_packet.rb +123 -0
- data/lib/omnizip/parity/models/main_packet.rb +128 -0
- data/lib/omnizip/parity/models/packet.rb +156 -0
- data/lib/omnizip/parity/models/packet_registry.rb +109 -0
- data/lib/omnizip/parity/models/recovery_slice_packet.rb +78 -0
- data/lib/omnizip/parity/par2_creator.rb +531 -0
- data/lib/omnizip/parity/par2_repairer.rb +407 -0
- data/lib/omnizip/parity/par2_verifier.rb +364 -0
- data/lib/omnizip/parity/par2cmdline_algorithm.rb +110 -0
- data/lib/omnizip/parity/par2cmdline_coefficients.rb +78 -0
- data/lib/omnizip/parity/reed_solomon_decoder.rb +266 -0
- data/lib/omnizip/parity/reed_solomon_encoder.rb +111 -0
- data/lib/omnizip/parity/reed_solomon_matrix.rb +342 -0
- data/lib/omnizip/parity.rb +186 -0
- data/lib/omnizip/password/encryption_registry.rb +65 -0
- data/lib/omnizip/password/encryption_strategy.rb +96 -0
- data/lib/omnizip/password/password_validator.rb +129 -0
- data/lib/omnizip/password/winzip_aes_strategy.rb +192 -0
- data/lib/omnizip/password/zip_crypto_strategy.rb +141 -0
- data/lib/omnizip/password.rb +87 -0
- data/lib/omnizip/pipe/stream_compressor.rb +124 -0
- data/lib/omnizip/pipe/stream_decompressor.rb +174 -0
- data/lib/omnizip/pipe.rb +121 -0
- data/lib/omnizip/platform/ntfs_streams.rb +201 -0
- data/lib/omnizip/platform.rb +189 -0
- data/lib/omnizip/profile/archive_profile.rb +39 -0
- data/lib/omnizip/profile/balanced_profile.rb +33 -0
- data/lib/omnizip/profile/binary_profile.rb +36 -0
- data/lib/omnizip/profile/compression_profile.rb +158 -0
- data/lib/omnizip/profile/custom_profile.rb +157 -0
- data/lib/omnizip/profile/fast_profile.rb +33 -0
- data/lib/omnizip/profile/maximum_profile.rb +33 -0
- data/lib/omnizip/profile/profile_detector.rb +110 -0
- data/lib/omnizip/profile/profile_registry.rb +161 -0
- data/lib/omnizip/profile/text_profile.rb +36 -0
- data/lib/omnizip/profile.rb +190 -0
- data/lib/omnizip/profiler/memory_profiler.rb +66 -0
- data/lib/omnizip/profiler/method_profiler.rb +49 -0
- data/lib/omnizip/profiler/report_generator.rb +169 -0
- data/lib/omnizip/profiler.rb +204 -0
- data/lib/omnizip/progress/callback_reporter.rb +36 -0
- data/lib/omnizip/progress/console_reporter.rb +62 -0
- data/lib/omnizip/progress/log_reporter.rb +91 -0
- data/lib/omnizip/progress/operation_progress.rb +118 -0
- data/lib/omnizip/progress/progress_bar.rb +156 -0
- data/lib/omnizip/progress/progress_reporter.rb +40 -0
- data/lib/omnizip/progress/progress_tracker.rb +190 -0
- data/lib/omnizip/progress/silent_reporter.rb +24 -0
- data/lib/omnizip/progress.rb +127 -0
- data/lib/omnizip/rubyzip_compat.rb +63 -0
- data/lib/omnizip/temp/safe_extract.rb +168 -0
- data/lib/omnizip/temp/temp_file.rb +124 -0
- data/lib/omnizip/temp/temp_file_pool.rb +109 -0
- data/lib/omnizip/temp.rb +181 -0
- data/lib/omnizip/version.rb +5 -0
- data/lib/omnizip/zip/entry.rb +156 -0
- data/lib/omnizip/zip/file.rb +485 -0
- data/lib/omnizip/zip/input_stream.rb +273 -0
- data/lib/omnizip/zip/output_stream.rb +324 -0
- data/lib/omnizip.rb +156 -0
- data/readme-docs/advanced-features.adoc +515 -0
- data/readme-docs/api-usage.adoc +444 -0
- data/readme-docs/architecture.adoc +449 -0
- data/readme-docs/archive-formats.adoc +479 -0
- data/readme-docs/cli-usage.adoc +222 -0
- data/readme-docs/compression-algorithms.adoc +442 -0
- data/readme-docs/compression-profiles.adoc +247 -0
- data/readme-docs/encryption-checksums.adoc +328 -0
- data/readme-docs/format-converter.adoc +325 -0
- data/readme-docs/installation.adoc +228 -0
- data/readme-docs/par2-archives.adoc +608 -0
- data/readme-docs/performance-profiler.adoc +389 -0
- data/readme-docs/preprocessing-filters.adoc +280 -0
- data/xz-file-format-1.2.1.txt +1174 -0
- metadata +617 -0
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Omnizip
|
|
4
|
+
module Formats
|
|
5
|
+
module Rar
|
|
6
|
+
module Compression
|
|
7
|
+
module LZ77Huffman
|
|
8
|
+
# Huffman tree builder for dynamic compression
|
|
9
|
+
#
|
|
10
|
+
# Builds canonical Huffman trees from symbol frequencies.
|
|
11
|
+
# Uses priority queue (heap) algorithm to construct optimal trees.
|
|
12
|
+
#
|
|
13
|
+
# Responsibilities:
|
|
14
|
+
# - ONE responsibility: Build Huffman trees and generate codes
|
|
15
|
+
# - Collect symbol frequencies
|
|
16
|
+
# - Build optimal Huffman tree
|
|
17
|
+
# - Generate canonical Huffman codes
|
|
18
|
+
# - Calculate code lengths
|
|
19
|
+
class HuffmanBuilder
|
|
20
|
+
MAX_CODE_LENGTH = 15
|
|
21
|
+
|
|
22
|
+
# Tree node for Huffman tree construction
|
|
23
|
+
class Node
|
|
24
|
+
attr_accessor :symbol, :frequency, :left, :right
|
|
25
|
+
|
|
26
|
+
def initialize(symbol, frequency)
|
|
27
|
+
@symbol = symbol
|
|
28
|
+
@frequency = frequency
|
|
29
|
+
@left = nil
|
|
30
|
+
@right = nil
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def leaf?
|
|
34
|
+
@left.nil? && @right.nil?
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
attr_reader :frequencies
|
|
39
|
+
|
|
40
|
+
def initialize
|
|
41
|
+
@frequencies = Hash.new(0)
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
# Add symbol occurrence(s)
|
|
45
|
+
#
|
|
46
|
+
# @param symbol [Integer] Symbol value
|
|
47
|
+
# @param count [Integer] Number of occurrences
|
|
48
|
+
# @return [void]
|
|
49
|
+
def add_symbol(symbol, count = 1)
|
|
50
|
+
@frequencies[symbol] += count
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Build Huffman tree from frequencies
|
|
54
|
+
#
|
|
55
|
+
# Uses priority queue algorithm to build optimal tree.
|
|
56
|
+
# Returns root node of the tree.
|
|
57
|
+
#
|
|
58
|
+
# @return [Node, nil] Root node or nil if empty
|
|
59
|
+
def build_tree
|
|
60
|
+
return nil if @frequencies.empty?
|
|
61
|
+
|
|
62
|
+
if @frequencies.size == 1
|
|
63
|
+
return Node.new(@frequencies.keys.first,
|
|
64
|
+
@frequencies.values.first)
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# Create leaf nodes
|
|
68
|
+
heap = @frequencies.map { |symbol, freq| Node.new(symbol, freq) }
|
|
69
|
+
heap.sort_by!(&:frequency)
|
|
70
|
+
|
|
71
|
+
# Build tree bottom-up
|
|
72
|
+
while heap.size > 1
|
|
73
|
+
left = heap.shift
|
|
74
|
+
right = heap.shift
|
|
75
|
+
|
|
76
|
+
parent = Node.new(nil, left.frequency + right.frequency)
|
|
77
|
+
parent.left = left
|
|
78
|
+
parent.right = right
|
|
79
|
+
|
|
80
|
+
# Insert maintaining heap property
|
|
81
|
+
insert_into_heap(heap, parent)
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
heap.first
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
# Generate canonical Huffman codes
|
|
88
|
+
#
|
|
89
|
+
# Returns hash mapping symbols to [code, length] pairs.
|
|
90
|
+
# Codes are canonical (same-length codes are sequential).
|
|
91
|
+
#
|
|
92
|
+
# @return [Hash<Integer, Array(Integer, Integer)>] symbol => [code, length]
|
|
93
|
+
def generate_codes
|
|
94
|
+
root = build_tree
|
|
95
|
+
return {} if root.nil?
|
|
96
|
+
|
|
97
|
+
# Handle single symbol case
|
|
98
|
+
if root.leaf?
|
|
99
|
+
return { root.symbol => [0, 1] }
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
# Calculate code lengths for each symbol
|
|
103
|
+
code_lengths = {}
|
|
104
|
+
calculate_code_lengths(root, 0, code_lengths)
|
|
105
|
+
|
|
106
|
+
# Generate canonical codes from lengths
|
|
107
|
+
generate_canonical_codes(code_lengths)
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
# Get code lengths only (for header transmission)
|
|
111
|
+
#
|
|
112
|
+
# @return [Hash<Integer, Integer>] symbol => length
|
|
113
|
+
def code_lengths
|
|
114
|
+
root = build_tree
|
|
115
|
+
return {} if root.nil?
|
|
116
|
+
|
|
117
|
+
if root.leaf?
|
|
118
|
+
return { root.symbol => 1 }
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
lengths = {}
|
|
122
|
+
calculate_code_lengths(root, 0, lengths)
|
|
123
|
+
lengths
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
# Reset builder
|
|
127
|
+
#
|
|
128
|
+
# @return [void]
|
|
129
|
+
def reset
|
|
130
|
+
@frequencies.clear
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
# Check if empty
|
|
134
|
+
#
|
|
135
|
+
# @return [Boolean]
|
|
136
|
+
def empty?
|
|
137
|
+
@frequencies.empty?
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
# Get number of symbols
|
|
141
|
+
#
|
|
142
|
+
# @return [Integer]
|
|
143
|
+
def symbol_count
|
|
144
|
+
@frequencies.size
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
private
|
|
148
|
+
|
|
149
|
+
# Insert node into heap maintaining sort order
|
|
150
|
+
#
|
|
151
|
+
# @param heap [Array<Node>] Heap array
|
|
152
|
+
# @param node [Node] Node to insert
|
|
153
|
+
# @return [void]
|
|
154
|
+
def insert_into_heap(heap, node)
|
|
155
|
+
index = heap.bsearch_index do |n|
|
|
156
|
+
n.frequency >= node.frequency
|
|
157
|
+
end || heap.size
|
|
158
|
+
heap.insert(index, node)
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
# Calculate code lengths via tree traversal
|
|
162
|
+
#
|
|
163
|
+
# @param node [Node] Current node
|
|
164
|
+
# @param depth [Integer] Current depth
|
|
165
|
+
# @param lengths [Hash] Output hash
|
|
166
|
+
# @return [void]
|
|
167
|
+
def calculate_code_lengths(node, depth, lengths)
|
|
168
|
+
return if node.nil?
|
|
169
|
+
|
|
170
|
+
if node.leaf?
|
|
171
|
+
lengths[node.symbol] = [depth, MAX_CODE_LENGTH].min
|
|
172
|
+
else
|
|
173
|
+
calculate_code_lengths(node.left, depth + 1, lengths)
|
|
174
|
+
calculate_code_lengths(node.right, depth + 1, lengths)
|
|
175
|
+
end
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
# Generate canonical codes from code lengths
|
|
179
|
+
#
|
|
180
|
+
# Canonical codes have the property that codes of the same
|
|
181
|
+
# length are sequential integers.
|
|
182
|
+
#
|
|
183
|
+
# @param code_lengths [Hash<Integer, Integer>] symbol => length
|
|
184
|
+
# @return [Hash<Integer, Array(Integer, Integer)>] symbol => [code, length]
|
|
185
|
+
def generate_canonical_codes(code_lengths)
|
|
186
|
+
return {} if code_lengths.empty?
|
|
187
|
+
|
|
188
|
+
# Count symbols at each length
|
|
189
|
+
length_counts = Array.new(MAX_CODE_LENGTH + 1, 0)
|
|
190
|
+
code_lengths.each_value { |len| length_counts[len] += 1 }
|
|
191
|
+
|
|
192
|
+
# Calculate first code for each length
|
|
193
|
+
first_codes = Array.new(MAX_CODE_LENGTH + 1, 0)
|
|
194
|
+
code = 0
|
|
195
|
+
(1..MAX_CODE_LENGTH).each do |len|
|
|
196
|
+
first_codes[len] = code
|
|
197
|
+
code = (code + length_counts[len]) << 1
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
# Assign codes to symbols
|
|
201
|
+
codes = {}
|
|
202
|
+
code_lengths.sort_by do |sym, len|
|
|
203
|
+
[len, sym]
|
|
204
|
+
end.each do |symbol, length|
|
|
205
|
+
code = first_codes[length]
|
|
206
|
+
first_codes[length] += 1
|
|
207
|
+
codes[symbol] = [code, length]
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
codes
|
|
211
|
+
end
|
|
212
|
+
end
|
|
213
|
+
end
|
|
214
|
+
end
|
|
215
|
+
end
|
|
216
|
+
end
|
|
217
|
+
end
|
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Copyright (C) 2025 Ribose Inc.
|
|
4
|
+
#
|
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a
|
|
6
|
+
# copy of this software and associated documentation files (the "Software"),
|
|
7
|
+
# to deal in the Software without restriction, including without limitation
|
|
8
|
+
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
9
|
+
# and/or sell copies of the Software, and to permit persons to whom the
|
|
10
|
+
# Software is furnished to do so, subject to the following conditions:
|
|
11
|
+
#
|
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
|
13
|
+
# all copies or substantial portions of the Software.
|
|
14
|
+
#
|
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
20
|
+
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
21
|
+
# DEALINGS IN THE SOFTWARE.
|
|
22
|
+
|
|
23
|
+
module Omnizip
|
|
24
|
+
module Formats
|
|
25
|
+
module Rar
|
|
26
|
+
module Compression
|
|
27
|
+
module LZ77Huffman
|
|
28
|
+
# Huffman coding for RAR LZ77+Huffman compression
|
|
29
|
+
#
|
|
30
|
+
# Implements canonical Huffman tree decoding for RAR archives.
|
|
31
|
+
# RAR uses multiple Huffman tables:
|
|
32
|
+
# - MC (Main Code): Literals and length codes
|
|
33
|
+
# - LD (Length-Distance): Distance codes
|
|
34
|
+
# - RC (Repeat Count): Run-length encoding
|
|
35
|
+
# - LDD (Low Distance): Low distance values
|
|
36
|
+
#
|
|
37
|
+
# Responsibilities:
|
|
38
|
+
# - ONE responsibility: Huffman tree operations
|
|
39
|
+
# - Build canonical Huffman trees from code lengths
|
|
40
|
+
# - Decode symbols using Huffman trees
|
|
41
|
+
# - Parse tree structure from bit stream
|
|
42
|
+
#
|
|
43
|
+
# Canonical Huffman Code Properties:
|
|
44
|
+
# - Codes of same length are sequential
|
|
45
|
+
# - Shorter codes have lower values
|
|
46
|
+
# - Deterministic tree construction from lengths
|
|
47
|
+
class HuffmanCoder
|
|
48
|
+
# Maximum code length for RAR
|
|
49
|
+
MAX_CODE_LENGTH = 15
|
|
50
|
+
|
|
51
|
+
# Initialize Huffman coder
|
|
52
|
+
def initialize
|
|
53
|
+
@decode_table = {}
|
|
54
|
+
@code_lengths = []
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# Build Huffman tree from code lengths
|
|
58
|
+
#
|
|
59
|
+
# Constructs a canonical Huffman tree given the code lengths
|
|
60
|
+
# for each symbol. This is how RAR transmits Huffman tables.
|
|
61
|
+
#
|
|
62
|
+
# @param code_lengths [Array<Integer>] Code length for each symbol
|
|
63
|
+
# @return [void]
|
|
64
|
+
def build_tree(code_lengths)
|
|
65
|
+
@code_lengths = code_lengths
|
|
66
|
+
@decode_table = {}
|
|
67
|
+
|
|
68
|
+
# Count codes of each length
|
|
69
|
+
length_counts = Array.new(MAX_CODE_LENGTH + 1, 0)
|
|
70
|
+
code_lengths.each do |len|
|
|
71
|
+
length_counts[len] += 1 if len.positive?
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
# Calculate first code for each length
|
|
75
|
+
first_codes = Array.new(MAX_CODE_LENGTH + 1, 0)
|
|
76
|
+
code = 0
|
|
77
|
+
(1..MAX_CODE_LENGTH).each do |len|
|
|
78
|
+
first_codes[len] = code
|
|
79
|
+
code = (code + length_counts[len]) << 1
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
# Assign codes to symbols
|
|
83
|
+
code_lengths.each_with_index do |len, symbol|
|
|
84
|
+
next if len.zero?
|
|
85
|
+
|
|
86
|
+
code = first_codes[len]
|
|
87
|
+
first_codes[len] += 1
|
|
88
|
+
|
|
89
|
+
# Store in decode table: [code, length] => symbol
|
|
90
|
+
key = (code << 8) | len
|
|
91
|
+
@decode_table[key] = symbol
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
# Decode a single symbol from bit stream
|
|
96
|
+
#
|
|
97
|
+
# Reads bits one at a time until a valid Huffman code is found,
|
|
98
|
+
# then returns the corresponding symbol.
|
|
99
|
+
#
|
|
100
|
+
# @param bit_stream [BitStream] Input bit stream
|
|
101
|
+
# @return [Integer, nil] Decoded symbol or nil if end
|
|
102
|
+
def decode_symbol(bit_stream)
|
|
103
|
+
code = 0
|
|
104
|
+
length = 0
|
|
105
|
+
|
|
106
|
+
# Read bits until we find a valid code
|
|
107
|
+
(1..MAX_CODE_LENGTH).each do |len|
|
|
108
|
+
bit = bit_stream.read_bit
|
|
109
|
+
code = (code << 1) | bit
|
|
110
|
+
length = len
|
|
111
|
+
|
|
112
|
+
# Check if this code exists in decode table
|
|
113
|
+
key = (code << 8) | length
|
|
114
|
+
return @decode_table[key] if @decode_table.key?(key)
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
# No valid code found
|
|
118
|
+
nil
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
# Parse Huffman tree from RAR bit stream
|
|
122
|
+
#
|
|
123
|
+
# RAR encodes Huffman trees in a compact format:
|
|
124
|
+
# 1. Number of code lengths
|
|
125
|
+
# 2. Code lengths (potentially compressed)
|
|
126
|
+
# 3. Tree structure
|
|
127
|
+
#
|
|
128
|
+
# This is a simplified implementation for MVP.
|
|
129
|
+
#
|
|
130
|
+
# @param bit_stream [BitStream] Input bit stream
|
|
131
|
+
# @param num_symbols [Integer] Number of symbols in alphabet
|
|
132
|
+
# @return [void]
|
|
133
|
+
def parse_tree(bit_stream, num_symbols)
|
|
134
|
+
code_lengths = Array.new(num_symbols, 0)
|
|
135
|
+
|
|
136
|
+
# Read code lengths (simplified - real RAR uses RLE)
|
|
137
|
+
num_symbols.times do |i|
|
|
138
|
+
# Read length as 4-bit value
|
|
139
|
+
len = bit_stream.read_bits(4)
|
|
140
|
+
code_lengths[i] = len
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
build_tree(code_lengths)
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
# Check if tree is empty
|
|
147
|
+
#
|
|
148
|
+
# @return [Boolean] True if no codes defined
|
|
149
|
+
def empty?
|
|
150
|
+
@decode_table.empty?
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
# Get number of symbols in tree
|
|
154
|
+
#
|
|
155
|
+
# @return [Integer] Number of symbols
|
|
156
|
+
def symbol_count
|
|
157
|
+
@decode_table.size
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
# Reset the coder
|
|
161
|
+
#
|
|
162
|
+
# @return [void]
|
|
163
|
+
def reset
|
|
164
|
+
@decode_table = {}
|
|
165
|
+
@code_lengths = []
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
# Encode a symbol (for future encoder implementation)
|
|
169
|
+
#
|
|
170
|
+
# @param symbol [Integer] Symbol to encode
|
|
171
|
+
# @return [Array<Integer, Integer>] [code, length]
|
|
172
|
+
def encode_symbol(symbol)
|
|
173
|
+
# Find code for symbol
|
|
174
|
+
@decode_table.each do |key, sym|
|
|
175
|
+
next unless sym == symbol
|
|
176
|
+
|
|
177
|
+
code = key >> 8
|
|
178
|
+
length = key & 0xFF
|
|
179
|
+
return [code, length]
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
nil
|
|
183
|
+
end
|
|
184
|
+
end
|
|
185
|
+
end
|
|
186
|
+
end
|
|
187
|
+
end
|
|
188
|
+
end
|
|
189
|
+
end
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Omnizip
|
|
4
|
+
module Formats
|
|
5
|
+
module Rar
|
|
6
|
+
module Compression
|
|
7
|
+
module LZ77Huffman
|
|
8
|
+
# LZ77 Match Finder for RAR compression
|
|
9
|
+
class MatchFinder
|
|
10
|
+
MAX_MATCH_LENGTH = 257
|
|
11
|
+
MIN_MATCH_LENGTH = 3
|
|
12
|
+
WINDOW_SIZE = 32768
|
|
13
|
+
MAX_CHAIN_LENGTH = 1024
|
|
14
|
+
|
|
15
|
+
class Match
|
|
16
|
+
attr_reader :offset, :length
|
|
17
|
+
|
|
18
|
+
def initialize(offset, length)
|
|
19
|
+
@offset = offset
|
|
20
|
+
@length = length
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def ==(other)
|
|
24
|
+
offset == other.offset && length == other.length
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
attr_reader :window_size, :max_match_length
|
|
29
|
+
|
|
30
|
+
def initialize(window_size = WINDOW_SIZE,
|
|
31
|
+
max_match_length = MAX_MATCH_LENGTH)
|
|
32
|
+
@window_size = window_size
|
|
33
|
+
@max_match_length = [max_match_length, MAX_MATCH_LENGTH].min
|
|
34
|
+
@hash_table = {}
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def find_match(data, position)
|
|
38
|
+
return nil if position >= data.size
|
|
39
|
+
return nil if data.size - position < MIN_MATCH_LENGTH
|
|
40
|
+
|
|
41
|
+
# Index all positions up to current if not done yet
|
|
42
|
+
ensure_indexed(data, position)
|
|
43
|
+
|
|
44
|
+
hash_val = hash_bytes(data, position)
|
|
45
|
+
candidates = @hash_table[hash_val] || []
|
|
46
|
+
best_match = find_best_among_candidates(data, position,
|
|
47
|
+
candidates)
|
|
48
|
+
update_hash(hash_val, position)
|
|
49
|
+
best_match
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def update(data, position)
|
|
53
|
+
return if position >= data.size
|
|
54
|
+
|
|
55
|
+
hash_val = hash_bytes(data, position)
|
|
56
|
+
update_hash(hash_val, position)
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def reset
|
|
60
|
+
@hash_table.clear
|
|
61
|
+
@last_indexed = -1
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def hash_chain_count
|
|
65
|
+
@hash_table.size
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
private
|
|
69
|
+
|
|
70
|
+
def ensure_indexed(data, position)
|
|
71
|
+
@last_indexed ||= -1
|
|
72
|
+
start_pos = [@last_indexed + 1, 0].max
|
|
73
|
+
(start_pos...position).each do |pos|
|
|
74
|
+
next if pos + MIN_MATCH_LENGTH > data.size
|
|
75
|
+
|
|
76
|
+
hash_val = hash_bytes(data, pos)
|
|
77
|
+
@hash_table[hash_val] ||= []
|
|
78
|
+
@hash_table[hash_val] << pos
|
|
79
|
+
end
|
|
80
|
+
@last_indexed = position - 1
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
def hash_bytes(data, position)
|
|
84
|
+
return 0 if position + 2 >= data.size
|
|
85
|
+
|
|
86
|
+
bytes = data.is_a?(String) ? data.bytes : data
|
|
87
|
+
(bytes[position] << 16) ^ (bytes[position + 1] << 8) ^ bytes[position + 2]
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
def find_best_among_candidates(data, position, candidates)
|
|
91
|
+
best_length = MIN_MATCH_LENGTH - 1
|
|
92
|
+
best_offset = 0
|
|
93
|
+
checked = 0
|
|
94
|
+
|
|
95
|
+
candidates.reverse_each do |candidate_pos|
|
|
96
|
+
offset = position - candidate_pos
|
|
97
|
+
break if offset > @window_size
|
|
98
|
+
|
|
99
|
+
checked += 1
|
|
100
|
+
break if checked > MAX_CHAIN_LENGTH
|
|
101
|
+
|
|
102
|
+
length = match_length(data, position, candidate_pos)
|
|
103
|
+
if length > best_length
|
|
104
|
+
best_length = length
|
|
105
|
+
best_offset = offset
|
|
106
|
+
break if best_length >= @max_match_length
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
return nil if best_length < MIN_MATCH_LENGTH
|
|
111
|
+
|
|
112
|
+
Match.new(best_offset, best_length)
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
def match_length(data, pos1, pos2)
|
|
116
|
+
bytes = data.is_a?(String) ? data.bytes : data
|
|
117
|
+
max_len = [data.size - pos1, @max_match_length].min
|
|
118
|
+
length = 0
|
|
119
|
+
while length < max_len && bytes[pos1 + length] == bytes[pos2 + length]
|
|
120
|
+
length += 1
|
|
121
|
+
end
|
|
122
|
+
length
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
def update_hash(hash_val, position)
|
|
126
|
+
@hash_table[hash_val] ||= []
|
|
127
|
+
@hash_table[hash_val] << position
|
|
128
|
+
@hash_table[hash_val].shift if @hash_table[hash_val].size > MAX_CHAIN_LENGTH
|
|
129
|
+
end
|
|
130
|
+
end
|
|
131
|
+
end
|
|
132
|
+
end
|
|
133
|
+
end
|
|
134
|
+
end
|
|
135
|
+
end
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Copyright (C) 2025 Ribose Inc.
|
|
4
|
+
#
|
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a
|
|
6
|
+
# copy of this software and associated documentation files (the "Software"),
|
|
7
|
+
# to deal in the Software without restriction, including without limitation
|
|
8
|
+
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
9
|
+
# and/or sell copies of the Software, and to permit persons to whom the
|
|
10
|
+
# Software is furnished to do so, subject to the following conditions:
|
|
11
|
+
#
|
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
|
13
|
+
# all copies or substantial portions of the Software.
|
|
14
|
+
#
|
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
20
|
+
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
21
|
+
# DEALINGS IN THE SOFTWARE.
|
|
22
|
+
|
|
23
|
+
module Omnizip
|
|
24
|
+
module Formats
|
|
25
|
+
module Rar
|
|
26
|
+
module Compression
|
|
27
|
+
module LZ77Huffman
|
|
28
|
+
# Sliding window buffer for LZ77 compression
|
|
29
|
+
#
|
|
30
|
+
# Provides a circular buffer that stores previously decoded bytes
|
|
31
|
+
# for LZ77 match copying. The window allows looking back at
|
|
32
|
+
# previously decoded data to resolve distance-length match pairs.
|
|
33
|
+
#
|
|
34
|
+
# Responsibilities:
|
|
35
|
+
# - ONE responsibility: Window buffer management
|
|
36
|
+
# - Store decoded bytes in circular buffer
|
|
37
|
+
# - Copy matches from window offset
|
|
38
|
+
# - Handle window wrap-around
|
|
39
|
+
# - Efficient lookback for match resolution
|
|
40
|
+
#
|
|
41
|
+
# RAR LZ77 Window Sizes:
|
|
42
|
+
# - RAR3: 32KB (32 * 1024 bytes)
|
|
43
|
+
# - RAR4: 64KB (64 * 1024 bytes)
|
|
44
|
+
# - RAR5: Up to 1GB (dynamic)
|
|
45
|
+
class SlidingWindow
|
|
46
|
+
# Default window size (64KB for RAR4)
|
|
47
|
+
DEFAULT_SIZE = 64 * 1024
|
|
48
|
+
|
|
49
|
+
# Initialize a new sliding window
|
|
50
|
+
#
|
|
51
|
+
# @param size [Integer] Window size in bytes
|
|
52
|
+
def initialize(size = DEFAULT_SIZE)
|
|
53
|
+
unless size.positive?
|
|
54
|
+
raise ArgumentError,
|
|
55
|
+
"Window size must be positive"
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
@size = size
|
|
59
|
+
@buffer = Array.new(size, 0)
|
|
60
|
+
@position = 0
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# Add a single byte to the window
|
|
64
|
+
#
|
|
65
|
+
# Stores the byte at the current position and advances.
|
|
66
|
+
# When position reaches window size, it wraps around to 0.
|
|
67
|
+
#
|
|
68
|
+
# @param byte [Integer] Byte value (0-255)
|
|
69
|
+
# @return [void]
|
|
70
|
+
def add_byte(byte)
|
|
71
|
+
raise ArgumentError, "Byte must be 0-255" unless byte.between?(0,
|
|
72
|
+
255)
|
|
73
|
+
|
|
74
|
+
@buffer[@position] = byte
|
|
75
|
+
@position = (@position + 1) % @size
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# Copy a match from the window
|
|
79
|
+
#
|
|
80
|
+
# Copies bytes from a backward offset (distance) and returns
|
|
81
|
+
# them as an array. This is used to resolve LZ77 match pairs.
|
|
82
|
+
#
|
|
83
|
+
# The match can overlap with the current position (e.g., when
|
|
84
|
+
# distance < length), which is handled byte-by-byte.
|
|
85
|
+
#
|
|
86
|
+
# @param distance [Integer] Backward offset (1 to window_size)
|
|
87
|
+
# @param length [Integer] Number of bytes to copy (1+)
|
|
88
|
+
# @return [Array<Integer>] Copied bytes
|
|
89
|
+
def copy_match(distance, length)
|
|
90
|
+
validate_match_params(distance, length)
|
|
91
|
+
|
|
92
|
+
result = []
|
|
93
|
+
start_pos = (@position - distance) % @size
|
|
94
|
+
|
|
95
|
+
length.times do |i|
|
|
96
|
+
copy_pos = (start_pos + i) % @size
|
|
97
|
+
byte = @buffer[copy_pos]
|
|
98
|
+
result << byte
|
|
99
|
+
add_byte(byte) # Add to window as we copy
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
result
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
# Get current window position
|
|
106
|
+
#
|
|
107
|
+
# @return [Integer] Current position (0 to size-1)
|
|
108
|
+
def position
|
|
109
|
+
@position
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
# Get window size
|
|
113
|
+
#
|
|
114
|
+
# @return [Integer] Window size in bytes
|
|
115
|
+
def size
|
|
116
|
+
@size
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
# Get byte at specific offset from current position
|
|
120
|
+
#
|
|
121
|
+
# @param offset [Integer] Backward offset (1 to window_size)
|
|
122
|
+
# @return [Integer] Byte value at offset
|
|
123
|
+
def get_byte_at_offset(offset)
|
|
124
|
+
unless offset.between?(
|
|
125
|
+
1, @size
|
|
126
|
+
)
|
|
127
|
+
raise ArgumentError,
|
|
128
|
+
"Offset must be 1 to #{@size}"
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
pos = (@position - offset) % @size
|
|
132
|
+
@buffer[pos]
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
# Reset window to initial state
|
|
136
|
+
#
|
|
137
|
+
# @return [void]
|
|
138
|
+
def reset
|
|
139
|
+
@buffer.fill(0)
|
|
140
|
+
@position = 0
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
private
|
|
144
|
+
|
|
145
|
+
# Validate match parameters
|
|
146
|
+
#
|
|
147
|
+
# @param distance [Integer] Distance parameter
|
|
148
|
+
# @param length [Integer] Length parameter
|
|
149
|
+
# @return [void]
|
|
150
|
+
def validate_match_params(distance, length)
|
|
151
|
+
unless distance.between?(1, @size)
|
|
152
|
+
raise ArgumentError, "Distance must be 1 to #{@size}"
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
unless length.positive?
|
|
156
|
+
raise ArgumentError,
|
|
157
|
+
"Length must be positive"
|
|
158
|
+
end
|
|
159
|
+
end
|
|
160
|
+
end
|
|
161
|
+
end
|
|
162
|
+
end
|
|
163
|
+
end
|
|
164
|
+
end
|
|
165
|
+
end
|