omnizip 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/.rubocop.yml +32 -0
- data/.rubocop_todo.yml +754 -0
- data/COPYING +502 -0
- data/Gemfile +17 -0
- data/LICENSE +12 -0
- data/README.adoc +1045 -0
- data/Rakefile +12 -0
- data/benchmark/README.md +260 -0
- data/benchmark/benchmark_suite.rb +125 -0
- data/benchmark/compression_bench.rb +181 -0
- data/benchmark/filter_bench.rb +180 -0
- data/benchmark/models/benchmark_result.rb +59 -0
- data/benchmark/models/comparison_result.rb +69 -0
- data/benchmark/profile_suite.rb +167 -0
- data/benchmark/reporter.rb +150 -0
- data/benchmark/run_benchmarks.rb +66 -0
- data/benchmark/test_data.rb +137 -0
- data/config/formats/rar3_spec.yml +91 -0
- data/config/formats/rar5_spec.yml +102 -0
- data/docs/.github/workflows/docs.yml +142 -0
- data/docs/.gitignore +21 -0
- data/docs/.lychee.toml +67 -0
- data/docs/Gemfile +13 -0
- data/docs/RAR_WRITE_SUPPORT.md +26 -0
- data/docs/README.md +101 -0
- data/docs/_config.yml +112 -0
- data/docs/assets/logo.svg +1 -0
- data/docs/assets/omnizip-logo.pdf +1540 -11
- data/docs/comparison/feature-matrix.adoc +694 -0
- data/docs/comparison/index.adoc +113 -0
- data/docs/comparison/vs-7zip.adoc +309 -0
- data/docs/comparison/vs-peazip.adoc +77 -0
- data/docs/comparison/vs-rubyzip.adoc +342 -0
- data/docs/comparison/vs-winrar.adoc +100 -0
- data/docs/compatibility.adoc +579 -0
- data/docs/concepts/index.adoc +129 -0
- data/docs/developer/architecture.adoc +256 -0
- data/docs/developer/contributing.adoc +158 -0
- data/docs/developer/index.adoc +25 -0
- data/docs/developer/testing.adoc +212 -0
- data/docs/getting-started/basic-usage.adoc +271 -0
- data/docs/getting-started/index.adoc +42 -0
- data/docs/getting-started/installation.adoc +138 -0
- data/docs/getting-started/quick-start.adoc +185 -0
- data/docs/getting-started/your-first-archive.adoc +218 -0
- data/docs/guides/advanced-features/encryption.adoc +300 -0
- data/docs/guides/advanced-features/index.adoc +49 -0
- data/docs/guides/advanced-features/parallel-processing.adoc +246 -0
- data/docs/guides/advanced-features/progress-tracking.adoc +320 -0
- data/docs/guides/advanced-features/streaming.adoc +212 -0
- data/docs/guides/archive-formats/gzip-format.adoc +107 -0
- data/docs/guides/archive-formats/index.adoc +130 -0
- data/docs/guides/archive-formats/rar-format.adoc +104 -0
- data/docs/guides/archive-formats/rar5.adoc +521 -0
- data/docs/guides/archive-formats/seven-zip-format.adoc +35 -0
- data/docs/guides/archive-formats/tar-format.adoc +106 -0
- data/docs/guides/archive-formats/xz-format.adoc +118 -0
- data/docs/guides/archive-formats/zip-format.adoc +35 -0
- data/docs/guides/compression-algorithms/bzip2.adoc +113 -0
- data/docs/guides/compression-algorithms/deflate.adoc +319 -0
- data/docs/guides/compression-algorithms/index.adoc +190 -0
- data/docs/guides/compression-algorithms/lzma.adoc +398 -0
- data/docs/guides/compression-algorithms/lzma2.adoc +327 -0
- data/docs/guides/compression-algorithms/ppmd.adoc +316 -0
- data/docs/guides/compression-algorithms/zstandard.adoc +361 -0
- data/docs/guides/creating-archives.adoc +354 -0
- data/docs/guides/extracting-archives.adoc +53 -0
- data/docs/guides/format-conversion.adoc +64 -0
- data/docs/guides/index.adoc +49 -0
- data/docs/guides/migration-rubyzip.adoc +217 -0
- data/docs/guides/parity-archives.adoc +605 -0
- data/docs/guides/performance-tuning.adoc +88 -0
- data/docs/index.adoc +218 -0
- data/docs/lychee.toml +67 -0
- data/docs/reference/api/overview.adoc +188 -0
- data/docs/reference/cli/compress-command.adoc +114 -0
- data/docs/reference/cli/overview.adoc +140 -0
- data/docs/reference/index.adoc +26 -0
- data/docs/resources/faq.adoc +185 -0
- data/docs/resources/quick-reference.adoc +222 -0
- data/docs/troubleshooting/index.adoc +208 -0
- data/examples/api_comparison.rb +205 -0
- data/examples/deflate64_example.rb +96 -0
- data/examples/par2_demo.rb +121 -0
- data/examples/quick_start_native.rb +150 -0
- data/examples/quick_start_rubyzip.rb +115 -0
- data/examples/rubyzip_compatibility_demo.rb +194 -0
- data/exe/omnizip +27 -0
- data/lib/omnizip/algorithm.rb +130 -0
- data/lib/omnizip/algorithm_registry.rb +86 -0
- data/lib/omnizip/algorithms/.keep +0 -0
- data/lib/omnizip/algorithms/bzip2/bwt.rb +225 -0
- data/lib/omnizip/algorithms/bzip2/decoder.rb +193 -0
- data/lib/omnizip/algorithms/bzip2/encoder.rb +237 -0
- data/lib/omnizip/algorithms/bzip2/huffman.rb +206 -0
- data/lib/omnizip/algorithms/bzip2/mtf.rb +101 -0
- data/lib/omnizip/algorithms/bzip2/rle.rb +151 -0
- data/lib/omnizip/algorithms/bzip2.rb +130 -0
- data/lib/omnizip/algorithms/deflate/constants.rb +28 -0
- data/lib/omnizip/algorithms/deflate/decoder.rb +38 -0
- data/lib/omnizip/algorithms/deflate/encoder.rb +46 -0
- data/lib/omnizip/algorithms/deflate.rb +128 -0
- data/lib/omnizip/algorithms/deflate64/constants.rb +45 -0
- data/lib/omnizip/algorithms/deflate64/decoder.rb +153 -0
- data/lib/omnizip/algorithms/deflate64/encoder.rb +98 -0
- data/lib/omnizip/algorithms/deflate64/huffman_coder.rb +354 -0
- data/lib/omnizip/algorithms/deflate64/lz77_encoder.rb +142 -0
- data/lib/omnizip/algorithms/deflate64.rb +109 -0
- data/lib/omnizip/algorithms/lzma/bit_model.rb +120 -0
- data/lib/omnizip/algorithms/lzma/constants.rb +112 -0
- data/lib/omnizip/algorithms/lzma/decoder.rb +148 -0
- data/lib/omnizip/algorithms/lzma/dictionary.rb +69 -0
- data/lib/omnizip/algorithms/lzma/distance_coder.rb +415 -0
- data/lib/omnizip/algorithms/lzma/encoder.rb +142 -0
- data/lib/omnizip/algorithms/lzma/length_coder.rb +260 -0
- data/lib/omnizip/algorithms/lzma/literal_decoder.rb +320 -0
- data/lib/omnizip/algorithms/lzma/literal_encoder.rb +210 -0
- data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +341 -0
- data/lib/omnizip/algorithms/lzma/lzma_alone_decoder.rb +192 -0
- data/lib/omnizip/algorithms/lzma/lzma_state.rb +128 -0
- data/lib/omnizip/algorithms/lzma/match.rb +32 -0
- data/lib/omnizip/algorithms/lzma/match_finder.rb +205 -0
- data/lib/omnizip/algorithms/lzma/match_finder_config.rb +142 -0
- data/lib/omnizip/algorithms/lzma/match_finder_factory.rb +88 -0
- data/lib/omnizip/algorithms/lzma/optimal_encoder.rb +130 -0
- data/lib/omnizip/algorithms/lzma/probability_models.rb +72 -0
- data/lib/omnizip/algorithms/lzma/range_coder.rb +85 -0
- data/lib/omnizip/algorithms/lzma/range_decoder.rb +434 -0
- data/lib/omnizip/algorithms/lzma/range_encoder.rb +194 -0
- data/lib/omnizip/algorithms/lzma/state.rb +127 -0
- data/lib/omnizip/algorithms/lzma/xz_buffered_range_encoder.rb +325 -0
- data/lib/omnizip/algorithms/lzma/xz_encoder.rb +426 -0
- data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +645 -0
- data/lib/omnizip/algorithms/lzma/xz_match_finder_adapter.rb +227 -0
- data/lib/omnizip/algorithms/lzma/xz_price_calculator.rb +169 -0
- data/lib/omnizip/algorithms/lzma/xz_probability_models.rb +261 -0
- data/lib/omnizip/algorithms/lzma/xz_range_encoder.rb +223 -0
- data/lib/omnizip/algorithms/lzma/xz_range_encoder_exact.rb +331 -0
- data/lib/omnizip/algorithms/lzma/xz_state.rb +116 -0
- data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +2055 -0
- data/lib/omnizip/algorithms/lzma.rb +238 -0
- data/lib/omnizip/algorithms/lzma2/chunk_manager.rb +182 -0
- data/lib/omnizip/algorithms/lzma2/constants.rb +41 -0
- data/lib/omnizip/algorithms/lzma2/encoder.rb +147 -0
- data/lib/omnizip/algorithms/lzma2/lzma2_chunk.rb +161 -0
- data/lib/omnizip/algorithms/lzma2/properties.rb +179 -0
- data/lib/omnizip/algorithms/lzma2/simple_lzma2_encoder.rb +127 -0
- data/lib/omnizip/algorithms/lzma2/xz_encoder_adapter.rb +85 -0
- data/lib/omnizip/algorithms/lzma2.rb +141 -0
- data/lib/omnizip/algorithms/ppmd7/constants.rb +74 -0
- data/lib/omnizip/algorithms/ppmd7/context.rb +154 -0
- data/lib/omnizip/algorithms/ppmd7/decoder.rb +126 -0
- data/lib/omnizip/algorithms/ppmd7/encoder.rb +163 -0
- data/lib/omnizip/algorithms/ppmd7/model.rb +248 -0
- data/lib/omnizip/algorithms/ppmd7/symbol_state.rb +57 -0
- data/lib/omnizip/algorithms/ppmd7.rb +116 -0
- data/lib/omnizip/algorithms/ppmd8/constants.rb +61 -0
- data/lib/omnizip/algorithms/ppmd8/context.rb +34 -0
- data/lib/omnizip/algorithms/ppmd8/decoder.rb +107 -0
- data/lib/omnizip/algorithms/ppmd8/encoder.rb +138 -0
- data/lib/omnizip/algorithms/ppmd8/model.rb +250 -0
- data/lib/omnizip/algorithms/ppmd8/restoration_method.rb +78 -0
- data/lib/omnizip/algorithms/ppmd8.rb +82 -0
- data/lib/omnizip/algorithms/ppmd_base.rb +138 -0
- data/lib/omnizip/algorithms/sevenzip_lzma2.rb +123 -0
- data/lib/omnizip/algorithms/xz_lzma2.rb +118 -0
- data/lib/omnizip/algorithms/zstandard/constants.rb +25 -0
- data/lib/omnizip/algorithms/zstandard/decoder.rb +46 -0
- data/lib/omnizip/algorithms/zstandard/encoder.rb +51 -0
- data/lib/omnizip/algorithms/zstandard.rb +138 -0
- data/lib/omnizip/buffer/memory_archive.rb +251 -0
- data/lib/omnizip/buffer/memory_extractor.rb +224 -0
- data/lib/omnizip/buffer.rb +176 -0
- data/lib/omnizip/checksum_registry.rb +114 -0
- data/lib/omnizip/checksums/crc32.rb +100 -0
- data/lib/omnizip/checksums/crc64.rb +101 -0
- data/lib/omnizip/checksums/crc_base.rb +158 -0
- data/lib/omnizip/checksums/verifier.rb +131 -0
- data/lib/omnizip/chunked/memory_manager.rb +194 -0
- data/lib/omnizip/chunked/reader.rb +78 -0
- data/lib/omnizip/chunked/writer.rb +120 -0
- data/lib/omnizip/chunked.rb +129 -0
- data/lib/omnizip/cli/output_formatter.rb +104 -0
- data/lib/omnizip/cli.rb +572 -0
- data/lib/omnizip/commands/.keep +0 -0
- data/lib/omnizip/commands/archive_create_command.rb +427 -0
- data/lib/omnizip/commands/archive_extract_command.rb +272 -0
- data/lib/omnizip/commands/archive_list_command.rb +218 -0
- data/lib/omnizip/commands/archive_repair_command.rb +131 -0
- data/lib/omnizip/commands/archive_verify_command.rb +117 -0
- data/lib/omnizip/commands/compress_command.rb +117 -0
- data/lib/omnizip/commands/decompress_command.rb +120 -0
- data/lib/omnizip/commands/list_command.rb +53 -0
- data/lib/omnizip/commands/metadata_command.rb +153 -0
- data/lib/omnizip/commands/parity_create_command.rb +122 -0
- data/lib/omnizip/commands/parity_repair_command.rb +122 -0
- data/lib/omnizip/commands/parity_verify_command.rb +124 -0
- data/lib/omnizip/commands/profile_list_command.rb +56 -0
- data/lib/omnizip/commands/profile_show_command.rb +44 -0
- data/lib/omnizip/convenience.rb +359 -0
- data/lib/omnizip/converter/conversion_registry.rb +49 -0
- data/lib/omnizip/converter/conversion_strategy.rb +121 -0
- data/lib/omnizip/converter/seven_zip_to_zip_strategy.rb +97 -0
- data/lib/omnizip/converter/zip_to_seven_zip_strategy.rb +112 -0
- data/lib/omnizip/converter.rb +105 -0
- data/lib/omnizip/crypto/aes256/cipher.rb +100 -0
- data/lib/omnizip/crypto/aes256/constants.rb +28 -0
- data/lib/omnizip/crypto/aes256/key_derivation.rb +101 -0
- data/lib/omnizip/crypto/aes256.rb +102 -0
- data/lib/omnizip/error.rb +106 -0
- data/lib/omnizip/eta/exponential_smoothing_estimator.rb +98 -0
- data/lib/omnizip/eta/moving_average_estimator.rb +99 -0
- data/lib/omnizip/eta/rate_calculator.rb +104 -0
- data/lib/omnizip/eta/sample_history.rb +143 -0
- data/lib/omnizip/eta/time_estimator.rb +106 -0
- data/lib/omnizip/eta.rb +63 -0
- data/lib/omnizip/extraction/filter_chain.rb +177 -0
- data/lib/omnizip/extraction/glob_pattern.rb +140 -0
- data/lib/omnizip/extraction/pattern_matcher.rb +70 -0
- data/lib/omnizip/extraction/predicate_pattern.rb +52 -0
- data/lib/omnizip/extraction/regex_pattern.rb +50 -0
- data/lib/omnizip/extraction/selective_extractor.rb +240 -0
- data/lib/omnizip/extraction.rb +111 -0
- data/lib/omnizip/file_type/mime_classifier.rb +144 -0
- data/lib/omnizip/file_type.rb +113 -0
- data/lib/omnizip/filter.rb +139 -0
- data/lib/omnizip/filter_pipeline.rb +108 -0
- data/lib/omnizip/filter_registry.rb +166 -0
- data/lib/omnizip/filters/bcj.rb +279 -0
- data/lib/omnizip/filters/bcj2/constants.rb +53 -0
- data/lib/omnizip/filters/bcj2/decoder.rb +200 -0
- data/lib/omnizip/filters/bcj2/encoder.rb +61 -0
- data/lib/omnizip/filters/bcj2/stream_data.rb +93 -0
- data/lib/omnizip/filters/bcj2.rb +99 -0
- data/lib/omnizip/filters/bcj_arm.rb +176 -0
- data/lib/omnizip/filters/bcj_arm64.rb +244 -0
- data/lib/omnizip/filters/bcj_ia64.rb +196 -0
- data/lib/omnizip/filters/bcj_ppc.rb +190 -0
- data/lib/omnizip/filters/bcj_sparc.rb +176 -0
- data/lib/omnizip/filters/bcj_x86.rb +193 -0
- data/lib/omnizip/filters/delta.rb +196 -0
- data/lib/omnizip/filters/filter_base.rb +72 -0
- data/lib/omnizip/filters/registry.rb +123 -0
- data/lib/omnizip/filters/xz_delta.rb +258 -0
- data/lib/omnizip/format_detector.rb +162 -0
- data/lib/omnizip/format_registry.rb +59 -0
- data/lib/omnizip/formats/.keep +0 -0
- data/lib/omnizip/formats/bzip2_file.rb +172 -0
- data/lib/omnizip/formats/cpio/constants.rb +55 -0
- data/lib/omnizip/formats/cpio/entry.rb +385 -0
- data/lib/omnizip/formats/cpio/reader.rb +196 -0
- data/lib/omnizip/formats/cpio/writer.rb +234 -0
- data/lib/omnizip/formats/cpio.rb +140 -0
- data/lib/omnizip/formats/format_spec_loader.rb +230 -0
- data/lib/omnizip/formats/gzip.rb +238 -0
- data/lib/omnizip/formats/iso/directory_builder.rb +297 -0
- data/lib/omnizip/formats/iso/directory_record.rb +152 -0
- data/lib/omnizip/formats/iso/joliet.rb +204 -0
- data/lib/omnizip/formats/iso/path_table.rb +125 -0
- data/lib/omnizip/formats/iso/reader.rb +197 -0
- data/lib/omnizip/formats/iso/rock_ridge.rb +349 -0
- data/lib/omnizip/formats/iso/volume_builder.rb +320 -0
- data/lib/omnizip/formats/iso/volume_descriptor.rb +168 -0
- data/lib/omnizip/formats/iso/writer.rb +530 -0
- data/lib/omnizip/formats/iso.rb +140 -0
- data/lib/omnizip/formats/lzip.rb +175 -0
- data/lib/omnizip/formats/lzma_alone.rb +171 -0
- data/lib/omnizip/formats/rar/archive_repairer.rb +243 -0
- data/lib/omnizip/formats/rar/archive_verifier.rb +195 -0
- data/lib/omnizip/formats/rar/block_parser.rb +243 -0
- data/lib/omnizip/formats/rar/compression/bit_stream.rb +180 -0
- data/lib/omnizip/formats/rar/compression/dispatcher.rb +217 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/decoder.rb +216 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/encoder.rb +158 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_builder.rb +217 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_coder.rb +189 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/match_finder.rb +135 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/sliding_window.rb +165 -0
- data/lib/omnizip/formats/rar/compression/ppmd/context.rb +105 -0
- data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +219 -0
- data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +262 -0
- data/lib/omnizip/formats/rar/compression_method_registry.rb +106 -0
- data/lib/omnizip/formats/rar/constants.rb +82 -0
- data/lib/omnizip/formats/rar/decompressor.rb +238 -0
- data/lib/omnizip/formats/rar/external_writer.rb +312 -0
- data/lib/omnizip/formats/rar/header.rb +192 -0
- data/lib/omnizip/formats/rar/license_validator.rb +109 -0
- data/lib/omnizip/formats/rar/models/rar_archive.rb +77 -0
- data/lib/omnizip/formats/rar/models/rar_entry.rb +65 -0
- data/lib/omnizip/formats/rar/models/rar_volume.rb +56 -0
- data/lib/omnizip/formats/rar/parity_handler.rb +292 -0
- data/lib/omnizip/formats/rar/rar5/compression/lzma.rb +202 -0
- data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +578 -0
- data/lib/omnizip/formats/rar/rar5/compression/store.rb +60 -0
- data/lib/omnizip/formats/rar/rar5/crc32.rb +39 -0
- data/lib/omnizip/formats/rar/rar5/encryption/aes256_cbc.rb +97 -0
- data/lib/omnizip/formats/rar/rar5/encryption/encryption_header.rb +114 -0
- data/lib/omnizip/formats/rar/rar5/encryption/encryption_manager.rb +166 -0
- data/lib/omnizip/formats/rar/rar5/encryption/key_derivation.rb +97 -0
- data/lib/omnizip/formats/rar/rar5/header.rb +187 -0
- data/lib/omnizip/formats/rar/rar5/models/encryption_options.rb +74 -0
- data/lib/omnizip/formats/rar/rar5/models/recovery_options.rb +63 -0
- data/lib/omnizip/formats/rar/rar5/models/solid_options.rb +63 -0
- data/lib/omnizip/formats/rar/rar5/models/volume_options.rb +74 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/ARCHITECTURE.md +290 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_manager.rb +264 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_splitter.rb +155 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_writer.rb +194 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_encoder.rb +109 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_manager.rb +142 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_stream.rb +121 -0
- data/lib/omnizip/formats/rar/rar5/vint.rb +65 -0
- data/lib/omnizip/formats/rar/rar5/writer.rb +466 -0
- data/lib/omnizip/formats/rar/rar_format_base.rb +241 -0
- data/lib/omnizip/formats/rar/reader.rb +366 -0
- data/lib/omnizip/formats/rar/recovery_record.rb +245 -0
- data/lib/omnizip/formats/rar/volume_manager.rb +168 -0
- data/lib/omnizip/formats/rar/writer.rb +431 -0
- data/lib/omnizip/formats/rar.rb +205 -0
- data/lib/omnizip/formats/rar3/compressor.rb +73 -0
- data/lib/omnizip/formats/rar3/decompressor.rb +66 -0
- data/lib/omnizip/formats/rar3/reader.rb +386 -0
- data/lib/omnizip/formats/rar3/writer.rb +219 -0
- data/lib/omnizip/formats/rar5/compressor.rb +73 -0
- data/lib/omnizip/formats/rar5/decompressor.rb +66 -0
- data/lib/omnizip/formats/rar5/reader.rb +342 -0
- data/lib/omnizip/formats/rar5/writer.rb +214 -0
- data/lib/omnizip/formats/seven_zip/coder_chain.rb +150 -0
- data/lib/omnizip/formats/seven_zip/constants.rb +126 -0
- data/lib/omnizip/formats/seven_zip/encoded_header.rb +114 -0
- data/lib/omnizip/formats/seven_zip/encrypted_header.rb +142 -0
- data/lib/omnizip/formats/seven_zip/file_collector.rb +144 -0
- data/lib/omnizip/formats/seven_zip/header.rb +106 -0
- data/lib/omnizip/formats/seven_zip/header_encryptor.rb +134 -0
- data/lib/omnizip/formats/seven_zip/header_writer.rb +466 -0
- data/lib/omnizip/formats/seven_zip/models/coder_info.rb +30 -0
- data/lib/omnizip/formats/seven_zip/models/file_entry.rb +58 -0
- data/lib/omnizip/formats/seven_zip/models/folder.rb +69 -0
- data/lib/omnizip/formats/seven_zip/models/stream_info.rb +42 -0
- data/lib/omnizip/formats/seven_zip/parser.rb +660 -0
- data/lib/omnizip/formats/seven_zip/reader.rb +458 -0
- data/lib/omnizip/formats/seven_zip/split_archive_reader.rb +632 -0
- data/lib/omnizip/formats/seven_zip/split_archive_writer.rb +315 -0
- data/lib/omnizip/formats/seven_zip/stream_compressor.rb +151 -0
- data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +162 -0
- data/lib/omnizip/formats/seven_zip/writer.rb +740 -0
- data/lib/omnizip/formats/seven_zip.rb +93 -0
- data/lib/omnizip/formats/tar/constants.rb +73 -0
- data/lib/omnizip/formats/tar/entry.rb +94 -0
- data/lib/omnizip/formats/tar/header.rb +168 -0
- data/lib/omnizip/formats/tar/reader.rb +121 -0
- data/lib/omnizip/formats/tar/writer.rb +216 -0
- data/lib/omnizip/formats/tar.rb +84 -0
- data/lib/omnizip/formats/xz/reader.rb +116 -0
- data/lib/omnizip/formats/xz.rb +237 -0
- data/lib/omnizip/formats/xz_impl/block_decoder.rb +754 -0
- data/lib/omnizip/formats/xz_impl/block_encoder.rb +306 -0
- data/lib/omnizip/formats/xz_impl/block_header.rb +210 -0
- data/lib/omnizip/formats/xz_impl/block_header_parser.rb +186 -0
- data/lib/omnizip/formats/xz_impl/constants.rb +49 -0
- data/lib/omnizip/formats/xz_impl/index_decoder.rb +174 -0
- data/lib/omnizip/formats/xz_impl/index_encoder.rb +122 -0
- data/lib/omnizip/formats/xz_impl/stream_decoder.rb +468 -0
- data/lib/omnizip/formats/xz_impl/stream_encoder.rb +99 -0
- data/lib/omnizip/formats/xz_impl/stream_footer.rb +81 -0
- data/lib/omnizip/formats/xz_impl/stream_footer_parser.rb +117 -0
- data/lib/omnizip/formats/xz_impl/stream_header.rb +55 -0
- data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +108 -0
- data/lib/omnizip/formats/xz_impl/vli.rb +128 -0
- data/lib/omnizip/formats/xz_impl/writer.rb +421 -0
- data/lib/omnizip/formats/zip/central_directory_header.rb +195 -0
- data/lib/omnizip/formats/zip/constants.rb +69 -0
- data/lib/omnizip/formats/zip/end_of_central_directory.rb +133 -0
- data/lib/omnizip/formats/zip/local_file_header.rb +138 -0
- data/lib/omnizip/formats/zip/reader.rb +250 -0
- data/lib/omnizip/formats/zip/unix_extra_field.rb +153 -0
- data/lib/omnizip/formats/zip/writer.rb +375 -0
- data/lib/omnizip/formats/zip/zip64_end_of_central_directory.rb +104 -0
- data/lib/omnizip/formats/zip/zip64_end_of_central_directory_locator.rb +66 -0
- data/lib/omnizip/formats/zip/zip64_extra_field.rb +114 -0
- data/lib/omnizip/formats/zip.rb +50 -0
- data/lib/omnizip/implementations/base/lzma2_decoder_base.rb +75 -0
- data/lib/omnizip/implementations/base/lzma2_encoder_base.rb +128 -0
- data/lib/omnizip/implementations/base/lzma_decoder_base.rb +83 -0
- data/lib/omnizip/implementations/base/lzma_encoder_base.rb +108 -0
- data/lib/omnizip/implementations/base/state_machine_base.rb +182 -0
- data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +421 -0
- data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +465 -0
- data/lib/omnizip/implementations/seven_zip/lzma/match_finder.rb +288 -0
- data/lib/omnizip/implementations/seven_zip/lzma/range_decoder.rb +200 -0
- data/lib/omnizip/implementations/seven_zip/lzma/range_encoder.rb +197 -0
- data/lib/omnizip/implementations/seven_zip/lzma/state_machine.rb +141 -0
- data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +519 -0
- data/lib/omnizip/implementations/xz_utils/lzma2/decoder.rb +723 -0
- data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +750 -0
- data/lib/omnizip/io/buffered_input.rb +146 -0
- data/lib/omnizip/io/buffered_output.rb +105 -0
- data/lib/omnizip/io/stream_manager.rb +115 -0
- data/lib/omnizip/link_handler/hard_link.rb +79 -0
- data/lib/omnizip/link_handler/symbolic_link.rb +74 -0
- data/lib/omnizip/link_handler.rb +124 -0
- data/lib/omnizip/metadata/archive_metadata.rb +114 -0
- data/lib/omnizip/metadata/entry_metadata.rb +146 -0
- data/lib/omnizip/metadata/metadata_editor.rb +171 -0
- data/lib/omnizip/metadata/metadata_registry.rb +64 -0
- data/lib/omnizip/metadata/metadata_validator.rb +99 -0
- data/lib/omnizip/metadata.rb +57 -0
- data/lib/omnizip/models/.keep +0 -0
- data/lib/omnizip/models/algorithm_metadata.rb +73 -0
- data/lib/omnizip/models/compression_options.rb +71 -0
- data/lib/omnizip/models/conversion_options.rb +87 -0
- data/lib/omnizip/models/conversion_result.rb +135 -0
- data/lib/omnizip/models/eta_result.rb +46 -0
- data/lib/omnizip/models/extraction_rule.rb +115 -0
- data/lib/omnizip/models/filter_chain.rb +144 -0
- data/lib/omnizip/models/filter_config.rb +183 -0
- data/lib/omnizip/models/match_result.rb +124 -0
- data/lib/omnizip/models/optimization_suggestion.rb +91 -0
- data/lib/omnizip/models/parallel_options.rb +104 -0
- data/lib/omnizip/models/performance_result.rb +79 -0
- data/lib/omnizip/models/profile_report.rb +82 -0
- data/lib/omnizip/models/progress_options.rb +38 -0
- data/lib/omnizip/models/split_options.rb +116 -0
- data/lib/omnizip/optimization_registry.rb +81 -0
- data/lib/omnizip/parallel/job_queue.rb +209 -0
- data/lib/omnizip/parallel/job_scheduler.rb +203 -0
- data/lib/omnizip/parallel/parallel_compressor.rb +347 -0
- data/lib/omnizip/parallel/parallel_extractor.rb +329 -0
- data/lib/omnizip/parallel/worker_pool.rb +223 -0
- data/lib/omnizip/parallel.rb +149 -0
- data/lib/omnizip/parity/chunked_block_processor.rb +196 -0
- data/lib/omnizip/parity/galois16.rb +145 -0
- data/lib/omnizip/parity/models/creator_packet.rb +73 -0
- data/lib/omnizip/parity/models/file_description_packet.rb +133 -0
- data/lib/omnizip/parity/models/ifsc_packet.rb +123 -0
- data/lib/omnizip/parity/models/main_packet.rb +128 -0
- data/lib/omnizip/parity/models/packet.rb +156 -0
- data/lib/omnizip/parity/models/packet_registry.rb +109 -0
- data/lib/omnizip/parity/models/recovery_slice_packet.rb +78 -0
- data/lib/omnizip/parity/par2_creator.rb +531 -0
- data/lib/omnizip/parity/par2_repairer.rb +407 -0
- data/lib/omnizip/parity/par2_verifier.rb +364 -0
- data/lib/omnizip/parity/par2cmdline_algorithm.rb +110 -0
- data/lib/omnizip/parity/par2cmdline_coefficients.rb +78 -0
- data/lib/omnizip/parity/reed_solomon_decoder.rb +266 -0
- data/lib/omnizip/parity/reed_solomon_encoder.rb +111 -0
- data/lib/omnizip/parity/reed_solomon_matrix.rb +342 -0
- data/lib/omnizip/parity.rb +186 -0
- data/lib/omnizip/password/encryption_registry.rb +65 -0
- data/lib/omnizip/password/encryption_strategy.rb +96 -0
- data/lib/omnizip/password/password_validator.rb +129 -0
- data/lib/omnizip/password/winzip_aes_strategy.rb +192 -0
- data/lib/omnizip/password/zip_crypto_strategy.rb +141 -0
- data/lib/omnizip/password.rb +87 -0
- data/lib/omnizip/pipe/stream_compressor.rb +124 -0
- data/lib/omnizip/pipe/stream_decompressor.rb +174 -0
- data/lib/omnizip/pipe.rb +121 -0
- data/lib/omnizip/platform/ntfs_streams.rb +201 -0
- data/lib/omnizip/platform.rb +189 -0
- data/lib/omnizip/profile/archive_profile.rb +39 -0
- data/lib/omnizip/profile/balanced_profile.rb +33 -0
- data/lib/omnizip/profile/binary_profile.rb +36 -0
- data/lib/omnizip/profile/compression_profile.rb +158 -0
- data/lib/omnizip/profile/custom_profile.rb +157 -0
- data/lib/omnizip/profile/fast_profile.rb +33 -0
- data/lib/omnizip/profile/maximum_profile.rb +33 -0
- data/lib/omnizip/profile/profile_detector.rb +110 -0
- data/lib/omnizip/profile/profile_registry.rb +161 -0
- data/lib/omnizip/profile/text_profile.rb +36 -0
- data/lib/omnizip/profile.rb +190 -0
- data/lib/omnizip/profiler/memory_profiler.rb +66 -0
- data/lib/omnizip/profiler/method_profiler.rb +49 -0
- data/lib/omnizip/profiler/report_generator.rb +169 -0
- data/lib/omnizip/profiler.rb +204 -0
- data/lib/omnizip/progress/callback_reporter.rb +36 -0
- data/lib/omnizip/progress/console_reporter.rb +62 -0
- data/lib/omnizip/progress/log_reporter.rb +91 -0
- data/lib/omnizip/progress/operation_progress.rb +118 -0
- data/lib/omnizip/progress/progress_bar.rb +156 -0
- data/lib/omnizip/progress/progress_reporter.rb +40 -0
- data/lib/omnizip/progress/progress_tracker.rb +190 -0
- data/lib/omnizip/progress/silent_reporter.rb +24 -0
- data/lib/omnizip/progress.rb +127 -0
- data/lib/omnizip/rubyzip_compat.rb +63 -0
- data/lib/omnizip/temp/safe_extract.rb +168 -0
- data/lib/omnizip/temp/temp_file.rb +124 -0
- data/lib/omnizip/temp/temp_file_pool.rb +109 -0
- data/lib/omnizip/temp.rb +181 -0
- data/lib/omnizip/version.rb +5 -0
- data/lib/omnizip/zip/entry.rb +156 -0
- data/lib/omnizip/zip/file.rb +485 -0
- data/lib/omnizip/zip/input_stream.rb +273 -0
- data/lib/omnizip/zip/output_stream.rb +324 -0
- data/lib/omnizip.rb +156 -0
- data/readme-docs/advanced-features.adoc +515 -0
- data/readme-docs/api-usage.adoc +444 -0
- data/readme-docs/architecture.adoc +449 -0
- data/readme-docs/archive-formats.adoc +479 -0
- data/readme-docs/cli-usage.adoc +222 -0
- data/readme-docs/compression-algorithms.adoc +442 -0
- data/readme-docs/compression-profiles.adoc +247 -0
- data/readme-docs/encryption-checksums.adoc +328 -0
- data/readme-docs/format-converter.adoc +325 -0
- data/readme-docs/installation.adoc +228 -0
- data/readme-docs/par2-archives.adoc +608 -0
- data/readme-docs/performance-profiler.adoc +389 -0
- data/readme-docs/preprocessing-filters.adoc +280 -0
- data/xz-file-format-1.2.1.txt +1174 -0
- metadata +617 -0
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Omnizip
|
|
4
|
+
module Formats
|
|
5
|
+
module Rar
|
|
6
|
+
module Compression
|
|
7
|
+
# Algorithm dispatcher for RAR compression
|
|
8
|
+
#
|
|
9
|
+
# Selects appropriate compression algorithm based on RAR method
|
|
10
|
+
# and dispatches to correct encoder/decoder.
|
|
11
|
+
#
|
|
12
|
+
# Responsibilities:
|
|
13
|
+
# - Algorithm selection based on compression method
|
|
14
|
+
# - Dispatch to appropriate decoder/encoder
|
|
15
|
+
# - Error handling for unsupported/unknown methods
|
|
16
|
+
#
|
|
17
|
+
# Note: Does NOT perform actual compression/decompression
|
|
18
|
+
# (delegated to decoder/encoder classes)
|
|
19
|
+
class Dispatcher
|
|
20
|
+
# RAR compression methods
|
|
21
|
+
METHOD_STORE = 0x30 # No compression
|
|
22
|
+
METHOD_FASTEST = 0x31 # LZ77+Huffman (fast)
|
|
23
|
+
METHOD_FAST = 0x32 # LZ77+Huffman
|
|
24
|
+
METHOD_NORMAL = 0x33 # LZ77+Huffman (default)
|
|
25
|
+
METHOD_GOOD = 0x34 # LZ77+Huffman or PPMd
|
|
26
|
+
METHOD_BEST = 0x35 # PPMd
|
|
27
|
+
|
|
28
|
+
# Custom errors
|
|
29
|
+
class UnsupportedMethodError < StandardError; end
|
|
30
|
+
class DecompressionError < StandardError; end
|
|
31
|
+
class CompressionError < StandardError; end
|
|
32
|
+
|
|
33
|
+
class << self
|
|
34
|
+
# Decompress data using appropriate algorithm
|
|
35
|
+
#
|
|
36
|
+
# @param method [Integer] RAR compression method (0x30-0x35)
|
|
37
|
+
# @param input [IO] Input stream
|
|
38
|
+
# @param output [IO] Output stream
|
|
39
|
+
# @param options [Hash] Decoder options
|
|
40
|
+
# @raise [UnsupportedMethodError] if method unknown
|
|
41
|
+
# @raise [DecompressionError] if decompression fails
|
|
42
|
+
def decompress(method, input, output, options = {})
|
|
43
|
+
case method
|
|
44
|
+
when METHOD_STORE
|
|
45
|
+
decompress_store(input, output)
|
|
46
|
+
when METHOD_FASTEST, METHOD_FAST, METHOD_NORMAL
|
|
47
|
+
decompress_lz77_huffman(input, output, options)
|
|
48
|
+
when METHOD_GOOD
|
|
49
|
+
decompress_good(input, output, options)
|
|
50
|
+
when METHOD_BEST
|
|
51
|
+
decompress_ppmd(input, output, options)
|
|
52
|
+
else
|
|
53
|
+
raise UnsupportedMethodError,
|
|
54
|
+
"Unknown compression method: 0x#{method.to_s(16).upcase}"
|
|
55
|
+
end
|
|
56
|
+
rescue StandardError => e
|
|
57
|
+
unless e.is_a?(UnsupportedMethodError)
|
|
58
|
+
raise DecompressionError,
|
|
59
|
+
"Decompression failed: #{e.message}"
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
raise
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
# Compress data using appropriate algorithm
|
|
66
|
+
#
|
|
67
|
+
# @param method [Integer] RAR compression method
|
|
68
|
+
# @param input [IO] Input stream
|
|
69
|
+
# @param output [IO] Output stream
|
|
70
|
+
# @param options [Hash] Encoder options
|
|
71
|
+
# @raise [UnsupportedMethodError] if method unknown
|
|
72
|
+
# @raise [CompressionError] if compression fails
|
|
73
|
+
# @raise [NotImplementedError] for methods not yet implemented
|
|
74
|
+
def compress(method, input, output, options = {})
|
|
75
|
+
case method
|
|
76
|
+
when METHOD_STORE
|
|
77
|
+
compress_store(input, output)
|
|
78
|
+
when METHOD_FASTEST, METHOD_FAST, METHOD_NORMAL
|
|
79
|
+
compress_lz77_huffman(input, output, options)
|
|
80
|
+
when METHOD_GOOD
|
|
81
|
+
compress_good(input, output, options)
|
|
82
|
+
when METHOD_BEST
|
|
83
|
+
compress_ppmd(input, output, options)
|
|
84
|
+
else
|
|
85
|
+
raise UnsupportedMethodError,
|
|
86
|
+
"Unknown compression method: 0x#{method.to_s(16).upcase}"
|
|
87
|
+
end
|
|
88
|
+
rescue StandardError => e
|
|
89
|
+
raise CompressionError, "Compression failed: #{e.message}" unless
|
|
90
|
+
e.is_a?(UnsupportedMethodError) || e.is_a?(NotImplementedError)
|
|
91
|
+
|
|
92
|
+
raise
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
private
|
|
96
|
+
|
|
97
|
+
# Decompress METHOD_STORE (no compression)
|
|
98
|
+
#
|
|
99
|
+
# @param input [IO] Input stream
|
|
100
|
+
# @param output [IO] Output stream
|
|
101
|
+
def decompress_store(input, output)
|
|
102
|
+
# Direct copy, no decompression needed
|
|
103
|
+
::IO.copy_stream(input, output)
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
# Decompress using LZ77+Huffman decoder
|
|
107
|
+
#
|
|
108
|
+
# @param input [IO] Input stream
|
|
109
|
+
# @param output [IO] Output stream
|
|
110
|
+
# @param options [Hash] Decoder options
|
|
111
|
+
def decompress_lz77_huffman(input, output, options)
|
|
112
|
+
require_relative "lz77_huffman/decoder"
|
|
113
|
+
|
|
114
|
+
decoder = LZ77Huffman::Decoder.new(input, options)
|
|
115
|
+
decoded_data = decoder.decode
|
|
116
|
+
output.write(decoded_data)
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
# Decompress METHOD_GOOD (adaptive)
|
|
120
|
+
#
|
|
121
|
+
# For now, default to LZ77+Huffman
|
|
122
|
+
# In future, could analyze content to choose algorithm
|
|
123
|
+
#
|
|
124
|
+
# @param input [IO] Input stream
|
|
125
|
+
# @param output [IO] Output stream
|
|
126
|
+
# @param options [Hash] Decoder options
|
|
127
|
+
def decompress_good(input, output, options)
|
|
128
|
+
# TODO: Implement content-based algorithm selection
|
|
129
|
+
# For now, use LZ77+Huffman as default
|
|
130
|
+
decompress_lz77_huffman(input, output, options)
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
# Decompress using PPMd decoder
|
|
134
|
+
#
|
|
135
|
+
# @param input [IO] Input stream
|
|
136
|
+
# @param output [IO] Output stream
|
|
137
|
+
# @param options [Hash] Decoder options
|
|
138
|
+
def decompress_ppmd(input, output, options)
|
|
139
|
+
require_relative "ppmd/decoder"
|
|
140
|
+
|
|
141
|
+
decoder = PPMd::Decoder.new(input, options)
|
|
142
|
+
decoded_data = decoder.decode_stream
|
|
143
|
+
output.write(decoded_data)
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
# Compress METHOD_STORE (no compression)
|
|
147
|
+
#
|
|
148
|
+
# @param input [IO] Input stream
|
|
149
|
+
# @param output [IO] Output stream
|
|
150
|
+
def compress_store(input, output)
|
|
151
|
+
# Direct copy, no compression
|
|
152
|
+
::IO.copy_stream(input, output)
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
# Compress using LZ77+Huffman encoder
|
|
156
|
+
#
|
|
157
|
+
# @param input [IO] Input stream
|
|
158
|
+
# @param output [IO] Output stream
|
|
159
|
+
# @param options [Hash] Encoder options
|
|
160
|
+
def compress_lz77_huffman(input, output, options)
|
|
161
|
+
require_relative "lz77_huffman/encoder"
|
|
162
|
+
|
|
163
|
+
encoder = LZ77Huffman::Encoder.new(output, options)
|
|
164
|
+
encoder.encode(input)
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
# Compress METHOD_GOOD (adaptive)
|
|
168
|
+
#
|
|
169
|
+
# For now, default to LZ77+Huffman
|
|
170
|
+
# In future, could analyze content to choose algorithm
|
|
171
|
+
#
|
|
172
|
+
# @param input [IO] Input stream
|
|
173
|
+
# @param output [IO] Output stream
|
|
174
|
+
# @param options [Hash] Encoder options
|
|
175
|
+
def compress_good(input, output, options)
|
|
176
|
+
# TODO: Implement content-based algorithm selection
|
|
177
|
+
# For now, use LZ77+Huffman as default
|
|
178
|
+
compress_lz77_huffman(input, output, options)
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
# Compress using PPMd encoder
|
|
182
|
+
#
|
|
183
|
+
# @param input [IO] Input stream
|
|
184
|
+
# @param output [IO] Output stream
|
|
185
|
+
# @param options [Hash] Encoder options
|
|
186
|
+
def compress_ppmd(input, output, options)
|
|
187
|
+
require_relative "ppmd/encoder"
|
|
188
|
+
|
|
189
|
+
encoder = PPMd::Encoder.new(output, options)
|
|
190
|
+
encoder.encode_stream(input)
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
# Select decoder class for method (for testing)
|
|
194
|
+
#
|
|
195
|
+
# @param method [Integer] Compression method
|
|
196
|
+
# @return [Class, nil] Decoder class or nil for METHOD_STORE
|
|
197
|
+
def select_decoder(method)
|
|
198
|
+
case method
|
|
199
|
+
when METHOD_STORE
|
|
200
|
+
nil
|
|
201
|
+
when METHOD_FASTEST, METHOD_FAST, METHOD_NORMAL, METHOD_GOOD
|
|
202
|
+
require_relative "lz77_huffman/decoder"
|
|
203
|
+
LZ77Huffman::Decoder
|
|
204
|
+
when METHOD_BEST
|
|
205
|
+
require_relative "ppmd/decoder"
|
|
206
|
+
PPMd::Decoder
|
|
207
|
+
else
|
|
208
|
+
raise UnsupportedMethodError,
|
|
209
|
+
"Unknown compression method: 0x#{method.to_s(16).upcase}"
|
|
210
|
+
end
|
|
211
|
+
end
|
|
212
|
+
end
|
|
213
|
+
end
|
|
214
|
+
end
|
|
215
|
+
end
|
|
216
|
+
end
|
|
217
|
+
end
|
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Copyright (C) 2025 Ribose Inc.
|
|
4
|
+
#
|
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a
|
|
6
|
+
# copy of this software and associated documentation files (the "Software"),
|
|
7
|
+
# to deal in the Software without restriction, including without limitation
|
|
8
|
+
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
9
|
+
# and/or sell copies of the Software, and to permit persons to whom the
|
|
10
|
+
# Software is furnished to do so, subject to the following conditions:
|
|
11
|
+
#
|
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
|
13
|
+
# all copies or substantial portions of the Software.
|
|
14
|
+
#
|
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
20
|
+
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
21
|
+
# DEALINGS IN THE SOFTWARE.
|
|
22
|
+
|
|
23
|
+
require_relative "../bit_stream"
|
|
24
|
+
require_relative "sliding_window"
|
|
25
|
+
require_relative "huffman_coder"
|
|
26
|
+
|
|
27
|
+
module Omnizip
|
|
28
|
+
module Formats
|
|
29
|
+
module Rar
|
|
30
|
+
module Compression
|
|
31
|
+
module LZ77Huffman
|
|
32
|
+
# RAR LZ77+Huffman decoder
|
|
33
|
+
#
|
|
34
|
+
# Orchestrates the decoding of RAR METHOD_NORMAL compressed data.
|
|
35
|
+
# Combines Huffman coding with LZ77 sliding window compression.
|
|
36
|
+
#
|
|
37
|
+
# Responsibilities:
|
|
38
|
+
# - ONE responsibility: Orchestrate LZ77+Huffman decoding
|
|
39
|
+
# - Parse Huffman trees from bit stream
|
|
40
|
+
# - Decode symbols using Huffman coder
|
|
41
|
+
# - Process LZ77 matches via sliding window
|
|
42
|
+
# - Manage decoder state and output
|
|
43
|
+
#
|
|
44
|
+
# RAR LZ77+Huffman Format:
|
|
45
|
+
# 1. Block header with Huffman tree definitions
|
|
46
|
+
# 2. Compressed data stream
|
|
47
|
+
# 3. Symbols: literals (0-255), matches (length+distance), end marker
|
|
48
|
+
class Decoder
|
|
49
|
+
# Symbol ranges
|
|
50
|
+
LITERAL_SYMBOLS = (0..255)
|
|
51
|
+
END_OF_BLOCK = 256
|
|
52
|
+
MATCH_SYMBOLS = (257..511)
|
|
53
|
+
|
|
54
|
+
# Match parameters
|
|
55
|
+
MIN_MATCH_LENGTH = 3
|
|
56
|
+
MAX_MATCH_LENGTH = 257
|
|
57
|
+
|
|
58
|
+
# Window size for RAR4
|
|
59
|
+
DEFAULT_WINDOW_SIZE = 64 * 1024
|
|
60
|
+
|
|
61
|
+
# Initialize LZ77+Huffman decoder
|
|
62
|
+
#
|
|
63
|
+
# @param input [IO] Compressed input stream
|
|
64
|
+
# @param options [Hash] Decoding options
|
|
65
|
+
# @option options [Integer] :window_size Window size in bytes
|
|
66
|
+
def initialize(input, options = {})
|
|
67
|
+
@bit_stream = BitStream.new(input, :read)
|
|
68
|
+
@window = SlidingWindow.new(options[:window_size] || DEFAULT_WINDOW_SIZE)
|
|
69
|
+
@huffman = HuffmanCoder.new
|
|
70
|
+
@output = String.new(encoding: Encoding::BINARY)
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# Decode compressed data
|
|
74
|
+
#
|
|
75
|
+
# Main decoding loop:
|
|
76
|
+
# 1. Parse Huffman tree (simplified for MVP)
|
|
77
|
+
# 2. Decode symbols until end-of-block
|
|
78
|
+
# 3. Process literals and matches
|
|
79
|
+
#
|
|
80
|
+
# @param max_output [Integer, nil] Maximum output bytes
|
|
81
|
+
# @return [String] Decoded data
|
|
82
|
+
def decode(max_output = nil)
|
|
83
|
+
@output.clear
|
|
84
|
+
|
|
85
|
+
# Parse Huffman tree (simplified - real RAR has complex structure)
|
|
86
|
+
parse_huffman_trees
|
|
87
|
+
|
|
88
|
+
# Decode symbols until end-of-block or max output
|
|
89
|
+
loop do
|
|
90
|
+
break if max_output && @output.bytesize >= max_output
|
|
91
|
+
|
|
92
|
+
symbol = @huffman.decode_symbol(@bit_stream)
|
|
93
|
+
break if symbol.nil? || symbol == END_OF_BLOCK
|
|
94
|
+
|
|
95
|
+
process_symbol(symbol)
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
@output
|
|
99
|
+
rescue EOFError
|
|
100
|
+
@output
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
# Get window size
|
|
104
|
+
#
|
|
105
|
+
# @return [Integer] Window size in bytes
|
|
106
|
+
def window_size
|
|
107
|
+
@window.size
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
private
|
|
111
|
+
|
|
112
|
+
# Parse Huffman trees from bit stream
|
|
113
|
+
#
|
|
114
|
+
# RAR uses multiple Huffman tables for different symbol types.
|
|
115
|
+
# This is a simplified implementation for MVP.
|
|
116
|
+
#
|
|
117
|
+
# Simplified format (written by Encoder):
|
|
118
|
+
# 1. 16-bit number of symbols (always 512 for MVP)
|
|
119
|
+
# 2. Code lengths (4 bits each, 512 × 4 bits = 2048 bits = 256 bytes)
|
|
120
|
+
#
|
|
121
|
+
# Real RAR format:
|
|
122
|
+
# - MC table: Main code (literals + length codes)
|
|
123
|
+
# - LD table: Low distance bits
|
|
124
|
+
# - RC table: Repeat codes
|
|
125
|
+
# - LDD table: Low distance for distance codes
|
|
126
|
+
#
|
|
127
|
+
# @return [void]
|
|
128
|
+
def parse_huffman_trees
|
|
129
|
+
# Read number of symbols from encoder (16-bit header)
|
|
130
|
+
num_symbols = @bit_stream.read_bits(16)
|
|
131
|
+
|
|
132
|
+
# Parse tree structure
|
|
133
|
+
@huffman.parse_tree(@bit_stream, num_symbols)
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
# Process a decoded symbol
|
|
137
|
+
#
|
|
138
|
+
# Symbol types:
|
|
139
|
+
# - 0-255: Literal byte
|
|
140
|
+
# - 256: End of block
|
|
141
|
+
# - 257-511: Match (length+distance)
|
|
142
|
+
#
|
|
143
|
+
# @param symbol [Integer] Decoded symbol
|
|
144
|
+
# @return [void]
|
|
145
|
+
def process_symbol(symbol)
|
|
146
|
+
if LITERAL_SYMBOLS.cover?(symbol)
|
|
147
|
+
process_literal(symbol)
|
|
148
|
+
elsif MATCH_SYMBOLS.cover?(symbol)
|
|
149
|
+
process_match(symbol)
|
|
150
|
+
end
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
# Process literal byte
|
|
154
|
+
#
|
|
155
|
+
# @param byte [Integer] Literal byte value (0-255)
|
|
156
|
+
# @return [void]
|
|
157
|
+
def process_literal(byte)
|
|
158
|
+
@output << byte.chr
|
|
159
|
+
@window.add_byte(byte)
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
# Process LZ77 match
|
|
163
|
+
#
|
|
164
|
+
# Match symbol encodes both length and distance information.
|
|
165
|
+
# Additional bits may be read for exact values.
|
|
166
|
+
#
|
|
167
|
+
# @param symbol [Integer] Match symbol (257-511)
|
|
168
|
+
# @return [void]
|
|
169
|
+
def process_match(symbol)
|
|
170
|
+
length = decode_match_length(symbol)
|
|
171
|
+
distance = decode_match_distance
|
|
172
|
+
|
|
173
|
+
# Copy match from window
|
|
174
|
+
match_bytes = @window.copy_match(distance, length)
|
|
175
|
+
match_bytes.each { |byte| @output << byte.chr }
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
# Decode match length from symbol
|
|
179
|
+
#
|
|
180
|
+
# RAR encodes length in the symbol itself plus extra bits.
|
|
181
|
+
# This is simplified for MVP.
|
|
182
|
+
#
|
|
183
|
+
# @param symbol [Integer] Match symbol
|
|
184
|
+
# @return [Integer] Match length
|
|
185
|
+
def decode_match_length(symbol)
|
|
186
|
+
# Simplified length decoding
|
|
187
|
+
# Real RAR uses complex length encoding with extra bits
|
|
188
|
+
|
|
189
|
+
base_length = symbol - 257 + MIN_MATCH_LENGTH
|
|
190
|
+
|
|
191
|
+
# Could read extra bits here for longer lengths
|
|
192
|
+
# For now, use base length
|
|
193
|
+
[base_length, MAX_MATCH_LENGTH].min
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
# Decode match distance
|
|
197
|
+
#
|
|
198
|
+
# Distance is encoded separately, often with additional
|
|
199
|
+
# Huffman tables and extra bits.
|
|
200
|
+
#
|
|
201
|
+
# @return [Integer] Match distance
|
|
202
|
+
def decode_match_distance
|
|
203
|
+
# Simplified distance decoding
|
|
204
|
+
# Real RAR uses separate Huffman table for distance
|
|
205
|
+
|
|
206
|
+
# Read distance as direct bits (simplified)
|
|
207
|
+
# Real implementation would use distance Huffman table
|
|
208
|
+
distance_bits = 16 # Changed from 8 to 16 bits for 64KB window
|
|
209
|
+
@bit_stream.read_bits(distance_bits)
|
|
210
|
+
end
|
|
211
|
+
end
|
|
212
|
+
end
|
|
213
|
+
end
|
|
214
|
+
end
|
|
215
|
+
end
|
|
216
|
+
end
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../bit_stream"
|
|
4
|
+
require_relative "match_finder"
|
|
5
|
+
require_relative "huffman_builder"
|
|
6
|
+
|
|
7
|
+
module Omnizip
|
|
8
|
+
module Formats
|
|
9
|
+
module Rar
|
|
10
|
+
module Compression
|
|
11
|
+
module LZ77Huffman
|
|
12
|
+
# RAR LZ77+Huffman encoder
|
|
13
|
+
#
|
|
14
|
+
# Implements compression using LZ77 string matching combined with
|
|
15
|
+
# Huffman coding for symbol encoding.
|
|
16
|
+
#
|
|
17
|
+
# ## Simplified Huffman Tree Format (MVP)
|
|
18
|
+
#
|
|
19
|
+
# This implementation uses a simplified tree format for portability
|
|
20
|
+
# and ease of implementation. The format differs from official RAR
|
|
21
|
+
# but maintains full compatibility between encoder and decoder.
|
|
22
|
+
#
|
|
23
|
+
# ### Format Structure:
|
|
24
|
+
# ```
|
|
25
|
+
# [16-bit num_symbols] [code_lengths...]
|
|
26
|
+
# 2 bytes 512 × 4 bits = 256 bytes
|
|
27
|
+
# ```
|
|
28
|
+
#
|
|
29
|
+
# ### Details:
|
|
30
|
+
# - **Header**: 16-bit number of symbols (always 512 for MVP)
|
|
31
|
+
# - 0-255: Literal bytes
|
|
32
|
+
# - 256: End-of-block marker
|
|
33
|
+
# - 257-511: LZ77 match symbols
|
|
34
|
+
#
|
|
35
|
+
# - **Code Lengths**: 4 bits per symbol × 512 symbols = 2048 bits
|
|
36
|
+
# - Each symbol gets a 4-bit code length (0-15)
|
|
37
|
+
# - Length 0 means symbol not used
|
|
38
|
+
# - Lengths build canonical Huffman tree
|
|
39
|
+
#
|
|
40
|
+
# ### Trade-offs:
|
|
41
|
+
# - **Fixed Overhead**: 258 bytes (2 + 256) per block
|
|
42
|
+
# - **Simplicity**: Easy to implement and debug
|
|
43
|
+
# - **Portability**: Pure Ruby, no external dependencies
|
|
44
|
+
# - **Compatibility**: Encoder/decoder use identical format
|
|
45
|
+
#
|
|
46
|
+
# ### Real RAR Format Differences:
|
|
47
|
+
# Real RAR uses a more complex format with:
|
|
48
|
+
# - RLE compression of code lengths
|
|
49
|
+
# - Multiple Huffman tables (MC, LD, RC, LDD)
|
|
50
|
+
# - Adaptive tree updates
|
|
51
|
+
# - More efficient length encoding
|
|
52
|
+
#
|
|
53
|
+
# The simplified format is sufficient for MVP and can be upgraded
|
|
54
|
+
# to full RAR format in future versions without breaking the API.
|
|
55
|
+
#
|
|
56
|
+
# @see Decoder for decoding implementation
|
|
57
|
+
# @see HuffmanCoder for tree building
|
|
58
|
+
# @see HuffmanBuilder for code generation
|
|
59
|
+
class Encoder
|
|
60
|
+
LITERAL_SYMBOLS = (0..255)
|
|
61
|
+
END_OF_BLOCK = 256
|
|
62
|
+
MATCH_SYMBOLS = (257..511)
|
|
63
|
+
MIN_MATCH_LENGTH = 3
|
|
64
|
+
MAX_MATCH_LENGTH = 257
|
|
65
|
+
|
|
66
|
+
attr_reader :compressed_size
|
|
67
|
+
|
|
68
|
+
def initialize(output, _options = {})
|
|
69
|
+
@output = output
|
|
70
|
+
@bit_stream = BitStream.new(output, :write)
|
|
71
|
+
@match_finder = MatchFinder.new
|
|
72
|
+
@huffman_builder = HuffmanBuilder.new
|
|
73
|
+
@compressed_size = 0
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def encode(input)
|
|
77
|
+
data = input.is_a?(String) ? input : input.read
|
|
78
|
+
return 0 if data.empty?
|
|
79
|
+
|
|
80
|
+
start_pos = @output.pos
|
|
81
|
+
items = collect_items(data)
|
|
82
|
+
codes = @huffman_builder.generate_codes
|
|
83
|
+
write_huffman_tree(codes)
|
|
84
|
+
|
|
85
|
+
items.each do |item|
|
|
86
|
+
if item[:type] == :literal
|
|
87
|
+
encode_literal(item[:value], codes)
|
|
88
|
+
else
|
|
89
|
+
encode_match(item[:offset], item[:length], codes)
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
encode_symbol(END_OF_BLOCK, codes)
|
|
94
|
+
@bit_stream.flush
|
|
95
|
+
@compressed_size = @output.pos - start_pos
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
private
|
|
99
|
+
|
|
100
|
+
def collect_items(data)
|
|
101
|
+
items = []
|
|
102
|
+
position = 0
|
|
103
|
+
|
|
104
|
+
while position < data.size
|
|
105
|
+
match = @match_finder.find_match(data.bytes, position)
|
|
106
|
+
|
|
107
|
+
if match && match.length >= MIN_MATCH_LENGTH
|
|
108
|
+
items << { type: :match, offset: match.offset,
|
|
109
|
+
length: match.length }
|
|
110
|
+
match_symbol = encode_match_symbol(match.length)
|
|
111
|
+
@huffman_builder.add_symbol(match_symbol)
|
|
112
|
+
position += match.length
|
|
113
|
+
else
|
|
114
|
+
byte = data.bytes[position]
|
|
115
|
+
items << { type: :literal, value: byte }
|
|
116
|
+
@huffman_builder.add_symbol(byte)
|
|
117
|
+
position += 1
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
@huffman_builder.add_symbol(END_OF_BLOCK)
|
|
122
|
+
items
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
def write_huffman_tree(codes)
|
|
126
|
+
lengths = Array.new(512, 0)
|
|
127
|
+
codes.each { |symbol, (_code, length)| lengths[symbol] = length }
|
|
128
|
+
@bit_stream.write_bits(512, 16)
|
|
129
|
+
lengths.each { |length| @bit_stream.write_bits(length, 4) }
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
def encode_literal(byte, codes)
|
|
133
|
+
encode_symbol(byte, codes)
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
def encode_match(offset, length, codes)
|
|
137
|
+
match_symbol = encode_match_symbol(length)
|
|
138
|
+
encode_symbol(match_symbol, codes)
|
|
139
|
+
@bit_stream.write_bits(offset, 16) # Changed from 8 to 16 bits for 64KB window
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
def encode_match_symbol(length)
|
|
143
|
+
base_symbol = length - MIN_MATCH_LENGTH + 257
|
|
144
|
+
[base_symbol, 511].min
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
def encode_symbol(symbol, codes)
|
|
148
|
+
code, length = codes[symbol]
|
|
149
|
+
return unless code && length
|
|
150
|
+
|
|
151
|
+
@bit_stream.write_bits(code, length)
|
|
152
|
+
end
|
|
153
|
+
end
|
|
154
|
+
end
|
|
155
|
+
end
|
|
156
|
+
end
|
|
157
|
+
end
|
|
158
|
+
end
|