omnizip 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/.rubocop.yml +32 -0
- data/.rubocop_todo.yml +754 -0
- data/COPYING +502 -0
- data/Gemfile +17 -0
- data/LICENSE +12 -0
- data/README.adoc +1045 -0
- data/Rakefile +12 -0
- data/benchmark/README.md +260 -0
- data/benchmark/benchmark_suite.rb +125 -0
- data/benchmark/compression_bench.rb +181 -0
- data/benchmark/filter_bench.rb +180 -0
- data/benchmark/models/benchmark_result.rb +59 -0
- data/benchmark/models/comparison_result.rb +69 -0
- data/benchmark/profile_suite.rb +167 -0
- data/benchmark/reporter.rb +150 -0
- data/benchmark/run_benchmarks.rb +66 -0
- data/benchmark/test_data.rb +137 -0
- data/config/formats/rar3_spec.yml +91 -0
- data/config/formats/rar5_spec.yml +102 -0
- data/docs/.github/workflows/docs.yml +142 -0
- data/docs/.gitignore +21 -0
- data/docs/.lychee.toml +67 -0
- data/docs/Gemfile +13 -0
- data/docs/RAR_WRITE_SUPPORT.md +26 -0
- data/docs/README.md +101 -0
- data/docs/_config.yml +112 -0
- data/docs/assets/logo.svg +1 -0
- data/docs/assets/omnizip-logo.pdf +1540 -11
- data/docs/comparison/feature-matrix.adoc +694 -0
- data/docs/comparison/index.adoc +113 -0
- data/docs/comparison/vs-7zip.adoc +309 -0
- data/docs/comparison/vs-peazip.adoc +77 -0
- data/docs/comparison/vs-rubyzip.adoc +342 -0
- data/docs/comparison/vs-winrar.adoc +100 -0
- data/docs/compatibility.adoc +579 -0
- data/docs/concepts/index.adoc +129 -0
- data/docs/developer/architecture.adoc +256 -0
- data/docs/developer/contributing.adoc +158 -0
- data/docs/developer/index.adoc +25 -0
- data/docs/developer/testing.adoc +212 -0
- data/docs/getting-started/basic-usage.adoc +271 -0
- data/docs/getting-started/index.adoc +42 -0
- data/docs/getting-started/installation.adoc +138 -0
- data/docs/getting-started/quick-start.adoc +185 -0
- data/docs/getting-started/your-first-archive.adoc +218 -0
- data/docs/guides/advanced-features/encryption.adoc +300 -0
- data/docs/guides/advanced-features/index.adoc +49 -0
- data/docs/guides/advanced-features/parallel-processing.adoc +246 -0
- data/docs/guides/advanced-features/progress-tracking.adoc +320 -0
- data/docs/guides/advanced-features/streaming.adoc +212 -0
- data/docs/guides/archive-formats/gzip-format.adoc +107 -0
- data/docs/guides/archive-formats/index.adoc +130 -0
- data/docs/guides/archive-formats/rar-format.adoc +104 -0
- data/docs/guides/archive-formats/rar5.adoc +521 -0
- data/docs/guides/archive-formats/seven-zip-format.adoc +35 -0
- data/docs/guides/archive-formats/tar-format.adoc +106 -0
- data/docs/guides/archive-formats/xz-format.adoc +118 -0
- data/docs/guides/archive-formats/zip-format.adoc +35 -0
- data/docs/guides/compression-algorithms/bzip2.adoc +113 -0
- data/docs/guides/compression-algorithms/deflate.adoc +319 -0
- data/docs/guides/compression-algorithms/index.adoc +190 -0
- data/docs/guides/compression-algorithms/lzma.adoc +398 -0
- data/docs/guides/compression-algorithms/lzma2.adoc +327 -0
- data/docs/guides/compression-algorithms/ppmd.adoc +316 -0
- data/docs/guides/compression-algorithms/zstandard.adoc +361 -0
- data/docs/guides/creating-archives.adoc +354 -0
- data/docs/guides/extracting-archives.adoc +53 -0
- data/docs/guides/format-conversion.adoc +64 -0
- data/docs/guides/index.adoc +49 -0
- data/docs/guides/migration-rubyzip.adoc +217 -0
- data/docs/guides/parity-archives.adoc +605 -0
- data/docs/guides/performance-tuning.adoc +88 -0
- data/docs/index.adoc +218 -0
- data/docs/lychee.toml +67 -0
- data/docs/reference/api/overview.adoc +188 -0
- data/docs/reference/cli/compress-command.adoc +114 -0
- data/docs/reference/cli/overview.adoc +140 -0
- data/docs/reference/index.adoc +26 -0
- data/docs/resources/faq.adoc +185 -0
- data/docs/resources/quick-reference.adoc +222 -0
- data/docs/troubleshooting/index.adoc +208 -0
- data/examples/api_comparison.rb +205 -0
- data/examples/deflate64_example.rb +96 -0
- data/examples/par2_demo.rb +121 -0
- data/examples/quick_start_native.rb +150 -0
- data/examples/quick_start_rubyzip.rb +115 -0
- data/examples/rubyzip_compatibility_demo.rb +194 -0
- data/exe/omnizip +27 -0
- data/lib/omnizip/algorithm.rb +130 -0
- data/lib/omnizip/algorithm_registry.rb +86 -0
- data/lib/omnizip/algorithms/.keep +0 -0
- data/lib/omnizip/algorithms/bzip2/bwt.rb +225 -0
- data/lib/omnizip/algorithms/bzip2/decoder.rb +193 -0
- data/lib/omnizip/algorithms/bzip2/encoder.rb +237 -0
- data/lib/omnizip/algorithms/bzip2/huffman.rb +206 -0
- data/lib/omnizip/algorithms/bzip2/mtf.rb +101 -0
- data/lib/omnizip/algorithms/bzip2/rle.rb +151 -0
- data/lib/omnizip/algorithms/bzip2.rb +130 -0
- data/lib/omnizip/algorithms/deflate/constants.rb +28 -0
- data/lib/omnizip/algorithms/deflate/decoder.rb +38 -0
- data/lib/omnizip/algorithms/deflate/encoder.rb +46 -0
- data/lib/omnizip/algorithms/deflate.rb +128 -0
- data/lib/omnizip/algorithms/deflate64/constants.rb +45 -0
- data/lib/omnizip/algorithms/deflate64/decoder.rb +153 -0
- data/lib/omnizip/algorithms/deflate64/encoder.rb +98 -0
- data/lib/omnizip/algorithms/deflate64/huffman_coder.rb +354 -0
- data/lib/omnizip/algorithms/deflate64/lz77_encoder.rb +142 -0
- data/lib/omnizip/algorithms/deflate64.rb +109 -0
- data/lib/omnizip/algorithms/lzma/bit_model.rb +120 -0
- data/lib/omnizip/algorithms/lzma/constants.rb +112 -0
- data/lib/omnizip/algorithms/lzma/decoder.rb +148 -0
- data/lib/omnizip/algorithms/lzma/dictionary.rb +69 -0
- data/lib/omnizip/algorithms/lzma/distance_coder.rb +415 -0
- data/lib/omnizip/algorithms/lzma/encoder.rb +142 -0
- data/lib/omnizip/algorithms/lzma/length_coder.rb +260 -0
- data/lib/omnizip/algorithms/lzma/literal_decoder.rb +320 -0
- data/lib/omnizip/algorithms/lzma/literal_encoder.rb +210 -0
- data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +341 -0
- data/lib/omnizip/algorithms/lzma/lzma_alone_decoder.rb +192 -0
- data/lib/omnizip/algorithms/lzma/lzma_state.rb +128 -0
- data/lib/omnizip/algorithms/lzma/match.rb +32 -0
- data/lib/omnizip/algorithms/lzma/match_finder.rb +205 -0
- data/lib/omnizip/algorithms/lzma/match_finder_config.rb +142 -0
- data/lib/omnizip/algorithms/lzma/match_finder_factory.rb +88 -0
- data/lib/omnizip/algorithms/lzma/optimal_encoder.rb +130 -0
- data/lib/omnizip/algorithms/lzma/probability_models.rb +72 -0
- data/lib/omnizip/algorithms/lzma/range_coder.rb +85 -0
- data/lib/omnizip/algorithms/lzma/range_decoder.rb +434 -0
- data/lib/omnizip/algorithms/lzma/range_encoder.rb +194 -0
- data/lib/omnizip/algorithms/lzma/state.rb +127 -0
- data/lib/omnizip/algorithms/lzma/xz_buffered_range_encoder.rb +325 -0
- data/lib/omnizip/algorithms/lzma/xz_encoder.rb +426 -0
- data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +645 -0
- data/lib/omnizip/algorithms/lzma/xz_match_finder_adapter.rb +227 -0
- data/lib/omnizip/algorithms/lzma/xz_price_calculator.rb +169 -0
- data/lib/omnizip/algorithms/lzma/xz_probability_models.rb +261 -0
- data/lib/omnizip/algorithms/lzma/xz_range_encoder.rb +223 -0
- data/lib/omnizip/algorithms/lzma/xz_range_encoder_exact.rb +331 -0
- data/lib/omnizip/algorithms/lzma/xz_state.rb +116 -0
- data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +2055 -0
- data/lib/omnizip/algorithms/lzma.rb +238 -0
- data/lib/omnizip/algorithms/lzma2/chunk_manager.rb +182 -0
- data/lib/omnizip/algorithms/lzma2/constants.rb +41 -0
- data/lib/omnizip/algorithms/lzma2/encoder.rb +147 -0
- data/lib/omnizip/algorithms/lzma2/lzma2_chunk.rb +161 -0
- data/lib/omnizip/algorithms/lzma2/properties.rb +179 -0
- data/lib/omnizip/algorithms/lzma2/simple_lzma2_encoder.rb +127 -0
- data/lib/omnizip/algorithms/lzma2/xz_encoder_adapter.rb +85 -0
- data/lib/omnizip/algorithms/lzma2.rb +141 -0
- data/lib/omnizip/algorithms/ppmd7/constants.rb +74 -0
- data/lib/omnizip/algorithms/ppmd7/context.rb +154 -0
- data/lib/omnizip/algorithms/ppmd7/decoder.rb +126 -0
- data/lib/omnizip/algorithms/ppmd7/encoder.rb +163 -0
- data/lib/omnizip/algorithms/ppmd7/model.rb +248 -0
- data/lib/omnizip/algorithms/ppmd7/symbol_state.rb +57 -0
- data/lib/omnizip/algorithms/ppmd7.rb +116 -0
- data/lib/omnizip/algorithms/ppmd8/constants.rb +61 -0
- data/lib/omnizip/algorithms/ppmd8/context.rb +34 -0
- data/lib/omnizip/algorithms/ppmd8/decoder.rb +107 -0
- data/lib/omnizip/algorithms/ppmd8/encoder.rb +138 -0
- data/lib/omnizip/algorithms/ppmd8/model.rb +250 -0
- data/lib/omnizip/algorithms/ppmd8/restoration_method.rb +78 -0
- data/lib/omnizip/algorithms/ppmd8.rb +82 -0
- data/lib/omnizip/algorithms/ppmd_base.rb +138 -0
- data/lib/omnizip/algorithms/sevenzip_lzma2.rb +123 -0
- data/lib/omnizip/algorithms/xz_lzma2.rb +118 -0
- data/lib/omnizip/algorithms/zstandard/constants.rb +25 -0
- data/lib/omnizip/algorithms/zstandard/decoder.rb +46 -0
- data/lib/omnizip/algorithms/zstandard/encoder.rb +51 -0
- data/lib/omnizip/algorithms/zstandard.rb +138 -0
- data/lib/omnizip/buffer/memory_archive.rb +251 -0
- data/lib/omnizip/buffer/memory_extractor.rb +224 -0
- data/lib/omnizip/buffer.rb +176 -0
- data/lib/omnizip/checksum_registry.rb +114 -0
- data/lib/omnizip/checksums/crc32.rb +100 -0
- data/lib/omnizip/checksums/crc64.rb +101 -0
- data/lib/omnizip/checksums/crc_base.rb +158 -0
- data/lib/omnizip/checksums/verifier.rb +131 -0
- data/lib/omnizip/chunked/memory_manager.rb +194 -0
- data/lib/omnizip/chunked/reader.rb +78 -0
- data/lib/omnizip/chunked/writer.rb +120 -0
- data/lib/omnizip/chunked.rb +129 -0
- data/lib/omnizip/cli/output_formatter.rb +104 -0
- data/lib/omnizip/cli.rb +572 -0
- data/lib/omnizip/commands/.keep +0 -0
- data/lib/omnizip/commands/archive_create_command.rb +427 -0
- data/lib/omnizip/commands/archive_extract_command.rb +272 -0
- data/lib/omnizip/commands/archive_list_command.rb +218 -0
- data/lib/omnizip/commands/archive_repair_command.rb +131 -0
- data/lib/omnizip/commands/archive_verify_command.rb +117 -0
- data/lib/omnizip/commands/compress_command.rb +117 -0
- data/lib/omnizip/commands/decompress_command.rb +120 -0
- data/lib/omnizip/commands/list_command.rb +53 -0
- data/lib/omnizip/commands/metadata_command.rb +153 -0
- data/lib/omnizip/commands/parity_create_command.rb +122 -0
- data/lib/omnizip/commands/parity_repair_command.rb +122 -0
- data/lib/omnizip/commands/parity_verify_command.rb +124 -0
- data/lib/omnizip/commands/profile_list_command.rb +56 -0
- data/lib/omnizip/commands/profile_show_command.rb +44 -0
- data/lib/omnizip/convenience.rb +359 -0
- data/lib/omnizip/converter/conversion_registry.rb +49 -0
- data/lib/omnizip/converter/conversion_strategy.rb +121 -0
- data/lib/omnizip/converter/seven_zip_to_zip_strategy.rb +97 -0
- data/lib/omnizip/converter/zip_to_seven_zip_strategy.rb +112 -0
- data/lib/omnizip/converter.rb +105 -0
- data/lib/omnizip/crypto/aes256/cipher.rb +100 -0
- data/lib/omnizip/crypto/aes256/constants.rb +28 -0
- data/lib/omnizip/crypto/aes256/key_derivation.rb +101 -0
- data/lib/omnizip/crypto/aes256.rb +102 -0
- data/lib/omnizip/error.rb +106 -0
- data/lib/omnizip/eta/exponential_smoothing_estimator.rb +98 -0
- data/lib/omnizip/eta/moving_average_estimator.rb +99 -0
- data/lib/omnizip/eta/rate_calculator.rb +104 -0
- data/lib/omnizip/eta/sample_history.rb +143 -0
- data/lib/omnizip/eta/time_estimator.rb +106 -0
- data/lib/omnizip/eta.rb +63 -0
- data/lib/omnizip/extraction/filter_chain.rb +177 -0
- data/lib/omnizip/extraction/glob_pattern.rb +140 -0
- data/lib/omnizip/extraction/pattern_matcher.rb +70 -0
- data/lib/omnizip/extraction/predicate_pattern.rb +52 -0
- data/lib/omnizip/extraction/regex_pattern.rb +50 -0
- data/lib/omnizip/extraction/selective_extractor.rb +240 -0
- data/lib/omnizip/extraction.rb +111 -0
- data/lib/omnizip/file_type/mime_classifier.rb +144 -0
- data/lib/omnizip/file_type.rb +113 -0
- data/lib/omnizip/filter.rb +139 -0
- data/lib/omnizip/filter_pipeline.rb +108 -0
- data/lib/omnizip/filter_registry.rb +166 -0
- data/lib/omnizip/filters/bcj.rb +279 -0
- data/lib/omnizip/filters/bcj2/constants.rb +53 -0
- data/lib/omnizip/filters/bcj2/decoder.rb +200 -0
- data/lib/omnizip/filters/bcj2/encoder.rb +61 -0
- data/lib/omnizip/filters/bcj2/stream_data.rb +93 -0
- data/lib/omnizip/filters/bcj2.rb +99 -0
- data/lib/omnizip/filters/bcj_arm.rb +176 -0
- data/lib/omnizip/filters/bcj_arm64.rb +244 -0
- data/lib/omnizip/filters/bcj_ia64.rb +196 -0
- data/lib/omnizip/filters/bcj_ppc.rb +190 -0
- data/lib/omnizip/filters/bcj_sparc.rb +176 -0
- data/lib/omnizip/filters/bcj_x86.rb +193 -0
- data/lib/omnizip/filters/delta.rb +196 -0
- data/lib/omnizip/filters/filter_base.rb +72 -0
- data/lib/omnizip/filters/registry.rb +123 -0
- data/lib/omnizip/filters/xz_delta.rb +258 -0
- data/lib/omnizip/format_detector.rb +162 -0
- data/lib/omnizip/format_registry.rb +59 -0
- data/lib/omnizip/formats/.keep +0 -0
- data/lib/omnizip/formats/bzip2_file.rb +172 -0
- data/lib/omnizip/formats/cpio/constants.rb +55 -0
- data/lib/omnizip/formats/cpio/entry.rb +385 -0
- data/lib/omnizip/formats/cpio/reader.rb +196 -0
- data/lib/omnizip/formats/cpio/writer.rb +234 -0
- data/lib/omnizip/formats/cpio.rb +140 -0
- data/lib/omnizip/formats/format_spec_loader.rb +230 -0
- data/lib/omnizip/formats/gzip.rb +238 -0
- data/lib/omnizip/formats/iso/directory_builder.rb +297 -0
- data/lib/omnizip/formats/iso/directory_record.rb +152 -0
- data/lib/omnizip/formats/iso/joliet.rb +204 -0
- data/lib/omnizip/formats/iso/path_table.rb +125 -0
- data/lib/omnizip/formats/iso/reader.rb +197 -0
- data/lib/omnizip/formats/iso/rock_ridge.rb +349 -0
- data/lib/omnizip/formats/iso/volume_builder.rb +320 -0
- data/lib/omnizip/formats/iso/volume_descriptor.rb +168 -0
- data/lib/omnizip/formats/iso/writer.rb +530 -0
- data/lib/omnizip/formats/iso.rb +140 -0
- data/lib/omnizip/formats/lzip.rb +175 -0
- data/lib/omnizip/formats/lzma_alone.rb +171 -0
- data/lib/omnizip/formats/rar/archive_repairer.rb +243 -0
- data/lib/omnizip/formats/rar/archive_verifier.rb +195 -0
- data/lib/omnizip/formats/rar/block_parser.rb +243 -0
- data/lib/omnizip/formats/rar/compression/bit_stream.rb +180 -0
- data/lib/omnizip/formats/rar/compression/dispatcher.rb +217 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/decoder.rb +216 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/encoder.rb +158 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_builder.rb +217 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_coder.rb +189 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/match_finder.rb +135 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/sliding_window.rb +165 -0
- data/lib/omnizip/formats/rar/compression/ppmd/context.rb +105 -0
- data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +219 -0
- data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +262 -0
- data/lib/omnizip/formats/rar/compression_method_registry.rb +106 -0
- data/lib/omnizip/formats/rar/constants.rb +82 -0
- data/lib/omnizip/formats/rar/decompressor.rb +238 -0
- data/lib/omnizip/formats/rar/external_writer.rb +312 -0
- data/lib/omnizip/formats/rar/header.rb +192 -0
- data/lib/omnizip/formats/rar/license_validator.rb +109 -0
- data/lib/omnizip/formats/rar/models/rar_archive.rb +77 -0
- data/lib/omnizip/formats/rar/models/rar_entry.rb +65 -0
- data/lib/omnizip/formats/rar/models/rar_volume.rb +56 -0
- data/lib/omnizip/formats/rar/parity_handler.rb +292 -0
- data/lib/omnizip/formats/rar/rar5/compression/lzma.rb +202 -0
- data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +578 -0
- data/lib/omnizip/formats/rar/rar5/compression/store.rb +60 -0
- data/lib/omnizip/formats/rar/rar5/crc32.rb +39 -0
- data/lib/omnizip/formats/rar/rar5/encryption/aes256_cbc.rb +97 -0
- data/lib/omnizip/formats/rar/rar5/encryption/encryption_header.rb +114 -0
- data/lib/omnizip/formats/rar/rar5/encryption/encryption_manager.rb +166 -0
- data/lib/omnizip/formats/rar/rar5/encryption/key_derivation.rb +97 -0
- data/lib/omnizip/formats/rar/rar5/header.rb +187 -0
- data/lib/omnizip/formats/rar/rar5/models/encryption_options.rb +74 -0
- data/lib/omnizip/formats/rar/rar5/models/recovery_options.rb +63 -0
- data/lib/omnizip/formats/rar/rar5/models/solid_options.rb +63 -0
- data/lib/omnizip/formats/rar/rar5/models/volume_options.rb +74 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/ARCHITECTURE.md +290 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_manager.rb +264 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_splitter.rb +155 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_writer.rb +194 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_encoder.rb +109 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_manager.rb +142 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_stream.rb +121 -0
- data/lib/omnizip/formats/rar/rar5/vint.rb +65 -0
- data/lib/omnizip/formats/rar/rar5/writer.rb +466 -0
- data/lib/omnizip/formats/rar/rar_format_base.rb +241 -0
- data/lib/omnizip/formats/rar/reader.rb +366 -0
- data/lib/omnizip/formats/rar/recovery_record.rb +245 -0
- data/lib/omnizip/formats/rar/volume_manager.rb +168 -0
- data/lib/omnizip/formats/rar/writer.rb +431 -0
- data/lib/omnizip/formats/rar.rb +205 -0
- data/lib/omnizip/formats/rar3/compressor.rb +73 -0
- data/lib/omnizip/formats/rar3/decompressor.rb +66 -0
- data/lib/omnizip/formats/rar3/reader.rb +386 -0
- data/lib/omnizip/formats/rar3/writer.rb +219 -0
- data/lib/omnizip/formats/rar5/compressor.rb +73 -0
- data/lib/omnizip/formats/rar5/decompressor.rb +66 -0
- data/lib/omnizip/formats/rar5/reader.rb +342 -0
- data/lib/omnizip/formats/rar5/writer.rb +214 -0
- data/lib/omnizip/formats/seven_zip/coder_chain.rb +150 -0
- data/lib/omnizip/formats/seven_zip/constants.rb +126 -0
- data/lib/omnizip/formats/seven_zip/encoded_header.rb +114 -0
- data/lib/omnizip/formats/seven_zip/encrypted_header.rb +142 -0
- data/lib/omnizip/formats/seven_zip/file_collector.rb +144 -0
- data/lib/omnizip/formats/seven_zip/header.rb +106 -0
- data/lib/omnizip/formats/seven_zip/header_encryptor.rb +134 -0
- data/lib/omnizip/formats/seven_zip/header_writer.rb +466 -0
- data/lib/omnizip/formats/seven_zip/models/coder_info.rb +30 -0
- data/lib/omnizip/formats/seven_zip/models/file_entry.rb +58 -0
- data/lib/omnizip/formats/seven_zip/models/folder.rb +69 -0
- data/lib/omnizip/formats/seven_zip/models/stream_info.rb +42 -0
- data/lib/omnizip/formats/seven_zip/parser.rb +660 -0
- data/lib/omnizip/formats/seven_zip/reader.rb +458 -0
- data/lib/omnizip/formats/seven_zip/split_archive_reader.rb +632 -0
- data/lib/omnizip/formats/seven_zip/split_archive_writer.rb +315 -0
- data/lib/omnizip/formats/seven_zip/stream_compressor.rb +151 -0
- data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +162 -0
- data/lib/omnizip/formats/seven_zip/writer.rb +740 -0
- data/lib/omnizip/formats/seven_zip.rb +93 -0
- data/lib/omnizip/formats/tar/constants.rb +73 -0
- data/lib/omnizip/formats/tar/entry.rb +94 -0
- data/lib/omnizip/formats/tar/header.rb +168 -0
- data/lib/omnizip/formats/tar/reader.rb +121 -0
- data/lib/omnizip/formats/tar/writer.rb +216 -0
- data/lib/omnizip/formats/tar.rb +84 -0
- data/lib/omnizip/formats/xz/reader.rb +116 -0
- data/lib/omnizip/formats/xz.rb +237 -0
- data/lib/omnizip/formats/xz_impl/block_decoder.rb +754 -0
- data/lib/omnizip/formats/xz_impl/block_encoder.rb +306 -0
- data/lib/omnizip/formats/xz_impl/block_header.rb +210 -0
- data/lib/omnizip/formats/xz_impl/block_header_parser.rb +186 -0
- data/lib/omnizip/formats/xz_impl/constants.rb +49 -0
- data/lib/omnizip/formats/xz_impl/index_decoder.rb +174 -0
- data/lib/omnizip/formats/xz_impl/index_encoder.rb +122 -0
- data/lib/omnizip/formats/xz_impl/stream_decoder.rb +468 -0
- data/lib/omnizip/formats/xz_impl/stream_encoder.rb +99 -0
- data/lib/omnizip/formats/xz_impl/stream_footer.rb +81 -0
- data/lib/omnizip/formats/xz_impl/stream_footer_parser.rb +117 -0
- data/lib/omnizip/formats/xz_impl/stream_header.rb +55 -0
- data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +108 -0
- data/lib/omnizip/formats/xz_impl/vli.rb +128 -0
- data/lib/omnizip/formats/xz_impl/writer.rb +421 -0
- data/lib/omnizip/formats/zip/central_directory_header.rb +195 -0
- data/lib/omnizip/formats/zip/constants.rb +69 -0
- data/lib/omnizip/formats/zip/end_of_central_directory.rb +133 -0
- data/lib/omnizip/formats/zip/local_file_header.rb +138 -0
- data/lib/omnizip/formats/zip/reader.rb +250 -0
- data/lib/omnizip/formats/zip/unix_extra_field.rb +153 -0
- data/lib/omnizip/formats/zip/writer.rb +375 -0
- data/lib/omnizip/formats/zip/zip64_end_of_central_directory.rb +104 -0
- data/lib/omnizip/formats/zip/zip64_end_of_central_directory_locator.rb +66 -0
- data/lib/omnizip/formats/zip/zip64_extra_field.rb +114 -0
- data/lib/omnizip/formats/zip.rb +50 -0
- data/lib/omnizip/implementations/base/lzma2_decoder_base.rb +75 -0
- data/lib/omnizip/implementations/base/lzma2_encoder_base.rb +128 -0
- data/lib/omnizip/implementations/base/lzma_decoder_base.rb +83 -0
- data/lib/omnizip/implementations/base/lzma_encoder_base.rb +108 -0
- data/lib/omnizip/implementations/base/state_machine_base.rb +182 -0
- data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +421 -0
- data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +465 -0
- data/lib/omnizip/implementations/seven_zip/lzma/match_finder.rb +288 -0
- data/lib/omnizip/implementations/seven_zip/lzma/range_decoder.rb +200 -0
- data/lib/omnizip/implementations/seven_zip/lzma/range_encoder.rb +197 -0
- data/lib/omnizip/implementations/seven_zip/lzma/state_machine.rb +141 -0
- data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +519 -0
- data/lib/omnizip/implementations/xz_utils/lzma2/decoder.rb +723 -0
- data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +750 -0
- data/lib/omnizip/io/buffered_input.rb +146 -0
- data/lib/omnizip/io/buffered_output.rb +105 -0
- data/lib/omnizip/io/stream_manager.rb +115 -0
- data/lib/omnizip/link_handler/hard_link.rb +79 -0
- data/lib/omnizip/link_handler/symbolic_link.rb +74 -0
- data/lib/omnizip/link_handler.rb +124 -0
- data/lib/omnizip/metadata/archive_metadata.rb +114 -0
- data/lib/omnizip/metadata/entry_metadata.rb +146 -0
- data/lib/omnizip/metadata/metadata_editor.rb +171 -0
- data/lib/omnizip/metadata/metadata_registry.rb +64 -0
- data/lib/omnizip/metadata/metadata_validator.rb +99 -0
- data/lib/omnizip/metadata.rb +57 -0
- data/lib/omnizip/models/.keep +0 -0
- data/lib/omnizip/models/algorithm_metadata.rb +73 -0
- data/lib/omnizip/models/compression_options.rb +71 -0
- data/lib/omnizip/models/conversion_options.rb +87 -0
- data/lib/omnizip/models/conversion_result.rb +135 -0
- data/lib/omnizip/models/eta_result.rb +46 -0
- data/lib/omnizip/models/extraction_rule.rb +115 -0
- data/lib/omnizip/models/filter_chain.rb +144 -0
- data/lib/omnizip/models/filter_config.rb +183 -0
- data/lib/omnizip/models/match_result.rb +124 -0
- data/lib/omnizip/models/optimization_suggestion.rb +91 -0
- data/lib/omnizip/models/parallel_options.rb +104 -0
- data/lib/omnizip/models/performance_result.rb +79 -0
- data/lib/omnizip/models/profile_report.rb +82 -0
- data/lib/omnizip/models/progress_options.rb +38 -0
- data/lib/omnizip/models/split_options.rb +116 -0
- data/lib/omnizip/optimization_registry.rb +81 -0
- data/lib/omnizip/parallel/job_queue.rb +209 -0
- data/lib/omnizip/parallel/job_scheduler.rb +203 -0
- data/lib/omnizip/parallel/parallel_compressor.rb +347 -0
- data/lib/omnizip/parallel/parallel_extractor.rb +329 -0
- data/lib/omnizip/parallel/worker_pool.rb +223 -0
- data/lib/omnizip/parallel.rb +149 -0
- data/lib/omnizip/parity/chunked_block_processor.rb +196 -0
- data/lib/omnizip/parity/galois16.rb +145 -0
- data/lib/omnizip/parity/models/creator_packet.rb +73 -0
- data/lib/omnizip/parity/models/file_description_packet.rb +133 -0
- data/lib/omnizip/parity/models/ifsc_packet.rb +123 -0
- data/lib/omnizip/parity/models/main_packet.rb +128 -0
- data/lib/omnizip/parity/models/packet.rb +156 -0
- data/lib/omnizip/parity/models/packet_registry.rb +109 -0
- data/lib/omnizip/parity/models/recovery_slice_packet.rb +78 -0
- data/lib/omnizip/parity/par2_creator.rb +531 -0
- data/lib/omnizip/parity/par2_repairer.rb +407 -0
- data/lib/omnizip/parity/par2_verifier.rb +364 -0
- data/lib/omnizip/parity/par2cmdline_algorithm.rb +110 -0
- data/lib/omnizip/parity/par2cmdline_coefficients.rb +78 -0
- data/lib/omnizip/parity/reed_solomon_decoder.rb +266 -0
- data/lib/omnizip/parity/reed_solomon_encoder.rb +111 -0
- data/lib/omnizip/parity/reed_solomon_matrix.rb +342 -0
- data/lib/omnizip/parity.rb +186 -0
- data/lib/omnizip/password/encryption_registry.rb +65 -0
- data/lib/omnizip/password/encryption_strategy.rb +96 -0
- data/lib/omnizip/password/password_validator.rb +129 -0
- data/lib/omnizip/password/winzip_aes_strategy.rb +192 -0
- data/lib/omnizip/password/zip_crypto_strategy.rb +141 -0
- data/lib/omnizip/password.rb +87 -0
- data/lib/omnizip/pipe/stream_compressor.rb +124 -0
- data/lib/omnizip/pipe/stream_decompressor.rb +174 -0
- data/lib/omnizip/pipe.rb +121 -0
- data/lib/omnizip/platform/ntfs_streams.rb +201 -0
- data/lib/omnizip/platform.rb +189 -0
- data/lib/omnizip/profile/archive_profile.rb +39 -0
- data/lib/omnizip/profile/balanced_profile.rb +33 -0
- data/lib/omnizip/profile/binary_profile.rb +36 -0
- data/lib/omnizip/profile/compression_profile.rb +158 -0
- data/lib/omnizip/profile/custom_profile.rb +157 -0
- data/lib/omnizip/profile/fast_profile.rb +33 -0
- data/lib/omnizip/profile/maximum_profile.rb +33 -0
- data/lib/omnizip/profile/profile_detector.rb +110 -0
- data/lib/omnizip/profile/profile_registry.rb +161 -0
- data/lib/omnizip/profile/text_profile.rb +36 -0
- data/lib/omnizip/profile.rb +190 -0
- data/lib/omnizip/profiler/memory_profiler.rb +66 -0
- data/lib/omnizip/profiler/method_profiler.rb +49 -0
- data/lib/omnizip/profiler/report_generator.rb +169 -0
- data/lib/omnizip/profiler.rb +204 -0
- data/lib/omnizip/progress/callback_reporter.rb +36 -0
- data/lib/omnizip/progress/console_reporter.rb +62 -0
- data/lib/omnizip/progress/log_reporter.rb +91 -0
- data/lib/omnizip/progress/operation_progress.rb +118 -0
- data/lib/omnizip/progress/progress_bar.rb +156 -0
- data/lib/omnizip/progress/progress_reporter.rb +40 -0
- data/lib/omnizip/progress/progress_tracker.rb +190 -0
- data/lib/omnizip/progress/silent_reporter.rb +24 -0
- data/lib/omnizip/progress.rb +127 -0
- data/lib/omnizip/rubyzip_compat.rb +63 -0
- data/lib/omnizip/temp/safe_extract.rb +168 -0
- data/lib/omnizip/temp/temp_file.rb +124 -0
- data/lib/omnizip/temp/temp_file_pool.rb +109 -0
- data/lib/omnizip/temp.rb +181 -0
- data/lib/omnizip/version.rb +5 -0
- data/lib/omnizip/zip/entry.rb +156 -0
- data/lib/omnizip/zip/file.rb +485 -0
- data/lib/omnizip/zip/input_stream.rb +273 -0
- data/lib/omnizip/zip/output_stream.rb +324 -0
- data/lib/omnizip.rb +156 -0
- data/readme-docs/advanced-features.adoc +515 -0
- data/readme-docs/api-usage.adoc +444 -0
- data/readme-docs/architecture.adoc +449 -0
- data/readme-docs/archive-formats.adoc +479 -0
- data/readme-docs/cli-usage.adoc +222 -0
- data/readme-docs/compression-algorithms.adoc +442 -0
- data/readme-docs/compression-profiles.adoc +247 -0
- data/readme-docs/encryption-checksums.adoc +328 -0
- data/readme-docs/format-converter.adoc +325 -0
- data/readme-docs/installation.adoc +228 -0
- data/readme-docs/par2-archives.adoc +608 -0
- data/readme-docs/performance-profiler.adoc +389 -0
- data/readme-docs/preprocessing-filters.adoc +280 -0
- data/xz-file-format-1.2.1.txt +1174 -0
- metadata +617 -0
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
require_relative "constants"
|
|
5
|
+
require_relative "huffman_coder"
|
|
6
|
+
|
|
7
|
+
module Omnizip
|
|
8
|
+
module Algorithms
|
|
9
|
+
class Deflate64
|
|
10
|
+
# Deflate64 decoder
|
|
11
|
+
class Decoder
|
|
12
|
+
include Constants
|
|
13
|
+
|
|
14
|
+
attr_reader :window_size
|
|
15
|
+
|
|
16
|
+
def initialize(input_stream)
|
|
17
|
+
@input_stream = input_stream
|
|
18
|
+
@window_size = DICTIONARY_SIZE
|
|
19
|
+
@window = []
|
|
20
|
+
@huffman = HuffmanCoder.new
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# Decompress input stream to output stream
|
|
24
|
+
#
|
|
25
|
+
# @param output_stream [IO] Output data stream
|
|
26
|
+
def decompress(output_stream)
|
|
27
|
+
compressed_data = @input_stream.read
|
|
28
|
+
|
|
29
|
+
# Deserialize trees and compressed data
|
|
30
|
+
literal_tree, distance_tree, data = deserialize_with_trees(compressed_data)
|
|
31
|
+
|
|
32
|
+
# Decode Huffman-encoded data
|
|
33
|
+
tokens = @huffman.decode(data, literal_tree, distance_tree)
|
|
34
|
+
|
|
35
|
+
# Reconstruct data from LZ77 tokens
|
|
36
|
+
decompressed = reconstruct_from_tokens(tokens)
|
|
37
|
+
|
|
38
|
+
output_stream.write(decompressed)
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# Deserialize compressed data with Huffman trees
|
|
42
|
+
#
|
|
43
|
+
# @param data [String] Serialized compressed data
|
|
44
|
+
# @return [Array] Literal tree, distance tree, compressed data
|
|
45
|
+
def deserialize_with_trees(data)
|
|
46
|
+
# Extract sizes (4 bytes each)
|
|
47
|
+
literal_size, distance_size = data.unpack("NN")
|
|
48
|
+
offset = 8
|
|
49
|
+
|
|
50
|
+
# Extract literal tree JSON
|
|
51
|
+
literal_json = data[offset, literal_size]
|
|
52
|
+
offset += literal_size
|
|
53
|
+
|
|
54
|
+
# Extract distance tree JSON
|
|
55
|
+
distance_json = data[offset, distance_size]
|
|
56
|
+
offset += distance_size
|
|
57
|
+
|
|
58
|
+
# Extract compressed data
|
|
59
|
+
compressed = data[offset..]
|
|
60
|
+
|
|
61
|
+
# Parse trees from JSON with symbol keys as integers
|
|
62
|
+
literal_tree = parse_tree_from_json(literal_json)
|
|
63
|
+
distance_tree = parse_tree_from_json(distance_json)
|
|
64
|
+
|
|
65
|
+
[literal_tree, distance_tree, compressed]
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Parse Huffman tree from JSON with integer keys
|
|
69
|
+
#
|
|
70
|
+
# @param json [String] JSON string
|
|
71
|
+
# @return [Hash] Huffman tree with integer keys
|
|
72
|
+
def parse_tree_from_json(json)
|
|
73
|
+
parsed = JSON.parse(json)
|
|
74
|
+
# Convert string keys back to integers
|
|
75
|
+
parsed.transform_keys(&:to_i)
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# Reconstruct data from LZ77 tokens
|
|
79
|
+
#
|
|
80
|
+
# @param tokens [Array<Hash>] LZ77 tokens
|
|
81
|
+
# @return [String] Decompressed data
|
|
82
|
+
def reconstruct_from_tokens(tokens)
|
|
83
|
+
output = []
|
|
84
|
+
|
|
85
|
+
tokens.each do |token|
|
|
86
|
+
if token[:type] == :literal
|
|
87
|
+
byte_char = token[:value].chr(Encoding::BINARY)
|
|
88
|
+
output << byte_char
|
|
89
|
+
@window << token[:value]
|
|
90
|
+
elsif token[:type] == :match
|
|
91
|
+
copy_from_window(
|
|
92
|
+
output,
|
|
93
|
+
token[:distance],
|
|
94
|
+
token[:length],
|
|
95
|
+
)
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
# Maintain 64KB window
|
|
99
|
+
while @window.size > @window_size
|
|
100
|
+
@window.shift
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
output.join.force_encoding(Encoding::BINARY)
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
# Decode single block
|
|
108
|
+
#
|
|
109
|
+
# @param data [String] Compressed block
|
|
110
|
+
# @return [String] Decompressed data
|
|
111
|
+
def decode_block(data)
|
|
112
|
+
tokens = @huffman.decode(data, {}, {})
|
|
113
|
+
reconstruct_from_tokens(tokens)
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
private
|
|
117
|
+
|
|
118
|
+
# Copy data from sliding window
|
|
119
|
+
#
|
|
120
|
+
# @param output [Array] Output buffer
|
|
121
|
+
# @param distance [Integer] Distance back in window
|
|
122
|
+
# @param length [Integer] Number of bytes to copy
|
|
123
|
+
def copy_from_window(output, distance, length)
|
|
124
|
+
start_pos = @window.size - distance
|
|
125
|
+
|
|
126
|
+
# Check if we're trying to copy from beyond the window
|
|
127
|
+
if start_pos.negative?
|
|
128
|
+
raise Omnizip::DecompressionError,
|
|
129
|
+
"Invalid distance: #{distance} exceeds window size #{@window.size}"
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
length.times do |i|
|
|
133
|
+
# Handle RLE case where we copy bytes we just wrote
|
|
134
|
+
idx = (start_pos + i) % @window.size
|
|
135
|
+
byte = @window[idx]
|
|
136
|
+
|
|
137
|
+
if byte.nil?
|
|
138
|
+
raise Omnizip::DecompressionError,
|
|
139
|
+
"Window access failed at index #{idx} (start: #{start_pos}, i: #{i})"
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
byte_char = byte.chr(Encoding::BINARY)
|
|
143
|
+
output << byte_char
|
|
144
|
+
@window << byte
|
|
145
|
+
|
|
146
|
+
# Maintain window size during copy
|
|
147
|
+
@window.shift if @window.size > @window_size
|
|
148
|
+
end
|
|
149
|
+
end
|
|
150
|
+
end
|
|
151
|
+
end
|
|
152
|
+
end
|
|
153
|
+
end
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
require_relative "constants"
|
|
5
|
+
require_relative "lz77_encoder"
|
|
6
|
+
require_relative "huffman_coder"
|
|
7
|
+
|
|
8
|
+
module Omnizip
|
|
9
|
+
module Algorithms
|
|
10
|
+
class Deflate64
|
|
11
|
+
# Deflate64 encoder
|
|
12
|
+
class Encoder
|
|
13
|
+
include Constants
|
|
14
|
+
|
|
15
|
+
attr_reader :window_size
|
|
16
|
+
|
|
17
|
+
def initialize(output_stream, options = {})
|
|
18
|
+
@output_stream = output_stream
|
|
19
|
+
@window_size = options[:window_size] || DICTIONARY_SIZE
|
|
20
|
+
@compression_level = options[:level] || 6
|
|
21
|
+
@lz77_encoder = LZ77Encoder.new(@window_size)
|
|
22
|
+
@huffman = HuffmanCoder.new
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# Compress input stream to output stream
|
|
26
|
+
#
|
|
27
|
+
# @param input_stream [IO] Input data stream
|
|
28
|
+
def compress(input_stream)
|
|
29
|
+
data = input_stream.read
|
|
30
|
+
|
|
31
|
+
# Step 1: LZ77 compression with 64KB window
|
|
32
|
+
tokens = @lz77_encoder.find_matches(data)
|
|
33
|
+
|
|
34
|
+
# Step 2: Huffman coding
|
|
35
|
+
compressed = @huffman.encode(tokens)
|
|
36
|
+
|
|
37
|
+
# Step 3: Serialize trees and write to output
|
|
38
|
+
output = serialize_with_trees(
|
|
39
|
+
compressed,
|
|
40
|
+
@huffman.literal_tree,
|
|
41
|
+
@huffman.distance_tree,
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
@output_stream.write(output)
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
private
|
|
48
|
+
|
|
49
|
+
# Serialize compressed data with Huffman trees
|
|
50
|
+
#
|
|
51
|
+
# @param compressed [String] Compressed data
|
|
52
|
+
# @param literal_tree [Hash] Literal Huffman tree
|
|
53
|
+
# @param distance_tree [Hash] Distance Huffman tree
|
|
54
|
+
# @return [String] Serialized output
|
|
55
|
+
def serialize_with_trees(compressed, literal_tree, distance_tree)
|
|
56
|
+
literal_json = literal_tree.to_json
|
|
57
|
+
distance_json = distance_tree.to_json
|
|
58
|
+
|
|
59
|
+
# Pack: literal_size (4 bytes), distance_size (4 bytes),
|
|
60
|
+
# literal_tree, distance_tree, compressed_data
|
|
61
|
+
[
|
|
62
|
+
literal_json.bytesize,
|
|
63
|
+
distance_json.bytesize,
|
|
64
|
+
literal_json,
|
|
65
|
+
distance_json,
|
|
66
|
+
compressed,
|
|
67
|
+
].pack("NNA#{literal_json.bytesize}A#{distance_json.bytesize}A*")
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# Encode data block
|
|
71
|
+
#
|
|
72
|
+
# @param data [String] Input data
|
|
73
|
+
# @return [String] Compressed data
|
|
74
|
+
def encode_block(data)
|
|
75
|
+
# Find LZ77 matches
|
|
76
|
+
tokens = @lz77_encoder.find_matches(data)
|
|
77
|
+
|
|
78
|
+
# Huffman encode
|
|
79
|
+
@huffman.encode(tokens)
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
# Encode stream in chunks
|
|
83
|
+
#
|
|
84
|
+
# @param input_stream [IO] Input stream
|
|
85
|
+
# @param chunk_size [Integer] Size of chunks to process
|
|
86
|
+
def encode_stream(input_stream, chunk_size = 65_536)
|
|
87
|
+
until input_stream.eof?
|
|
88
|
+
chunk = input_stream.read(chunk_size)
|
|
89
|
+
break if chunk.nil? || chunk.empty?
|
|
90
|
+
|
|
91
|
+
compressed = encode_block(chunk)
|
|
92
|
+
@output_stream.write(compressed)
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
end
|
|
@@ -0,0 +1,354 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "constants"
|
|
4
|
+
|
|
5
|
+
module Omnizip
|
|
6
|
+
module Algorithms
|
|
7
|
+
class Deflate64
|
|
8
|
+
# Huffman coding for Deflate64
|
|
9
|
+
class HuffmanCoder
|
|
10
|
+
include Constants
|
|
11
|
+
|
|
12
|
+
# Length code mapping
|
|
13
|
+
LENGTH_CODES_MAP = {
|
|
14
|
+
3 => 257, 4 => 258, 5 => 259, 6 => 260, 7 => 261,
|
|
15
|
+
8 => 262, 9 => 263, 10 => 264, 11 => 265, 12 => 266,
|
|
16
|
+
13 => 267, 14 => 268, 15 => 269, 16 => 270, 17 => 271,
|
|
17
|
+
18 => 272, 19 => 273, 20 => 274, 21 => 275, 22 => 276,
|
|
18
|
+
23 => 277, 24 => 278, 25 => 279, 26 => 280, 27 => 281,
|
|
19
|
+
28 => 282, 29 => 283, 30 => 284, 31 => 285
|
|
20
|
+
}.freeze
|
|
21
|
+
|
|
22
|
+
# Distance code mapping
|
|
23
|
+
DISTANCE_CODES_MAP = (0..29).to_a.freeze
|
|
24
|
+
|
|
25
|
+
attr_reader :literal_tree, :distance_tree
|
|
26
|
+
|
|
27
|
+
def initialize
|
|
28
|
+
@literal_tree = nil
|
|
29
|
+
@distance_tree = nil
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# Encode tokens using Huffman coding
|
|
33
|
+
#
|
|
34
|
+
# @param tokens [Array<Hash>] LZ77 tokens
|
|
35
|
+
# @return [String] Encoded bitstream
|
|
36
|
+
def encode(tokens)
|
|
37
|
+
# Build frequency tables
|
|
38
|
+
literal_freqs = build_literal_frequencies(tokens)
|
|
39
|
+
distance_freqs = build_distance_frequencies(tokens)
|
|
40
|
+
|
|
41
|
+
# Build Huffman trees
|
|
42
|
+
@literal_tree = build_tree(literal_freqs)
|
|
43
|
+
@distance_tree = build_tree(distance_freqs)
|
|
44
|
+
|
|
45
|
+
# Encode tokens
|
|
46
|
+
encode_tokens(tokens)
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# Decode bitstream using Huffman coding
|
|
50
|
+
#
|
|
51
|
+
# @param bitstream [String] Encoded data
|
|
52
|
+
# @param literal_tree [Hash] Literal Huffman tree
|
|
53
|
+
# @param distance_tree [Hash] Distance Huffman tree
|
|
54
|
+
# @return [Array<Hash>] Decoded tokens
|
|
55
|
+
def decode(bitstream, literal_tree, distance_tree)
|
|
56
|
+
@literal_tree = literal_tree
|
|
57
|
+
@distance_tree = distance_tree
|
|
58
|
+
|
|
59
|
+
decode_tokens(bitstream)
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
private
|
|
63
|
+
|
|
64
|
+
# Build frequency table for literals and lengths
|
|
65
|
+
#
|
|
66
|
+
# @param tokens [Array<Hash>] LZ77 tokens
|
|
67
|
+
# @return [Hash] Frequency table
|
|
68
|
+
def build_literal_frequencies(tokens)
|
|
69
|
+
freqs = Hash.new(0)
|
|
70
|
+
|
|
71
|
+
tokens.each do |token|
|
|
72
|
+
if token[:type] == :literal
|
|
73
|
+
freqs[token[:value]] += 1
|
|
74
|
+
else
|
|
75
|
+
length_code = length_to_code(token[:length])
|
|
76
|
+
freqs[length_code] += 1
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# Add end of block marker
|
|
81
|
+
freqs[END_OF_BLOCK] = 1
|
|
82
|
+
|
|
83
|
+
freqs
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# Build frequency table for distances
|
|
87
|
+
#
|
|
88
|
+
# @param tokens [Array<Hash>] LZ77 tokens
|
|
89
|
+
# @return [Hash] Frequency table
|
|
90
|
+
def build_distance_frequencies(tokens)
|
|
91
|
+
freqs = Hash.new(0)
|
|
92
|
+
|
|
93
|
+
tokens.each do |token|
|
|
94
|
+
next unless token[:type] == :match
|
|
95
|
+
|
|
96
|
+
distance_code = distance_to_code(token[:distance])
|
|
97
|
+
freqs[distance_code] += 1
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
freqs
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
# Build Huffman tree from frequencies
|
|
104
|
+
#
|
|
105
|
+
# @param frequencies [Hash] Symbol frequencies
|
|
106
|
+
# @return [Hash] Huffman code table
|
|
107
|
+
def build_tree(frequencies)
|
|
108
|
+
return {} if frequencies.empty?
|
|
109
|
+
|
|
110
|
+
# Build priority queue of nodes
|
|
111
|
+
nodes = frequencies.map do |symbol, freq|
|
|
112
|
+
{ symbol: symbol, freq: freq, left: nil, right: nil }
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
# Build tree using priority queue
|
|
116
|
+
while nodes.size > 1
|
|
117
|
+
nodes.sort_by! { |n| n[:freq] }
|
|
118
|
+
left = nodes.shift
|
|
119
|
+
right = nodes.shift
|
|
120
|
+
|
|
121
|
+
parent = {
|
|
122
|
+
symbol: nil,
|
|
123
|
+
freq: left[:freq] + right[:freq],
|
|
124
|
+
left: left,
|
|
125
|
+
right: right,
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
nodes << parent
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
# Generate codes from tree
|
|
132
|
+
generate_codes(nodes.first)
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
# Generate Huffman codes from tree
|
|
136
|
+
#
|
|
137
|
+
# @param node [Hash] Tree node
|
|
138
|
+
# @param code [String] Current code
|
|
139
|
+
# @param codes [Hash] Code table
|
|
140
|
+
# @return [Hash] Complete code table
|
|
141
|
+
def generate_codes(node, code = "", codes = {})
|
|
142
|
+
return codes if node.nil?
|
|
143
|
+
|
|
144
|
+
if node[:symbol]
|
|
145
|
+
codes[node[:symbol]] = code
|
|
146
|
+
else
|
|
147
|
+
generate_codes(node[:left], "#{code}0", codes)
|
|
148
|
+
generate_codes(node[:right], "#{code}1", codes)
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
codes
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
# Encode tokens to bitstream
|
|
155
|
+
#
|
|
156
|
+
# @param tokens [Array<Hash>] LZ77 tokens
|
|
157
|
+
# @return [String] Encoded bitstream
|
|
158
|
+
def encode_tokens(tokens)
|
|
159
|
+
bits = ""
|
|
160
|
+
|
|
161
|
+
tokens.each do |token|
|
|
162
|
+
if token[:type] == :literal
|
|
163
|
+
bits += @literal_tree[token[:value]]
|
|
164
|
+
else
|
|
165
|
+
length_code = length_to_code(token[:length])
|
|
166
|
+
bits += @literal_tree[length_code]
|
|
167
|
+
|
|
168
|
+
distance_code = distance_to_code(token[:distance])
|
|
169
|
+
bits += @distance_tree[distance_code]
|
|
170
|
+
end
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
# Add end of block marker
|
|
174
|
+
bits += @literal_tree[END_OF_BLOCK]
|
|
175
|
+
|
|
176
|
+
# Convert bits to bytes
|
|
177
|
+
bits_to_bytes(bits)
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
# Decode tokens from bitstream
|
|
181
|
+
#
|
|
182
|
+
# @param bitstream [String] Encoded data
|
|
183
|
+
# @return [Array<Hash>] Decoded tokens
|
|
184
|
+
def decode_tokens(bitstream)
|
|
185
|
+
tokens = []
|
|
186
|
+
bits = bytes_to_bits(bitstream)
|
|
187
|
+
pos = 0
|
|
188
|
+
|
|
189
|
+
while pos < bits.length
|
|
190
|
+
symbol, length = decode_symbol(bits, pos, @literal_tree)
|
|
191
|
+
|
|
192
|
+
# Check for decoding failure
|
|
193
|
+
if symbol.nil?
|
|
194
|
+
raise Omnizip::DecompressionError,
|
|
195
|
+
"Failed to decode symbol at bit position #{pos}"
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
pos += length
|
|
199
|
+
|
|
200
|
+
break if symbol == END_OF_BLOCK
|
|
201
|
+
|
|
202
|
+
if symbol < 256
|
|
203
|
+
tokens << { type: :literal, value: symbol }
|
|
204
|
+
else
|
|
205
|
+
match_length = code_to_length(symbol)
|
|
206
|
+
|
|
207
|
+
dist_symbol, dist_length =
|
|
208
|
+
decode_symbol(bits, pos, @distance_tree)
|
|
209
|
+
|
|
210
|
+
# Check for distance decoding failure
|
|
211
|
+
if dist_symbol.nil?
|
|
212
|
+
raise Omnizip::DecompressionError,
|
|
213
|
+
"Failed to decode distance at bit position #{pos}"
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
pos += dist_length
|
|
217
|
+
|
|
218
|
+
distance = code_to_distance(dist_symbol)
|
|
219
|
+
|
|
220
|
+
tokens << {
|
|
221
|
+
type: :match,
|
|
222
|
+
length: match_length,
|
|
223
|
+
distance: distance,
|
|
224
|
+
}
|
|
225
|
+
end
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
tokens
|
|
229
|
+
end
|
|
230
|
+
|
|
231
|
+
# Decode single symbol from bitstream
|
|
232
|
+
#
|
|
233
|
+
# @param bits [String] Bit string
|
|
234
|
+
# @param pos [Integer] Current position
|
|
235
|
+
# @param tree [Hash] Huffman tree
|
|
236
|
+
# @return [Array] Symbol and bits consumed
|
|
237
|
+
def decode_symbol(bits, pos, tree)
|
|
238
|
+
code = ""
|
|
239
|
+
reverse_tree = tree.invert
|
|
240
|
+
|
|
241
|
+
while pos < bits.length
|
|
242
|
+
code += bits[pos]
|
|
243
|
+
pos += 1
|
|
244
|
+
|
|
245
|
+
if reverse_tree[code]
|
|
246
|
+
return [reverse_tree[code], code.length]
|
|
247
|
+
end
|
|
248
|
+
end
|
|
249
|
+
|
|
250
|
+
[nil, 0]
|
|
251
|
+
end
|
|
252
|
+
|
|
253
|
+
# Convert match length to Huffman code
|
|
254
|
+
def length_to_code(length)
|
|
255
|
+
LENGTH_CODES_MAP[length] || 285
|
|
256
|
+
end
|
|
257
|
+
|
|
258
|
+
# Convert Huffman code to match length
|
|
259
|
+
def code_to_length(code)
|
|
260
|
+
LENGTH_CODES_MAP.key(code) || 258
|
|
261
|
+
end
|
|
262
|
+
|
|
263
|
+
# Convert distance to Huffman code
|
|
264
|
+
# Uses DEFLATE distance code table
|
|
265
|
+
def distance_to_code(distance)
|
|
266
|
+
case distance
|
|
267
|
+
when 1..4
|
|
268
|
+
distance - 1
|
|
269
|
+
when 5..8
|
|
270
|
+
4 + ((distance - 5) / 2)
|
|
271
|
+
when 9..16
|
|
272
|
+
6 + ((distance - 9) / 4)
|
|
273
|
+
when 17..32
|
|
274
|
+
8 + ((distance - 17) / 8)
|
|
275
|
+
when 33..64
|
|
276
|
+
10 + ((distance - 33) / 16)
|
|
277
|
+
when 65..128
|
|
278
|
+
12 + ((distance - 65) / 32)
|
|
279
|
+
when 129..256
|
|
280
|
+
14 + ((distance - 129) / 64)
|
|
281
|
+
when 257..512
|
|
282
|
+
16 + ((distance - 257) / 128)
|
|
283
|
+
when 513..1024
|
|
284
|
+
18 + ((distance - 513) / 256)
|
|
285
|
+
when 1025..2048
|
|
286
|
+
20 + ((distance - 1025) / 512)
|
|
287
|
+
when 2049..4096
|
|
288
|
+
22 + ((distance - 2049) / 1024)
|
|
289
|
+
when 4097..8192
|
|
290
|
+
24 + ((distance - 4097) / 2048)
|
|
291
|
+
when 8193..16384
|
|
292
|
+
26 + ((distance - 8193) / 4096)
|
|
293
|
+
when 16385..32768
|
|
294
|
+
28 + ((distance - 16385) / 8192)
|
|
295
|
+
when 32769..65536
|
|
296
|
+
29
|
|
297
|
+
else
|
|
298
|
+
29 # Max distance code for 64KB window
|
|
299
|
+
end
|
|
300
|
+
end
|
|
301
|
+
|
|
302
|
+
# Convert Huffman code to distance
|
|
303
|
+
# Uses DEFLATE distance code table (base distances)
|
|
304
|
+
def code_to_distance(code)
|
|
305
|
+
case code
|
|
306
|
+
when 0..3
|
|
307
|
+
code + 1
|
|
308
|
+
when 4..5
|
|
309
|
+
5 + ((code - 4) * 2)
|
|
310
|
+
when 6..7
|
|
311
|
+
9 + ((code - 6) * 4)
|
|
312
|
+
when 8..9
|
|
313
|
+
17 + ((code - 8) * 8)
|
|
314
|
+
when 10..11
|
|
315
|
+
33 + ((code - 10) * 16)
|
|
316
|
+
when 12..13
|
|
317
|
+
65 + ((code - 12) * 32)
|
|
318
|
+
when 14..15
|
|
319
|
+
129 + ((code - 14) * 64)
|
|
320
|
+
when 16..17
|
|
321
|
+
257 + ((code - 16) * 128)
|
|
322
|
+
when 18..19
|
|
323
|
+
513 + ((code - 18) * 256)
|
|
324
|
+
when 20..21
|
|
325
|
+
1025 + ((code - 20) * 512)
|
|
326
|
+
when 22..23
|
|
327
|
+
2049 + ((code - 22) * 1024)
|
|
328
|
+
when 24..25
|
|
329
|
+
4097 + ((code - 24) * 2048)
|
|
330
|
+
when 26..27
|
|
331
|
+
8193 + ((code - 26) * 4096)
|
|
332
|
+
when 28..29
|
|
333
|
+
16385 + ((code - 28) * 8192)
|
|
334
|
+
else
|
|
335
|
+
1 # Default to distance 1
|
|
336
|
+
end
|
|
337
|
+
end
|
|
338
|
+
|
|
339
|
+
# Convert bit string to bytes
|
|
340
|
+
def bits_to_bytes(bits)
|
|
341
|
+
bytes = bits.scan(/.{1,8}/).map do |byte_bits|
|
|
342
|
+
byte_bits.ljust(8, "0").to_i(2)
|
|
343
|
+
end
|
|
344
|
+
bytes.pack("C*")
|
|
345
|
+
end
|
|
346
|
+
|
|
347
|
+
# Convert bytes to bit string
|
|
348
|
+
def bytes_to_bits(bytes)
|
|
349
|
+
bytes.unpack("C*").map { |b| b.to_s(2).rjust(8, "0") }.join
|
|
350
|
+
end
|
|
351
|
+
end
|
|
352
|
+
end
|
|
353
|
+
end
|
|
354
|
+
end
|