omnizip 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/.rubocop.yml +32 -0
- data/.rubocop_todo.yml +754 -0
- data/COPYING +502 -0
- data/Gemfile +17 -0
- data/LICENSE +12 -0
- data/README.adoc +1045 -0
- data/Rakefile +12 -0
- data/benchmark/README.md +260 -0
- data/benchmark/benchmark_suite.rb +125 -0
- data/benchmark/compression_bench.rb +181 -0
- data/benchmark/filter_bench.rb +180 -0
- data/benchmark/models/benchmark_result.rb +59 -0
- data/benchmark/models/comparison_result.rb +69 -0
- data/benchmark/profile_suite.rb +167 -0
- data/benchmark/reporter.rb +150 -0
- data/benchmark/run_benchmarks.rb +66 -0
- data/benchmark/test_data.rb +137 -0
- data/config/formats/rar3_spec.yml +91 -0
- data/config/formats/rar5_spec.yml +102 -0
- data/docs/.github/workflows/docs.yml +142 -0
- data/docs/.gitignore +21 -0
- data/docs/.lychee.toml +67 -0
- data/docs/Gemfile +13 -0
- data/docs/RAR_WRITE_SUPPORT.md +26 -0
- data/docs/README.md +101 -0
- data/docs/_config.yml +112 -0
- data/docs/assets/logo.svg +1 -0
- data/docs/assets/omnizip-logo.pdf +1540 -11
- data/docs/comparison/feature-matrix.adoc +694 -0
- data/docs/comparison/index.adoc +113 -0
- data/docs/comparison/vs-7zip.adoc +309 -0
- data/docs/comparison/vs-peazip.adoc +77 -0
- data/docs/comparison/vs-rubyzip.adoc +342 -0
- data/docs/comparison/vs-winrar.adoc +100 -0
- data/docs/compatibility.adoc +579 -0
- data/docs/concepts/index.adoc +129 -0
- data/docs/developer/architecture.adoc +256 -0
- data/docs/developer/contributing.adoc +158 -0
- data/docs/developer/index.adoc +25 -0
- data/docs/developer/testing.adoc +212 -0
- data/docs/getting-started/basic-usage.adoc +271 -0
- data/docs/getting-started/index.adoc +42 -0
- data/docs/getting-started/installation.adoc +138 -0
- data/docs/getting-started/quick-start.adoc +185 -0
- data/docs/getting-started/your-first-archive.adoc +218 -0
- data/docs/guides/advanced-features/encryption.adoc +300 -0
- data/docs/guides/advanced-features/index.adoc +49 -0
- data/docs/guides/advanced-features/parallel-processing.adoc +246 -0
- data/docs/guides/advanced-features/progress-tracking.adoc +320 -0
- data/docs/guides/advanced-features/streaming.adoc +212 -0
- data/docs/guides/archive-formats/gzip-format.adoc +107 -0
- data/docs/guides/archive-formats/index.adoc +130 -0
- data/docs/guides/archive-formats/rar-format.adoc +104 -0
- data/docs/guides/archive-formats/rar5.adoc +521 -0
- data/docs/guides/archive-formats/seven-zip-format.adoc +35 -0
- data/docs/guides/archive-formats/tar-format.adoc +106 -0
- data/docs/guides/archive-formats/xz-format.adoc +118 -0
- data/docs/guides/archive-formats/zip-format.adoc +35 -0
- data/docs/guides/compression-algorithms/bzip2.adoc +113 -0
- data/docs/guides/compression-algorithms/deflate.adoc +319 -0
- data/docs/guides/compression-algorithms/index.adoc +190 -0
- data/docs/guides/compression-algorithms/lzma.adoc +398 -0
- data/docs/guides/compression-algorithms/lzma2.adoc +327 -0
- data/docs/guides/compression-algorithms/ppmd.adoc +316 -0
- data/docs/guides/compression-algorithms/zstandard.adoc +361 -0
- data/docs/guides/creating-archives.adoc +354 -0
- data/docs/guides/extracting-archives.adoc +53 -0
- data/docs/guides/format-conversion.adoc +64 -0
- data/docs/guides/index.adoc +49 -0
- data/docs/guides/migration-rubyzip.adoc +217 -0
- data/docs/guides/parity-archives.adoc +605 -0
- data/docs/guides/performance-tuning.adoc +88 -0
- data/docs/index.adoc +218 -0
- data/docs/lychee.toml +67 -0
- data/docs/reference/api/overview.adoc +188 -0
- data/docs/reference/cli/compress-command.adoc +114 -0
- data/docs/reference/cli/overview.adoc +140 -0
- data/docs/reference/index.adoc +26 -0
- data/docs/resources/faq.adoc +185 -0
- data/docs/resources/quick-reference.adoc +222 -0
- data/docs/troubleshooting/index.adoc +208 -0
- data/examples/api_comparison.rb +205 -0
- data/examples/deflate64_example.rb +96 -0
- data/examples/par2_demo.rb +121 -0
- data/examples/quick_start_native.rb +150 -0
- data/examples/quick_start_rubyzip.rb +115 -0
- data/examples/rubyzip_compatibility_demo.rb +194 -0
- data/exe/omnizip +27 -0
- data/lib/omnizip/algorithm.rb +130 -0
- data/lib/omnizip/algorithm_registry.rb +86 -0
- data/lib/omnizip/algorithms/.keep +0 -0
- data/lib/omnizip/algorithms/bzip2/bwt.rb +225 -0
- data/lib/omnizip/algorithms/bzip2/decoder.rb +193 -0
- data/lib/omnizip/algorithms/bzip2/encoder.rb +237 -0
- data/lib/omnizip/algorithms/bzip2/huffman.rb +206 -0
- data/lib/omnizip/algorithms/bzip2/mtf.rb +101 -0
- data/lib/omnizip/algorithms/bzip2/rle.rb +151 -0
- data/lib/omnizip/algorithms/bzip2.rb +130 -0
- data/lib/omnizip/algorithms/deflate/constants.rb +28 -0
- data/lib/omnizip/algorithms/deflate/decoder.rb +38 -0
- data/lib/omnizip/algorithms/deflate/encoder.rb +46 -0
- data/lib/omnizip/algorithms/deflate.rb +128 -0
- data/lib/omnizip/algorithms/deflate64/constants.rb +45 -0
- data/lib/omnizip/algorithms/deflate64/decoder.rb +153 -0
- data/lib/omnizip/algorithms/deflate64/encoder.rb +98 -0
- data/lib/omnizip/algorithms/deflate64/huffman_coder.rb +354 -0
- data/lib/omnizip/algorithms/deflate64/lz77_encoder.rb +142 -0
- data/lib/omnizip/algorithms/deflate64.rb +109 -0
- data/lib/omnizip/algorithms/lzma/bit_model.rb +120 -0
- data/lib/omnizip/algorithms/lzma/constants.rb +112 -0
- data/lib/omnizip/algorithms/lzma/decoder.rb +148 -0
- data/lib/omnizip/algorithms/lzma/dictionary.rb +69 -0
- data/lib/omnizip/algorithms/lzma/distance_coder.rb +415 -0
- data/lib/omnizip/algorithms/lzma/encoder.rb +142 -0
- data/lib/omnizip/algorithms/lzma/length_coder.rb +260 -0
- data/lib/omnizip/algorithms/lzma/literal_decoder.rb +320 -0
- data/lib/omnizip/algorithms/lzma/literal_encoder.rb +210 -0
- data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +341 -0
- data/lib/omnizip/algorithms/lzma/lzma_alone_decoder.rb +192 -0
- data/lib/omnizip/algorithms/lzma/lzma_state.rb +128 -0
- data/lib/omnizip/algorithms/lzma/match.rb +32 -0
- data/lib/omnizip/algorithms/lzma/match_finder.rb +205 -0
- data/lib/omnizip/algorithms/lzma/match_finder_config.rb +142 -0
- data/lib/omnizip/algorithms/lzma/match_finder_factory.rb +88 -0
- data/lib/omnizip/algorithms/lzma/optimal_encoder.rb +130 -0
- data/lib/omnizip/algorithms/lzma/probability_models.rb +72 -0
- data/lib/omnizip/algorithms/lzma/range_coder.rb +85 -0
- data/lib/omnizip/algorithms/lzma/range_decoder.rb +434 -0
- data/lib/omnizip/algorithms/lzma/range_encoder.rb +194 -0
- data/lib/omnizip/algorithms/lzma/state.rb +127 -0
- data/lib/omnizip/algorithms/lzma/xz_buffered_range_encoder.rb +325 -0
- data/lib/omnizip/algorithms/lzma/xz_encoder.rb +426 -0
- data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +645 -0
- data/lib/omnizip/algorithms/lzma/xz_match_finder_adapter.rb +227 -0
- data/lib/omnizip/algorithms/lzma/xz_price_calculator.rb +169 -0
- data/lib/omnizip/algorithms/lzma/xz_probability_models.rb +261 -0
- data/lib/omnizip/algorithms/lzma/xz_range_encoder.rb +223 -0
- data/lib/omnizip/algorithms/lzma/xz_range_encoder_exact.rb +331 -0
- data/lib/omnizip/algorithms/lzma/xz_state.rb +116 -0
- data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +2055 -0
- data/lib/omnizip/algorithms/lzma.rb +238 -0
- data/lib/omnizip/algorithms/lzma2/chunk_manager.rb +182 -0
- data/lib/omnizip/algorithms/lzma2/constants.rb +41 -0
- data/lib/omnizip/algorithms/lzma2/encoder.rb +147 -0
- data/lib/omnizip/algorithms/lzma2/lzma2_chunk.rb +161 -0
- data/lib/omnizip/algorithms/lzma2/properties.rb +179 -0
- data/lib/omnizip/algorithms/lzma2/simple_lzma2_encoder.rb +127 -0
- data/lib/omnizip/algorithms/lzma2/xz_encoder_adapter.rb +85 -0
- data/lib/omnizip/algorithms/lzma2.rb +141 -0
- data/lib/omnizip/algorithms/ppmd7/constants.rb +74 -0
- data/lib/omnizip/algorithms/ppmd7/context.rb +154 -0
- data/lib/omnizip/algorithms/ppmd7/decoder.rb +126 -0
- data/lib/omnizip/algorithms/ppmd7/encoder.rb +163 -0
- data/lib/omnizip/algorithms/ppmd7/model.rb +248 -0
- data/lib/omnizip/algorithms/ppmd7/symbol_state.rb +57 -0
- data/lib/omnizip/algorithms/ppmd7.rb +116 -0
- data/lib/omnizip/algorithms/ppmd8/constants.rb +61 -0
- data/lib/omnizip/algorithms/ppmd8/context.rb +34 -0
- data/lib/omnizip/algorithms/ppmd8/decoder.rb +107 -0
- data/lib/omnizip/algorithms/ppmd8/encoder.rb +138 -0
- data/lib/omnizip/algorithms/ppmd8/model.rb +250 -0
- data/lib/omnizip/algorithms/ppmd8/restoration_method.rb +78 -0
- data/lib/omnizip/algorithms/ppmd8.rb +82 -0
- data/lib/omnizip/algorithms/ppmd_base.rb +138 -0
- data/lib/omnizip/algorithms/sevenzip_lzma2.rb +123 -0
- data/lib/omnizip/algorithms/xz_lzma2.rb +118 -0
- data/lib/omnizip/algorithms/zstandard/constants.rb +25 -0
- data/lib/omnizip/algorithms/zstandard/decoder.rb +46 -0
- data/lib/omnizip/algorithms/zstandard/encoder.rb +51 -0
- data/lib/omnizip/algorithms/zstandard.rb +138 -0
- data/lib/omnizip/buffer/memory_archive.rb +251 -0
- data/lib/omnizip/buffer/memory_extractor.rb +224 -0
- data/lib/omnizip/buffer.rb +176 -0
- data/lib/omnizip/checksum_registry.rb +114 -0
- data/lib/omnizip/checksums/crc32.rb +100 -0
- data/lib/omnizip/checksums/crc64.rb +101 -0
- data/lib/omnizip/checksums/crc_base.rb +158 -0
- data/lib/omnizip/checksums/verifier.rb +131 -0
- data/lib/omnizip/chunked/memory_manager.rb +194 -0
- data/lib/omnizip/chunked/reader.rb +78 -0
- data/lib/omnizip/chunked/writer.rb +120 -0
- data/lib/omnizip/chunked.rb +129 -0
- data/lib/omnizip/cli/output_formatter.rb +104 -0
- data/lib/omnizip/cli.rb +572 -0
- data/lib/omnizip/commands/.keep +0 -0
- data/lib/omnizip/commands/archive_create_command.rb +427 -0
- data/lib/omnizip/commands/archive_extract_command.rb +272 -0
- data/lib/omnizip/commands/archive_list_command.rb +218 -0
- data/lib/omnizip/commands/archive_repair_command.rb +131 -0
- data/lib/omnizip/commands/archive_verify_command.rb +117 -0
- data/lib/omnizip/commands/compress_command.rb +117 -0
- data/lib/omnizip/commands/decompress_command.rb +120 -0
- data/lib/omnizip/commands/list_command.rb +53 -0
- data/lib/omnizip/commands/metadata_command.rb +153 -0
- data/lib/omnizip/commands/parity_create_command.rb +122 -0
- data/lib/omnizip/commands/parity_repair_command.rb +122 -0
- data/lib/omnizip/commands/parity_verify_command.rb +124 -0
- data/lib/omnizip/commands/profile_list_command.rb +56 -0
- data/lib/omnizip/commands/profile_show_command.rb +44 -0
- data/lib/omnizip/convenience.rb +359 -0
- data/lib/omnizip/converter/conversion_registry.rb +49 -0
- data/lib/omnizip/converter/conversion_strategy.rb +121 -0
- data/lib/omnizip/converter/seven_zip_to_zip_strategy.rb +97 -0
- data/lib/omnizip/converter/zip_to_seven_zip_strategy.rb +112 -0
- data/lib/omnizip/converter.rb +105 -0
- data/lib/omnizip/crypto/aes256/cipher.rb +100 -0
- data/lib/omnizip/crypto/aes256/constants.rb +28 -0
- data/lib/omnizip/crypto/aes256/key_derivation.rb +101 -0
- data/lib/omnizip/crypto/aes256.rb +102 -0
- data/lib/omnizip/error.rb +106 -0
- data/lib/omnizip/eta/exponential_smoothing_estimator.rb +98 -0
- data/lib/omnizip/eta/moving_average_estimator.rb +99 -0
- data/lib/omnizip/eta/rate_calculator.rb +104 -0
- data/lib/omnizip/eta/sample_history.rb +143 -0
- data/lib/omnizip/eta/time_estimator.rb +106 -0
- data/lib/omnizip/eta.rb +63 -0
- data/lib/omnizip/extraction/filter_chain.rb +177 -0
- data/lib/omnizip/extraction/glob_pattern.rb +140 -0
- data/lib/omnizip/extraction/pattern_matcher.rb +70 -0
- data/lib/omnizip/extraction/predicate_pattern.rb +52 -0
- data/lib/omnizip/extraction/regex_pattern.rb +50 -0
- data/lib/omnizip/extraction/selective_extractor.rb +240 -0
- data/lib/omnizip/extraction.rb +111 -0
- data/lib/omnizip/file_type/mime_classifier.rb +144 -0
- data/lib/omnizip/file_type.rb +113 -0
- data/lib/omnizip/filter.rb +139 -0
- data/lib/omnizip/filter_pipeline.rb +108 -0
- data/lib/omnizip/filter_registry.rb +166 -0
- data/lib/omnizip/filters/bcj.rb +279 -0
- data/lib/omnizip/filters/bcj2/constants.rb +53 -0
- data/lib/omnizip/filters/bcj2/decoder.rb +200 -0
- data/lib/omnizip/filters/bcj2/encoder.rb +61 -0
- data/lib/omnizip/filters/bcj2/stream_data.rb +93 -0
- data/lib/omnizip/filters/bcj2.rb +99 -0
- data/lib/omnizip/filters/bcj_arm.rb +176 -0
- data/lib/omnizip/filters/bcj_arm64.rb +244 -0
- data/lib/omnizip/filters/bcj_ia64.rb +196 -0
- data/lib/omnizip/filters/bcj_ppc.rb +190 -0
- data/lib/omnizip/filters/bcj_sparc.rb +176 -0
- data/lib/omnizip/filters/bcj_x86.rb +193 -0
- data/lib/omnizip/filters/delta.rb +196 -0
- data/lib/omnizip/filters/filter_base.rb +72 -0
- data/lib/omnizip/filters/registry.rb +123 -0
- data/lib/omnizip/filters/xz_delta.rb +258 -0
- data/lib/omnizip/format_detector.rb +162 -0
- data/lib/omnizip/format_registry.rb +59 -0
- data/lib/omnizip/formats/.keep +0 -0
- data/lib/omnizip/formats/bzip2_file.rb +172 -0
- data/lib/omnizip/formats/cpio/constants.rb +55 -0
- data/lib/omnizip/formats/cpio/entry.rb +385 -0
- data/lib/omnizip/formats/cpio/reader.rb +196 -0
- data/lib/omnizip/formats/cpio/writer.rb +234 -0
- data/lib/omnizip/formats/cpio.rb +140 -0
- data/lib/omnizip/formats/format_spec_loader.rb +230 -0
- data/lib/omnizip/formats/gzip.rb +238 -0
- data/lib/omnizip/formats/iso/directory_builder.rb +297 -0
- data/lib/omnizip/formats/iso/directory_record.rb +152 -0
- data/lib/omnizip/formats/iso/joliet.rb +204 -0
- data/lib/omnizip/formats/iso/path_table.rb +125 -0
- data/lib/omnizip/formats/iso/reader.rb +197 -0
- data/lib/omnizip/formats/iso/rock_ridge.rb +349 -0
- data/lib/omnizip/formats/iso/volume_builder.rb +320 -0
- data/lib/omnizip/formats/iso/volume_descriptor.rb +168 -0
- data/lib/omnizip/formats/iso/writer.rb +530 -0
- data/lib/omnizip/formats/iso.rb +140 -0
- data/lib/omnizip/formats/lzip.rb +175 -0
- data/lib/omnizip/formats/lzma_alone.rb +171 -0
- data/lib/omnizip/formats/rar/archive_repairer.rb +243 -0
- data/lib/omnizip/formats/rar/archive_verifier.rb +195 -0
- data/lib/omnizip/formats/rar/block_parser.rb +243 -0
- data/lib/omnizip/formats/rar/compression/bit_stream.rb +180 -0
- data/lib/omnizip/formats/rar/compression/dispatcher.rb +217 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/decoder.rb +216 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/encoder.rb +158 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_builder.rb +217 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_coder.rb +189 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/match_finder.rb +135 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/sliding_window.rb +165 -0
- data/lib/omnizip/formats/rar/compression/ppmd/context.rb +105 -0
- data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +219 -0
- data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +262 -0
- data/lib/omnizip/formats/rar/compression_method_registry.rb +106 -0
- data/lib/omnizip/formats/rar/constants.rb +82 -0
- data/lib/omnizip/formats/rar/decompressor.rb +238 -0
- data/lib/omnizip/formats/rar/external_writer.rb +312 -0
- data/lib/omnizip/formats/rar/header.rb +192 -0
- data/lib/omnizip/formats/rar/license_validator.rb +109 -0
- data/lib/omnizip/formats/rar/models/rar_archive.rb +77 -0
- data/lib/omnizip/formats/rar/models/rar_entry.rb +65 -0
- data/lib/omnizip/formats/rar/models/rar_volume.rb +56 -0
- data/lib/omnizip/formats/rar/parity_handler.rb +292 -0
- data/lib/omnizip/formats/rar/rar5/compression/lzma.rb +202 -0
- data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +578 -0
- data/lib/omnizip/formats/rar/rar5/compression/store.rb +60 -0
- data/lib/omnizip/formats/rar/rar5/crc32.rb +39 -0
- data/lib/omnizip/formats/rar/rar5/encryption/aes256_cbc.rb +97 -0
- data/lib/omnizip/formats/rar/rar5/encryption/encryption_header.rb +114 -0
- data/lib/omnizip/formats/rar/rar5/encryption/encryption_manager.rb +166 -0
- data/lib/omnizip/formats/rar/rar5/encryption/key_derivation.rb +97 -0
- data/lib/omnizip/formats/rar/rar5/header.rb +187 -0
- data/lib/omnizip/formats/rar/rar5/models/encryption_options.rb +74 -0
- data/lib/omnizip/formats/rar/rar5/models/recovery_options.rb +63 -0
- data/lib/omnizip/formats/rar/rar5/models/solid_options.rb +63 -0
- data/lib/omnizip/formats/rar/rar5/models/volume_options.rb +74 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/ARCHITECTURE.md +290 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_manager.rb +264 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_splitter.rb +155 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_writer.rb +194 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_encoder.rb +109 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_manager.rb +142 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_stream.rb +121 -0
- data/lib/omnizip/formats/rar/rar5/vint.rb +65 -0
- data/lib/omnizip/formats/rar/rar5/writer.rb +466 -0
- data/lib/omnizip/formats/rar/rar_format_base.rb +241 -0
- data/lib/omnizip/formats/rar/reader.rb +366 -0
- data/lib/omnizip/formats/rar/recovery_record.rb +245 -0
- data/lib/omnizip/formats/rar/volume_manager.rb +168 -0
- data/lib/omnizip/formats/rar/writer.rb +431 -0
- data/lib/omnizip/formats/rar.rb +205 -0
- data/lib/omnizip/formats/rar3/compressor.rb +73 -0
- data/lib/omnizip/formats/rar3/decompressor.rb +66 -0
- data/lib/omnizip/formats/rar3/reader.rb +386 -0
- data/lib/omnizip/formats/rar3/writer.rb +219 -0
- data/lib/omnizip/formats/rar5/compressor.rb +73 -0
- data/lib/omnizip/formats/rar5/decompressor.rb +66 -0
- data/lib/omnizip/formats/rar5/reader.rb +342 -0
- data/lib/omnizip/formats/rar5/writer.rb +214 -0
- data/lib/omnizip/formats/seven_zip/coder_chain.rb +150 -0
- data/lib/omnizip/formats/seven_zip/constants.rb +126 -0
- data/lib/omnizip/formats/seven_zip/encoded_header.rb +114 -0
- data/lib/omnizip/formats/seven_zip/encrypted_header.rb +142 -0
- data/lib/omnizip/formats/seven_zip/file_collector.rb +144 -0
- data/lib/omnizip/formats/seven_zip/header.rb +106 -0
- data/lib/omnizip/formats/seven_zip/header_encryptor.rb +134 -0
- data/lib/omnizip/formats/seven_zip/header_writer.rb +466 -0
- data/lib/omnizip/formats/seven_zip/models/coder_info.rb +30 -0
- data/lib/omnizip/formats/seven_zip/models/file_entry.rb +58 -0
- data/lib/omnizip/formats/seven_zip/models/folder.rb +69 -0
- data/lib/omnizip/formats/seven_zip/models/stream_info.rb +42 -0
- data/lib/omnizip/formats/seven_zip/parser.rb +660 -0
- data/lib/omnizip/formats/seven_zip/reader.rb +458 -0
- data/lib/omnizip/formats/seven_zip/split_archive_reader.rb +632 -0
- data/lib/omnizip/formats/seven_zip/split_archive_writer.rb +315 -0
- data/lib/omnizip/formats/seven_zip/stream_compressor.rb +151 -0
- data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +162 -0
- data/lib/omnizip/formats/seven_zip/writer.rb +740 -0
- data/lib/omnizip/formats/seven_zip.rb +93 -0
- data/lib/omnizip/formats/tar/constants.rb +73 -0
- data/lib/omnizip/formats/tar/entry.rb +94 -0
- data/lib/omnizip/formats/tar/header.rb +168 -0
- data/lib/omnizip/formats/tar/reader.rb +121 -0
- data/lib/omnizip/formats/tar/writer.rb +216 -0
- data/lib/omnizip/formats/tar.rb +84 -0
- data/lib/omnizip/formats/xz/reader.rb +116 -0
- data/lib/omnizip/formats/xz.rb +237 -0
- data/lib/omnizip/formats/xz_impl/block_decoder.rb +754 -0
- data/lib/omnizip/formats/xz_impl/block_encoder.rb +306 -0
- data/lib/omnizip/formats/xz_impl/block_header.rb +210 -0
- data/lib/omnizip/formats/xz_impl/block_header_parser.rb +186 -0
- data/lib/omnizip/formats/xz_impl/constants.rb +49 -0
- data/lib/omnizip/formats/xz_impl/index_decoder.rb +174 -0
- data/lib/omnizip/formats/xz_impl/index_encoder.rb +122 -0
- data/lib/omnizip/formats/xz_impl/stream_decoder.rb +468 -0
- data/lib/omnizip/formats/xz_impl/stream_encoder.rb +99 -0
- data/lib/omnizip/formats/xz_impl/stream_footer.rb +81 -0
- data/lib/omnizip/formats/xz_impl/stream_footer_parser.rb +117 -0
- data/lib/omnizip/formats/xz_impl/stream_header.rb +55 -0
- data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +108 -0
- data/lib/omnizip/formats/xz_impl/vli.rb +128 -0
- data/lib/omnizip/formats/xz_impl/writer.rb +421 -0
- data/lib/omnizip/formats/zip/central_directory_header.rb +195 -0
- data/lib/omnizip/formats/zip/constants.rb +69 -0
- data/lib/omnizip/formats/zip/end_of_central_directory.rb +133 -0
- data/lib/omnizip/formats/zip/local_file_header.rb +138 -0
- data/lib/omnizip/formats/zip/reader.rb +250 -0
- data/lib/omnizip/formats/zip/unix_extra_field.rb +153 -0
- data/lib/omnizip/formats/zip/writer.rb +375 -0
- data/lib/omnizip/formats/zip/zip64_end_of_central_directory.rb +104 -0
- data/lib/omnizip/formats/zip/zip64_end_of_central_directory_locator.rb +66 -0
- data/lib/omnizip/formats/zip/zip64_extra_field.rb +114 -0
- data/lib/omnizip/formats/zip.rb +50 -0
- data/lib/omnizip/implementations/base/lzma2_decoder_base.rb +75 -0
- data/lib/omnizip/implementations/base/lzma2_encoder_base.rb +128 -0
- data/lib/omnizip/implementations/base/lzma_decoder_base.rb +83 -0
- data/lib/omnizip/implementations/base/lzma_encoder_base.rb +108 -0
- data/lib/omnizip/implementations/base/state_machine_base.rb +182 -0
- data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +421 -0
- data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +465 -0
- data/lib/omnizip/implementations/seven_zip/lzma/match_finder.rb +288 -0
- data/lib/omnizip/implementations/seven_zip/lzma/range_decoder.rb +200 -0
- data/lib/omnizip/implementations/seven_zip/lzma/range_encoder.rb +197 -0
- data/lib/omnizip/implementations/seven_zip/lzma/state_machine.rb +141 -0
- data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +519 -0
- data/lib/omnizip/implementations/xz_utils/lzma2/decoder.rb +723 -0
- data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +750 -0
- data/lib/omnizip/io/buffered_input.rb +146 -0
- data/lib/omnizip/io/buffered_output.rb +105 -0
- data/lib/omnizip/io/stream_manager.rb +115 -0
- data/lib/omnizip/link_handler/hard_link.rb +79 -0
- data/lib/omnizip/link_handler/symbolic_link.rb +74 -0
- data/lib/omnizip/link_handler.rb +124 -0
- data/lib/omnizip/metadata/archive_metadata.rb +114 -0
- data/lib/omnizip/metadata/entry_metadata.rb +146 -0
- data/lib/omnizip/metadata/metadata_editor.rb +171 -0
- data/lib/omnizip/metadata/metadata_registry.rb +64 -0
- data/lib/omnizip/metadata/metadata_validator.rb +99 -0
- data/lib/omnizip/metadata.rb +57 -0
- data/lib/omnizip/models/.keep +0 -0
- data/lib/omnizip/models/algorithm_metadata.rb +73 -0
- data/lib/omnizip/models/compression_options.rb +71 -0
- data/lib/omnizip/models/conversion_options.rb +87 -0
- data/lib/omnizip/models/conversion_result.rb +135 -0
- data/lib/omnizip/models/eta_result.rb +46 -0
- data/lib/omnizip/models/extraction_rule.rb +115 -0
- data/lib/omnizip/models/filter_chain.rb +144 -0
- data/lib/omnizip/models/filter_config.rb +183 -0
- data/lib/omnizip/models/match_result.rb +124 -0
- data/lib/omnizip/models/optimization_suggestion.rb +91 -0
- data/lib/omnizip/models/parallel_options.rb +104 -0
- data/lib/omnizip/models/performance_result.rb +79 -0
- data/lib/omnizip/models/profile_report.rb +82 -0
- data/lib/omnizip/models/progress_options.rb +38 -0
- data/lib/omnizip/models/split_options.rb +116 -0
- data/lib/omnizip/optimization_registry.rb +81 -0
- data/lib/omnizip/parallel/job_queue.rb +209 -0
- data/lib/omnizip/parallel/job_scheduler.rb +203 -0
- data/lib/omnizip/parallel/parallel_compressor.rb +347 -0
- data/lib/omnizip/parallel/parallel_extractor.rb +329 -0
- data/lib/omnizip/parallel/worker_pool.rb +223 -0
- data/lib/omnizip/parallel.rb +149 -0
- data/lib/omnizip/parity/chunked_block_processor.rb +196 -0
- data/lib/omnizip/parity/galois16.rb +145 -0
- data/lib/omnizip/parity/models/creator_packet.rb +73 -0
- data/lib/omnizip/parity/models/file_description_packet.rb +133 -0
- data/lib/omnizip/parity/models/ifsc_packet.rb +123 -0
- data/lib/omnizip/parity/models/main_packet.rb +128 -0
- data/lib/omnizip/parity/models/packet.rb +156 -0
- data/lib/omnizip/parity/models/packet_registry.rb +109 -0
- data/lib/omnizip/parity/models/recovery_slice_packet.rb +78 -0
- data/lib/omnizip/parity/par2_creator.rb +531 -0
- data/lib/omnizip/parity/par2_repairer.rb +407 -0
- data/lib/omnizip/parity/par2_verifier.rb +364 -0
- data/lib/omnizip/parity/par2cmdline_algorithm.rb +110 -0
- data/lib/omnizip/parity/par2cmdline_coefficients.rb +78 -0
- data/lib/omnizip/parity/reed_solomon_decoder.rb +266 -0
- data/lib/omnizip/parity/reed_solomon_encoder.rb +111 -0
- data/lib/omnizip/parity/reed_solomon_matrix.rb +342 -0
- data/lib/omnizip/parity.rb +186 -0
- data/lib/omnizip/password/encryption_registry.rb +65 -0
- data/lib/omnizip/password/encryption_strategy.rb +96 -0
- data/lib/omnizip/password/password_validator.rb +129 -0
- data/lib/omnizip/password/winzip_aes_strategy.rb +192 -0
- data/lib/omnizip/password/zip_crypto_strategy.rb +141 -0
- data/lib/omnizip/password.rb +87 -0
- data/lib/omnizip/pipe/stream_compressor.rb +124 -0
- data/lib/omnizip/pipe/stream_decompressor.rb +174 -0
- data/lib/omnizip/pipe.rb +121 -0
- data/lib/omnizip/platform/ntfs_streams.rb +201 -0
- data/lib/omnizip/platform.rb +189 -0
- data/lib/omnizip/profile/archive_profile.rb +39 -0
- data/lib/omnizip/profile/balanced_profile.rb +33 -0
- data/lib/omnizip/profile/binary_profile.rb +36 -0
- data/lib/omnizip/profile/compression_profile.rb +158 -0
- data/lib/omnizip/profile/custom_profile.rb +157 -0
- data/lib/omnizip/profile/fast_profile.rb +33 -0
- data/lib/omnizip/profile/maximum_profile.rb +33 -0
- data/lib/omnizip/profile/profile_detector.rb +110 -0
- data/lib/omnizip/profile/profile_registry.rb +161 -0
- data/lib/omnizip/profile/text_profile.rb +36 -0
- data/lib/omnizip/profile.rb +190 -0
- data/lib/omnizip/profiler/memory_profiler.rb +66 -0
- data/lib/omnizip/profiler/method_profiler.rb +49 -0
- data/lib/omnizip/profiler/report_generator.rb +169 -0
- data/lib/omnizip/profiler.rb +204 -0
- data/lib/omnizip/progress/callback_reporter.rb +36 -0
- data/lib/omnizip/progress/console_reporter.rb +62 -0
- data/lib/omnizip/progress/log_reporter.rb +91 -0
- data/lib/omnizip/progress/operation_progress.rb +118 -0
- data/lib/omnizip/progress/progress_bar.rb +156 -0
- data/lib/omnizip/progress/progress_reporter.rb +40 -0
- data/lib/omnizip/progress/progress_tracker.rb +190 -0
- data/lib/omnizip/progress/silent_reporter.rb +24 -0
- data/lib/omnizip/progress.rb +127 -0
- data/lib/omnizip/rubyzip_compat.rb +63 -0
- data/lib/omnizip/temp/safe_extract.rb +168 -0
- data/lib/omnizip/temp/temp_file.rb +124 -0
- data/lib/omnizip/temp/temp_file_pool.rb +109 -0
- data/lib/omnizip/temp.rb +181 -0
- data/lib/omnizip/version.rb +5 -0
- data/lib/omnizip/zip/entry.rb +156 -0
- data/lib/omnizip/zip/file.rb +485 -0
- data/lib/omnizip/zip/input_stream.rb +273 -0
- data/lib/omnizip/zip/output_stream.rb +324 -0
- data/lib/omnizip.rb +156 -0
- data/readme-docs/advanced-features.adoc +515 -0
- data/readme-docs/api-usage.adoc +444 -0
- data/readme-docs/architecture.adoc +449 -0
- data/readme-docs/archive-formats.adoc +479 -0
- data/readme-docs/cli-usage.adoc +222 -0
- data/readme-docs/compression-algorithms.adoc +442 -0
- data/readme-docs/compression-profiles.adoc +247 -0
- data/readme-docs/encryption-checksums.adoc +328 -0
- data/readme-docs/format-converter.adoc +325 -0
- data/readme-docs/installation.adoc +228 -0
- data/readme-docs/par2-archives.adoc +608 -0
- data/readme-docs/performance-profiler.adoc +389 -0
- data/readme-docs/preprocessing-filters.adoc +280 -0
- data/xz-file-format-1.2.1.txt +1174 -0
- metadata +617 -0
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Copyright (C) 2025 Ribose Inc.
|
|
4
|
+
#
|
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a
|
|
6
|
+
# copy of this software and associated documentation files (the "Software"),
|
|
7
|
+
# to deal in the Software without restriction, including without limitation
|
|
8
|
+
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
9
|
+
# and/or sell copies of the Software, and to permit persons to whom the
|
|
10
|
+
# Software is furnished to do so, subject to the following conditions:
|
|
11
|
+
#
|
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
|
13
|
+
# all copies or substantial portions of the Software.
|
|
14
|
+
#
|
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
20
|
+
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
21
|
+
# DEALINGS IN THE SOFTWARE.
|
|
22
|
+
|
|
23
|
+
require_relative "bwt"
|
|
24
|
+
require_relative "mtf"
|
|
25
|
+
require_relative "rle"
|
|
26
|
+
require_relative "huffman"
|
|
27
|
+
require_relative "../../checksums/crc32"
|
|
28
|
+
|
|
29
|
+
module Omnizip
|
|
30
|
+
module Algorithms
|
|
31
|
+
class BZip2 < Algorithm
|
|
32
|
+
# BZip2 Decoder
|
|
33
|
+
#
|
|
34
|
+
# Reverses the full BZip2 compression pipeline:
|
|
35
|
+
# 1. Read block headers
|
|
36
|
+
# 2. Decode Huffman coding
|
|
37
|
+
# 3. Reverse Run-Length Encoding (RLE)
|
|
38
|
+
# 4. Reverse Move-to-Front Transform (MTF)
|
|
39
|
+
# 5. Reverse Burrows-Wheeler Transform (BWT)
|
|
40
|
+
# 6. Verify CRC32 checksum
|
|
41
|
+
#
|
|
42
|
+
# Processes each block independently and concatenates results.
|
|
43
|
+
class Decoder
|
|
44
|
+
attr_reader :input
|
|
45
|
+
|
|
46
|
+
# Initialize decoder
|
|
47
|
+
#
|
|
48
|
+
# @param input [IO] Input stream
|
|
49
|
+
# @param options [Hash] Decoding options
|
|
50
|
+
def initialize(input, _options = {})
|
|
51
|
+
@input = input
|
|
52
|
+
@bwt = Bwt.new
|
|
53
|
+
@mtf = Mtf.new
|
|
54
|
+
@rle = Rle.new
|
|
55
|
+
@huffman = Huffman.new
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# Decode stream using BZip2 algorithm
|
|
59
|
+
#
|
|
60
|
+
# @return [String] Decoded data
|
|
61
|
+
def decode_stream
|
|
62
|
+
result = []
|
|
63
|
+
|
|
64
|
+
# Read and decode all blocks
|
|
65
|
+
loop do
|
|
66
|
+
block_data = decode_block
|
|
67
|
+
break unless block_data
|
|
68
|
+
|
|
69
|
+
result << block_data
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
result.join.b
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
private
|
|
76
|
+
|
|
77
|
+
# Decode single block
|
|
78
|
+
#
|
|
79
|
+
# @return [String, nil] Decoded block or nil if no more blocks
|
|
80
|
+
def decode_block
|
|
81
|
+
# Read block header
|
|
82
|
+
crc_bytes = @input.read(4)
|
|
83
|
+
return nil unless crc_bytes && crc_bytes.length == 4
|
|
84
|
+
|
|
85
|
+
expected_crc = crc_bytes.unpack1("N")
|
|
86
|
+
primary_index = @input.read(4).unpack1("N")
|
|
87
|
+
@input.read(4).unpack1("N")
|
|
88
|
+
rle_length = @input.read(4).unpack1("N")
|
|
89
|
+
|
|
90
|
+
# Read Huffman code table
|
|
91
|
+
codes = read_huffman_codes
|
|
92
|
+
|
|
93
|
+
# Read encoded data
|
|
94
|
+
encoded_length = @input.read(4).unpack1("N")
|
|
95
|
+
encoded_data = @input.read(encoded_length)
|
|
96
|
+
|
|
97
|
+
# Rebuild Huffman tree from codes
|
|
98
|
+
tree = rebuild_huffman_tree(codes)
|
|
99
|
+
|
|
100
|
+
# Decode Huffman
|
|
101
|
+
rle_data = @huffman.decode(encoded_data, tree, rle_length)
|
|
102
|
+
|
|
103
|
+
# Reverse RLE
|
|
104
|
+
mtf_data = @rle.decode(rle_data)
|
|
105
|
+
|
|
106
|
+
# Reverse MTF
|
|
107
|
+
bwt_data = @mtf.decode(mtf_data)
|
|
108
|
+
|
|
109
|
+
# Reverse BWT
|
|
110
|
+
original_data = @bwt.decode(bwt_data, primary_index)
|
|
111
|
+
|
|
112
|
+
# Verify CRC
|
|
113
|
+
actual_crc = Checksums::Crc32.calculate(original_data)
|
|
114
|
+
if actual_crc != expected_crc
|
|
115
|
+
raise "CRC mismatch: expected #{expected_crc}, " \
|
|
116
|
+
"got #{actual_crc}"
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
original_data
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
# Read Huffman code table from stream
|
|
123
|
+
#
|
|
124
|
+
# @return [Hash<Integer, String>] Symbol => binary code
|
|
125
|
+
def read_huffman_codes
|
|
126
|
+
codes = {}
|
|
127
|
+
code_count = @input.read(2).unpack1("n")
|
|
128
|
+
|
|
129
|
+
code_count.times do
|
|
130
|
+
symbol = @input.read(1).unpack1("C")
|
|
131
|
+
code_length = @input.read(1).unpack1("C")
|
|
132
|
+
codes[symbol] = code_length
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
codes
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
# Rebuild Huffman tree from code lengths
|
|
139
|
+
#
|
|
140
|
+
# Creates canonical Huffman codes and builds tree
|
|
141
|
+
#
|
|
142
|
+
# @param code_lengths [Hash<Integer, Integer>] Symbol => length
|
|
143
|
+
# @return [Huffman::Node] Root of Huffman tree
|
|
144
|
+
def rebuild_huffman_tree(code_lengths)
|
|
145
|
+
# Sort symbols by code length, then by symbol value
|
|
146
|
+
sorted_symbols = code_lengths.sort_by { |sym, len| [len, sym] }
|
|
147
|
+
|
|
148
|
+
# Generate canonical codes
|
|
149
|
+
codes = {}
|
|
150
|
+
code_value = 0
|
|
151
|
+
prev_length = 0
|
|
152
|
+
|
|
153
|
+
sorted_symbols.each do |symbol, length|
|
|
154
|
+
# Shift code value for new length
|
|
155
|
+
code_value <<= (length - prev_length)
|
|
156
|
+
codes[symbol] = format("%0#{length}b", code_value)
|
|
157
|
+
code_value += 1
|
|
158
|
+
prev_length = length
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
# Build tree from codes
|
|
162
|
+
build_tree_from_codes(codes)
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
# Build Huffman tree from code strings
|
|
166
|
+
#
|
|
167
|
+
# @param codes [Hash<Integer, String>] Symbol => binary code
|
|
168
|
+
# @return [Huffman::Node] Root node
|
|
169
|
+
def build_tree_from_codes(codes)
|
|
170
|
+
root = Huffman::Node.new(nil, 0)
|
|
171
|
+
|
|
172
|
+
codes.each do |symbol, code|
|
|
173
|
+
current = root
|
|
174
|
+
|
|
175
|
+
code.each_char do |bit|
|
|
176
|
+
if bit == "0"
|
|
177
|
+
current.left ||= Huffman::Node.new(nil, 0)
|
|
178
|
+
current = current.left
|
|
179
|
+
else
|
|
180
|
+
current.right ||= Huffman::Node.new(nil, 0)
|
|
181
|
+
current = current.right
|
|
182
|
+
end
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
current.symbol = symbol
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
root
|
|
189
|
+
end
|
|
190
|
+
end
|
|
191
|
+
end
|
|
192
|
+
end
|
|
193
|
+
end
|
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Copyright (C) 2025 Ribose Inc.
|
|
4
|
+
#
|
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a
|
|
6
|
+
# copy of this software and associated documentation files (the "Software"),
|
|
7
|
+
# to deal in the Software without restriction, including without limitation
|
|
8
|
+
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
9
|
+
# and/or sell copies of the Software, and to permit persons to whom the
|
|
10
|
+
# Software is furnished to do so, subject to the following conditions:
|
|
11
|
+
#
|
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
|
13
|
+
# all copies or substantial portions of the Software.
|
|
14
|
+
#
|
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
20
|
+
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
21
|
+
# DEALINGS IN THE SOFTWARE.
|
|
22
|
+
|
|
23
|
+
require_relative "bwt"
|
|
24
|
+
require_relative "mtf"
|
|
25
|
+
require_relative "rle"
|
|
26
|
+
require_relative "huffman"
|
|
27
|
+
require_relative "../../checksums/crc32"
|
|
28
|
+
|
|
29
|
+
module Omnizip
|
|
30
|
+
module Algorithms
|
|
31
|
+
class BZip2 < Algorithm
|
|
32
|
+
# BZip2 Encoder
|
|
33
|
+
#
|
|
34
|
+
# Orchestrates the full BZip2 compression pipeline:
|
|
35
|
+
# 1. Block splitting (configurable block size)
|
|
36
|
+
# 2. Burrows-Wheeler Transform (BWT)
|
|
37
|
+
# 3. Move-to-Front Transform (MTF)
|
|
38
|
+
# 4. Run-Length Encoding (RLE)
|
|
39
|
+
# 5. Huffman Coding
|
|
40
|
+
# 6. CRC32 checksum calculation
|
|
41
|
+
#
|
|
42
|
+
# Each block is compressed independently for better parallelization
|
|
43
|
+
# potential and error recovery.
|
|
44
|
+
class Encoder
|
|
45
|
+
attr_reader :output, :block_size
|
|
46
|
+
|
|
47
|
+
# Block size constants (in bytes)
|
|
48
|
+
MIN_BLOCK_SIZE = 100_000 # 100KB
|
|
49
|
+
MAX_BLOCK_SIZE = 900_000 # 900KB
|
|
50
|
+
DEFAULT_BLOCK_SIZE = 900_000 # 900KB (level 9)
|
|
51
|
+
|
|
52
|
+
# Initialize encoder
|
|
53
|
+
#
|
|
54
|
+
# @param output [IO] Output stream
|
|
55
|
+
# @param options [Hash] Encoding options
|
|
56
|
+
# @option options [Integer] :block_size Block size in bytes
|
|
57
|
+
def initialize(output, options = {})
|
|
58
|
+
@output = output
|
|
59
|
+
@block_size = validate_block_size(
|
|
60
|
+
options[:block_size] || DEFAULT_BLOCK_SIZE,
|
|
61
|
+
)
|
|
62
|
+
@bwt = Bwt.new
|
|
63
|
+
@mtf = Mtf.new
|
|
64
|
+
@rle = Rle.new
|
|
65
|
+
@huffman = Huffman.new
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Encode stream using BZip2 algorithm
|
|
69
|
+
#
|
|
70
|
+
# @param input [String] Input data to encode
|
|
71
|
+
# @return [void]
|
|
72
|
+
def encode_stream(input)
|
|
73
|
+
return if input.empty?
|
|
74
|
+
|
|
75
|
+
# Split into blocks and encode each
|
|
76
|
+
blocks = split_into_blocks(input)
|
|
77
|
+
blocks.each { |block| encode_block(block) }
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
private
|
|
81
|
+
|
|
82
|
+
# Validate and clamp block size to valid range
|
|
83
|
+
#
|
|
84
|
+
# @param size [Integer] Requested block size
|
|
85
|
+
# @return [Integer] Validated block size
|
|
86
|
+
def validate_block_size(size)
|
|
87
|
+
size.clamp(MIN_BLOCK_SIZE, MAX_BLOCK_SIZE)
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
# Split input into blocks
|
|
91
|
+
#
|
|
92
|
+
# @param input [String] Input data
|
|
93
|
+
# @return [Array<String>] Array of blocks
|
|
94
|
+
def split_into_blocks(input)
|
|
95
|
+
blocks = []
|
|
96
|
+
offset = 0
|
|
97
|
+
|
|
98
|
+
while offset < input.length
|
|
99
|
+
block = input[offset, @block_size]
|
|
100
|
+
blocks << block if block && !block.empty?
|
|
101
|
+
offset += @block_size
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
blocks
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
# Encode single block through full pipeline
|
|
108
|
+
#
|
|
109
|
+
# @param block [String] Block data
|
|
110
|
+
# @return [void]
|
|
111
|
+
def encode_block(block)
|
|
112
|
+
# Calculate CRC of original data
|
|
113
|
+
crc = Checksums::Crc32.calculate(block)
|
|
114
|
+
|
|
115
|
+
# Apply BWT
|
|
116
|
+
bwt_data, primary_index = @bwt.encode(block)
|
|
117
|
+
|
|
118
|
+
# Apply MTF
|
|
119
|
+
mtf_data = @mtf.encode(bwt_data)
|
|
120
|
+
|
|
121
|
+
# Apply RLE
|
|
122
|
+
rle_data = @rle.encode(mtf_data)
|
|
123
|
+
|
|
124
|
+
# Build frequency table for Huffman
|
|
125
|
+
frequencies = build_frequency_table(rle_data)
|
|
126
|
+
|
|
127
|
+
# Build Huffman tree and generate codes
|
|
128
|
+
tree = @huffman.build_tree(frequencies)
|
|
129
|
+
codes = generate_canonical_codes(tree)
|
|
130
|
+
|
|
131
|
+
# Encode data with Huffman
|
|
132
|
+
encoded_data = @huffman.encode(rle_data, codes)
|
|
133
|
+
|
|
134
|
+
# Write block to output
|
|
135
|
+
write_block(crc, primary_index, block.length, codes,
|
|
136
|
+
encoded_data, rle_data.length)
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
# Build frequency table from data
|
|
140
|
+
#
|
|
141
|
+
# @param data [String] Input data
|
|
142
|
+
# @return [Hash<Integer, Integer>] Byte => frequency
|
|
143
|
+
def build_frequency_table(data)
|
|
144
|
+
freq = Hash.new(0)
|
|
145
|
+
data.each_byte { |byte| freq[byte] += 1 }
|
|
146
|
+
freq
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
# Generate canonical Huffman codes from tree
|
|
150
|
+
#
|
|
151
|
+
# @param tree [Huffman::Node] Huffman tree root
|
|
152
|
+
# @return [Hash<Integer, String>] Symbol => canonical code
|
|
153
|
+
def generate_canonical_codes(tree)
|
|
154
|
+
# Get standard codes first
|
|
155
|
+
standard_codes = @huffman.generate_codes(tree)
|
|
156
|
+
|
|
157
|
+
# Convert to code lengths
|
|
158
|
+
code_lengths = {}
|
|
159
|
+
standard_codes.each do |symbol, code|
|
|
160
|
+
code_lengths[symbol] = code.length
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
# Generate canonical codes from lengths
|
|
164
|
+
# Sort by (length, symbol) to ensure deterministic ordering
|
|
165
|
+
sorted_symbols = code_lengths.sort_by { |sym, len| [len, sym] }
|
|
166
|
+
|
|
167
|
+
canonical_codes = {}
|
|
168
|
+
code_value = 0
|
|
169
|
+
prev_length = 0
|
|
170
|
+
|
|
171
|
+
sorted_symbols.each do |symbol, length|
|
|
172
|
+
# Shift code value for new length
|
|
173
|
+
code_value <<= (length - prev_length)
|
|
174
|
+
canonical_codes[symbol] = format("%0#{length}b", code_value)
|
|
175
|
+
code_value += 1
|
|
176
|
+
prev_length = length
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
canonical_codes
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
# Write encoded block to output
|
|
183
|
+
#
|
|
184
|
+
# @param crc [Integer] CRC32 of original block
|
|
185
|
+
# @param primary_index [Integer] BWT primary index
|
|
186
|
+
# @param original_length [Integer] Original block length
|
|
187
|
+
# @param codes [Hash] Huffman codes
|
|
188
|
+
# @param encoded_data [String] Huffman-encoded data
|
|
189
|
+
# @param rle_length [Integer] Length after RLE
|
|
190
|
+
# @return [void]
|
|
191
|
+
def write_block(crc, primary_index, original_length, codes,
|
|
192
|
+
encoded_data, rle_length)
|
|
193
|
+
write_block_header(crc, primary_index, original_length, rle_length)
|
|
194
|
+
write_huffman_codes(codes)
|
|
195
|
+
write_encoded_data(encoded_data)
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
# Write block header
|
|
199
|
+
#
|
|
200
|
+
# @param crc [Integer] CRC32 checksum
|
|
201
|
+
# @param primary_index [Integer] BWT primary index
|
|
202
|
+
# @param original_length [Integer] Original block length
|
|
203
|
+
# @param rle_length [Integer] RLE length
|
|
204
|
+
# @return [void]
|
|
205
|
+
def write_block_header(crc, primary_index, original_length,
|
|
206
|
+
rle_length)
|
|
207
|
+
@output.write([crc].pack("N"))
|
|
208
|
+
@output.write([primary_index].pack("N"))
|
|
209
|
+
@output.write([original_length].pack("N"))
|
|
210
|
+
@output.write([rle_length].pack("N"))
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
# Write Huffman codes to output
|
|
214
|
+
#
|
|
215
|
+
# @param codes [Hash] Huffman codes
|
|
216
|
+
# @return [void]
|
|
217
|
+
def write_huffman_codes(codes)
|
|
218
|
+
@output.write([codes.size].pack("n"))
|
|
219
|
+
|
|
220
|
+
codes.each do |symbol, code|
|
|
221
|
+
@output.write([symbol].pack("C"))
|
|
222
|
+
@output.write([code.length].pack("C"))
|
|
223
|
+
end
|
|
224
|
+
end
|
|
225
|
+
|
|
226
|
+
# Write encoded data to output
|
|
227
|
+
#
|
|
228
|
+
# @param encoded_data [String] Huffman-encoded data
|
|
229
|
+
# @return [void]
|
|
230
|
+
def write_encoded_data(encoded_data)
|
|
231
|
+
@output.write([encoded_data.length].pack("N"))
|
|
232
|
+
@output.write(encoded_data)
|
|
233
|
+
end
|
|
234
|
+
end
|
|
235
|
+
end
|
|
236
|
+
end
|
|
237
|
+
end
|
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Copyright (C) 2025 Ribose Inc.
|
|
4
|
+
#
|
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a
|
|
6
|
+
# copy of this software and associated documentation files (the "Software"),
|
|
7
|
+
# to deal in the Software without restriction, including without limitation
|
|
8
|
+
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
9
|
+
# and/or sell copies of the Software, and to permit persons to whom the
|
|
10
|
+
# Software is furnished to do so, subject to the following conditions:
|
|
11
|
+
#
|
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
|
13
|
+
# all copies or substantial portions of the Software.
|
|
14
|
+
#
|
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
20
|
+
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
21
|
+
# DEALINGS IN THE SOFTWARE.
|
|
22
|
+
|
|
23
|
+
module Omnizip
|
|
24
|
+
module Algorithms
|
|
25
|
+
class BZip2 < Algorithm
|
|
26
|
+
# Huffman Coding for BZip2
|
|
27
|
+
#
|
|
28
|
+
# Implements canonical Huffman coding for the final compression
|
|
29
|
+
# stage of BZip2. Huffman coding assigns variable-length codes
|
|
30
|
+
# to symbols based on their frequencies, with more frequent
|
|
31
|
+
# symbols getting shorter codes.
|
|
32
|
+
#
|
|
33
|
+
# This implementation:
|
|
34
|
+
# 1. Builds a Huffman tree from symbol frequencies
|
|
35
|
+
# 2. Generates canonical codes for efficient decoding
|
|
36
|
+
# 3. Encodes data as a bit stream
|
|
37
|
+
# 4. Decodes bit streams back to symbols
|
|
38
|
+
class Huffman
|
|
39
|
+
# Huffman tree node
|
|
40
|
+
class Node
|
|
41
|
+
attr_accessor :symbol, :frequency, :left, :right
|
|
42
|
+
|
|
43
|
+
# Initialize node
|
|
44
|
+
#
|
|
45
|
+
# @param symbol [Integer, nil] Symbol (nil for internal nodes)
|
|
46
|
+
# @param frequency [Integer] Frequency/weight
|
|
47
|
+
def initialize(symbol, frequency)
|
|
48
|
+
@symbol = symbol
|
|
49
|
+
@frequency = frequency
|
|
50
|
+
@left = nil
|
|
51
|
+
@right = nil
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# Check if node is a leaf
|
|
55
|
+
#
|
|
56
|
+
# @return [Boolean] True if leaf node
|
|
57
|
+
def leaf?
|
|
58
|
+
@left.nil? && @right.nil?
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
# Build Huffman tree from frequency table
|
|
63
|
+
#
|
|
64
|
+
# @param frequencies [Hash<Integer, Integer>] Symbol => frequency
|
|
65
|
+
# @return [Node, nil] Root node of Huffman tree
|
|
66
|
+
def build_tree(frequencies)
|
|
67
|
+
return nil if frequencies.empty?
|
|
68
|
+
|
|
69
|
+
# Create leaf nodes
|
|
70
|
+
nodes = frequencies.map do |symbol, freq|
|
|
71
|
+
Node.new(symbol, freq)
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
# Build tree bottom-up
|
|
75
|
+
while nodes.length > 1
|
|
76
|
+
# Sort by frequency
|
|
77
|
+
nodes.sort_by!(&:frequency)
|
|
78
|
+
|
|
79
|
+
# Take two lowest frequency nodes
|
|
80
|
+
left = nodes.shift
|
|
81
|
+
right = nodes.shift
|
|
82
|
+
|
|
83
|
+
# Create parent node
|
|
84
|
+
parent = Node.new(nil, left.frequency + right.frequency)
|
|
85
|
+
parent.left = left
|
|
86
|
+
parent.right = right
|
|
87
|
+
|
|
88
|
+
# Add back to nodes
|
|
89
|
+
nodes << parent
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
nodes.first
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
# Generate code table from Huffman tree
|
|
96
|
+
#
|
|
97
|
+
# @param root [Node] Root of Huffman tree
|
|
98
|
+
# @return [Hash<Integer, String>] Symbol => binary code
|
|
99
|
+
def generate_codes(root)
|
|
100
|
+
return {} if root.nil?
|
|
101
|
+
|
|
102
|
+
codes = {}
|
|
103
|
+
generate_codes_recursive(root, "", codes)
|
|
104
|
+
codes
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
# Encode data using Huffman codes
|
|
108
|
+
#
|
|
109
|
+
# @param data [String] Input data to encode
|
|
110
|
+
# @param codes [Hash<Integer, String>] Symbol => binary code
|
|
111
|
+
# @return [String] Encoded bit stream (as binary string)
|
|
112
|
+
def encode(data, codes)
|
|
113
|
+
return "".b if data.empty?
|
|
114
|
+
|
|
115
|
+
bits = []
|
|
116
|
+
|
|
117
|
+
data.each_byte do |byte|
|
|
118
|
+
code = codes[byte]
|
|
119
|
+
raise "No code for byte #{byte}" unless code
|
|
120
|
+
|
|
121
|
+
bits << code
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
bits_to_bytes(bits.join)
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
# Decode bit stream using Huffman tree
|
|
128
|
+
#
|
|
129
|
+
# @param bits [String] Encoded bit stream
|
|
130
|
+
# @param root [Node] Root of Huffman tree
|
|
131
|
+
# @param length [Integer] Expected output length
|
|
132
|
+
# @return [String] Decoded data
|
|
133
|
+
def decode(bits, root, length)
|
|
134
|
+
return "".b if bits.empty? || root.nil?
|
|
135
|
+
|
|
136
|
+
result = []
|
|
137
|
+
current = root
|
|
138
|
+
bit_string = bytes_to_bits(bits)
|
|
139
|
+
bit_index = 0
|
|
140
|
+
|
|
141
|
+
while result.length < length && bit_index < bit_string.length
|
|
142
|
+
bit = bit_string[bit_index]
|
|
143
|
+
bit_index += 1
|
|
144
|
+
|
|
145
|
+
# Navigate tree
|
|
146
|
+
current = (bit == "0" ? current.left : current.right)
|
|
147
|
+
|
|
148
|
+
# Check if we reached a leaf
|
|
149
|
+
if current.leaf?
|
|
150
|
+
result << current.symbol
|
|
151
|
+
current = root
|
|
152
|
+
end
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
result.pack("C*")
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
private
|
|
159
|
+
|
|
160
|
+
# Recursively generate codes for all symbols
|
|
161
|
+
#
|
|
162
|
+
# @param node [Node] Current node
|
|
163
|
+
# @param code [String] Current code path
|
|
164
|
+
# @param codes [Hash] Code table being built
|
|
165
|
+
# @return [void]
|
|
166
|
+
def generate_codes_recursive(node, code, codes)
|
|
167
|
+
return if node.nil?
|
|
168
|
+
|
|
169
|
+
if node.leaf?
|
|
170
|
+
codes[node.symbol] = code.empty? ? "0" : code
|
|
171
|
+
else
|
|
172
|
+
generate_codes_recursive(node.left, "#{code}0", codes)
|
|
173
|
+
generate_codes_recursive(node.right, "#{code}1", codes)
|
|
174
|
+
end
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
# Convert bit string to bytes
|
|
178
|
+
#
|
|
179
|
+
# @param bits [String] Bit string (e.g., "10110101")
|
|
180
|
+
# @return [String] Byte string
|
|
181
|
+
def bits_to_bytes(bits)
|
|
182
|
+
# Pad to multiple of 8
|
|
183
|
+
padding = (8 - (bits.length % 8)) % 8
|
|
184
|
+
bits += ("0" * padding)
|
|
185
|
+
|
|
186
|
+
# Convert to bytes
|
|
187
|
+
bytes = []
|
|
188
|
+
(0...bits.length).step(8) do |i|
|
|
189
|
+
byte_bits = bits[i, 8]
|
|
190
|
+
bytes << byte_bits.to_i(2)
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
bytes.pack("C*")
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
# Convert bytes to bit string
|
|
197
|
+
#
|
|
198
|
+
# @param bytes [String] Byte string
|
|
199
|
+
# @return [String] Bit string
|
|
200
|
+
def bytes_to_bits(bytes)
|
|
201
|
+
bytes.bytes.map { |b| format("%08b", b) }.join
|
|
202
|
+
end
|
|
203
|
+
end
|
|
204
|
+
end
|
|
205
|
+
end
|
|
206
|
+
end
|