omnizip 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/.rubocop.yml +32 -0
- data/.rubocop_todo.yml +754 -0
- data/COPYING +502 -0
- data/Gemfile +17 -0
- data/LICENSE +12 -0
- data/README.adoc +1045 -0
- data/Rakefile +12 -0
- data/benchmark/README.md +260 -0
- data/benchmark/benchmark_suite.rb +125 -0
- data/benchmark/compression_bench.rb +181 -0
- data/benchmark/filter_bench.rb +180 -0
- data/benchmark/models/benchmark_result.rb +59 -0
- data/benchmark/models/comparison_result.rb +69 -0
- data/benchmark/profile_suite.rb +167 -0
- data/benchmark/reporter.rb +150 -0
- data/benchmark/run_benchmarks.rb +66 -0
- data/benchmark/test_data.rb +137 -0
- data/config/formats/rar3_spec.yml +91 -0
- data/config/formats/rar5_spec.yml +102 -0
- data/docs/.github/workflows/docs.yml +142 -0
- data/docs/.gitignore +21 -0
- data/docs/.lychee.toml +67 -0
- data/docs/Gemfile +13 -0
- data/docs/RAR_WRITE_SUPPORT.md +26 -0
- data/docs/README.md +101 -0
- data/docs/_config.yml +112 -0
- data/docs/assets/logo.svg +1 -0
- data/docs/assets/omnizip-logo.pdf +1540 -11
- data/docs/comparison/feature-matrix.adoc +694 -0
- data/docs/comparison/index.adoc +113 -0
- data/docs/comparison/vs-7zip.adoc +309 -0
- data/docs/comparison/vs-peazip.adoc +77 -0
- data/docs/comparison/vs-rubyzip.adoc +342 -0
- data/docs/comparison/vs-winrar.adoc +100 -0
- data/docs/compatibility.adoc +579 -0
- data/docs/concepts/index.adoc +129 -0
- data/docs/developer/architecture.adoc +256 -0
- data/docs/developer/contributing.adoc +158 -0
- data/docs/developer/index.adoc +25 -0
- data/docs/developer/testing.adoc +212 -0
- data/docs/getting-started/basic-usage.adoc +271 -0
- data/docs/getting-started/index.adoc +42 -0
- data/docs/getting-started/installation.adoc +138 -0
- data/docs/getting-started/quick-start.adoc +185 -0
- data/docs/getting-started/your-first-archive.adoc +218 -0
- data/docs/guides/advanced-features/encryption.adoc +300 -0
- data/docs/guides/advanced-features/index.adoc +49 -0
- data/docs/guides/advanced-features/parallel-processing.adoc +246 -0
- data/docs/guides/advanced-features/progress-tracking.adoc +320 -0
- data/docs/guides/advanced-features/streaming.adoc +212 -0
- data/docs/guides/archive-formats/gzip-format.adoc +107 -0
- data/docs/guides/archive-formats/index.adoc +130 -0
- data/docs/guides/archive-formats/rar-format.adoc +104 -0
- data/docs/guides/archive-formats/rar5.adoc +521 -0
- data/docs/guides/archive-formats/seven-zip-format.adoc +35 -0
- data/docs/guides/archive-formats/tar-format.adoc +106 -0
- data/docs/guides/archive-formats/xz-format.adoc +118 -0
- data/docs/guides/archive-formats/zip-format.adoc +35 -0
- data/docs/guides/compression-algorithms/bzip2.adoc +113 -0
- data/docs/guides/compression-algorithms/deflate.adoc +319 -0
- data/docs/guides/compression-algorithms/index.adoc +190 -0
- data/docs/guides/compression-algorithms/lzma.adoc +398 -0
- data/docs/guides/compression-algorithms/lzma2.adoc +327 -0
- data/docs/guides/compression-algorithms/ppmd.adoc +316 -0
- data/docs/guides/compression-algorithms/zstandard.adoc +361 -0
- data/docs/guides/creating-archives.adoc +354 -0
- data/docs/guides/extracting-archives.adoc +53 -0
- data/docs/guides/format-conversion.adoc +64 -0
- data/docs/guides/index.adoc +49 -0
- data/docs/guides/migration-rubyzip.adoc +217 -0
- data/docs/guides/parity-archives.adoc +605 -0
- data/docs/guides/performance-tuning.adoc +88 -0
- data/docs/index.adoc +218 -0
- data/docs/lychee.toml +67 -0
- data/docs/reference/api/overview.adoc +188 -0
- data/docs/reference/cli/compress-command.adoc +114 -0
- data/docs/reference/cli/overview.adoc +140 -0
- data/docs/reference/index.adoc +26 -0
- data/docs/resources/faq.adoc +185 -0
- data/docs/resources/quick-reference.adoc +222 -0
- data/docs/troubleshooting/index.adoc +208 -0
- data/examples/api_comparison.rb +205 -0
- data/examples/deflate64_example.rb +96 -0
- data/examples/par2_demo.rb +121 -0
- data/examples/quick_start_native.rb +150 -0
- data/examples/quick_start_rubyzip.rb +115 -0
- data/examples/rubyzip_compatibility_demo.rb +194 -0
- data/exe/omnizip +27 -0
- data/lib/omnizip/algorithm.rb +130 -0
- data/lib/omnizip/algorithm_registry.rb +86 -0
- data/lib/omnizip/algorithms/.keep +0 -0
- data/lib/omnizip/algorithms/bzip2/bwt.rb +225 -0
- data/lib/omnizip/algorithms/bzip2/decoder.rb +193 -0
- data/lib/omnizip/algorithms/bzip2/encoder.rb +237 -0
- data/lib/omnizip/algorithms/bzip2/huffman.rb +206 -0
- data/lib/omnizip/algorithms/bzip2/mtf.rb +101 -0
- data/lib/omnizip/algorithms/bzip2/rle.rb +151 -0
- data/lib/omnizip/algorithms/bzip2.rb +130 -0
- data/lib/omnizip/algorithms/deflate/constants.rb +28 -0
- data/lib/omnizip/algorithms/deflate/decoder.rb +38 -0
- data/lib/omnizip/algorithms/deflate/encoder.rb +46 -0
- data/lib/omnizip/algorithms/deflate.rb +128 -0
- data/lib/omnizip/algorithms/deflate64/constants.rb +45 -0
- data/lib/omnizip/algorithms/deflate64/decoder.rb +153 -0
- data/lib/omnizip/algorithms/deflate64/encoder.rb +98 -0
- data/lib/omnizip/algorithms/deflate64/huffman_coder.rb +354 -0
- data/lib/omnizip/algorithms/deflate64/lz77_encoder.rb +142 -0
- data/lib/omnizip/algorithms/deflate64.rb +109 -0
- data/lib/omnizip/algorithms/lzma/bit_model.rb +120 -0
- data/lib/omnizip/algorithms/lzma/constants.rb +112 -0
- data/lib/omnizip/algorithms/lzma/decoder.rb +148 -0
- data/lib/omnizip/algorithms/lzma/dictionary.rb +69 -0
- data/lib/omnizip/algorithms/lzma/distance_coder.rb +415 -0
- data/lib/omnizip/algorithms/lzma/encoder.rb +142 -0
- data/lib/omnizip/algorithms/lzma/length_coder.rb +260 -0
- data/lib/omnizip/algorithms/lzma/literal_decoder.rb +320 -0
- data/lib/omnizip/algorithms/lzma/literal_encoder.rb +210 -0
- data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +341 -0
- data/lib/omnizip/algorithms/lzma/lzma_alone_decoder.rb +192 -0
- data/lib/omnizip/algorithms/lzma/lzma_state.rb +128 -0
- data/lib/omnizip/algorithms/lzma/match.rb +32 -0
- data/lib/omnizip/algorithms/lzma/match_finder.rb +205 -0
- data/lib/omnizip/algorithms/lzma/match_finder_config.rb +142 -0
- data/lib/omnizip/algorithms/lzma/match_finder_factory.rb +88 -0
- data/lib/omnizip/algorithms/lzma/optimal_encoder.rb +130 -0
- data/lib/omnizip/algorithms/lzma/probability_models.rb +72 -0
- data/lib/omnizip/algorithms/lzma/range_coder.rb +85 -0
- data/lib/omnizip/algorithms/lzma/range_decoder.rb +434 -0
- data/lib/omnizip/algorithms/lzma/range_encoder.rb +194 -0
- data/lib/omnizip/algorithms/lzma/state.rb +127 -0
- data/lib/omnizip/algorithms/lzma/xz_buffered_range_encoder.rb +325 -0
- data/lib/omnizip/algorithms/lzma/xz_encoder.rb +426 -0
- data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +645 -0
- data/lib/omnizip/algorithms/lzma/xz_match_finder_adapter.rb +227 -0
- data/lib/omnizip/algorithms/lzma/xz_price_calculator.rb +169 -0
- data/lib/omnizip/algorithms/lzma/xz_probability_models.rb +261 -0
- data/lib/omnizip/algorithms/lzma/xz_range_encoder.rb +223 -0
- data/lib/omnizip/algorithms/lzma/xz_range_encoder_exact.rb +331 -0
- data/lib/omnizip/algorithms/lzma/xz_state.rb +116 -0
- data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +2055 -0
- data/lib/omnizip/algorithms/lzma.rb +238 -0
- data/lib/omnizip/algorithms/lzma2/chunk_manager.rb +182 -0
- data/lib/omnizip/algorithms/lzma2/constants.rb +41 -0
- data/lib/omnizip/algorithms/lzma2/encoder.rb +147 -0
- data/lib/omnizip/algorithms/lzma2/lzma2_chunk.rb +161 -0
- data/lib/omnizip/algorithms/lzma2/properties.rb +179 -0
- data/lib/omnizip/algorithms/lzma2/simple_lzma2_encoder.rb +127 -0
- data/lib/omnizip/algorithms/lzma2/xz_encoder_adapter.rb +85 -0
- data/lib/omnizip/algorithms/lzma2.rb +141 -0
- data/lib/omnizip/algorithms/ppmd7/constants.rb +74 -0
- data/lib/omnizip/algorithms/ppmd7/context.rb +154 -0
- data/lib/omnizip/algorithms/ppmd7/decoder.rb +126 -0
- data/lib/omnizip/algorithms/ppmd7/encoder.rb +163 -0
- data/lib/omnizip/algorithms/ppmd7/model.rb +248 -0
- data/lib/omnizip/algorithms/ppmd7/symbol_state.rb +57 -0
- data/lib/omnizip/algorithms/ppmd7.rb +116 -0
- data/lib/omnizip/algorithms/ppmd8/constants.rb +61 -0
- data/lib/omnizip/algorithms/ppmd8/context.rb +34 -0
- data/lib/omnizip/algorithms/ppmd8/decoder.rb +107 -0
- data/lib/omnizip/algorithms/ppmd8/encoder.rb +138 -0
- data/lib/omnizip/algorithms/ppmd8/model.rb +250 -0
- data/lib/omnizip/algorithms/ppmd8/restoration_method.rb +78 -0
- data/lib/omnizip/algorithms/ppmd8.rb +82 -0
- data/lib/omnizip/algorithms/ppmd_base.rb +138 -0
- data/lib/omnizip/algorithms/sevenzip_lzma2.rb +123 -0
- data/lib/omnizip/algorithms/xz_lzma2.rb +118 -0
- data/lib/omnizip/algorithms/zstandard/constants.rb +25 -0
- data/lib/omnizip/algorithms/zstandard/decoder.rb +46 -0
- data/lib/omnizip/algorithms/zstandard/encoder.rb +51 -0
- data/lib/omnizip/algorithms/zstandard.rb +138 -0
- data/lib/omnizip/buffer/memory_archive.rb +251 -0
- data/lib/omnizip/buffer/memory_extractor.rb +224 -0
- data/lib/omnizip/buffer.rb +176 -0
- data/lib/omnizip/checksum_registry.rb +114 -0
- data/lib/omnizip/checksums/crc32.rb +100 -0
- data/lib/omnizip/checksums/crc64.rb +101 -0
- data/lib/omnizip/checksums/crc_base.rb +158 -0
- data/lib/omnizip/checksums/verifier.rb +131 -0
- data/lib/omnizip/chunked/memory_manager.rb +194 -0
- data/lib/omnizip/chunked/reader.rb +78 -0
- data/lib/omnizip/chunked/writer.rb +120 -0
- data/lib/omnizip/chunked.rb +129 -0
- data/lib/omnizip/cli/output_formatter.rb +104 -0
- data/lib/omnizip/cli.rb +572 -0
- data/lib/omnizip/commands/.keep +0 -0
- data/lib/omnizip/commands/archive_create_command.rb +427 -0
- data/lib/omnizip/commands/archive_extract_command.rb +272 -0
- data/lib/omnizip/commands/archive_list_command.rb +218 -0
- data/lib/omnizip/commands/archive_repair_command.rb +131 -0
- data/lib/omnizip/commands/archive_verify_command.rb +117 -0
- data/lib/omnizip/commands/compress_command.rb +117 -0
- data/lib/omnizip/commands/decompress_command.rb +120 -0
- data/lib/omnizip/commands/list_command.rb +53 -0
- data/lib/omnizip/commands/metadata_command.rb +153 -0
- data/lib/omnizip/commands/parity_create_command.rb +122 -0
- data/lib/omnizip/commands/parity_repair_command.rb +122 -0
- data/lib/omnizip/commands/parity_verify_command.rb +124 -0
- data/lib/omnizip/commands/profile_list_command.rb +56 -0
- data/lib/omnizip/commands/profile_show_command.rb +44 -0
- data/lib/omnizip/convenience.rb +359 -0
- data/lib/omnizip/converter/conversion_registry.rb +49 -0
- data/lib/omnizip/converter/conversion_strategy.rb +121 -0
- data/lib/omnizip/converter/seven_zip_to_zip_strategy.rb +97 -0
- data/lib/omnizip/converter/zip_to_seven_zip_strategy.rb +112 -0
- data/lib/omnizip/converter.rb +105 -0
- data/lib/omnizip/crypto/aes256/cipher.rb +100 -0
- data/lib/omnizip/crypto/aes256/constants.rb +28 -0
- data/lib/omnizip/crypto/aes256/key_derivation.rb +101 -0
- data/lib/omnizip/crypto/aes256.rb +102 -0
- data/lib/omnizip/error.rb +106 -0
- data/lib/omnizip/eta/exponential_smoothing_estimator.rb +98 -0
- data/lib/omnizip/eta/moving_average_estimator.rb +99 -0
- data/lib/omnizip/eta/rate_calculator.rb +104 -0
- data/lib/omnizip/eta/sample_history.rb +143 -0
- data/lib/omnizip/eta/time_estimator.rb +106 -0
- data/lib/omnizip/eta.rb +63 -0
- data/lib/omnizip/extraction/filter_chain.rb +177 -0
- data/lib/omnizip/extraction/glob_pattern.rb +140 -0
- data/lib/omnizip/extraction/pattern_matcher.rb +70 -0
- data/lib/omnizip/extraction/predicate_pattern.rb +52 -0
- data/lib/omnizip/extraction/regex_pattern.rb +50 -0
- data/lib/omnizip/extraction/selective_extractor.rb +240 -0
- data/lib/omnizip/extraction.rb +111 -0
- data/lib/omnizip/file_type/mime_classifier.rb +144 -0
- data/lib/omnizip/file_type.rb +113 -0
- data/lib/omnizip/filter.rb +139 -0
- data/lib/omnizip/filter_pipeline.rb +108 -0
- data/lib/omnizip/filter_registry.rb +166 -0
- data/lib/omnizip/filters/bcj.rb +279 -0
- data/lib/omnizip/filters/bcj2/constants.rb +53 -0
- data/lib/omnizip/filters/bcj2/decoder.rb +200 -0
- data/lib/omnizip/filters/bcj2/encoder.rb +61 -0
- data/lib/omnizip/filters/bcj2/stream_data.rb +93 -0
- data/lib/omnizip/filters/bcj2.rb +99 -0
- data/lib/omnizip/filters/bcj_arm.rb +176 -0
- data/lib/omnizip/filters/bcj_arm64.rb +244 -0
- data/lib/omnizip/filters/bcj_ia64.rb +196 -0
- data/lib/omnizip/filters/bcj_ppc.rb +190 -0
- data/lib/omnizip/filters/bcj_sparc.rb +176 -0
- data/lib/omnizip/filters/bcj_x86.rb +193 -0
- data/lib/omnizip/filters/delta.rb +196 -0
- data/lib/omnizip/filters/filter_base.rb +72 -0
- data/lib/omnizip/filters/registry.rb +123 -0
- data/lib/omnizip/filters/xz_delta.rb +258 -0
- data/lib/omnizip/format_detector.rb +162 -0
- data/lib/omnizip/format_registry.rb +59 -0
- data/lib/omnizip/formats/.keep +0 -0
- data/lib/omnizip/formats/bzip2_file.rb +172 -0
- data/lib/omnizip/formats/cpio/constants.rb +55 -0
- data/lib/omnizip/formats/cpio/entry.rb +385 -0
- data/lib/omnizip/formats/cpio/reader.rb +196 -0
- data/lib/omnizip/formats/cpio/writer.rb +234 -0
- data/lib/omnizip/formats/cpio.rb +140 -0
- data/lib/omnizip/formats/format_spec_loader.rb +230 -0
- data/lib/omnizip/formats/gzip.rb +238 -0
- data/lib/omnizip/formats/iso/directory_builder.rb +297 -0
- data/lib/omnizip/formats/iso/directory_record.rb +152 -0
- data/lib/omnizip/formats/iso/joliet.rb +204 -0
- data/lib/omnizip/formats/iso/path_table.rb +125 -0
- data/lib/omnizip/formats/iso/reader.rb +197 -0
- data/lib/omnizip/formats/iso/rock_ridge.rb +349 -0
- data/lib/omnizip/formats/iso/volume_builder.rb +320 -0
- data/lib/omnizip/formats/iso/volume_descriptor.rb +168 -0
- data/lib/omnizip/formats/iso/writer.rb +530 -0
- data/lib/omnizip/formats/iso.rb +140 -0
- data/lib/omnizip/formats/lzip.rb +175 -0
- data/lib/omnizip/formats/lzma_alone.rb +171 -0
- data/lib/omnizip/formats/rar/archive_repairer.rb +243 -0
- data/lib/omnizip/formats/rar/archive_verifier.rb +195 -0
- data/lib/omnizip/formats/rar/block_parser.rb +243 -0
- data/lib/omnizip/formats/rar/compression/bit_stream.rb +180 -0
- data/lib/omnizip/formats/rar/compression/dispatcher.rb +217 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/decoder.rb +216 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/encoder.rb +158 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_builder.rb +217 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_coder.rb +189 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/match_finder.rb +135 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/sliding_window.rb +165 -0
- data/lib/omnizip/formats/rar/compression/ppmd/context.rb +105 -0
- data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +219 -0
- data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +262 -0
- data/lib/omnizip/formats/rar/compression_method_registry.rb +106 -0
- data/lib/omnizip/formats/rar/constants.rb +82 -0
- data/lib/omnizip/formats/rar/decompressor.rb +238 -0
- data/lib/omnizip/formats/rar/external_writer.rb +312 -0
- data/lib/omnizip/formats/rar/header.rb +192 -0
- data/lib/omnizip/formats/rar/license_validator.rb +109 -0
- data/lib/omnizip/formats/rar/models/rar_archive.rb +77 -0
- data/lib/omnizip/formats/rar/models/rar_entry.rb +65 -0
- data/lib/omnizip/formats/rar/models/rar_volume.rb +56 -0
- data/lib/omnizip/formats/rar/parity_handler.rb +292 -0
- data/lib/omnizip/formats/rar/rar5/compression/lzma.rb +202 -0
- data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +578 -0
- data/lib/omnizip/formats/rar/rar5/compression/store.rb +60 -0
- data/lib/omnizip/formats/rar/rar5/crc32.rb +39 -0
- data/lib/omnizip/formats/rar/rar5/encryption/aes256_cbc.rb +97 -0
- data/lib/omnizip/formats/rar/rar5/encryption/encryption_header.rb +114 -0
- data/lib/omnizip/formats/rar/rar5/encryption/encryption_manager.rb +166 -0
- data/lib/omnizip/formats/rar/rar5/encryption/key_derivation.rb +97 -0
- data/lib/omnizip/formats/rar/rar5/header.rb +187 -0
- data/lib/omnizip/formats/rar/rar5/models/encryption_options.rb +74 -0
- data/lib/omnizip/formats/rar/rar5/models/recovery_options.rb +63 -0
- data/lib/omnizip/formats/rar/rar5/models/solid_options.rb +63 -0
- data/lib/omnizip/formats/rar/rar5/models/volume_options.rb +74 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/ARCHITECTURE.md +290 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_manager.rb +264 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_splitter.rb +155 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_writer.rb +194 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_encoder.rb +109 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_manager.rb +142 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_stream.rb +121 -0
- data/lib/omnizip/formats/rar/rar5/vint.rb +65 -0
- data/lib/omnizip/formats/rar/rar5/writer.rb +466 -0
- data/lib/omnizip/formats/rar/rar_format_base.rb +241 -0
- data/lib/omnizip/formats/rar/reader.rb +366 -0
- data/lib/omnizip/formats/rar/recovery_record.rb +245 -0
- data/lib/omnizip/formats/rar/volume_manager.rb +168 -0
- data/lib/omnizip/formats/rar/writer.rb +431 -0
- data/lib/omnizip/formats/rar.rb +205 -0
- data/lib/omnizip/formats/rar3/compressor.rb +73 -0
- data/lib/omnizip/formats/rar3/decompressor.rb +66 -0
- data/lib/omnizip/formats/rar3/reader.rb +386 -0
- data/lib/omnizip/formats/rar3/writer.rb +219 -0
- data/lib/omnizip/formats/rar5/compressor.rb +73 -0
- data/lib/omnizip/formats/rar5/decompressor.rb +66 -0
- data/lib/omnizip/formats/rar5/reader.rb +342 -0
- data/lib/omnizip/formats/rar5/writer.rb +214 -0
- data/lib/omnizip/formats/seven_zip/coder_chain.rb +150 -0
- data/lib/omnizip/formats/seven_zip/constants.rb +126 -0
- data/lib/omnizip/formats/seven_zip/encoded_header.rb +114 -0
- data/lib/omnizip/formats/seven_zip/encrypted_header.rb +142 -0
- data/lib/omnizip/formats/seven_zip/file_collector.rb +144 -0
- data/lib/omnizip/formats/seven_zip/header.rb +106 -0
- data/lib/omnizip/formats/seven_zip/header_encryptor.rb +134 -0
- data/lib/omnizip/formats/seven_zip/header_writer.rb +466 -0
- data/lib/omnizip/formats/seven_zip/models/coder_info.rb +30 -0
- data/lib/omnizip/formats/seven_zip/models/file_entry.rb +58 -0
- data/lib/omnizip/formats/seven_zip/models/folder.rb +69 -0
- data/lib/omnizip/formats/seven_zip/models/stream_info.rb +42 -0
- data/lib/omnizip/formats/seven_zip/parser.rb +660 -0
- data/lib/omnizip/formats/seven_zip/reader.rb +458 -0
- data/lib/omnizip/formats/seven_zip/split_archive_reader.rb +632 -0
- data/lib/omnizip/formats/seven_zip/split_archive_writer.rb +315 -0
- data/lib/omnizip/formats/seven_zip/stream_compressor.rb +151 -0
- data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +162 -0
- data/lib/omnizip/formats/seven_zip/writer.rb +740 -0
- data/lib/omnizip/formats/seven_zip.rb +93 -0
- data/lib/omnizip/formats/tar/constants.rb +73 -0
- data/lib/omnizip/formats/tar/entry.rb +94 -0
- data/lib/omnizip/formats/tar/header.rb +168 -0
- data/lib/omnizip/formats/tar/reader.rb +121 -0
- data/lib/omnizip/formats/tar/writer.rb +216 -0
- data/lib/omnizip/formats/tar.rb +84 -0
- data/lib/omnizip/formats/xz/reader.rb +116 -0
- data/lib/omnizip/formats/xz.rb +237 -0
- data/lib/omnizip/formats/xz_impl/block_decoder.rb +754 -0
- data/lib/omnizip/formats/xz_impl/block_encoder.rb +306 -0
- data/lib/omnizip/formats/xz_impl/block_header.rb +210 -0
- data/lib/omnizip/formats/xz_impl/block_header_parser.rb +186 -0
- data/lib/omnizip/formats/xz_impl/constants.rb +49 -0
- data/lib/omnizip/formats/xz_impl/index_decoder.rb +174 -0
- data/lib/omnizip/formats/xz_impl/index_encoder.rb +122 -0
- data/lib/omnizip/formats/xz_impl/stream_decoder.rb +468 -0
- data/lib/omnizip/formats/xz_impl/stream_encoder.rb +99 -0
- data/lib/omnizip/formats/xz_impl/stream_footer.rb +81 -0
- data/lib/omnizip/formats/xz_impl/stream_footer_parser.rb +117 -0
- data/lib/omnizip/formats/xz_impl/stream_header.rb +55 -0
- data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +108 -0
- data/lib/omnizip/formats/xz_impl/vli.rb +128 -0
- data/lib/omnizip/formats/xz_impl/writer.rb +421 -0
- data/lib/omnizip/formats/zip/central_directory_header.rb +195 -0
- data/lib/omnizip/formats/zip/constants.rb +69 -0
- data/lib/omnizip/formats/zip/end_of_central_directory.rb +133 -0
- data/lib/omnizip/formats/zip/local_file_header.rb +138 -0
- data/lib/omnizip/formats/zip/reader.rb +250 -0
- data/lib/omnizip/formats/zip/unix_extra_field.rb +153 -0
- data/lib/omnizip/formats/zip/writer.rb +375 -0
- data/lib/omnizip/formats/zip/zip64_end_of_central_directory.rb +104 -0
- data/lib/omnizip/formats/zip/zip64_end_of_central_directory_locator.rb +66 -0
- data/lib/omnizip/formats/zip/zip64_extra_field.rb +114 -0
- data/lib/omnizip/formats/zip.rb +50 -0
- data/lib/omnizip/implementations/base/lzma2_decoder_base.rb +75 -0
- data/lib/omnizip/implementations/base/lzma2_encoder_base.rb +128 -0
- data/lib/omnizip/implementations/base/lzma_decoder_base.rb +83 -0
- data/lib/omnizip/implementations/base/lzma_encoder_base.rb +108 -0
- data/lib/omnizip/implementations/base/state_machine_base.rb +182 -0
- data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +421 -0
- data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +465 -0
- data/lib/omnizip/implementations/seven_zip/lzma/match_finder.rb +288 -0
- data/lib/omnizip/implementations/seven_zip/lzma/range_decoder.rb +200 -0
- data/lib/omnizip/implementations/seven_zip/lzma/range_encoder.rb +197 -0
- data/lib/omnizip/implementations/seven_zip/lzma/state_machine.rb +141 -0
- data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +519 -0
- data/lib/omnizip/implementations/xz_utils/lzma2/decoder.rb +723 -0
- data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +750 -0
- data/lib/omnizip/io/buffered_input.rb +146 -0
- data/lib/omnizip/io/buffered_output.rb +105 -0
- data/lib/omnizip/io/stream_manager.rb +115 -0
- data/lib/omnizip/link_handler/hard_link.rb +79 -0
- data/lib/omnizip/link_handler/symbolic_link.rb +74 -0
- data/lib/omnizip/link_handler.rb +124 -0
- data/lib/omnizip/metadata/archive_metadata.rb +114 -0
- data/lib/omnizip/metadata/entry_metadata.rb +146 -0
- data/lib/omnizip/metadata/metadata_editor.rb +171 -0
- data/lib/omnizip/metadata/metadata_registry.rb +64 -0
- data/lib/omnizip/metadata/metadata_validator.rb +99 -0
- data/lib/omnizip/metadata.rb +57 -0
- data/lib/omnizip/models/.keep +0 -0
- data/lib/omnizip/models/algorithm_metadata.rb +73 -0
- data/lib/omnizip/models/compression_options.rb +71 -0
- data/lib/omnizip/models/conversion_options.rb +87 -0
- data/lib/omnizip/models/conversion_result.rb +135 -0
- data/lib/omnizip/models/eta_result.rb +46 -0
- data/lib/omnizip/models/extraction_rule.rb +115 -0
- data/lib/omnizip/models/filter_chain.rb +144 -0
- data/lib/omnizip/models/filter_config.rb +183 -0
- data/lib/omnizip/models/match_result.rb +124 -0
- data/lib/omnizip/models/optimization_suggestion.rb +91 -0
- data/lib/omnizip/models/parallel_options.rb +104 -0
- data/lib/omnizip/models/performance_result.rb +79 -0
- data/lib/omnizip/models/profile_report.rb +82 -0
- data/lib/omnizip/models/progress_options.rb +38 -0
- data/lib/omnizip/models/split_options.rb +116 -0
- data/lib/omnizip/optimization_registry.rb +81 -0
- data/lib/omnizip/parallel/job_queue.rb +209 -0
- data/lib/omnizip/parallel/job_scheduler.rb +203 -0
- data/lib/omnizip/parallel/parallel_compressor.rb +347 -0
- data/lib/omnizip/parallel/parallel_extractor.rb +329 -0
- data/lib/omnizip/parallel/worker_pool.rb +223 -0
- data/lib/omnizip/parallel.rb +149 -0
- data/lib/omnizip/parity/chunked_block_processor.rb +196 -0
- data/lib/omnizip/parity/galois16.rb +145 -0
- data/lib/omnizip/parity/models/creator_packet.rb +73 -0
- data/lib/omnizip/parity/models/file_description_packet.rb +133 -0
- data/lib/omnizip/parity/models/ifsc_packet.rb +123 -0
- data/lib/omnizip/parity/models/main_packet.rb +128 -0
- data/lib/omnizip/parity/models/packet.rb +156 -0
- data/lib/omnizip/parity/models/packet_registry.rb +109 -0
- data/lib/omnizip/parity/models/recovery_slice_packet.rb +78 -0
- data/lib/omnizip/parity/par2_creator.rb +531 -0
- data/lib/omnizip/parity/par2_repairer.rb +407 -0
- data/lib/omnizip/parity/par2_verifier.rb +364 -0
- data/lib/omnizip/parity/par2cmdline_algorithm.rb +110 -0
- data/lib/omnizip/parity/par2cmdline_coefficients.rb +78 -0
- data/lib/omnizip/parity/reed_solomon_decoder.rb +266 -0
- data/lib/omnizip/parity/reed_solomon_encoder.rb +111 -0
- data/lib/omnizip/parity/reed_solomon_matrix.rb +342 -0
- data/lib/omnizip/parity.rb +186 -0
- data/lib/omnizip/password/encryption_registry.rb +65 -0
- data/lib/omnizip/password/encryption_strategy.rb +96 -0
- data/lib/omnizip/password/password_validator.rb +129 -0
- data/lib/omnizip/password/winzip_aes_strategy.rb +192 -0
- data/lib/omnizip/password/zip_crypto_strategy.rb +141 -0
- data/lib/omnizip/password.rb +87 -0
- data/lib/omnizip/pipe/stream_compressor.rb +124 -0
- data/lib/omnizip/pipe/stream_decompressor.rb +174 -0
- data/lib/omnizip/pipe.rb +121 -0
- data/lib/omnizip/platform/ntfs_streams.rb +201 -0
- data/lib/omnizip/platform.rb +189 -0
- data/lib/omnizip/profile/archive_profile.rb +39 -0
- data/lib/omnizip/profile/balanced_profile.rb +33 -0
- data/lib/omnizip/profile/binary_profile.rb +36 -0
- data/lib/omnizip/profile/compression_profile.rb +158 -0
- data/lib/omnizip/profile/custom_profile.rb +157 -0
- data/lib/omnizip/profile/fast_profile.rb +33 -0
- data/lib/omnizip/profile/maximum_profile.rb +33 -0
- data/lib/omnizip/profile/profile_detector.rb +110 -0
- data/lib/omnizip/profile/profile_registry.rb +161 -0
- data/lib/omnizip/profile/text_profile.rb +36 -0
- data/lib/omnizip/profile.rb +190 -0
- data/lib/omnizip/profiler/memory_profiler.rb +66 -0
- data/lib/omnizip/profiler/method_profiler.rb +49 -0
- data/lib/omnizip/profiler/report_generator.rb +169 -0
- data/lib/omnizip/profiler.rb +204 -0
- data/lib/omnizip/progress/callback_reporter.rb +36 -0
- data/lib/omnizip/progress/console_reporter.rb +62 -0
- data/lib/omnizip/progress/log_reporter.rb +91 -0
- data/lib/omnizip/progress/operation_progress.rb +118 -0
- data/lib/omnizip/progress/progress_bar.rb +156 -0
- data/lib/omnizip/progress/progress_reporter.rb +40 -0
- data/lib/omnizip/progress/progress_tracker.rb +190 -0
- data/lib/omnizip/progress/silent_reporter.rb +24 -0
- data/lib/omnizip/progress.rb +127 -0
- data/lib/omnizip/rubyzip_compat.rb +63 -0
- data/lib/omnizip/temp/safe_extract.rb +168 -0
- data/lib/omnizip/temp/temp_file.rb +124 -0
- data/lib/omnizip/temp/temp_file_pool.rb +109 -0
- data/lib/omnizip/temp.rb +181 -0
- data/lib/omnizip/version.rb +5 -0
- data/lib/omnizip/zip/entry.rb +156 -0
- data/lib/omnizip/zip/file.rb +485 -0
- data/lib/omnizip/zip/input_stream.rb +273 -0
- data/lib/omnizip/zip/output_stream.rb +324 -0
- data/lib/omnizip.rb +156 -0
- data/readme-docs/advanced-features.adoc +515 -0
- data/readme-docs/api-usage.adoc +444 -0
- data/readme-docs/architecture.adoc +449 -0
- data/readme-docs/archive-formats.adoc +479 -0
- data/readme-docs/cli-usage.adoc +222 -0
- data/readme-docs/compression-algorithms.adoc +442 -0
- data/readme-docs/compression-profiles.adoc +247 -0
- data/readme-docs/encryption-checksums.adoc +328 -0
- data/readme-docs/format-converter.adoc +325 -0
- data/readme-docs/installation.adoc +228 -0
- data/readme-docs/par2-archives.adoc +608 -0
- data/readme-docs/performance-profiler.adoc +389 -0
- data/readme-docs/preprocessing-filters.adoc +280 -0
- data/xz-file-format-1.2.1.txt +1174 -0
- metadata +617 -0
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Copyright (C) 2025 Ribose Inc.
|
|
4
|
+
#
|
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a
|
|
6
|
+
# copy of this software and associated documentation files (the "Software"),
|
|
7
|
+
# to deal in the Software without restriction, including without limitation
|
|
8
|
+
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
9
|
+
# and/or sell copies of the Software, and to permit persons to whom the
|
|
10
|
+
# Software is furnished to do so, subject to the following conditions:
|
|
11
|
+
#
|
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
|
13
|
+
# all copies or substantial portions of the Software.
|
|
14
|
+
#
|
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
20
|
+
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
21
|
+
# DEALINGS IN THE SOFTWARE.
|
|
22
|
+
|
|
23
|
+
module Omnizip
|
|
24
|
+
module Algorithms
|
|
25
|
+
class BZip2 < Algorithm
|
|
26
|
+
# Move-to-Front (MTF) Transform
|
|
27
|
+
#
|
|
28
|
+
# MTF is a data transformation that exploits locality of reference.
|
|
29
|
+
# It maintains a list of symbols and moves accessed symbols to the
|
|
30
|
+
# front of the list. This tends to concentrate frequently accessed
|
|
31
|
+
# symbols at low indices, making the data more compressible.
|
|
32
|
+
#
|
|
33
|
+
# After BWT, the data often has runs of the same character. MTF
|
|
34
|
+
# converts these to runs of low numbers (often 0), which are then
|
|
35
|
+
# efficiently compressed by RLE.
|
|
36
|
+
#
|
|
37
|
+
# The algorithm:
|
|
38
|
+
# 1. Initialize symbol list [0, 1, 2, ..., 255]
|
|
39
|
+
# 2. For each byte in input:
|
|
40
|
+
# - Find its position in the symbol list
|
|
41
|
+
# - Output that position
|
|
42
|
+
# - Move the byte to the front of the list
|
|
43
|
+
class Mtf
|
|
44
|
+
# Encode data using Move-to-Front transform
|
|
45
|
+
#
|
|
46
|
+
# @param data [String] Input data to transform
|
|
47
|
+
# @return [String] MTF-encoded data (byte indices)
|
|
48
|
+
def encode(data)
|
|
49
|
+
return "".b if data.empty?
|
|
50
|
+
|
|
51
|
+
symbols = init_symbol_list
|
|
52
|
+
result = []
|
|
53
|
+
|
|
54
|
+
data.each_byte do |byte|
|
|
55
|
+
# Find position of byte in symbol list
|
|
56
|
+
index = symbols.index(byte)
|
|
57
|
+
result << index
|
|
58
|
+
|
|
59
|
+
# Move byte to front
|
|
60
|
+
symbols.delete_at(index)
|
|
61
|
+
symbols.unshift(byte)
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
result.pack("C*")
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# Decode MTF-encoded data
|
|
68
|
+
#
|
|
69
|
+
# @param data [String] MTF-encoded indices
|
|
70
|
+
# @return [String] Original data
|
|
71
|
+
def decode(data)
|
|
72
|
+
return "".b if data.empty?
|
|
73
|
+
|
|
74
|
+
symbols = init_symbol_list
|
|
75
|
+
result = []
|
|
76
|
+
|
|
77
|
+
data.each_byte do |index|
|
|
78
|
+
# Get byte at this index
|
|
79
|
+
byte = symbols[index]
|
|
80
|
+
result << byte
|
|
81
|
+
|
|
82
|
+
# Move byte to front
|
|
83
|
+
symbols.delete_at(index)
|
|
84
|
+
symbols.unshift(byte)
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
result.pack("C*")
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
private
|
|
91
|
+
|
|
92
|
+
# Initialize symbol list with all possible byte values
|
|
93
|
+
#
|
|
94
|
+
# @return [Array<Integer>] Symbol list [0, 1, 2, ..., 255]
|
|
95
|
+
def init_symbol_list
|
|
96
|
+
(0..255).to_a
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
end
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Copyright (C) 2025 Ribose Inc.
|
|
4
|
+
#
|
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a
|
|
6
|
+
# copy of this software and associated documentation files (the "Software"),
|
|
7
|
+
# to deal in the Software without restriction, including without limitation
|
|
8
|
+
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
9
|
+
# and/or sell copies of the Software, and to permit persons to whom the
|
|
10
|
+
# Software is furnished to do so, subject to the following conditions:
|
|
11
|
+
#
|
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
|
13
|
+
# all copies or substantial portions of the Software.
|
|
14
|
+
#
|
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
20
|
+
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
21
|
+
# DEALINGS IN THE SOFTWARE.
|
|
22
|
+
|
|
23
|
+
module Omnizip
|
|
24
|
+
module Algorithms
|
|
25
|
+
class BZip2 < Algorithm
|
|
26
|
+
# Run-Length Encoding (RLE) for BZip2
|
|
27
|
+
#
|
|
28
|
+
# This is a BZip2-specific RLE variant that encodes runs of
|
|
29
|
+
# identical bytes. After MTF, the data often contains long runs
|
|
30
|
+
# of zeros, which RLE compresses efficiently.
|
|
31
|
+
#
|
|
32
|
+
# BZip2 RLE encoding scheme:
|
|
33
|
+
# - Runs of 4-259 identical bytes are encoded as:
|
|
34
|
+
# [byte, byte, byte, byte, count-4]
|
|
35
|
+
# - Where count is 0-255 representing 4-259 repetitions
|
|
36
|
+
# - Runs < 4 are left unencoded
|
|
37
|
+
# - This scheme avoids ambiguity in decoding
|
|
38
|
+
class Rle
|
|
39
|
+
# Maximum run length (4 + 255)
|
|
40
|
+
MAX_RUN_LENGTH = 259
|
|
41
|
+
|
|
42
|
+
# Minimum run length for encoding
|
|
43
|
+
MIN_RUN_LENGTH = 4
|
|
44
|
+
|
|
45
|
+
# Encode data using BZip2 RLE
|
|
46
|
+
#
|
|
47
|
+
# @param data [String] Input data to encode
|
|
48
|
+
# @return [String] RLE-encoded data
|
|
49
|
+
def encode(data)
|
|
50
|
+
return "".b if data.empty?
|
|
51
|
+
|
|
52
|
+
result = []
|
|
53
|
+
i = 0
|
|
54
|
+
|
|
55
|
+
while i < data.length
|
|
56
|
+
byte = data.getbyte(i)
|
|
57
|
+
run_length = count_run(data, i)
|
|
58
|
+
|
|
59
|
+
if run_length >= MIN_RUN_LENGTH
|
|
60
|
+
# Encode run: emit 4 copies + extra count
|
|
61
|
+
extra = [run_length - MIN_RUN_LENGTH, 255].min
|
|
62
|
+
4.times { result << byte }
|
|
63
|
+
result << extra
|
|
64
|
+
i += MIN_RUN_LENGTH + extra
|
|
65
|
+
else
|
|
66
|
+
# No run, emit single byte
|
|
67
|
+
result << byte
|
|
68
|
+
i += 1
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
result.pack("C*")
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
# Decode RLE-encoded data
|
|
76
|
+
#
|
|
77
|
+
# @param data [String] RLE-encoded data
|
|
78
|
+
# @return [String] Decoded data
|
|
79
|
+
def decode(data)
|
|
80
|
+
return "".b if data.empty?
|
|
81
|
+
|
|
82
|
+
result = []
|
|
83
|
+
i = 0
|
|
84
|
+
skip_count = 0
|
|
85
|
+
|
|
86
|
+
while i < data.length
|
|
87
|
+
byte = data.getbyte(i)
|
|
88
|
+
result << byte
|
|
89
|
+
i += 1
|
|
90
|
+
|
|
91
|
+
# Decrement skip counter if active
|
|
92
|
+
if skip_count.positive?
|
|
93
|
+
skip_count -= 1
|
|
94
|
+
next
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
# Check for run encoding (4 consecutive identical bytes)
|
|
98
|
+
next unless i >= 4 && consecutive_match?(result, byte, 4)
|
|
99
|
+
|
|
100
|
+
# Read run count
|
|
101
|
+
break if i >= data.length
|
|
102
|
+
|
|
103
|
+
count = data.getbyte(i)
|
|
104
|
+
i += 1
|
|
105
|
+
|
|
106
|
+
# Emit additional copies
|
|
107
|
+
count.times { result << byte }
|
|
108
|
+
|
|
109
|
+
# Skip checking for next 3 bytes (need 4 to form a run)
|
|
110
|
+
skip_count = 3
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
result.pack("C*")
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
private
|
|
117
|
+
|
|
118
|
+
# Count run length starting at given position
|
|
119
|
+
#
|
|
120
|
+
# @param data [String] Input data
|
|
121
|
+
# @param start [Integer] Starting position
|
|
122
|
+
# @return [Integer] Run length
|
|
123
|
+
def count_run(data, start)
|
|
124
|
+
byte = data.getbyte(start)
|
|
125
|
+
count = 1
|
|
126
|
+
|
|
127
|
+
(start + 1).upto([start + MAX_RUN_LENGTH - 1,
|
|
128
|
+
data.length - 1].min) do |i|
|
|
129
|
+
break if data.getbyte(i) != byte
|
|
130
|
+
|
|
131
|
+
count += 1
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
count
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
# Check if last n bytes in array match the given byte
|
|
138
|
+
#
|
|
139
|
+
# @param array [Array<Integer>] Byte array
|
|
140
|
+
# @param byte [Integer] Byte to match
|
|
141
|
+
# @param count [Integer] Number of bytes to check
|
|
142
|
+
# @return [Boolean] True if last n bytes match
|
|
143
|
+
def consecutive_match?(array, byte, count)
|
|
144
|
+
return false if array.length < count
|
|
145
|
+
|
|
146
|
+
array[-count..].all?(byte)
|
|
147
|
+
end
|
|
148
|
+
end
|
|
149
|
+
end
|
|
150
|
+
end
|
|
151
|
+
end
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Copyright (C) 2025 Ribose Inc.
|
|
4
|
+
#
|
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a
|
|
6
|
+
# copy of this software and associated documentation files (the "Software"),
|
|
7
|
+
# to deal in the Software without restriction, including without limitation
|
|
8
|
+
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
9
|
+
# and/or sell copies of the Software, and to permit persons to whom the
|
|
10
|
+
# Software is furnished to do so, subject to the following conditions:
|
|
11
|
+
#
|
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
|
13
|
+
# all copies or substantial portions of the Software.
|
|
14
|
+
#
|
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
20
|
+
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
21
|
+
# DEALINGS IN THE SOFTWARE.
|
|
22
|
+
|
|
23
|
+
require_relative "../algorithm"
|
|
24
|
+
require_relative "../models/algorithm_metadata"
|
|
25
|
+
|
|
26
|
+
module Omnizip
|
|
27
|
+
module Algorithms
|
|
28
|
+
# BZip2 block-sorting compression algorithm
|
|
29
|
+
#
|
|
30
|
+
# BZip2 combines several compression techniques in a pipeline:
|
|
31
|
+
# 1. Burrows-Wheeler Transform (BWT) - block-sorting transformation
|
|
32
|
+
# 2. Move-to-Front Transform (MTF) - exploits locality
|
|
33
|
+
# 3. Run-Length Encoding (RLE) - compresses repeated bytes
|
|
34
|
+
# 4. Huffman Coding - variable-length entropy encoding
|
|
35
|
+
#
|
|
36
|
+
# This algorithm is particularly effective for:
|
|
37
|
+
# - Text files with repetitive patterns
|
|
38
|
+
# - Data with high local similarity
|
|
39
|
+
# - Files where block-sorting improves compressibility
|
|
40
|
+
#
|
|
41
|
+
# Block size affects both compression ratio and memory usage.
|
|
42
|
+
# Larger blocks (up to 900KB) generally provide better compression
|
|
43
|
+
# but require more memory.
|
|
44
|
+
class BZip2 < Algorithm
|
|
45
|
+
# Get algorithm metadata
|
|
46
|
+
#
|
|
47
|
+
# @return [AlgorithmMetadata] Algorithm information
|
|
48
|
+
def self.metadata
|
|
49
|
+
Models::AlgorithmMetadata.new.tap do |meta|
|
|
50
|
+
meta.name = "bzip2"
|
|
51
|
+
meta.description = "BZip2 block-sorting compression using " \
|
|
52
|
+
"BWT, MTF, RLE, and Huffman coding"
|
|
53
|
+
meta.version = "1.0.0"
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# Compress data using BZip2 algorithm
|
|
58
|
+
#
|
|
59
|
+
# @param input_stream [IO] Input stream to compress
|
|
60
|
+
# @param output_stream [IO] Output stream for compressed data
|
|
61
|
+
# @param options [Models::CompressionOptions] Compression options
|
|
62
|
+
# @return [void]
|
|
63
|
+
def compress(input_stream, output_stream, options = nil)
|
|
64
|
+
input_data = input_stream.read
|
|
65
|
+
encoder = Encoder.new(output_stream,
|
|
66
|
+
build_encoder_options(options))
|
|
67
|
+
encoder.encode_stream(input_data)
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# Decompress BZip2-compressed data
|
|
71
|
+
#
|
|
72
|
+
# @param input_stream [IO] Input stream of compressed data
|
|
73
|
+
# @param output_stream [IO] Output stream for decompressed data
|
|
74
|
+
# @param options [Models::CompressionOptions] Decompression options
|
|
75
|
+
# @return [void]
|
|
76
|
+
def decompress(input_stream, output_stream, _options = nil)
|
|
77
|
+
if output_stream.respond_to?(:set_encoding)
|
|
78
|
+
output_stream.set_encoding(Encoding::BINARY)
|
|
79
|
+
end
|
|
80
|
+
decoder = Decoder.new(input_stream)
|
|
81
|
+
decompressed = decoder.decode_stream
|
|
82
|
+
output_stream.write(decompressed)
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
private
|
|
86
|
+
|
|
87
|
+
# Build encoder options from compression options
|
|
88
|
+
#
|
|
89
|
+
# @param options [Models::CompressionOptions, nil] Compression opts
|
|
90
|
+
# @return [Hash] Encoder options
|
|
91
|
+
def build_encoder_options(options)
|
|
92
|
+
return {} if options.nil?
|
|
93
|
+
|
|
94
|
+
opts = {}
|
|
95
|
+
|
|
96
|
+
if options.respond_to?(:level)
|
|
97
|
+
level = options.level || 9
|
|
98
|
+
opts[:block_size] = block_size_for_level(level)
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
opts
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
# Get block size based on compression level
|
|
105
|
+
#
|
|
106
|
+
# BZip2 traditionally uses levels 1-9 corresponding to
|
|
107
|
+
# 100KB-900KB block sizes
|
|
108
|
+
#
|
|
109
|
+
# @param level [Integer] Compression level (1-9)
|
|
110
|
+
# @return [Integer] Block size in bytes
|
|
111
|
+
def block_size_for_level(level)
|
|
112
|
+
# Clamp level to valid range
|
|
113
|
+
level = [[level, 1].max, 9].min
|
|
114
|
+
# Each level = 100KB
|
|
115
|
+
level * 100_000
|
|
116
|
+
end
|
|
117
|
+
end
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
# Load nested classes after BZip2 class is defined
|
|
122
|
+
require_relative "bzip2/bwt"
|
|
123
|
+
require_relative "bzip2/mtf"
|
|
124
|
+
require_relative "bzip2/rle"
|
|
125
|
+
require_relative "bzip2/huffman"
|
|
126
|
+
require_relative "bzip2/encoder"
|
|
127
|
+
require_relative "bzip2/decoder"
|
|
128
|
+
|
|
129
|
+
# Register the BZip2 algorithm
|
|
130
|
+
Omnizip::AlgorithmRegistry.register(:bzip2, Omnizip::Algorithms::BZip2)
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "zlib"
|
|
4
|
+
|
|
5
|
+
module Omnizip
|
|
6
|
+
module Algorithms
|
|
7
|
+
class Deflate
|
|
8
|
+
# Constants for Deflate algorithm
|
|
9
|
+
module Constants
|
|
10
|
+
# Compression levels
|
|
11
|
+
NO_COMPRESSION = Zlib::NO_COMPRESSION
|
|
12
|
+
BEST_SPEED = Zlib::BEST_SPEED
|
|
13
|
+
BEST_COMPRESSION = Zlib::BEST_COMPRESSION
|
|
14
|
+
DEFAULT_COMPRESSION = Zlib::DEFAULT_COMPRESSION
|
|
15
|
+
|
|
16
|
+
# Compression strategies
|
|
17
|
+
FILTERED = Zlib::FILTERED
|
|
18
|
+
HUFFMAN_ONLY = Zlib::HUFFMAN_ONLY
|
|
19
|
+
RLE = Zlib::RLE
|
|
20
|
+
FIXED = Zlib::FIXED
|
|
21
|
+
DEFAULT_STRATEGY = Zlib::DEFAULT_STRATEGY
|
|
22
|
+
|
|
23
|
+
# Buffer size for streaming operations
|
|
24
|
+
BUFFER_SIZE = 32 * 1024 # 32KB
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "zlib"
|
|
4
|
+
require_relative "constants"
|
|
5
|
+
|
|
6
|
+
module Omnizip
|
|
7
|
+
module Algorithms
|
|
8
|
+
class Deflate
|
|
9
|
+
# Deflate decoder using Zlib
|
|
10
|
+
#
|
|
11
|
+
# This class wraps Ruby's Zlib::Inflate to provide Deflate
|
|
12
|
+
# decompression following the established Omnizip architecture.
|
|
13
|
+
class Decoder
|
|
14
|
+
include Constants
|
|
15
|
+
|
|
16
|
+
attr_reader :input_stream
|
|
17
|
+
|
|
18
|
+
# Initialize decoder
|
|
19
|
+
#
|
|
20
|
+
# @param input_stream [IO] Input stream of compressed data
|
|
21
|
+
def initialize(input_stream)
|
|
22
|
+
@input_stream = input_stream
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# Decode compressed data stream
|
|
26
|
+
#
|
|
27
|
+
# @return [String] Decompressed data
|
|
28
|
+
def decode_stream
|
|
29
|
+
compressed = @input_stream.read
|
|
30
|
+
inflater = Zlib::Inflate.new
|
|
31
|
+
decompressed = inflater.inflate(compressed)
|
|
32
|
+
inflater.close
|
|
33
|
+
decompressed
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "zlib"
|
|
4
|
+
require_relative "constants"
|
|
5
|
+
|
|
6
|
+
module Omnizip
|
|
7
|
+
module Algorithms
|
|
8
|
+
class Deflate
|
|
9
|
+
# Deflate encoder using Zlib
|
|
10
|
+
#
|
|
11
|
+
# This class wraps Ruby's Zlib::Deflate to provide Deflate
|
|
12
|
+
# compression following the established Omnizip architecture.
|
|
13
|
+
class Encoder
|
|
14
|
+
include Constants
|
|
15
|
+
|
|
16
|
+
attr_reader :output_stream, :options
|
|
17
|
+
|
|
18
|
+
# Initialize encoder
|
|
19
|
+
#
|
|
20
|
+
# @param output_stream [IO] Output stream for compressed data
|
|
21
|
+
# @param options [Hash] Encoder options
|
|
22
|
+
# @option options [Integer] :level Compression level (0-9)
|
|
23
|
+
# @option options [Integer] :strategy Compression strategy
|
|
24
|
+
# @option options [Integer] :window_bits Window size (8-15)
|
|
25
|
+
def initialize(output_stream, options = {})
|
|
26
|
+
@output_stream = output_stream
|
|
27
|
+
@options = options
|
|
28
|
+
@level = options[:level] || DEFAULT_COMPRESSION
|
|
29
|
+
@strategy = options[:strategy] || DEFAULT_STRATEGY
|
|
30
|
+
@window_bits = options[:window_bits] || 15
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# Encode data stream
|
|
34
|
+
#
|
|
35
|
+
# @param data [String] Data to compress
|
|
36
|
+
# @return [void]
|
|
37
|
+
def encode_stream(data)
|
|
38
|
+
deflater = Zlib::Deflate.new(@level, @window_bits, 9, @strategy)
|
|
39
|
+
compressed = deflater.deflate(data, Zlib::FINISH)
|
|
40
|
+
deflater.close
|
|
41
|
+
@output_stream.write(compressed)
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
end
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Copyright (C) 2025 Ribose Inc.
|
|
4
|
+
#
|
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a
|
|
6
|
+
# copy of this software and associated documentation files (the "Software"),
|
|
7
|
+
# to deal in the Software without restriction, including without limitation
|
|
8
|
+
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
9
|
+
# and/or sell copies of the Software, and to permit persons to whom the
|
|
10
|
+
# Software is furnished to do so, subject to the following conditions:
|
|
11
|
+
#
|
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
|
13
|
+
# all copies or substantial portions of the Software.
|
|
14
|
+
#
|
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
20
|
+
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
21
|
+
# DEALINGS IN THE SOFTWARE.
|
|
22
|
+
|
|
23
|
+
require_relative "../algorithm"
|
|
24
|
+
require_relative "../models/algorithm_metadata"
|
|
25
|
+
|
|
26
|
+
module Omnizip
|
|
27
|
+
module Algorithms
|
|
28
|
+
# Deflate compression algorithm (RFC 1951)
|
|
29
|
+
#
|
|
30
|
+
# Deflate is a widely-used lossless data compression algorithm that
|
|
31
|
+
# combines LZ77 compression with Huffman coding. It is the foundation
|
|
32
|
+
# of many popular formats including ZIP, gzip, and PNG.
|
|
33
|
+
#
|
|
34
|
+
# The algorithm works in two phases:
|
|
35
|
+
# 1. LZ77 compression - Identifies repeated byte sequences
|
|
36
|
+
# 2. Huffman coding - Encodes the result using variable-length codes
|
|
37
|
+
#
|
|
38
|
+
# This implementation uses Ruby's Zlib library which provides a
|
|
39
|
+
# well-tested, efficient implementation of the Deflate algorithm.
|
|
40
|
+
#
|
|
41
|
+
# Deflate is particularly effective for:
|
|
42
|
+
# - Text files and source code
|
|
43
|
+
# - HTML, XML, and JSON documents
|
|
44
|
+
# - Files with repeated patterns
|
|
45
|
+
# - General-purpose compression needs
|
|
46
|
+
class Deflate < Algorithm
|
|
47
|
+
# Get algorithm metadata
|
|
48
|
+
#
|
|
49
|
+
# @return [AlgorithmMetadata] Algorithm information
|
|
50
|
+
def self.metadata
|
|
51
|
+
Models::AlgorithmMetadata.new.tap do |meta|
|
|
52
|
+
meta.name = "deflate"
|
|
53
|
+
meta.description = "Deflate compression using LZ77 and " \
|
|
54
|
+
"Huffman coding (RFC 1951)"
|
|
55
|
+
meta.version = "1.0.0"
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# Compress data using Deflate algorithm
|
|
60
|
+
#
|
|
61
|
+
# @param input_stream [IO] Input stream to compress
|
|
62
|
+
# @param output_stream [IO] Output stream for compressed data
|
|
63
|
+
# @param options [Models::CompressionOptions] Compression options
|
|
64
|
+
# @return [void]
|
|
65
|
+
def compress(input_stream, output_stream, options = nil)
|
|
66
|
+
input_data = input_stream.read
|
|
67
|
+
encoder = Encoder.new(output_stream, build_encoder_options(options))
|
|
68
|
+
encoder.encode_stream(input_data)
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# Decompress Deflate-compressed data
|
|
72
|
+
#
|
|
73
|
+
# @param input_stream [IO] Input stream of compressed data
|
|
74
|
+
# @param output_stream [IO] Output stream for decompressed data
|
|
75
|
+
# @param options [Models::CompressionOptions] Decompression options
|
|
76
|
+
# @return [void]
|
|
77
|
+
def decompress(input_stream, output_stream, _options = nil)
|
|
78
|
+
if output_stream.respond_to?(:set_encoding)
|
|
79
|
+
output_stream.set_encoding(Encoding::BINARY)
|
|
80
|
+
end
|
|
81
|
+
decoder = Decoder.new(input_stream)
|
|
82
|
+
decompressed = decoder.decode_stream
|
|
83
|
+
output_stream.write(decompressed)
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
private
|
|
87
|
+
|
|
88
|
+
# Build encoder options from compression options
|
|
89
|
+
#
|
|
90
|
+
# @param options [Models::CompressionOptions, nil] Compression opts
|
|
91
|
+
# @return [Hash] Encoder options
|
|
92
|
+
def build_encoder_options(options)
|
|
93
|
+
return {} if options.nil?
|
|
94
|
+
|
|
95
|
+
opts = {}
|
|
96
|
+
|
|
97
|
+
if options.respond_to?(:level)
|
|
98
|
+
opts[:level] = map_compression_level(options.level)
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
opts
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
# Map generic compression level (0-9) to Zlib level
|
|
105
|
+
#
|
|
106
|
+
# @param level [Integer] Compression level (0-9)
|
|
107
|
+
# @return [Integer] Zlib compression level
|
|
108
|
+
def map_compression_level(level)
|
|
109
|
+
return Zlib::DEFAULT_COMPRESSION if level.nil?
|
|
110
|
+
|
|
111
|
+
case level
|
|
112
|
+
when 0 then Zlib::NO_COMPRESSION
|
|
113
|
+
when 1 then Zlib::BEST_SPEED
|
|
114
|
+
when 9 then Zlib::BEST_COMPRESSION
|
|
115
|
+
else level
|
|
116
|
+
end
|
|
117
|
+
end
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
# Load nested classes after Deflate class is defined
|
|
123
|
+
require_relative "deflate/constants"
|
|
124
|
+
require_relative "deflate/encoder"
|
|
125
|
+
require_relative "deflate/decoder"
|
|
126
|
+
|
|
127
|
+
# Register the Deflate algorithm
|
|
128
|
+
Omnizip::AlgorithmRegistry.register(:deflate, Omnizip::Algorithms::Deflate)
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Omnizip
|
|
4
|
+
module Algorithms
|
|
5
|
+
class Deflate64
|
|
6
|
+
# Constants for Deflate64 (Enhanced Deflate) algorithm
|
|
7
|
+
module Constants
|
|
8
|
+
# Dictionary/window size - 64KB vs 32KB in standard Deflate
|
|
9
|
+
DICTIONARY_SIZE = 65_536
|
|
10
|
+
|
|
11
|
+
# Match length constraints
|
|
12
|
+
MAX_MATCH_LENGTH = 258
|
|
13
|
+
MIN_MATCH_LENGTH = 3
|
|
14
|
+
MAX_DISTANCE = DICTIONARY_SIZE - 1
|
|
15
|
+
|
|
16
|
+
# Huffman coding constants
|
|
17
|
+
LITERAL_CODES = 286
|
|
18
|
+
DISTANCE_CODES = 30
|
|
19
|
+
LENGTH_CODES = 19
|
|
20
|
+
|
|
21
|
+
# Block types
|
|
22
|
+
BLOCK_TYPE_STORED = 0
|
|
23
|
+
BLOCK_TYPE_FIXED = 1
|
|
24
|
+
BLOCK_TYPE_DYNAMIC = 2
|
|
25
|
+
|
|
26
|
+
# End of block marker
|
|
27
|
+
END_OF_BLOCK = 256
|
|
28
|
+
|
|
29
|
+
# Maximum code lengths
|
|
30
|
+
MAX_LITERAL_CODE_LENGTH = 15
|
|
31
|
+
MAX_DISTANCE_CODE_LENGTH = 15
|
|
32
|
+
|
|
33
|
+
# Hash table size for LZ77
|
|
34
|
+
HASH_SIZE = 65_536
|
|
35
|
+
HASH_SHIFT = 5
|
|
36
|
+
|
|
37
|
+
# Search limits
|
|
38
|
+
MAX_CHAIN_LENGTH = 4096
|
|
39
|
+
GOOD_MATCH = 32
|
|
40
|
+
NICE_MATCH = 258
|
|
41
|
+
MAX_LAZY_MATCH = 258
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|