omnizip 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/.rubocop.yml +32 -0
- data/.rubocop_todo.yml +754 -0
- data/COPYING +502 -0
- data/Gemfile +17 -0
- data/LICENSE +12 -0
- data/README.adoc +1045 -0
- data/Rakefile +12 -0
- data/benchmark/README.md +260 -0
- data/benchmark/benchmark_suite.rb +125 -0
- data/benchmark/compression_bench.rb +181 -0
- data/benchmark/filter_bench.rb +180 -0
- data/benchmark/models/benchmark_result.rb +59 -0
- data/benchmark/models/comparison_result.rb +69 -0
- data/benchmark/profile_suite.rb +167 -0
- data/benchmark/reporter.rb +150 -0
- data/benchmark/run_benchmarks.rb +66 -0
- data/benchmark/test_data.rb +137 -0
- data/config/formats/rar3_spec.yml +91 -0
- data/config/formats/rar5_spec.yml +102 -0
- data/docs/.github/workflows/docs.yml +142 -0
- data/docs/.gitignore +21 -0
- data/docs/.lychee.toml +67 -0
- data/docs/Gemfile +13 -0
- data/docs/RAR_WRITE_SUPPORT.md +26 -0
- data/docs/README.md +101 -0
- data/docs/_config.yml +112 -0
- data/docs/assets/logo.svg +1 -0
- data/docs/assets/omnizip-logo.pdf +1540 -11
- data/docs/comparison/feature-matrix.adoc +694 -0
- data/docs/comparison/index.adoc +113 -0
- data/docs/comparison/vs-7zip.adoc +309 -0
- data/docs/comparison/vs-peazip.adoc +77 -0
- data/docs/comparison/vs-rubyzip.adoc +342 -0
- data/docs/comparison/vs-winrar.adoc +100 -0
- data/docs/compatibility.adoc +579 -0
- data/docs/concepts/index.adoc +129 -0
- data/docs/developer/architecture.adoc +256 -0
- data/docs/developer/contributing.adoc +158 -0
- data/docs/developer/index.adoc +25 -0
- data/docs/developer/testing.adoc +212 -0
- data/docs/getting-started/basic-usage.adoc +271 -0
- data/docs/getting-started/index.adoc +42 -0
- data/docs/getting-started/installation.adoc +138 -0
- data/docs/getting-started/quick-start.adoc +185 -0
- data/docs/getting-started/your-first-archive.adoc +218 -0
- data/docs/guides/advanced-features/encryption.adoc +300 -0
- data/docs/guides/advanced-features/index.adoc +49 -0
- data/docs/guides/advanced-features/parallel-processing.adoc +246 -0
- data/docs/guides/advanced-features/progress-tracking.adoc +320 -0
- data/docs/guides/advanced-features/streaming.adoc +212 -0
- data/docs/guides/archive-formats/gzip-format.adoc +107 -0
- data/docs/guides/archive-formats/index.adoc +130 -0
- data/docs/guides/archive-formats/rar-format.adoc +104 -0
- data/docs/guides/archive-formats/rar5.adoc +521 -0
- data/docs/guides/archive-formats/seven-zip-format.adoc +35 -0
- data/docs/guides/archive-formats/tar-format.adoc +106 -0
- data/docs/guides/archive-formats/xz-format.adoc +118 -0
- data/docs/guides/archive-formats/zip-format.adoc +35 -0
- data/docs/guides/compression-algorithms/bzip2.adoc +113 -0
- data/docs/guides/compression-algorithms/deflate.adoc +319 -0
- data/docs/guides/compression-algorithms/index.adoc +190 -0
- data/docs/guides/compression-algorithms/lzma.adoc +398 -0
- data/docs/guides/compression-algorithms/lzma2.adoc +327 -0
- data/docs/guides/compression-algorithms/ppmd.adoc +316 -0
- data/docs/guides/compression-algorithms/zstandard.adoc +361 -0
- data/docs/guides/creating-archives.adoc +354 -0
- data/docs/guides/extracting-archives.adoc +53 -0
- data/docs/guides/format-conversion.adoc +64 -0
- data/docs/guides/index.adoc +49 -0
- data/docs/guides/migration-rubyzip.adoc +217 -0
- data/docs/guides/parity-archives.adoc +605 -0
- data/docs/guides/performance-tuning.adoc +88 -0
- data/docs/index.adoc +218 -0
- data/docs/lychee.toml +67 -0
- data/docs/reference/api/overview.adoc +188 -0
- data/docs/reference/cli/compress-command.adoc +114 -0
- data/docs/reference/cli/overview.adoc +140 -0
- data/docs/reference/index.adoc +26 -0
- data/docs/resources/faq.adoc +185 -0
- data/docs/resources/quick-reference.adoc +222 -0
- data/docs/troubleshooting/index.adoc +208 -0
- data/examples/api_comparison.rb +205 -0
- data/examples/deflate64_example.rb +96 -0
- data/examples/par2_demo.rb +121 -0
- data/examples/quick_start_native.rb +150 -0
- data/examples/quick_start_rubyzip.rb +115 -0
- data/examples/rubyzip_compatibility_demo.rb +194 -0
- data/exe/omnizip +27 -0
- data/lib/omnizip/algorithm.rb +130 -0
- data/lib/omnizip/algorithm_registry.rb +86 -0
- data/lib/omnizip/algorithms/.keep +0 -0
- data/lib/omnizip/algorithms/bzip2/bwt.rb +225 -0
- data/lib/omnizip/algorithms/bzip2/decoder.rb +193 -0
- data/lib/omnizip/algorithms/bzip2/encoder.rb +237 -0
- data/lib/omnizip/algorithms/bzip2/huffman.rb +206 -0
- data/lib/omnizip/algorithms/bzip2/mtf.rb +101 -0
- data/lib/omnizip/algorithms/bzip2/rle.rb +151 -0
- data/lib/omnizip/algorithms/bzip2.rb +130 -0
- data/lib/omnizip/algorithms/deflate/constants.rb +28 -0
- data/lib/omnizip/algorithms/deflate/decoder.rb +38 -0
- data/lib/omnizip/algorithms/deflate/encoder.rb +46 -0
- data/lib/omnizip/algorithms/deflate.rb +128 -0
- data/lib/omnizip/algorithms/deflate64/constants.rb +45 -0
- data/lib/omnizip/algorithms/deflate64/decoder.rb +153 -0
- data/lib/omnizip/algorithms/deflate64/encoder.rb +98 -0
- data/lib/omnizip/algorithms/deflate64/huffman_coder.rb +354 -0
- data/lib/omnizip/algorithms/deflate64/lz77_encoder.rb +142 -0
- data/lib/omnizip/algorithms/deflate64.rb +109 -0
- data/lib/omnizip/algorithms/lzma/bit_model.rb +120 -0
- data/lib/omnizip/algorithms/lzma/constants.rb +112 -0
- data/lib/omnizip/algorithms/lzma/decoder.rb +148 -0
- data/lib/omnizip/algorithms/lzma/dictionary.rb +69 -0
- data/lib/omnizip/algorithms/lzma/distance_coder.rb +415 -0
- data/lib/omnizip/algorithms/lzma/encoder.rb +142 -0
- data/lib/omnizip/algorithms/lzma/length_coder.rb +260 -0
- data/lib/omnizip/algorithms/lzma/literal_decoder.rb +320 -0
- data/lib/omnizip/algorithms/lzma/literal_encoder.rb +210 -0
- data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +341 -0
- data/lib/omnizip/algorithms/lzma/lzma_alone_decoder.rb +192 -0
- data/lib/omnizip/algorithms/lzma/lzma_state.rb +128 -0
- data/lib/omnizip/algorithms/lzma/match.rb +32 -0
- data/lib/omnizip/algorithms/lzma/match_finder.rb +205 -0
- data/lib/omnizip/algorithms/lzma/match_finder_config.rb +142 -0
- data/lib/omnizip/algorithms/lzma/match_finder_factory.rb +88 -0
- data/lib/omnizip/algorithms/lzma/optimal_encoder.rb +130 -0
- data/lib/omnizip/algorithms/lzma/probability_models.rb +72 -0
- data/lib/omnizip/algorithms/lzma/range_coder.rb +85 -0
- data/lib/omnizip/algorithms/lzma/range_decoder.rb +434 -0
- data/lib/omnizip/algorithms/lzma/range_encoder.rb +194 -0
- data/lib/omnizip/algorithms/lzma/state.rb +127 -0
- data/lib/omnizip/algorithms/lzma/xz_buffered_range_encoder.rb +325 -0
- data/lib/omnizip/algorithms/lzma/xz_encoder.rb +426 -0
- data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +645 -0
- data/lib/omnizip/algorithms/lzma/xz_match_finder_adapter.rb +227 -0
- data/lib/omnizip/algorithms/lzma/xz_price_calculator.rb +169 -0
- data/lib/omnizip/algorithms/lzma/xz_probability_models.rb +261 -0
- data/lib/omnizip/algorithms/lzma/xz_range_encoder.rb +223 -0
- data/lib/omnizip/algorithms/lzma/xz_range_encoder_exact.rb +331 -0
- data/lib/omnizip/algorithms/lzma/xz_state.rb +116 -0
- data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +2055 -0
- data/lib/omnizip/algorithms/lzma.rb +238 -0
- data/lib/omnizip/algorithms/lzma2/chunk_manager.rb +182 -0
- data/lib/omnizip/algorithms/lzma2/constants.rb +41 -0
- data/lib/omnizip/algorithms/lzma2/encoder.rb +147 -0
- data/lib/omnizip/algorithms/lzma2/lzma2_chunk.rb +161 -0
- data/lib/omnizip/algorithms/lzma2/properties.rb +179 -0
- data/lib/omnizip/algorithms/lzma2/simple_lzma2_encoder.rb +127 -0
- data/lib/omnizip/algorithms/lzma2/xz_encoder_adapter.rb +85 -0
- data/lib/omnizip/algorithms/lzma2.rb +141 -0
- data/lib/omnizip/algorithms/ppmd7/constants.rb +74 -0
- data/lib/omnizip/algorithms/ppmd7/context.rb +154 -0
- data/lib/omnizip/algorithms/ppmd7/decoder.rb +126 -0
- data/lib/omnizip/algorithms/ppmd7/encoder.rb +163 -0
- data/lib/omnizip/algorithms/ppmd7/model.rb +248 -0
- data/lib/omnizip/algorithms/ppmd7/symbol_state.rb +57 -0
- data/lib/omnizip/algorithms/ppmd7.rb +116 -0
- data/lib/omnizip/algorithms/ppmd8/constants.rb +61 -0
- data/lib/omnizip/algorithms/ppmd8/context.rb +34 -0
- data/lib/omnizip/algorithms/ppmd8/decoder.rb +107 -0
- data/lib/omnizip/algorithms/ppmd8/encoder.rb +138 -0
- data/lib/omnizip/algorithms/ppmd8/model.rb +250 -0
- data/lib/omnizip/algorithms/ppmd8/restoration_method.rb +78 -0
- data/lib/omnizip/algorithms/ppmd8.rb +82 -0
- data/lib/omnizip/algorithms/ppmd_base.rb +138 -0
- data/lib/omnizip/algorithms/sevenzip_lzma2.rb +123 -0
- data/lib/omnizip/algorithms/xz_lzma2.rb +118 -0
- data/lib/omnizip/algorithms/zstandard/constants.rb +25 -0
- data/lib/omnizip/algorithms/zstandard/decoder.rb +46 -0
- data/lib/omnizip/algorithms/zstandard/encoder.rb +51 -0
- data/lib/omnizip/algorithms/zstandard.rb +138 -0
- data/lib/omnizip/buffer/memory_archive.rb +251 -0
- data/lib/omnizip/buffer/memory_extractor.rb +224 -0
- data/lib/omnizip/buffer.rb +176 -0
- data/lib/omnizip/checksum_registry.rb +114 -0
- data/lib/omnizip/checksums/crc32.rb +100 -0
- data/lib/omnizip/checksums/crc64.rb +101 -0
- data/lib/omnizip/checksums/crc_base.rb +158 -0
- data/lib/omnizip/checksums/verifier.rb +131 -0
- data/lib/omnizip/chunked/memory_manager.rb +194 -0
- data/lib/omnizip/chunked/reader.rb +78 -0
- data/lib/omnizip/chunked/writer.rb +120 -0
- data/lib/omnizip/chunked.rb +129 -0
- data/lib/omnizip/cli/output_formatter.rb +104 -0
- data/lib/omnizip/cli.rb +572 -0
- data/lib/omnizip/commands/.keep +0 -0
- data/lib/omnizip/commands/archive_create_command.rb +427 -0
- data/lib/omnizip/commands/archive_extract_command.rb +272 -0
- data/lib/omnizip/commands/archive_list_command.rb +218 -0
- data/lib/omnizip/commands/archive_repair_command.rb +131 -0
- data/lib/omnizip/commands/archive_verify_command.rb +117 -0
- data/lib/omnizip/commands/compress_command.rb +117 -0
- data/lib/omnizip/commands/decompress_command.rb +120 -0
- data/lib/omnizip/commands/list_command.rb +53 -0
- data/lib/omnizip/commands/metadata_command.rb +153 -0
- data/lib/omnizip/commands/parity_create_command.rb +122 -0
- data/lib/omnizip/commands/parity_repair_command.rb +122 -0
- data/lib/omnizip/commands/parity_verify_command.rb +124 -0
- data/lib/omnizip/commands/profile_list_command.rb +56 -0
- data/lib/omnizip/commands/profile_show_command.rb +44 -0
- data/lib/omnizip/convenience.rb +359 -0
- data/lib/omnizip/converter/conversion_registry.rb +49 -0
- data/lib/omnizip/converter/conversion_strategy.rb +121 -0
- data/lib/omnizip/converter/seven_zip_to_zip_strategy.rb +97 -0
- data/lib/omnizip/converter/zip_to_seven_zip_strategy.rb +112 -0
- data/lib/omnizip/converter.rb +105 -0
- data/lib/omnizip/crypto/aes256/cipher.rb +100 -0
- data/lib/omnizip/crypto/aes256/constants.rb +28 -0
- data/lib/omnizip/crypto/aes256/key_derivation.rb +101 -0
- data/lib/omnizip/crypto/aes256.rb +102 -0
- data/lib/omnizip/error.rb +106 -0
- data/lib/omnizip/eta/exponential_smoothing_estimator.rb +98 -0
- data/lib/omnizip/eta/moving_average_estimator.rb +99 -0
- data/lib/omnizip/eta/rate_calculator.rb +104 -0
- data/lib/omnizip/eta/sample_history.rb +143 -0
- data/lib/omnizip/eta/time_estimator.rb +106 -0
- data/lib/omnizip/eta.rb +63 -0
- data/lib/omnizip/extraction/filter_chain.rb +177 -0
- data/lib/omnizip/extraction/glob_pattern.rb +140 -0
- data/lib/omnizip/extraction/pattern_matcher.rb +70 -0
- data/lib/omnizip/extraction/predicate_pattern.rb +52 -0
- data/lib/omnizip/extraction/regex_pattern.rb +50 -0
- data/lib/omnizip/extraction/selective_extractor.rb +240 -0
- data/lib/omnizip/extraction.rb +111 -0
- data/lib/omnizip/file_type/mime_classifier.rb +144 -0
- data/lib/omnizip/file_type.rb +113 -0
- data/lib/omnizip/filter.rb +139 -0
- data/lib/omnizip/filter_pipeline.rb +108 -0
- data/lib/omnizip/filter_registry.rb +166 -0
- data/lib/omnizip/filters/bcj.rb +279 -0
- data/lib/omnizip/filters/bcj2/constants.rb +53 -0
- data/lib/omnizip/filters/bcj2/decoder.rb +200 -0
- data/lib/omnizip/filters/bcj2/encoder.rb +61 -0
- data/lib/omnizip/filters/bcj2/stream_data.rb +93 -0
- data/lib/omnizip/filters/bcj2.rb +99 -0
- data/lib/omnizip/filters/bcj_arm.rb +176 -0
- data/lib/omnizip/filters/bcj_arm64.rb +244 -0
- data/lib/omnizip/filters/bcj_ia64.rb +196 -0
- data/lib/omnizip/filters/bcj_ppc.rb +190 -0
- data/lib/omnizip/filters/bcj_sparc.rb +176 -0
- data/lib/omnizip/filters/bcj_x86.rb +193 -0
- data/lib/omnizip/filters/delta.rb +196 -0
- data/lib/omnizip/filters/filter_base.rb +72 -0
- data/lib/omnizip/filters/registry.rb +123 -0
- data/lib/omnizip/filters/xz_delta.rb +258 -0
- data/lib/omnizip/format_detector.rb +162 -0
- data/lib/omnizip/format_registry.rb +59 -0
- data/lib/omnizip/formats/.keep +0 -0
- data/lib/omnizip/formats/bzip2_file.rb +172 -0
- data/lib/omnizip/formats/cpio/constants.rb +55 -0
- data/lib/omnizip/formats/cpio/entry.rb +385 -0
- data/lib/omnizip/formats/cpio/reader.rb +196 -0
- data/lib/omnizip/formats/cpio/writer.rb +234 -0
- data/lib/omnizip/formats/cpio.rb +140 -0
- data/lib/omnizip/formats/format_spec_loader.rb +230 -0
- data/lib/omnizip/formats/gzip.rb +238 -0
- data/lib/omnizip/formats/iso/directory_builder.rb +297 -0
- data/lib/omnizip/formats/iso/directory_record.rb +152 -0
- data/lib/omnizip/formats/iso/joliet.rb +204 -0
- data/lib/omnizip/formats/iso/path_table.rb +125 -0
- data/lib/omnizip/formats/iso/reader.rb +197 -0
- data/lib/omnizip/formats/iso/rock_ridge.rb +349 -0
- data/lib/omnizip/formats/iso/volume_builder.rb +320 -0
- data/lib/omnizip/formats/iso/volume_descriptor.rb +168 -0
- data/lib/omnizip/formats/iso/writer.rb +530 -0
- data/lib/omnizip/formats/iso.rb +140 -0
- data/lib/omnizip/formats/lzip.rb +175 -0
- data/lib/omnizip/formats/lzma_alone.rb +171 -0
- data/lib/omnizip/formats/rar/archive_repairer.rb +243 -0
- data/lib/omnizip/formats/rar/archive_verifier.rb +195 -0
- data/lib/omnizip/formats/rar/block_parser.rb +243 -0
- data/lib/omnizip/formats/rar/compression/bit_stream.rb +180 -0
- data/lib/omnizip/formats/rar/compression/dispatcher.rb +217 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/decoder.rb +216 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/encoder.rb +158 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_builder.rb +217 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_coder.rb +189 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/match_finder.rb +135 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/sliding_window.rb +165 -0
- data/lib/omnizip/formats/rar/compression/ppmd/context.rb +105 -0
- data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +219 -0
- data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +262 -0
- data/lib/omnizip/formats/rar/compression_method_registry.rb +106 -0
- data/lib/omnizip/formats/rar/constants.rb +82 -0
- data/lib/omnizip/formats/rar/decompressor.rb +238 -0
- data/lib/omnizip/formats/rar/external_writer.rb +312 -0
- data/lib/omnizip/formats/rar/header.rb +192 -0
- data/lib/omnizip/formats/rar/license_validator.rb +109 -0
- data/lib/omnizip/formats/rar/models/rar_archive.rb +77 -0
- data/lib/omnizip/formats/rar/models/rar_entry.rb +65 -0
- data/lib/omnizip/formats/rar/models/rar_volume.rb +56 -0
- data/lib/omnizip/formats/rar/parity_handler.rb +292 -0
- data/lib/omnizip/formats/rar/rar5/compression/lzma.rb +202 -0
- data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +578 -0
- data/lib/omnizip/formats/rar/rar5/compression/store.rb +60 -0
- data/lib/omnizip/formats/rar/rar5/crc32.rb +39 -0
- data/lib/omnizip/formats/rar/rar5/encryption/aes256_cbc.rb +97 -0
- data/lib/omnizip/formats/rar/rar5/encryption/encryption_header.rb +114 -0
- data/lib/omnizip/formats/rar/rar5/encryption/encryption_manager.rb +166 -0
- data/lib/omnizip/formats/rar/rar5/encryption/key_derivation.rb +97 -0
- data/lib/omnizip/formats/rar/rar5/header.rb +187 -0
- data/lib/omnizip/formats/rar/rar5/models/encryption_options.rb +74 -0
- data/lib/omnizip/formats/rar/rar5/models/recovery_options.rb +63 -0
- data/lib/omnizip/formats/rar/rar5/models/solid_options.rb +63 -0
- data/lib/omnizip/formats/rar/rar5/models/volume_options.rb +74 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/ARCHITECTURE.md +290 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_manager.rb +264 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_splitter.rb +155 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_writer.rb +194 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_encoder.rb +109 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_manager.rb +142 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_stream.rb +121 -0
- data/lib/omnizip/formats/rar/rar5/vint.rb +65 -0
- data/lib/omnizip/formats/rar/rar5/writer.rb +466 -0
- data/lib/omnizip/formats/rar/rar_format_base.rb +241 -0
- data/lib/omnizip/formats/rar/reader.rb +366 -0
- data/lib/omnizip/formats/rar/recovery_record.rb +245 -0
- data/lib/omnizip/formats/rar/volume_manager.rb +168 -0
- data/lib/omnizip/formats/rar/writer.rb +431 -0
- data/lib/omnizip/formats/rar.rb +205 -0
- data/lib/omnizip/formats/rar3/compressor.rb +73 -0
- data/lib/omnizip/formats/rar3/decompressor.rb +66 -0
- data/lib/omnizip/formats/rar3/reader.rb +386 -0
- data/lib/omnizip/formats/rar3/writer.rb +219 -0
- data/lib/omnizip/formats/rar5/compressor.rb +73 -0
- data/lib/omnizip/formats/rar5/decompressor.rb +66 -0
- data/lib/omnizip/formats/rar5/reader.rb +342 -0
- data/lib/omnizip/formats/rar5/writer.rb +214 -0
- data/lib/omnizip/formats/seven_zip/coder_chain.rb +150 -0
- data/lib/omnizip/formats/seven_zip/constants.rb +126 -0
- data/lib/omnizip/formats/seven_zip/encoded_header.rb +114 -0
- data/lib/omnizip/formats/seven_zip/encrypted_header.rb +142 -0
- data/lib/omnizip/formats/seven_zip/file_collector.rb +144 -0
- data/lib/omnizip/formats/seven_zip/header.rb +106 -0
- data/lib/omnizip/formats/seven_zip/header_encryptor.rb +134 -0
- data/lib/omnizip/formats/seven_zip/header_writer.rb +466 -0
- data/lib/omnizip/formats/seven_zip/models/coder_info.rb +30 -0
- data/lib/omnizip/formats/seven_zip/models/file_entry.rb +58 -0
- data/lib/omnizip/formats/seven_zip/models/folder.rb +69 -0
- data/lib/omnizip/formats/seven_zip/models/stream_info.rb +42 -0
- data/lib/omnizip/formats/seven_zip/parser.rb +660 -0
- data/lib/omnizip/formats/seven_zip/reader.rb +458 -0
- data/lib/omnizip/formats/seven_zip/split_archive_reader.rb +632 -0
- data/lib/omnizip/formats/seven_zip/split_archive_writer.rb +315 -0
- data/lib/omnizip/formats/seven_zip/stream_compressor.rb +151 -0
- data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +162 -0
- data/lib/omnizip/formats/seven_zip/writer.rb +740 -0
- data/lib/omnizip/formats/seven_zip.rb +93 -0
- data/lib/omnizip/formats/tar/constants.rb +73 -0
- data/lib/omnizip/formats/tar/entry.rb +94 -0
- data/lib/omnizip/formats/tar/header.rb +168 -0
- data/lib/omnizip/formats/tar/reader.rb +121 -0
- data/lib/omnizip/formats/tar/writer.rb +216 -0
- data/lib/omnizip/formats/tar.rb +84 -0
- data/lib/omnizip/formats/xz/reader.rb +116 -0
- data/lib/omnizip/formats/xz.rb +237 -0
- data/lib/omnizip/formats/xz_impl/block_decoder.rb +754 -0
- data/lib/omnizip/formats/xz_impl/block_encoder.rb +306 -0
- data/lib/omnizip/formats/xz_impl/block_header.rb +210 -0
- data/lib/omnizip/formats/xz_impl/block_header_parser.rb +186 -0
- data/lib/omnizip/formats/xz_impl/constants.rb +49 -0
- data/lib/omnizip/formats/xz_impl/index_decoder.rb +174 -0
- data/lib/omnizip/formats/xz_impl/index_encoder.rb +122 -0
- data/lib/omnizip/formats/xz_impl/stream_decoder.rb +468 -0
- data/lib/omnizip/formats/xz_impl/stream_encoder.rb +99 -0
- data/lib/omnizip/formats/xz_impl/stream_footer.rb +81 -0
- data/lib/omnizip/formats/xz_impl/stream_footer_parser.rb +117 -0
- data/lib/omnizip/formats/xz_impl/stream_header.rb +55 -0
- data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +108 -0
- data/lib/omnizip/formats/xz_impl/vli.rb +128 -0
- data/lib/omnizip/formats/xz_impl/writer.rb +421 -0
- data/lib/omnizip/formats/zip/central_directory_header.rb +195 -0
- data/lib/omnizip/formats/zip/constants.rb +69 -0
- data/lib/omnizip/formats/zip/end_of_central_directory.rb +133 -0
- data/lib/omnizip/formats/zip/local_file_header.rb +138 -0
- data/lib/omnizip/formats/zip/reader.rb +250 -0
- data/lib/omnizip/formats/zip/unix_extra_field.rb +153 -0
- data/lib/omnizip/formats/zip/writer.rb +375 -0
- data/lib/omnizip/formats/zip/zip64_end_of_central_directory.rb +104 -0
- data/lib/omnizip/formats/zip/zip64_end_of_central_directory_locator.rb +66 -0
- data/lib/omnizip/formats/zip/zip64_extra_field.rb +114 -0
- data/lib/omnizip/formats/zip.rb +50 -0
- data/lib/omnizip/implementations/base/lzma2_decoder_base.rb +75 -0
- data/lib/omnizip/implementations/base/lzma2_encoder_base.rb +128 -0
- data/lib/omnizip/implementations/base/lzma_decoder_base.rb +83 -0
- data/lib/omnizip/implementations/base/lzma_encoder_base.rb +108 -0
- data/lib/omnizip/implementations/base/state_machine_base.rb +182 -0
- data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +421 -0
- data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +465 -0
- data/lib/omnizip/implementations/seven_zip/lzma/match_finder.rb +288 -0
- data/lib/omnizip/implementations/seven_zip/lzma/range_decoder.rb +200 -0
- data/lib/omnizip/implementations/seven_zip/lzma/range_encoder.rb +197 -0
- data/lib/omnizip/implementations/seven_zip/lzma/state_machine.rb +141 -0
- data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +519 -0
- data/lib/omnizip/implementations/xz_utils/lzma2/decoder.rb +723 -0
- data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +750 -0
- data/lib/omnizip/io/buffered_input.rb +146 -0
- data/lib/omnizip/io/buffered_output.rb +105 -0
- data/lib/omnizip/io/stream_manager.rb +115 -0
- data/lib/omnizip/link_handler/hard_link.rb +79 -0
- data/lib/omnizip/link_handler/symbolic_link.rb +74 -0
- data/lib/omnizip/link_handler.rb +124 -0
- data/lib/omnizip/metadata/archive_metadata.rb +114 -0
- data/lib/omnizip/metadata/entry_metadata.rb +146 -0
- data/lib/omnizip/metadata/metadata_editor.rb +171 -0
- data/lib/omnizip/metadata/metadata_registry.rb +64 -0
- data/lib/omnizip/metadata/metadata_validator.rb +99 -0
- data/lib/omnizip/metadata.rb +57 -0
- data/lib/omnizip/models/.keep +0 -0
- data/lib/omnizip/models/algorithm_metadata.rb +73 -0
- data/lib/omnizip/models/compression_options.rb +71 -0
- data/lib/omnizip/models/conversion_options.rb +87 -0
- data/lib/omnizip/models/conversion_result.rb +135 -0
- data/lib/omnizip/models/eta_result.rb +46 -0
- data/lib/omnizip/models/extraction_rule.rb +115 -0
- data/lib/omnizip/models/filter_chain.rb +144 -0
- data/lib/omnizip/models/filter_config.rb +183 -0
- data/lib/omnizip/models/match_result.rb +124 -0
- data/lib/omnizip/models/optimization_suggestion.rb +91 -0
- data/lib/omnizip/models/parallel_options.rb +104 -0
- data/lib/omnizip/models/performance_result.rb +79 -0
- data/lib/omnizip/models/profile_report.rb +82 -0
- data/lib/omnizip/models/progress_options.rb +38 -0
- data/lib/omnizip/models/split_options.rb +116 -0
- data/lib/omnizip/optimization_registry.rb +81 -0
- data/lib/omnizip/parallel/job_queue.rb +209 -0
- data/lib/omnizip/parallel/job_scheduler.rb +203 -0
- data/lib/omnizip/parallel/parallel_compressor.rb +347 -0
- data/lib/omnizip/parallel/parallel_extractor.rb +329 -0
- data/lib/omnizip/parallel/worker_pool.rb +223 -0
- data/lib/omnizip/parallel.rb +149 -0
- data/lib/omnizip/parity/chunked_block_processor.rb +196 -0
- data/lib/omnizip/parity/galois16.rb +145 -0
- data/lib/omnizip/parity/models/creator_packet.rb +73 -0
- data/lib/omnizip/parity/models/file_description_packet.rb +133 -0
- data/lib/omnizip/parity/models/ifsc_packet.rb +123 -0
- data/lib/omnizip/parity/models/main_packet.rb +128 -0
- data/lib/omnizip/parity/models/packet.rb +156 -0
- data/lib/omnizip/parity/models/packet_registry.rb +109 -0
- data/lib/omnizip/parity/models/recovery_slice_packet.rb +78 -0
- data/lib/omnizip/parity/par2_creator.rb +531 -0
- data/lib/omnizip/parity/par2_repairer.rb +407 -0
- data/lib/omnizip/parity/par2_verifier.rb +364 -0
- data/lib/omnizip/parity/par2cmdline_algorithm.rb +110 -0
- data/lib/omnizip/parity/par2cmdline_coefficients.rb +78 -0
- data/lib/omnizip/parity/reed_solomon_decoder.rb +266 -0
- data/lib/omnizip/parity/reed_solomon_encoder.rb +111 -0
- data/lib/omnizip/parity/reed_solomon_matrix.rb +342 -0
- data/lib/omnizip/parity.rb +186 -0
- data/lib/omnizip/password/encryption_registry.rb +65 -0
- data/lib/omnizip/password/encryption_strategy.rb +96 -0
- data/lib/omnizip/password/password_validator.rb +129 -0
- data/lib/omnizip/password/winzip_aes_strategy.rb +192 -0
- data/lib/omnizip/password/zip_crypto_strategy.rb +141 -0
- data/lib/omnizip/password.rb +87 -0
- data/lib/omnizip/pipe/stream_compressor.rb +124 -0
- data/lib/omnizip/pipe/stream_decompressor.rb +174 -0
- data/lib/omnizip/pipe.rb +121 -0
- data/lib/omnizip/platform/ntfs_streams.rb +201 -0
- data/lib/omnizip/platform.rb +189 -0
- data/lib/omnizip/profile/archive_profile.rb +39 -0
- data/lib/omnizip/profile/balanced_profile.rb +33 -0
- data/lib/omnizip/profile/binary_profile.rb +36 -0
- data/lib/omnizip/profile/compression_profile.rb +158 -0
- data/lib/omnizip/profile/custom_profile.rb +157 -0
- data/lib/omnizip/profile/fast_profile.rb +33 -0
- data/lib/omnizip/profile/maximum_profile.rb +33 -0
- data/lib/omnizip/profile/profile_detector.rb +110 -0
- data/lib/omnizip/profile/profile_registry.rb +161 -0
- data/lib/omnizip/profile/text_profile.rb +36 -0
- data/lib/omnizip/profile.rb +190 -0
- data/lib/omnizip/profiler/memory_profiler.rb +66 -0
- data/lib/omnizip/profiler/method_profiler.rb +49 -0
- data/lib/omnizip/profiler/report_generator.rb +169 -0
- data/lib/omnizip/profiler.rb +204 -0
- data/lib/omnizip/progress/callback_reporter.rb +36 -0
- data/lib/omnizip/progress/console_reporter.rb +62 -0
- data/lib/omnizip/progress/log_reporter.rb +91 -0
- data/lib/omnizip/progress/operation_progress.rb +118 -0
- data/lib/omnizip/progress/progress_bar.rb +156 -0
- data/lib/omnizip/progress/progress_reporter.rb +40 -0
- data/lib/omnizip/progress/progress_tracker.rb +190 -0
- data/lib/omnizip/progress/silent_reporter.rb +24 -0
- data/lib/omnizip/progress.rb +127 -0
- data/lib/omnizip/rubyzip_compat.rb +63 -0
- data/lib/omnizip/temp/safe_extract.rb +168 -0
- data/lib/omnizip/temp/temp_file.rb +124 -0
- data/lib/omnizip/temp/temp_file_pool.rb +109 -0
- data/lib/omnizip/temp.rb +181 -0
- data/lib/omnizip/version.rb +5 -0
- data/lib/omnizip/zip/entry.rb +156 -0
- data/lib/omnizip/zip/file.rb +485 -0
- data/lib/omnizip/zip/input_stream.rb +273 -0
- data/lib/omnizip/zip/output_stream.rb +324 -0
- data/lib/omnizip.rb +156 -0
- data/readme-docs/advanced-features.adoc +515 -0
- data/readme-docs/api-usage.adoc +444 -0
- data/readme-docs/architecture.adoc +449 -0
- data/readme-docs/archive-formats.adoc +479 -0
- data/readme-docs/cli-usage.adoc +222 -0
- data/readme-docs/compression-algorithms.adoc +442 -0
- data/readme-docs/compression-profiles.adoc +247 -0
- data/readme-docs/encryption-checksums.adoc +328 -0
- data/readme-docs/format-converter.adoc +325 -0
- data/readme-docs/installation.adoc +228 -0
- data/readme-docs/par2-archives.adoc +608 -0
- data/readme-docs/performance-profiler.adoc +389 -0
- data/readme-docs/preprocessing-filters.adoc +280 -0
- data/xz-file-format-1.2.1.txt +1174 -0
- metadata +617 -0
data/Rakefile
ADDED
data/benchmark/README.md
ADDED
|
@@ -0,0 +1,260 @@
|
|
|
1
|
+
# Omnizip Benchmark Suite
|
|
2
|
+
|
|
3
|
+
Comprehensive benchmark suite comparing omnizip performance against native
|
|
4
|
+
7-Zip.
|
|
5
|
+
|
|
6
|
+
## Purpose
|
|
7
|
+
|
|
8
|
+
This benchmark suite provides:
|
|
9
|
+
|
|
10
|
+
* Performance comparison between omnizip (Ruby) and 7-Zip (C)
|
|
11
|
+
* Compression ratio analysis for each algorithm
|
|
12
|
+
* Filter effectiveness measurements
|
|
13
|
+
* Baseline for future optimization work
|
|
14
|
+
|
|
15
|
+
## Requirements
|
|
16
|
+
|
|
17
|
+
* Ruby 2.7 or higher
|
|
18
|
+
* 7-Zip CLI tool (`7z` or `7za`) installed (optional but recommended)
|
|
19
|
+
* Omnizip gem dependencies installed (`bundle install`)
|
|
20
|
+
|
|
21
|
+
## Installation
|
|
22
|
+
|
|
23
|
+
Install 7-Zip for comparisons:
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
# macOS
|
|
27
|
+
brew install p7zip
|
|
28
|
+
|
|
29
|
+
# Ubuntu/Debian
|
|
30
|
+
sudo apt-get install p7zip-full
|
|
31
|
+
|
|
32
|
+
# Windows
|
|
33
|
+
# Download from https://www.7-zip.org/
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
## Running Benchmarks
|
|
37
|
+
|
|
38
|
+
### Quick Start
|
|
39
|
+
|
|
40
|
+
Run all benchmarks:
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
ruby benchmark/run_benchmarks.rb
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
Run quick benchmarks (1 size, 1 data type):
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
ruby benchmark/run_benchmarks.rb --quick
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
### Options
|
|
53
|
+
|
|
54
|
+
* `-v`, `--verbose` - Enable verbose output
|
|
55
|
+
* `-q`, `--quick` - Run quick benchmarks (faster, less coverage)
|
|
56
|
+
* `--compression-only` - Run only compression algorithm benchmarks
|
|
57
|
+
* `--filters-only` - Run only filter benchmarks
|
|
58
|
+
* `--output=FILE` - Save results to JSON file
|
|
59
|
+
* `-h`, `--help` - Show help message
|
|
60
|
+
|
|
61
|
+
### Examples
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
# Verbose output with JSON results
|
|
65
|
+
ruby benchmark/run_benchmarks.rb --verbose --output=results.json
|
|
66
|
+
|
|
67
|
+
# Quick compression-only benchmark
|
|
68
|
+
ruby benchmark/run_benchmarks.rb --quick --compression-only
|
|
69
|
+
|
|
70
|
+
# Full benchmark with results saved
|
|
71
|
+
ruby benchmark/run_benchmarks.rb --output=benchmark/results/full.json
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
## What Gets Benchmarked
|
|
75
|
+
|
|
76
|
+
### Compression Algorithms
|
|
77
|
+
|
|
78
|
+
* **LZMA** - Lempel-Ziv-Markov chain algorithm
|
|
79
|
+
* **LZMA2** - Improved LZMA with better multithreading
|
|
80
|
+
* **PPMd7** - Prediction by partial matching
|
|
81
|
+
* **BZip2** - Burrows-Wheeler transform compression
|
|
82
|
+
|
|
83
|
+
### Filters
|
|
84
|
+
|
|
85
|
+
* **BCJ x86** - Branch/Call/Jump filter for x86 executables
|
|
86
|
+
* **Delta** - Delta encoding for gradual data
|
|
87
|
+
|
|
88
|
+
### Data Types
|
|
89
|
+
|
|
90
|
+
* **Text** - Lorem ipsum text data
|
|
91
|
+
* **Source Code** - Ruby source code
|
|
92
|
+
* **Repetitive** - Highly compressible repetitive patterns
|
|
93
|
+
* **Random** - Incompressible random data
|
|
94
|
+
|
|
95
|
+
### Test Sizes
|
|
96
|
+
|
|
97
|
+
* 1KB (1,024 bytes)
|
|
98
|
+
* 10KB (10,240 bytes)
|
|
99
|
+
* 100KB (102,400 bytes)
|
|
100
|
+
|
|
101
|
+
## Interpreting Results
|
|
102
|
+
|
|
103
|
+
### Compression Ratio
|
|
104
|
+
|
|
105
|
+
```
|
|
106
|
+
Compression Ratio = Original Size / Compressed Size
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
Higher is better. Example: 3.0x means data compressed to 1/3 original size.
|
|
110
|
+
|
|
111
|
+
### Size Difference
|
|
112
|
+
|
|
113
|
+
Shows how much larger/smaller omnizip output is compared to 7-Zip:
|
|
114
|
+
|
|
115
|
+
* Positive % = omnizip produces larger files
|
|
116
|
+
* Negative % = omnizip produces smaller files (better)
|
|
117
|
+
|
|
118
|
+
Expect omnizip to be within 10-20% of 7-Zip size.
|
|
119
|
+
|
|
120
|
+
### Speed Ratio
|
|
121
|
+
|
|
122
|
+
```
|
|
123
|
+
Speed Ratio = Omnizip Time / 7-Zip Time
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
Shows how many times slower omnizip is compared to 7-Zip.
|
|
127
|
+
|
|
128
|
+
* Expected: 5-20x slower (Ruby vs C is normal)
|
|
129
|
+
* < 10x = Good performance for Ruby implementation
|
|
130
|
+
* > 20x = May need optimization
|
|
131
|
+
|
|
132
|
+
## Expected Performance Characteristics
|
|
133
|
+
|
|
134
|
+
### Compression Ratios
|
|
135
|
+
|
|
136
|
+
Omnizip should achieve similar compression ratios to 7-Zip (within 10-20%)
|
|
137
|
+
because both implement the same algorithms. Differences come from:
|
|
138
|
+
|
|
139
|
+
* Parameter tuning differences
|
|
140
|
+
* Implementation details
|
|
141
|
+
* Ruby vs C precision differences
|
|
142
|
+
|
|
143
|
+
### Speed
|
|
144
|
+
|
|
145
|
+
Ruby implementations are typically 5-20x slower than C implementations:
|
|
146
|
+
|
|
147
|
+
* **5-10x slower** = Excellent for Ruby
|
|
148
|
+
* **10-15x slower** = Good for Ruby
|
|
149
|
+
* **15-20x slower** = Acceptable for Ruby
|
|
150
|
+
* **> 20x slower** = May indicate optimization opportunities
|
|
151
|
+
|
|
152
|
+
### Algorithm-Specific Notes
|
|
153
|
+
|
|
154
|
+
* **LZMA/LZMA2**: Most complex, expect larger speed differences
|
|
155
|
+
* **BZip2**: Simpler algorithm, may have better speed ratios
|
|
156
|
+
* **PPMd7**: Memory-intensive, speed depends on implementation details
|
|
157
|
+
|
|
158
|
+
## Architecture
|
|
159
|
+
|
|
160
|
+
The benchmark suite follows object-oriented architecture:
|
|
161
|
+
|
|
162
|
+
```
|
|
163
|
+
benchmark/
|
|
164
|
+
├── models/ # Data models
|
|
165
|
+
│ ├── benchmark_result.rb # Single benchmark result
|
|
166
|
+
│ └── comparison_result.rb # Omnizip vs 7-Zip comparison
|
|
167
|
+
├── test_data.rb # Test data generator
|
|
168
|
+
├── compression_bench.rb # Algorithm benchmarks
|
|
169
|
+
├── filter_bench.rb # Filter benchmarks
|
|
170
|
+
├── benchmark_suite.rb # Main orchestrator
|
|
171
|
+
├── reporter.rb # Results formatting
|
|
172
|
+
└── run_benchmarks.rb # Executable runner
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
## Output Format
|
|
176
|
+
|
|
177
|
+
### Console Output
|
|
178
|
+
|
|
179
|
+
```
|
|
180
|
+
================================================================================
|
|
181
|
+
OMNIZIP vs 7-ZIP BENCHMARK RESULTS
|
|
182
|
+
================================================================================
|
|
183
|
+
|
|
184
|
+
--------------------------------------------------------------------------------
|
|
185
|
+
Test: lzma_text
|
|
186
|
+
--------------------------------------------------------------------------------
|
|
187
|
+
Metric Omnizip 7-Zip
|
|
188
|
+
--------------------------------------------------------------------------------
|
|
189
|
+
Input Size 10.0KB 10.0KB
|
|
190
|
+
Compressed Size 3.5KB 3.2KB
|
|
191
|
+
Compression Ratio 2.86 3.13
|
|
192
|
+
Compression Time 2.500s 0.150s
|
|
193
|
+
|
|
194
|
+
--------------------------------------------------------------------------------
|
|
195
|
+
Comparison:
|
|
196
|
+
--------------------------------------------------------------------------------
|
|
197
|
+
Size difference: +300 bytes (+9.4%)
|
|
198
|
+
Speed ratio: 16.7x slower
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
### JSON Output
|
|
202
|
+
|
|
203
|
+
```json
|
|
204
|
+
{
|
|
205
|
+
"timestamp": "2025-10-26T12:00:00Z",
|
|
206
|
+
"results": [
|
|
207
|
+
{
|
|
208
|
+
"test_name": "lzma_text",
|
|
209
|
+
"omnizip": {
|
|
210
|
+
"algorithm": "lzma",
|
|
211
|
+
"input_size": 10240,
|
|
212
|
+
"compressed_size": 3584,
|
|
213
|
+
"compression_ratio": 2.86,
|
|
214
|
+
"compression_time": 2.5
|
|
215
|
+
},
|
|
216
|
+
"seven_zip": { ... },
|
|
217
|
+
"comparison": {
|
|
218
|
+
"size_difference_bytes": 300,
|
|
219
|
+
"size_difference_percentage": 9.4,
|
|
220
|
+
"compression_speed_ratio": 16.7
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
]
|
|
224
|
+
}
|
|
225
|
+
```
|
|
226
|
+
|
|
227
|
+
## Troubleshooting
|
|
228
|
+
|
|
229
|
+
### 7-Zip Not Found
|
|
230
|
+
|
|
231
|
+
If 7-Zip is not installed, benchmarks will still run but comparisons will show
|
|
232
|
+
"7-Zip not available". Install 7-Zip for full comparisons.
|
|
233
|
+
|
|
234
|
+
### Slow Benchmarks
|
|
235
|
+
|
|
236
|
+
Use `--quick` flag for faster results with less coverage, or run specific
|
|
237
|
+
benchmark types with `--compression-only` or `--filters-only`.
|
|
238
|
+
|
|
239
|
+
### Memory Issues
|
|
240
|
+
|
|
241
|
+
Large test files (100KB+) with complex algorithms may use significant memory.
|
|
242
|
+
Reduce test sizes in `benchmark_suite.rb` if needed.
|
|
243
|
+
|
|
244
|
+
## Future Enhancements
|
|
245
|
+
|
|
246
|
+
* Add decompression benchmarks
|
|
247
|
+
* Test larger file sizes (1MB, 10MB)
|
|
248
|
+
* Add multi-threaded benchmarks
|
|
249
|
+
* Compare memory usage
|
|
250
|
+
* Add visualization/charts
|
|
251
|
+
|
|
252
|
+
## Contributing
|
|
253
|
+
|
|
254
|
+
When adding new benchmarks:
|
|
255
|
+
|
|
256
|
+
1. Follow object-oriented design patterns
|
|
257
|
+
2. Use model classes for data representation
|
|
258
|
+
3. Maintain separation of concerns
|
|
259
|
+
4. Add documentation for new features
|
|
260
|
+
5. Run `bundle exec rubocop` before committing
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "test_data"
|
|
4
|
+
require_relative "compression_bench"
|
|
5
|
+
require_relative "filter_bench"
|
|
6
|
+
require_relative "reporter"
|
|
7
|
+
|
|
8
|
+
module Benchmark
|
|
9
|
+
# Main orchestrator for running all benchmarks
|
|
10
|
+
class BenchmarkSuite
|
|
11
|
+
TEST_SIZES = [1024, 10_240, 102_400].freeze # 1KB, 10KB, 100KB
|
|
12
|
+
DATA_TYPES = %w[text source_code repetitive random].freeze
|
|
13
|
+
|
|
14
|
+
attr_reader :verbose, :results
|
|
15
|
+
|
|
16
|
+
def initialize(verbose: false, quick: false)
|
|
17
|
+
@verbose = verbose
|
|
18
|
+
@quick = quick
|
|
19
|
+
@test_data = TestData.new
|
|
20
|
+
@compression_bench = CompressionBench.new(verbose: verbose)
|
|
21
|
+
@filter_bench = FilterBench.new(verbose: verbose)
|
|
22
|
+
@results = []
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def run_all
|
|
26
|
+
puts "Starting Omnizip vs 7-Zip benchmark suite..."
|
|
27
|
+
puts "7-Zip available: #{@compression_bench.seven_zip_available?}"
|
|
28
|
+
|
|
29
|
+
generate_test_data
|
|
30
|
+
run_compression_benchmarks
|
|
31
|
+
run_filter_benchmarks
|
|
32
|
+
cleanup_test_data
|
|
33
|
+
|
|
34
|
+
self
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def run_compression_only
|
|
38
|
+
puts "Running compression benchmarks only..."
|
|
39
|
+
generate_test_data
|
|
40
|
+
run_compression_benchmarks
|
|
41
|
+
cleanup_test_data
|
|
42
|
+
self
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def run_filters_only
|
|
46
|
+
puts "Running filter benchmarks only..."
|
|
47
|
+
generate_test_data
|
|
48
|
+
run_filter_benchmarks
|
|
49
|
+
cleanup_test_data
|
|
50
|
+
self
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def report
|
|
54
|
+
Reporter.new(@results).print_summary
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def save_results(filename)
|
|
58
|
+
Reporter.new(@results).save_to_file(filename)
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
private
|
|
62
|
+
|
|
63
|
+
def generate_test_data
|
|
64
|
+
puts "\nGenerating test data..." if verbose
|
|
65
|
+
|
|
66
|
+
sizes = @quick ? [TEST_SIZES.first] : TEST_SIZES
|
|
67
|
+
types = @quick ? [DATA_TYPES.first] : DATA_TYPES
|
|
68
|
+
|
|
69
|
+
sizes.each do |size|
|
|
70
|
+
types.each do |type|
|
|
71
|
+
@test_data.public_send("generate_#{type}", size,
|
|
72
|
+
filename: "#{type}_#{size}.dat")
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def run_compression_benchmarks
|
|
78
|
+
puts "\nRunning compression benchmarks..." if verbose
|
|
79
|
+
|
|
80
|
+
sizes = @quick ? [TEST_SIZES.first] : TEST_SIZES
|
|
81
|
+
types = @quick ? [DATA_TYPES.first] : DATA_TYPES
|
|
82
|
+
algos = @quick ? ["lzma"] : CompressionBench::ALGORITHMS
|
|
83
|
+
|
|
84
|
+
algos.each do |algorithm|
|
|
85
|
+
sizes.each do |size|
|
|
86
|
+
types.each do |type|
|
|
87
|
+
filename = "#{type}_#{size}.dat"
|
|
88
|
+
filepath = File.join(@test_data.data_dir, filename)
|
|
89
|
+
|
|
90
|
+
result = @compression_bench.benchmark_algorithm(
|
|
91
|
+
algorithm, filepath, type
|
|
92
|
+
)
|
|
93
|
+
@results << result
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def run_filter_benchmarks
|
|
100
|
+
puts "\nRunning filter benchmarks..." if verbose
|
|
101
|
+
|
|
102
|
+
return if @quick
|
|
103
|
+
|
|
104
|
+
sizes = [TEST_SIZES[1]]
|
|
105
|
+
types = %w[source_code]
|
|
106
|
+
|
|
107
|
+
FilterBench::FILTERS.each do |filter|
|
|
108
|
+
sizes.each do |size|
|
|
109
|
+
types.each do |type|
|
|
110
|
+
filename = "#{type}_#{size}.dat"
|
|
111
|
+
filepath = File.join(@test_data.data_dir, filename)
|
|
112
|
+
|
|
113
|
+
result = @filter_bench.benchmark_filter(filter, filepath, type)
|
|
114
|
+
@results << result
|
|
115
|
+
end
|
|
116
|
+
end
|
|
117
|
+
end
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
def cleanup_test_data
|
|
121
|
+
puts "\nCleaning up test data..." if verbose
|
|
122
|
+
@test_data.cleanup
|
|
123
|
+
end
|
|
124
|
+
end
|
|
125
|
+
end
|
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "benchmark"
|
|
4
|
+
require "tempfile"
|
|
5
|
+
require "fileutils"
|
|
6
|
+
require_relative "models/benchmark_result"
|
|
7
|
+
require_relative "models/comparison_result"
|
|
8
|
+
|
|
9
|
+
module Benchmark
|
|
10
|
+
# Benchmarks compression algorithms against native 7-Zip
|
|
11
|
+
class CompressionBench
|
|
12
|
+
ALGORITHMS = %w[lzma lzma2 ppmd7 bzip2].freeze
|
|
13
|
+
ITERATIONS = 3
|
|
14
|
+
|
|
15
|
+
attr_reader :verbose
|
|
16
|
+
|
|
17
|
+
def initialize(verbose: false)
|
|
18
|
+
@verbose = verbose
|
|
19
|
+
@seven_zip_available = check_seven_zip_availability
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def seven_zip_available?
|
|
23
|
+
@seven_zip_available
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def benchmark_algorithm(algorithm, input_file, input_type)
|
|
27
|
+
puts "Benchmarking #{algorithm} on #{input_type}..." if verbose
|
|
28
|
+
|
|
29
|
+
omnizip_result = benchmark_omnizip(algorithm, input_file, input_type)
|
|
30
|
+
seven_zip_result = if seven_zip_available?
|
|
31
|
+
benchmark_seven_zip(algorithm, input_file,
|
|
32
|
+
input_type)
|
|
33
|
+
else
|
|
34
|
+
create_unavailable_result(algorithm, input_file,
|
|
35
|
+
input_type)
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
Models::ComparisonResult.new(
|
|
39
|
+
test_name: "#{algorithm}_#{input_type}",
|
|
40
|
+
omnizip_result: omnizip_result,
|
|
41
|
+
seven_zip_result: seven_zip_result,
|
|
42
|
+
)
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
private
|
|
46
|
+
|
|
47
|
+
def check_seven_zip_availability
|
|
48
|
+
system("which 7z > /dev/null 2>&1") ||
|
|
49
|
+
system("which 7za > /dev/null 2>&1")
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def get_7z_command
|
|
53
|
+
@get_7z_command ||= if system("which 7z > /dev/null 2>&1")
|
|
54
|
+
"7z"
|
|
55
|
+
elsif system("which 7za > /dev/null 2>&1")
|
|
56
|
+
"7za"
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def benchmark_omnizip(algorithm, input_file, input_type)
|
|
61
|
+
input_size = File.size(input_file)
|
|
62
|
+
compressed_file = create_temp_file(".7z")
|
|
63
|
+
|
|
64
|
+
begin
|
|
65
|
+
time = measure_time do
|
|
66
|
+
compress_with_omnizip(algorithm, input_file, compressed_file)
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
compressed_size = if File.exist?(compressed_file)
|
|
70
|
+
File.size(compressed_file)
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
Models::BenchmarkResult.new(
|
|
74
|
+
algorithm: algorithm,
|
|
75
|
+
input_size: input_size,
|
|
76
|
+
input_type: input_type,
|
|
77
|
+
compressed_size: compressed_size,
|
|
78
|
+
compression_time: time,
|
|
79
|
+
)
|
|
80
|
+
rescue StandardError => e
|
|
81
|
+
Models::BenchmarkResult.new(
|
|
82
|
+
algorithm: algorithm,
|
|
83
|
+
input_size: input_size,
|
|
84
|
+
input_type: input_type,
|
|
85
|
+
error: e.message,
|
|
86
|
+
)
|
|
87
|
+
ensure
|
|
88
|
+
FileUtils.rm_f(compressed_file)
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
def benchmark_seven_zip(algorithm, input_file, input_type)
|
|
93
|
+
input_size = File.size(input_file)
|
|
94
|
+
compressed_file = create_temp_file(".7z")
|
|
95
|
+
|
|
96
|
+
begin
|
|
97
|
+
time = measure_time do
|
|
98
|
+
compress_with_7z(algorithm, input_file, compressed_file)
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
compressed_size = if File.exist?(compressed_file)
|
|
102
|
+
File.size(compressed_file)
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
Models::BenchmarkResult.new(
|
|
106
|
+
algorithm: algorithm,
|
|
107
|
+
input_size: input_size,
|
|
108
|
+
input_type: input_type,
|
|
109
|
+
compressed_size: compressed_size,
|
|
110
|
+
compression_time: time,
|
|
111
|
+
)
|
|
112
|
+
rescue StandardError => e
|
|
113
|
+
Models::BenchmarkResult.new(
|
|
114
|
+
algorithm: algorithm,
|
|
115
|
+
input_size: input_size,
|
|
116
|
+
input_type: input_type,
|
|
117
|
+
error: e.message,
|
|
118
|
+
)
|
|
119
|
+
ensure
|
|
120
|
+
FileUtils.rm_f(compressed_file)
|
|
121
|
+
end
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
def create_unavailable_result(algorithm, input_file, input_type)
|
|
125
|
+
Models::BenchmarkResult.new(
|
|
126
|
+
algorithm: algorithm,
|
|
127
|
+
input_size: File.size(input_file),
|
|
128
|
+
input_type: input_type,
|
|
129
|
+
error: "7-Zip not available",
|
|
130
|
+
)
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
def compress_with_omnizip(algorithm, input_file, output_file)
|
|
134
|
+
require_relative "../lib/omnizip"
|
|
135
|
+
|
|
136
|
+
algo_class = case algorithm
|
|
137
|
+
when "lzma" then Omnizip::Algorithms::LZMA
|
|
138
|
+
when "lzma2" then Omnizip::Algorithms::LZMA2
|
|
139
|
+
when "ppmd7" then Omnizip::Algorithms::PPMd7
|
|
140
|
+
when "bzip2" then Omnizip::Algorithms::BZip2
|
|
141
|
+
else raise "Unknown algorithm: #{algorithm}"
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
input = File.binread(input_file)
|
|
145
|
+
compressed = algo_class.compress(input)
|
|
146
|
+
File.binwrite(output_file, compressed)
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
def compress_with_7z(algorithm, input_file, output_file)
|
|
150
|
+
method_flag = case algorithm
|
|
151
|
+
when "lzma" then "LZMA"
|
|
152
|
+
when "lzma2" then "LZMA2"
|
|
153
|
+
when "ppmd7" then "PPMd"
|
|
154
|
+
when "bzip2" then "BZip2"
|
|
155
|
+
else raise "Unknown algorithm: #{algorithm}"
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
cmd = "#{get_7z_command} a -m0=#{method_flag} -mx=5 #{output_file} " \
|
|
159
|
+
"#{input_file} > /dev/null 2>&1"
|
|
160
|
+
success = system(cmd)
|
|
161
|
+
raise "7z compression failed" unless success
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
def measure_time(&block)
|
|
165
|
+
times = []
|
|
166
|
+
ITERATIONS.times do
|
|
167
|
+
time = ::Benchmark.realtime(&block)
|
|
168
|
+
times << time
|
|
169
|
+
end
|
|
170
|
+
times.sum / times.size
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
def create_temp_file(extension)
|
|
174
|
+
temp = Tempfile.new(["benchmark", extension])
|
|
175
|
+
path = temp.path
|
|
176
|
+
temp.close
|
|
177
|
+
temp.unlink
|
|
178
|
+
path
|
|
179
|
+
end
|
|
180
|
+
end
|
|
181
|
+
end
|