omnizip 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/.rubocop.yml +32 -0
- data/.rubocop_todo.yml +754 -0
- data/COPYING +502 -0
- data/Gemfile +17 -0
- data/LICENSE +12 -0
- data/README.adoc +1045 -0
- data/Rakefile +12 -0
- data/benchmark/README.md +260 -0
- data/benchmark/benchmark_suite.rb +125 -0
- data/benchmark/compression_bench.rb +181 -0
- data/benchmark/filter_bench.rb +180 -0
- data/benchmark/models/benchmark_result.rb +59 -0
- data/benchmark/models/comparison_result.rb +69 -0
- data/benchmark/profile_suite.rb +167 -0
- data/benchmark/reporter.rb +150 -0
- data/benchmark/run_benchmarks.rb +66 -0
- data/benchmark/test_data.rb +137 -0
- data/config/formats/rar3_spec.yml +91 -0
- data/config/formats/rar5_spec.yml +102 -0
- data/docs/.github/workflows/docs.yml +142 -0
- data/docs/.gitignore +21 -0
- data/docs/.lychee.toml +67 -0
- data/docs/Gemfile +13 -0
- data/docs/RAR_WRITE_SUPPORT.md +26 -0
- data/docs/README.md +101 -0
- data/docs/_config.yml +112 -0
- data/docs/assets/logo.svg +1 -0
- data/docs/assets/omnizip-logo.pdf +1540 -11
- data/docs/comparison/feature-matrix.adoc +694 -0
- data/docs/comparison/index.adoc +113 -0
- data/docs/comparison/vs-7zip.adoc +309 -0
- data/docs/comparison/vs-peazip.adoc +77 -0
- data/docs/comparison/vs-rubyzip.adoc +342 -0
- data/docs/comparison/vs-winrar.adoc +100 -0
- data/docs/compatibility.adoc +579 -0
- data/docs/concepts/index.adoc +129 -0
- data/docs/developer/architecture.adoc +256 -0
- data/docs/developer/contributing.adoc +158 -0
- data/docs/developer/index.adoc +25 -0
- data/docs/developer/testing.adoc +212 -0
- data/docs/getting-started/basic-usage.adoc +271 -0
- data/docs/getting-started/index.adoc +42 -0
- data/docs/getting-started/installation.adoc +138 -0
- data/docs/getting-started/quick-start.adoc +185 -0
- data/docs/getting-started/your-first-archive.adoc +218 -0
- data/docs/guides/advanced-features/encryption.adoc +300 -0
- data/docs/guides/advanced-features/index.adoc +49 -0
- data/docs/guides/advanced-features/parallel-processing.adoc +246 -0
- data/docs/guides/advanced-features/progress-tracking.adoc +320 -0
- data/docs/guides/advanced-features/streaming.adoc +212 -0
- data/docs/guides/archive-formats/gzip-format.adoc +107 -0
- data/docs/guides/archive-formats/index.adoc +130 -0
- data/docs/guides/archive-formats/rar-format.adoc +104 -0
- data/docs/guides/archive-formats/rar5.adoc +521 -0
- data/docs/guides/archive-formats/seven-zip-format.adoc +35 -0
- data/docs/guides/archive-formats/tar-format.adoc +106 -0
- data/docs/guides/archive-formats/xz-format.adoc +118 -0
- data/docs/guides/archive-formats/zip-format.adoc +35 -0
- data/docs/guides/compression-algorithms/bzip2.adoc +113 -0
- data/docs/guides/compression-algorithms/deflate.adoc +319 -0
- data/docs/guides/compression-algorithms/index.adoc +190 -0
- data/docs/guides/compression-algorithms/lzma.adoc +398 -0
- data/docs/guides/compression-algorithms/lzma2.adoc +327 -0
- data/docs/guides/compression-algorithms/ppmd.adoc +316 -0
- data/docs/guides/compression-algorithms/zstandard.adoc +361 -0
- data/docs/guides/creating-archives.adoc +354 -0
- data/docs/guides/extracting-archives.adoc +53 -0
- data/docs/guides/format-conversion.adoc +64 -0
- data/docs/guides/index.adoc +49 -0
- data/docs/guides/migration-rubyzip.adoc +217 -0
- data/docs/guides/parity-archives.adoc +605 -0
- data/docs/guides/performance-tuning.adoc +88 -0
- data/docs/index.adoc +218 -0
- data/docs/lychee.toml +67 -0
- data/docs/reference/api/overview.adoc +188 -0
- data/docs/reference/cli/compress-command.adoc +114 -0
- data/docs/reference/cli/overview.adoc +140 -0
- data/docs/reference/index.adoc +26 -0
- data/docs/resources/faq.adoc +185 -0
- data/docs/resources/quick-reference.adoc +222 -0
- data/docs/troubleshooting/index.adoc +208 -0
- data/examples/api_comparison.rb +205 -0
- data/examples/deflate64_example.rb +96 -0
- data/examples/par2_demo.rb +121 -0
- data/examples/quick_start_native.rb +150 -0
- data/examples/quick_start_rubyzip.rb +115 -0
- data/examples/rubyzip_compatibility_demo.rb +194 -0
- data/exe/omnizip +27 -0
- data/lib/omnizip/algorithm.rb +130 -0
- data/lib/omnizip/algorithm_registry.rb +86 -0
- data/lib/omnizip/algorithms/.keep +0 -0
- data/lib/omnizip/algorithms/bzip2/bwt.rb +225 -0
- data/lib/omnizip/algorithms/bzip2/decoder.rb +193 -0
- data/lib/omnizip/algorithms/bzip2/encoder.rb +237 -0
- data/lib/omnizip/algorithms/bzip2/huffman.rb +206 -0
- data/lib/omnizip/algorithms/bzip2/mtf.rb +101 -0
- data/lib/omnizip/algorithms/bzip2/rle.rb +151 -0
- data/lib/omnizip/algorithms/bzip2.rb +130 -0
- data/lib/omnizip/algorithms/deflate/constants.rb +28 -0
- data/lib/omnizip/algorithms/deflate/decoder.rb +38 -0
- data/lib/omnizip/algorithms/deflate/encoder.rb +46 -0
- data/lib/omnizip/algorithms/deflate.rb +128 -0
- data/lib/omnizip/algorithms/deflate64/constants.rb +45 -0
- data/lib/omnizip/algorithms/deflate64/decoder.rb +153 -0
- data/lib/omnizip/algorithms/deflate64/encoder.rb +98 -0
- data/lib/omnizip/algorithms/deflate64/huffman_coder.rb +354 -0
- data/lib/omnizip/algorithms/deflate64/lz77_encoder.rb +142 -0
- data/lib/omnizip/algorithms/deflate64.rb +109 -0
- data/lib/omnizip/algorithms/lzma/bit_model.rb +120 -0
- data/lib/omnizip/algorithms/lzma/constants.rb +112 -0
- data/lib/omnizip/algorithms/lzma/decoder.rb +148 -0
- data/lib/omnizip/algorithms/lzma/dictionary.rb +69 -0
- data/lib/omnizip/algorithms/lzma/distance_coder.rb +415 -0
- data/lib/omnizip/algorithms/lzma/encoder.rb +142 -0
- data/lib/omnizip/algorithms/lzma/length_coder.rb +260 -0
- data/lib/omnizip/algorithms/lzma/literal_decoder.rb +320 -0
- data/lib/omnizip/algorithms/lzma/literal_encoder.rb +210 -0
- data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +341 -0
- data/lib/omnizip/algorithms/lzma/lzma_alone_decoder.rb +192 -0
- data/lib/omnizip/algorithms/lzma/lzma_state.rb +128 -0
- data/lib/omnizip/algorithms/lzma/match.rb +32 -0
- data/lib/omnizip/algorithms/lzma/match_finder.rb +205 -0
- data/lib/omnizip/algorithms/lzma/match_finder_config.rb +142 -0
- data/lib/omnizip/algorithms/lzma/match_finder_factory.rb +88 -0
- data/lib/omnizip/algorithms/lzma/optimal_encoder.rb +130 -0
- data/lib/omnizip/algorithms/lzma/probability_models.rb +72 -0
- data/lib/omnizip/algorithms/lzma/range_coder.rb +85 -0
- data/lib/omnizip/algorithms/lzma/range_decoder.rb +434 -0
- data/lib/omnizip/algorithms/lzma/range_encoder.rb +194 -0
- data/lib/omnizip/algorithms/lzma/state.rb +127 -0
- data/lib/omnizip/algorithms/lzma/xz_buffered_range_encoder.rb +325 -0
- data/lib/omnizip/algorithms/lzma/xz_encoder.rb +426 -0
- data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +645 -0
- data/lib/omnizip/algorithms/lzma/xz_match_finder_adapter.rb +227 -0
- data/lib/omnizip/algorithms/lzma/xz_price_calculator.rb +169 -0
- data/lib/omnizip/algorithms/lzma/xz_probability_models.rb +261 -0
- data/lib/omnizip/algorithms/lzma/xz_range_encoder.rb +223 -0
- data/lib/omnizip/algorithms/lzma/xz_range_encoder_exact.rb +331 -0
- data/lib/omnizip/algorithms/lzma/xz_state.rb +116 -0
- data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +2055 -0
- data/lib/omnizip/algorithms/lzma.rb +238 -0
- data/lib/omnizip/algorithms/lzma2/chunk_manager.rb +182 -0
- data/lib/omnizip/algorithms/lzma2/constants.rb +41 -0
- data/lib/omnizip/algorithms/lzma2/encoder.rb +147 -0
- data/lib/omnizip/algorithms/lzma2/lzma2_chunk.rb +161 -0
- data/lib/omnizip/algorithms/lzma2/properties.rb +179 -0
- data/lib/omnizip/algorithms/lzma2/simple_lzma2_encoder.rb +127 -0
- data/lib/omnizip/algorithms/lzma2/xz_encoder_adapter.rb +85 -0
- data/lib/omnizip/algorithms/lzma2.rb +141 -0
- data/lib/omnizip/algorithms/ppmd7/constants.rb +74 -0
- data/lib/omnizip/algorithms/ppmd7/context.rb +154 -0
- data/lib/omnizip/algorithms/ppmd7/decoder.rb +126 -0
- data/lib/omnizip/algorithms/ppmd7/encoder.rb +163 -0
- data/lib/omnizip/algorithms/ppmd7/model.rb +248 -0
- data/lib/omnizip/algorithms/ppmd7/symbol_state.rb +57 -0
- data/lib/omnizip/algorithms/ppmd7.rb +116 -0
- data/lib/omnizip/algorithms/ppmd8/constants.rb +61 -0
- data/lib/omnizip/algorithms/ppmd8/context.rb +34 -0
- data/lib/omnizip/algorithms/ppmd8/decoder.rb +107 -0
- data/lib/omnizip/algorithms/ppmd8/encoder.rb +138 -0
- data/lib/omnizip/algorithms/ppmd8/model.rb +250 -0
- data/lib/omnizip/algorithms/ppmd8/restoration_method.rb +78 -0
- data/lib/omnizip/algorithms/ppmd8.rb +82 -0
- data/lib/omnizip/algorithms/ppmd_base.rb +138 -0
- data/lib/omnizip/algorithms/sevenzip_lzma2.rb +123 -0
- data/lib/omnizip/algorithms/xz_lzma2.rb +118 -0
- data/lib/omnizip/algorithms/zstandard/constants.rb +25 -0
- data/lib/omnizip/algorithms/zstandard/decoder.rb +46 -0
- data/lib/omnizip/algorithms/zstandard/encoder.rb +51 -0
- data/lib/omnizip/algorithms/zstandard.rb +138 -0
- data/lib/omnizip/buffer/memory_archive.rb +251 -0
- data/lib/omnizip/buffer/memory_extractor.rb +224 -0
- data/lib/omnizip/buffer.rb +176 -0
- data/lib/omnizip/checksum_registry.rb +114 -0
- data/lib/omnizip/checksums/crc32.rb +100 -0
- data/lib/omnizip/checksums/crc64.rb +101 -0
- data/lib/omnizip/checksums/crc_base.rb +158 -0
- data/lib/omnizip/checksums/verifier.rb +131 -0
- data/lib/omnizip/chunked/memory_manager.rb +194 -0
- data/lib/omnizip/chunked/reader.rb +78 -0
- data/lib/omnizip/chunked/writer.rb +120 -0
- data/lib/omnizip/chunked.rb +129 -0
- data/lib/omnizip/cli/output_formatter.rb +104 -0
- data/lib/omnizip/cli.rb +572 -0
- data/lib/omnizip/commands/.keep +0 -0
- data/lib/omnizip/commands/archive_create_command.rb +427 -0
- data/lib/omnizip/commands/archive_extract_command.rb +272 -0
- data/lib/omnizip/commands/archive_list_command.rb +218 -0
- data/lib/omnizip/commands/archive_repair_command.rb +131 -0
- data/lib/omnizip/commands/archive_verify_command.rb +117 -0
- data/lib/omnizip/commands/compress_command.rb +117 -0
- data/lib/omnizip/commands/decompress_command.rb +120 -0
- data/lib/omnizip/commands/list_command.rb +53 -0
- data/lib/omnizip/commands/metadata_command.rb +153 -0
- data/lib/omnizip/commands/parity_create_command.rb +122 -0
- data/lib/omnizip/commands/parity_repair_command.rb +122 -0
- data/lib/omnizip/commands/parity_verify_command.rb +124 -0
- data/lib/omnizip/commands/profile_list_command.rb +56 -0
- data/lib/omnizip/commands/profile_show_command.rb +44 -0
- data/lib/omnizip/convenience.rb +359 -0
- data/lib/omnizip/converter/conversion_registry.rb +49 -0
- data/lib/omnizip/converter/conversion_strategy.rb +121 -0
- data/lib/omnizip/converter/seven_zip_to_zip_strategy.rb +97 -0
- data/lib/omnizip/converter/zip_to_seven_zip_strategy.rb +112 -0
- data/lib/omnizip/converter.rb +105 -0
- data/lib/omnizip/crypto/aes256/cipher.rb +100 -0
- data/lib/omnizip/crypto/aes256/constants.rb +28 -0
- data/lib/omnizip/crypto/aes256/key_derivation.rb +101 -0
- data/lib/omnizip/crypto/aes256.rb +102 -0
- data/lib/omnizip/error.rb +106 -0
- data/lib/omnizip/eta/exponential_smoothing_estimator.rb +98 -0
- data/lib/omnizip/eta/moving_average_estimator.rb +99 -0
- data/lib/omnizip/eta/rate_calculator.rb +104 -0
- data/lib/omnizip/eta/sample_history.rb +143 -0
- data/lib/omnizip/eta/time_estimator.rb +106 -0
- data/lib/omnizip/eta.rb +63 -0
- data/lib/omnizip/extraction/filter_chain.rb +177 -0
- data/lib/omnizip/extraction/glob_pattern.rb +140 -0
- data/lib/omnizip/extraction/pattern_matcher.rb +70 -0
- data/lib/omnizip/extraction/predicate_pattern.rb +52 -0
- data/lib/omnizip/extraction/regex_pattern.rb +50 -0
- data/lib/omnizip/extraction/selective_extractor.rb +240 -0
- data/lib/omnizip/extraction.rb +111 -0
- data/lib/omnizip/file_type/mime_classifier.rb +144 -0
- data/lib/omnizip/file_type.rb +113 -0
- data/lib/omnizip/filter.rb +139 -0
- data/lib/omnizip/filter_pipeline.rb +108 -0
- data/lib/omnizip/filter_registry.rb +166 -0
- data/lib/omnizip/filters/bcj.rb +279 -0
- data/lib/omnizip/filters/bcj2/constants.rb +53 -0
- data/lib/omnizip/filters/bcj2/decoder.rb +200 -0
- data/lib/omnizip/filters/bcj2/encoder.rb +61 -0
- data/lib/omnizip/filters/bcj2/stream_data.rb +93 -0
- data/lib/omnizip/filters/bcj2.rb +99 -0
- data/lib/omnizip/filters/bcj_arm.rb +176 -0
- data/lib/omnizip/filters/bcj_arm64.rb +244 -0
- data/lib/omnizip/filters/bcj_ia64.rb +196 -0
- data/lib/omnizip/filters/bcj_ppc.rb +190 -0
- data/lib/omnizip/filters/bcj_sparc.rb +176 -0
- data/lib/omnizip/filters/bcj_x86.rb +193 -0
- data/lib/omnizip/filters/delta.rb +196 -0
- data/lib/omnizip/filters/filter_base.rb +72 -0
- data/lib/omnizip/filters/registry.rb +123 -0
- data/lib/omnizip/filters/xz_delta.rb +258 -0
- data/lib/omnizip/format_detector.rb +162 -0
- data/lib/omnizip/format_registry.rb +59 -0
- data/lib/omnizip/formats/.keep +0 -0
- data/lib/omnizip/formats/bzip2_file.rb +172 -0
- data/lib/omnizip/formats/cpio/constants.rb +55 -0
- data/lib/omnizip/formats/cpio/entry.rb +385 -0
- data/lib/omnizip/formats/cpio/reader.rb +196 -0
- data/lib/omnizip/formats/cpio/writer.rb +234 -0
- data/lib/omnizip/formats/cpio.rb +140 -0
- data/lib/omnizip/formats/format_spec_loader.rb +230 -0
- data/lib/omnizip/formats/gzip.rb +238 -0
- data/lib/omnizip/formats/iso/directory_builder.rb +297 -0
- data/lib/omnizip/formats/iso/directory_record.rb +152 -0
- data/lib/omnizip/formats/iso/joliet.rb +204 -0
- data/lib/omnizip/formats/iso/path_table.rb +125 -0
- data/lib/omnizip/formats/iso/reader.rb +197 -0
- data/lib/omnizip/formats/iso/rock_ridge.rb +349 -0
- data/lib/omnizip/formats/iso/volume_builder.rb +320 -0
- data/lib/omnizip/formats/iso/volume_descriptor.rb +168 -0
- data/lib/omnizip/formats/iso/writer.rb +530 -0
- data/lib/omnizip/formats/iso.rb +140 -0
- data/lib/omnizip/formats/lzip.rb +175 -0
- data/lib/omnizip/formats/lzma_alone.rb +171 -0
- data/lib/omnizip/formats/rar/archive_repairer.rb +243 -0
- data/lib/omnizip/formats/rar/archive_verifier.rb +195 -0
- data/lib/omnizip/formats/rar/block_parser.rb +243 -0
- data/lib/omnizip/formats/rar/compression/bit_stream.rb +180 -0
- data/lib/omnizip/formats/rar/compression/dispatcher.rb +217 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/decoder.rb +216 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/encoder.rb +158 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_builder.rb +217 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_coder.rb +189 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/match_finder.rb +135 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/sliding_window.rb +165 -0
- data/lib/omnizip/formats/rar/compression/ppmd/context.rb +105 -0
- data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +219 -0
- data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +262 -0
- data/lib/omnizip/formats/rar/compression_method_registry.rb +106 -0
- data/lib/omnizip/formats/rar/constants.rb +82 -0
- data/lib/omnizip/formats/rar/decompressor.rb +238 -0
- data/lib/omnizip/formats/rar/external_writer.rb +312 -0
- data/lib/omnizip/formats/rar/header.rb +192 -0
- data/lib/omnizip/formats/rar/license_validator.rb +109 -0
- data/lib/omnizip/formats/rar/models/rar_archive.rb +77 -0
- data/lib/omnizip/formats/rar/models/rar_entry.rb +65 -0
- data/lib/omnizip/formats/rar/models/rar_volume.rb +56 -0
- data/lib/omnizip/formats/rar/parity_handler.rb +292 -0
- data/lib/omnizip/formats/rar/rar5/compression/lzma.rb +202 -0
- data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +578 -0
- data/lib/omnizip/formats/rar/rar5/compression/store.rb +60 -0
- data/lib/omnizip/formats/rar/rar5/crc32.rb +39 -0
- data/lib/omnizip/formats/rar/rar5/encryption/aes256_cbc.rb +97 -0
- data/lib/omnizip/formats/rar/rar5/encryption/encryption_header.rb +114 -0
- data/lib/omnizip/formats/rar/rar5/encryption/encryption_manager.rb +166 -0
- data/lib/omnizip/formats/rar/rar5/encryption/key_derivation.rb +97 -0
- data/lib/omnizip/formats/rar/rar5/header.rb +187 -0
- data/lib/omnizip/formats/rar/rar5/models/encryption_options.rb +74 -0
- data/lib/omnizip/formats/rar/rar5/models/recovery_options.rb +63 -0
- data/lib/omnizip/formats/rar/rar5/models/solid_options.rb +63 -0
- data/lib/omnizip/formats/rar/rar5/models/volume_options.rb +74 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/ARCHITECTURE.md +290 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_manager.rb +264 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_splitter.rb +155 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_writer.rb +194 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_encoder.rb +109 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_manager.rb +142 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_stream.rb +121 -0
- data/lib/omnizip/formats/rar/rar5/vint.rb +65 -0
- data/lib/omnizip/formats/rar/rar5/writer.rb +466 -0
- data/lib/omnizip/formats/rar/rar_format_base.rb +241 -0
- data/lib/omnizip/formats/rar/reader.rb +366 -0
- data/lib/omnizip/formats/rar/recovery_record.rb +245 -0
- data/lib/omnizip/formats/rar/volume_manager.rb +168 -0
- data/lib/omnizip/formats/rar/writer.rb +431 -0
- data/lib/omnizip/formats/rar.rb +205 -0
- data/lib/omnizip/formats/rar3/compressor.rb +73 -0
- data/lib/omnizip/formats/rar3/decompressor.rb +66 -0
- data/lib/omnizip/formats/rar3/reader.rb +386 -0
- data/lib/omnizip/formats/rar3/writer.rb +219 -0
- data/lib/omnizip/formats/rar5/compressor.rb +73 -0
- data/lib/omnizip/formats/rar5/decompressor.rb +66 -0
- data/lib/omnizip/formats/rar5/reader.rb +342 -0
- data/lib/omnizip/formats/rar5/writer.rb +214 -0
- data/lib/omnizip/formats/seven_zip/coder_chain.rb +150 -0
- data/lib/omnizip/formats/seven_zip/constants.rb +126 -0
- data/lib/omnizip/formats/seven_zip/encoded_header.rb +114 -0
- data/lib/omnizip/formats/seven_zip/encrypted_header.rb +142 -0
- data/lib/omnizip/formats/seven_zip/file_collector.rb +144 -0
- data/lib/omnizip/formats/seven_zip/header.rb +106 -0
- data/lib/omnizip/formats/seven_zip/header_encryptor.rb +134 -0
- data/lib/omnizip/formats/seven_zip/header_writer.rb +466 -0
- data/lib/omnizip/formats/seven_zip/models/coder_info.rb +30 -0
- data/lib/omnizip/formats/seven_zip/models/file_entry.rb +58 -0
- data/lib/omnizip/formats/seven_zip/models/folder.rb +69 -0
- data/lib/omnizip/formats/seven_zip/models/stream_info.rb +42 -0
- data/lib/omnizip/formats/seven_zip/parser.rb +660 -0
- data/lib/omnizip/formats/seven_zip/reader.rb +458 -0
- data/lib/omnizip/formats/seven_zip/split_archive_reader.rb +632 -0
- data/lib/omnizip/formats/seven_zip/split_archive_writer.rb +315 -0
- data/lib/omnizip/formats/seven_zip/stream_compressor.rb +151 -0
- data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +162 -0
- data/lib/omnizip/formats/seven_zip/writer.rb +740 -0
- data/lib/omnizip/formats/seven_zip.rb +93 -0
- data/lib/omnizip/formats/tar/constants.rb +73 -0
- data/lib/omnizip/formats/tar/entry.rb +94 -0
- data/lib/omnizip/formats/tar/header.rb +168 -0
- data/lib/omnizip/formats/tar/reader.rb +121 -0
- data/lib/omnizip/formats/tar/writer.rb +216 -0
- data/lib/omnizip/formats/tar.rb +84 -0
- data/lib/omnizip/formats/xz/reader.rb +116 -0
- data/lib/omnizip/formats/xz.rb +237 -0
- data/lib/omnizip/formats/xz_impl/block_decoder.rb +754 -0
- data/lib/omnizip/formats/xz_impl/block_encoder.rb +306 -0
- data/lib/omnizip/formats/xz_impl/block_header.rb +210 -0
- data/lib/omnizip/formats/xz_impl/block_header_parser.rb +186 -0
- data/lib/omnizip/formats/xz_impl/constants.rb +49 -0
- data/lib/omnizip/formats/xz_impl/index_decoder.rb +174 -0
- data/lib/omnizip/formats/xz_impl/index_encoder.rb +122 -0
- data/lib/omnizip/formats/xz_impl/stream_decoder.rb +468 -0
- data/lib/omnizip/formats/xz_impl/stream_encoder.rb +99 -0
- data/lib/omnizip/formats/xz_impl/stream_footer.rb +81 -0
- data/lib/omnizip/formats/xz_impl/stream_footer_parser.rb +117 -0
- data/lib/omnizip/formats/xz_impl/stream_header.rb +55 -0
- data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +108 -0
- data/lib/omnizip/formats/xz_impl/vli.rb +128 -0
- data/lib/omnizip/formats/xz_impl/writer.rb +421 -0
- data/lib/omnizip/formats/zip/central_directory_header.rb +195 -0
- data/lib/omnizip/formats/zip/constants.rb +69 -0
- data/lib/omnizip/formats/zip/end_of_central_directory.rb +133 -0
- data/lib/omnizip/formats/zip/local_file_header.rb +138 -0
- data/lib/omnizip/formats/zip/reader.rb +250 -0
- data/lib/omnizip/formats/zip/unix_extra_field.rb +153 -0
- data/lib/omnizip/formats/zip/writer.rb +375 -0
- data/lib/omnizip/formats/zip/zip64_end_of_central_directory.rb +104 -0
- data/lib/omnizip/formats/zip/zip64_end_of_central_directory_locator.rb +66 -0
- data/lib/omnizip/formats/zip/zip64_extra_field.rb +114 -0
- data/lib/omnizip/formats/zip.rb +50 -0
- data/lib/omnizip/implementations/base/lzma2_decoder_base.rb +75 -0
- data/lib/omnizip/implementations/base/lzma2_encoder_base.rb +128 -0
- data/lib/omnizip/implementations/base/lzma_decoder_base.rb +83 -0
- data/lib/omnizip/implementations/base/lzma_encoder_base.rb +108 -0
- data/lib/omnizip/implementations/base/state_machine_base.rb +182 -0
- data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +421 -0
- data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +465 -0
- data/lib/omnizip/implementations/seven_zip/lzma/match_finder.rb +288 -0
- data/lib/omnizip/implementations/seven_zip/lzma/range_decoder.rb +200 -0
- data/lib/omnizip/implementations/seven_zip/lzma/range_encoder.rb +197 -0
- data/lib/omnizip/implementations/seven_zip/lzma/state_machine.rb +141 -0
- data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +519 -0
- data/lib/omnizip/implementations/xz_utils/lzma2/decoder.rb +723 -0
- data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +750 -0
- data/lib/omnizip/io/buffered_input.rb +146 -0
- data/lib/omnizip/io/buffered_output.rb +105 -0
- data/lib/omnizip/io/stream_manager.rb +115 -0
- data/lib/omnizip/link_handler/hard_link.rb +79 -0
- data/lib/omnizip/link_handler/symbolic_link.rb +74 -0
- data/lib/omnizip/link_handler.rb +124 -0
- data/lib/omnizip/metadata/archive_metadata.rb +114 -0
- data/lib/omnizip/metadata/entry_metadata.rb +146 -0
- data/lib/omnizip/metadata/metadata_editor.rb +171 -0
- data/lib/omnizip/metadata/metadata_registry.rb +64 -0
- data/lib/omnizip/metadata/metadata_validator.rb +99 -0
- data/lib/omnizip/metadata.rb +57 -0
- data/lib/omnizip/models/.keep +0 -0
- data/lib/omnizip/models/algorithm_metadata.rb +73 -0
- data/lib/omnizip/models/compression_options.rb +71 -0
- data/lib/omnizip/models/conversion_options.rb +87 -0
- data/lib/omnizip/models/conversion_result.rb +135 -0
- data/lib/omnizip/models/eta_result.rb +46 -0
- data/lib/omnizip/models/extraction_rule.rb +115 -0
- data/lib/omnizip/models/filter_chain.rb +144 -0
- data/lib/omnizip/models/filter_config.rb +183 -0
- data/lib/omnizip/models/match_result.rb +124 -0
- data/lib/omnizip/models/optimization_suggestion.rb +91 -0
- data/lib/omnizip/models/parallel_options.rb +104 -0
- data/lib/omnizip/models/performance_result.rb +79 -0
- data/lib/omnizip/models/profile_report.rb +82 -0
- data/lib/omnizip/models/progress_options.rb +38 -0
- data/lib/omnizip/models/split_options.rb +116 -0
- data/lib/omnizip/optimization_registry.rb +81 -0
- data/lib/omnizip/parallel/job_queue.rb +209 -0
- data/lib/omnizip/parallel/job_scheduler.rb +203 -0
- data/lib/omnizip/parallel/parallel_compressor.rb +347 -0
- data/lib/omnizip/parallel/parallel_extractor.rb +329 -0
- data/lib/omnizip/parallel/worker_pool.rb +223 -0
- data/lib/omnizip/parallel.rb +149 -0
- data/lib/omnizip/parity/chunked_block_processor.rb +196 -0
- data/lib/omnizip/parity/galois16.rb +145 -0
- data/lib/omnizip/parity/models/creator_packet.rb +73 -0
- data/lib/omnizip/parity/models/file_description_packet.rb +133 -0
- data/lib/omnizip/parity/models/ifsc_packet.rb +123 -0
- data/lib/omnizip/parity/models/main_packet.rb +128 -0
- data/lib/omnizip/parity/models/packet.rb +156 -0
- data/lib/omnizip/parity/models/packet_registry.rb +109 -0
- data/lib/omnizip/parity/models/recovery_slice_packet.rb +78 -0
- data/lib/omnizip/parity/par2_creator.rb +531 -0
- data/lib/omnizip/parity/par2_repairer.rb +407 -0
- data/lib/omnizip/parity/par2_verifier.rb +364 -0
- data/lib/omnizip/parity/par2cmdline_algorithm.rb +110 -0
- data/lib/omnizip/parity/par2cmdline_coefficients.rb +78 -0
- data/lib/omnizip/parity/reed_solomon_decoder.rb +266 -0
- data/lib/omnizip/parity/reed_solomon_encoder.rb +111 -0
- data/lib/omnizip/parity/reed_solomon_matrix.rb +342 -0
- data/lib/omnizip/parity.rb +186 -0
- data/lib/omnizip/password/encryption_registry.rb +65 -0
- data/lib/omnizip/password/encryption_strategy.rb +96 -0
- data/lib/omnizip/password/password_validator.rb +129 -0
- data/lib/omnizip/password/winzip_aes_strategy.rb +192 -0
- data/lib/omnizip/password/zip_crypto_strategy.rb +141 -0
- data/lib/omnizip/password.rb +87 -0
- data/lib/omnizip/pipe/stream_compressor.rb +124 -0
- data/lib/omnizip/pipe/stream_decompressor.rb +174 -0
- data/lib/omnizip/pipe.rb +121 -0
- data/lib/omnizip/platform/ntfs_streams.rb +201 -0
- data/lib/omnizip/platform.rb +189 -0
- data/lib/omnizip/profile/archive_profile.rb +39 -0
- data/lib/omnizip/profile/balanced_profile.rb +33 -0
- data/lib/omnizip/profile/binary_profile.rb +36 -0
- data/lib/omnizip/profile/compression_profile.rb +158 -0
- data/lib/omnizip/profile/custom_profile.rb +157 -0
- data/lib/omnizip/profile/fast_profile.rb +33 -0
- data/lib/omnizip/profile/maximum_profile.rb +33 -0
- data/lib/omnizip/profile/profile_detector.rb +110 -0
- data/lib/omnizip/profile/profile_registry.rb +161 -0
- data/lib/omnizip/profile/text_profile.rb +36 -0
- data/lib/omnizip/profile.rb +190 -0
- data/lib/omnizip/profiler/memory_profiler.rb +66 -0
- data/lib/omnizip/profiler/method_profiler.rb +49 -0
- data/lib/omnizip/profiler/report_generator.rb +169 -0
- data/lib/omnizip/profiler.rb +204 -0
- data/lib/omnizip/progress/callback_reporter.rb +36 -0
- data/lib/omnizip/progress/console_reporter.rb +62 -0
- data/lib/omnizip/progress/log_reporter.rb +91 -0
- data/lib/omnizip/progress/operation_progress.rb +118 -0
- data/lib/omnizip/progress/progress_bar.rb +156 -0
- data/lib/omnizip/progress/progress_reporter.rb +40 -0
- data/lib/omnizip/progress/progress_tracker.rb +190 -0
- data/lib/omnizip/progress/silent_reporter.rb +24 -0
- data/lib/omnizip/progress.rb +127 -0
- data/lib/omnizip/rubyzip_compat.rb +63 -0
- data/lib/omnizip/temp/safe_extract.rb +168 -0
- data/lib/omnizip/temp/temp_file.rb +124 -0
- data/lib/omnizip/temp/temp_file_pool.rb +109 -0
- data/lib/omnizip/temp.rb +181 -0
- data/lib/omnizip/version.rb +5 -0
- data/lib/omnizip/zip/entry.rb +156 -0
- data/lib/omnizip/zip/file.rb +485 -0
- data/lib/omnizip/zip/input_stream.rb +273 -0
- data/lib/omnizip/zip/output_stream.rb +324 -0
- data/lib/omnizip.rb +156 -0
- data/readme-docs/advanced-features.adoc +515 -0
- data/readme-docs/api-usage.adoc +444 -0
- data/readme-docs/architecture.adoc +449 -0
- data/readme-docs/archive-formats.adoc +479 -0
- data/readme-docs/cli-usage.adoc +222 -0
- data/readme-docs/compression-algorithms.adoc +442 -0
- data/readme-docs/compression-profiles.adoc +247 -0
- data/readme-docs/encryption-checksums.adoc +328 -0
- data/readme-docs/format-converter.adoc +325 -0
- data/readme-docs/installation.adoc +228 -0
- data/readme-docs/par2-archives.adoc +608 -0
- data/readme-docs/performance-profiler.adoc +389 -0
- data/readme-docs/preprocessing-filters.adoc +280 -0
- data/xz-file-format-1.2.1.txt +1174 -0
- metadata +617 -0
|
@@ -0,0 +1,442 @@
|
|
|
1
|
+
= Compression Algorithms Guide
|
|
2
|
+
:toc:
|
|
3
|
+
:toclevels: 3
|
|
4
|
+
|
|
5
|
+
== Purpose
|
|
6
|
+
|
|
7
|
+
This document provides detailed information about all compression algorithms supported by Omnizip, including their characteristics, use cases, and performance considerations.
|
|
8
|
+
|
|
9
|
+
== Supported Algorithms Overview
|
|
10
|
+
|
|
11
|
+
[cols="20,15,15,50",options="header"]
|
|
12
|
+
|===
|
|
13
|
+
|Algorithm |ID |Type |Description
|
|
14
|
+
|
|
15
|
+
|LZMA |0x030101 |Dictionary |High compression, range coding
|
|
16
|
+
|LZMA2 |0x21 |Dictionary |Enhanced LZMA with better streaming
|
|
17
|
+
|PPMd7 |0x030401 |Statistical |Prediction by Partial Matching v7
|
|
18
|
+
|PPMd8 |0x030402 |Statistical |PPMd variant H v8
|
|
19
|
+
|BZip2 |0x040202 |BWT |Burrows-Wheeler Transform
|
|
20
|
+
|Deflate |0x040108 |LZ77 |ZIP-compatible (Zlib wrapper)
|
|
21
|
+
|Deflate64 |9 (ZIP) |LZ77 |Enhanced Deflate with 64KB window
|
|
22
|
+
|Zstandard |0x04F71101 |LZ77 |Fast modern compression
|
|
23
|
+
|Copy |0x00 |None |Uncompressed storage
|
|
24
|
+
|===
|
|
25
|
+
|
|
26
|
+
== LZMA/LZMA2
|
|
27
|
+
|
|
28
|
+
=== General
|
|
29
|
+
|
|
30
|
+
LZMA (Lempel-Ziv-Markov chain Algorithm) is a lossless data compression algorithm that achieves high compression ratios through dictionary-based compression combined with range coding. The implementation uses adaptive probability models that adjust based on the input data.
|
|
31
|
+
|
|
32
|
+
The LZMA algorithm operates in three main stages:
|
|
33
|
+
|
|
34
|
+
. Dictionary-based compression using LZ77 match finding
|
|
35
|
+
. Range encoding with adaptive bit models
|
|
36
|
+
. State machine for context tracking
|
|
37
|
+
|
|
38
|
+
LZMA2 extends LZMA with:
|
|
39
|
+
|
|
40
|
+
* Better support for uncompressible data
|
|
41
|
+
* Improved parallelization potential
|
|
42
|
+
* More efficient handling of small chunks
|
|
43
|
+
* Reset capability for streaming
|
|
44
|
+
|
|
45
|
+
=== Characteristics
|
|
46
|
+
|
|
47
|
+
**Compression Ratio:** ⭐⭐⭐⭐⭐ (Excellent)
|
|
48
|
+
|
|
49
|
+
**Speed:** ⭐⭐ (Slow)
|
|
50
|
+
|
|
51
|
+
**Memory Usage:** ⭐⭐⭐ (Moderate to High)
|
|
52
|
+
|
|
53
|
+
**Best For:**
|
|
54
|
+
|
|
55
|
+
* Archival storage where size matters most
|
|
56
|
+
* Software distribution packages
|
|
57
|
+
* Documents and text files
|
|
58
|
+
* Any data where maximum compression is priority
|
|
59
|
+
|
|
60
|
+
**Not Ideal For:**
|
|
61
|
+
|
|
62
|
+
* Real-time compression needs
|
|
63
|
+
* Streaming data that changes frequently
|
|
64
|
+
* Already compressed data (images, videos)
|
|
65
|
+
|
|
66
|
+
=== Compression Levels
|
|
67
|
+
|
|
68
|
+
LZMA/LZMA2 support compression levels from 1 to 9:
|
|
69
|
+
|
|
70
|
+
* **Level 1:** Fastest compression, 64KB dictionary, ~3-5x compression
|
|
71
|
+
* **Level 2-3:** Fast compression, 1MB dictionary, ~4-7x compression
|
|
72
|
+
* **Level 4-5:** Balanced (default 5), 4MB dictionary, ~6-10x compression
|
|
73
|
+
* **Level 6-7:** Higher compression, 8MB dictionary, ~8-12x compression
|
|
74
|
+
* **Level 8-9:** Maximum compression, 16MB dictionary, ~10-15x compression
|
|
75
|
+
|
|
76
|
+
=== Usage Example
|
|
77
|
+
|
|
78
|
+
[source,ruby]
|
|
79
|
+
----
|
|
80
|
+
# LZMA compression
|
|
81
|
+
lzma = Omnizip::AlgorithmRegistry.get(:lzma).new(level: 9)
|
|
82
|
+
lzma.compress(input, output)
|
|
83
|
+
|
|
84
|
+
# LZMA2 compression (recommended for new archives)
|
|
85
|
+
lzma2 = Omnizip::AlgorithmRegistry.get(:lzma2).new(level: 7)
|
|
86
|
+
lzma2.compress(input, output)
|
|
87
|
+
----
|
|
88
|
+
|
|
89
|
+
== BZip2
|
|
90
|
+
|
|
91
|
+
=== General
|
|
92
|
+
|
|
93
|
+
BZip2 uses the Burrows-Wheeler Transform (BWT) for block-sorting compression, achieving good compression ratios with moderate speed. It's particularly effective for text data.
|
|
94
|
+
|
|
95
|
+
The algorithm operates in stages:
|
|
96
|
+
|
|
97
|
+
. Run-Length Encoding (RLE) preprocessing
|
|
98
|
+
. Burrows-Wheeler Transform
|
|
99
|
+
. Move-to-Front transform
|
|
100
|
+
. Run-Length Encoding (second pass)
|
|
101
|
+
. Huffman coding
|
|
102
|
+
|
|
103
|
+
=== Characteristics
|
|
104
|
+
|
|
105
|
+
**Compression Ratio:** ⭐⭐⭐⭐ (Very Good)
|
|
106
|
+
|
|
107
|
+
**Speed:** ⭐⭐⭐ (Moderate)
|
|
108
|
+
|
|
109
|
+
**Memory Usage:** ⭐⭐⭐⭐ (Low to Moderate)
|
|
110
|
+
|
|
111
|
+
**Best For:**
|
|
112
|
+
|
|
113
|
+
* Text files and source code
|
|
114
|
+
* Log files
|
|
115
|
+
* Data with repetitive patterns
|
|
116
|
+
* When LZMA is too slow but good compression is needed
|
|
117
|
+
|
|
118
|
+
**Not Ideal For:**
|
|
119
|
+
|
|
120
|
+
* Binary data with low redundancy
|
|
121
|
+
* Multimedia files
|
|
122
|
+
* When maximum compression is required
|
|
123
|
+
|
|
124
|
+
=== Compression Levels
|
|
125
|
+
|
|
126
|
+
BZip2 supports levels 1-9:
|
|
127
|
+
|
|
128
|
+
* **Level 1:** 100KB blocks, fastest
|
|
129
|
+
* **Level 5:** 500KB blocks, default
|
|
130
|
+
* **Level 9:** 900KB blocks, best compression
|
|
131
|
+
|
|
132
|
+
Block size directly affects memory usage and compression ratio.
|
|
133
|
+
|
|
134
|
+
=== Usage Example
|
|
135
|
+
|
|
136
|
+
[source,ruby]
|
|
137
|
+
----
|
|
138
|
+
bzip2 = Omnizip::AlgorithmRegistry.get(:bzip2).new(level: 9)
|
|
139
|
+
bzip2.compress(input, output)
|
|
140
|
+
----
|
|
141
|
+
|
|
142
|
+
== PPMd7 and PPMd8
|
|
143
|
+
|
|
144
|
+
=== General
|
|
145
|
+
|
|
146
|
+
PPMd (Prediction by Partial Matching) excels at text compression using statistical modeling. It maintains context models that predict the probability of each symbol based on preceding symbols.
|
|
147
|
+
|
|
148
|
+
Two variants are supported:
|
|
149
|
+
|
|
150
|
+
* **PPMd7:** Original variant, excellent for general text
|
|
151
|
+
* **PPMd8:** Variant H with improved handling of binary data
|
|
152
|
+
|
|
153
|
+
=== Characteristics
|
|
154
|
+
|
|
155
|
+
**Compression Ratio:** ⭐⭐⭐⭐⭐ (Excellent for text)
|
|
156
|
+
|
|
157
|
+
**Speed:** ⭐⭐ (Slow)
|
|
158
|
+
|
|
159
|
+
**Memory Usage:** ⭐⭐ (High, configurable)
|
|
160
|
+
|
|
161
|
+
**Best For:**
|
|
162
|
+
|
|
163
|
+
* Plain text files
|
|
164
|
+
* Source code
|
|
165
|
+
* XML/JSON/YAML files
|
|
166
|
+
* Natural language text
|
|
167
|
+
* Any highly structured text data
|
|
168
|
+
|
|
169
|
+
**Not Ideal For:**
|
|
170
|
+
|
|
171
|
+
* Binary executable files
|
|
172
|
+
* Multimedia files
|
|
173
|
+
* Low-redundancy data
|
|
174
|
+
* Limited memory environments
|
|
175
|
+
|
|
176
|
+
=== Configuration
|
|
177
|
+
|
|
178
|
+
PPMd requires two key parameters:
|
|
179
|
+
|
|
180
|
+
* `mem_size`: Memory size as power of 2 (e.g., 24 = 16MB)
|
|
181
|
+
* `order`: Context order (typically 4-8)
|
|
182
|
+
|
|
183
|
+
Higher memory and order = better compression but slower.
|
|
184
|
+
|
|
185
|
+
=== Usage Example
|
|
186
|
+
|
|
187
|
+
[source,ruby]
|
|
188
|
+
----
|
|
189
|
+
# PPMd7 for general text
|
|
190
|
+
ppmd7 = Omnizip::AlgorithmRegistry.get(:ppmd7).new(
|
|
191
|
+
mem_size: 24, # 16MB
|
|
192
|
+
order: 6
|
|
193
|
+
)
|
|
194
|
+
ppmd7.compress(input, output)
|
|
195
|
+
|
|
196
|
+
# PPMd8 for mixed content
|
|
197
|
+
ppmd8 = Omnizip::AlgorithmRegistry.get(:ppmd8).new(
|
|
198
|
+
mem_size: 26, # 64MB
|
|
199
|
+
order: 8
|
|
200
|
+
)
|
|
201
|
+
ppmd8.compress(input, output)
|
|
202
|
+
----
|
|
203
|
+
|
|
204
|
+
== Deflate
|
|
205
|
+
|
|
206
|
+
=== General
|
|
207
|
+
|
|
208
|
+
Deflate provides ZIP-compatible compression using LZ77 sliding window with Huffman coding. It's the standard compression algorithm for ZIP files and uses the native Zlib library.
|
|
209
|
+
|
|
210
|
+
=== Characteristics
|
|
211
|
+
|
|
212
|
+
**Compression Ratio:** ⭐⭐⭐ (Good)
|
|
213
|
+
|
|
214
|
+
**Speed:** ⭐⭐⭐⭐ (Fast)
|
|
215
|
+
|
|
216
|
+
**Memory Usage:** ⭐⭐⭐⭐⭐ (Low)
|
|
217
|
+
|
|
218
|
+
**Best For:**
|
|
219
|
+
|
|
220
|
+
* ZIP file creation
|
|
221
|
+
* Fast compression needs
|
|
222
|
+
* Web content (gzip)
|
|
223
|
+
* Limited resource environments
|
|
224
|
+
* When compatibility is important
|
|
225
|
+
|
|
226
|
+
**Not Ideal For:**
|
|
227
|
+
|
|
228
|
+
* Maximum compression requirements
|
|
229
|
+
* When speed is not critical
|
|
230
|
+
|
|
231
|
+
=== Compression Levels
|
|
232
|
+
|
|
233
|
+
Deflate supports levels 1-9:
|
|
234
|
+
|
|
235
|
+
* **Level 1:** Fastest, minimal compression
|
|
236
|
+
* **Level 6:** Default, balanced
|
|
237
|
+
* **Level 9:** Best compression, slower
|
|
238
|
+
|
|
239
|
+
=== Usage Example
|
|
240
|
+
|
|
241
|
+
[source,ruby]
|
|
242
|
+
----
|
|
243
|
+
deflate = Omnizip::AlgorithmRegistry.get(:deflate).new(level: 6)
|
|
244
|
+
deflate.compress(input, output)
|
|
245
|
+
----
|
|
246
|
+
|
|
247
|
+
== Deflate64
|
|
248
|
+
|
|
249
|
+
=== General
|
|
250
|
+
|
|
251
|
+
Deflate64 (Enhanced Deflate) extends standard Deflate with a 64KB sliding window (versus 32KB in standard Deflate), providing better compression ratios for larger files while maintaining ZIP format compatibility as compression method 9.
|
|
252
|
+
|
|
253
|
+
The algorithm operates in stages:
|
|
254
|
+
|
|
255
|
+
. LZ77 match finding with 64KB dictionary
|
|
256
|
+
. Huffman coding with dynamic trees
|
|
257
|
+
. Bitstream encoding
|
|
258
|
+
|
|
259
|
+
=== Characteristics
|
|
260
|
+
|
|
261
|
+
**Compression Ratio:** ⭐⭐⭐⭐ (Better than Deflate)
|
|
262
|
+
|
|
263
|
+
**Speed:** ⭐⭐⭐⭐ (Fast)
|
|
264
|
+
|
|
265
|
+
**Memory Usage:** ⭐⭐⭐⭐ (Low to Moderate)
|
|
266
|
+
|
|
267
|
+
**Best For:**
|
|
268
|
+
|
|
269
|
+
* Large files (> 32KB)
|
|
270
|
+
* ZIP archives needing better compression
|
|
271
|
+
* When Deflate is not enough but LZMA is too slow
|
|
272
|
+
* Files with long-range repetition
|
|
273
|
+
|
|
274
|
+
**Not Ideal For:**
|
|
275
|
+
|
|
276
|
+
* Small files (< 32KB)
|
|
277
|
+
* When maximum compatibility is needed (not all ZIP readers support it)
|
|
278
|
+
|
|
279
|
+
=== Usage Example
|
|
280
|
+
|
|
281
|
+
[source,ruby]
|
|
282
|
+
----
|
|
283
|
+
deflate64 = Omnizip::AlgorithmRegistry.get(:deflate64).new(level: 7)
|
|
284
|
+
deflate64.compress(input, output)
|
|
285
|
+
----
|
|
286
|
+
|
|
287
|
+
== Zstandard
|
|
288
|
+
|
|
289
|
+
=== General
|
|
290
|
+
|
|
291
|
+
Zstandard (zstd) offers fast compression with good ratios, using a modern LZ77-based algorithm. It's designed to provide a good balance between compression ratio and speed.
|
|
292
|
+
|
|
293
|
+
**Note:** Current implementation uses the zstd-ruby gem. A pure Ruby implementation is planned for full portability.
|
|
294
|
+
|
|
295
|
+
=== Characteristics
|
|
296
|
+
|
|
297
|
+
**Compression Ratio:** ⭐⭐⭐⭐ (Very Good)
|
|
298
|
+
|
|
299
|
+
**Speed:** ⭐⭐⭐⭐⭐ (Very Fast)
|
|
300
|
+
|
|
301
|
+
**Memory Usage:** ⭐⭐⭐⭐ (Low to Moderate)
|
|
302
|
+
|
|
303
|
+
**Best For:**
|
|
304
|
+
|
|
305
|
+
* Real-time compression
|
|
306
|
+
* Network transmission
|
|
307
|
+
* Fast backup operations
|
|
308
|
+
* When both speed and compression matter
|
|
309
|
+
|
|
310
|
+
**Not Ideal For:**
|
|
311
|
+
|
|
312
|
+
* Maximum compression needs (use LZMA instead)
|
|
313
|
+
* Environments requiring pure Ruby (until pure Ruby implementation is complete)
|
|
314
|
+
|
|
315
|
+
=== Compression Levels
|
|
316
|
+
|
|
317
|
+
Zstandard supports levels 1-22:
|
|
318
|
+
|
|
319
|
+
* **Level 1-3:** Fast compression, ~2-3x ratio
|
|
320
|
+
* **Level 3:** Default, good balance
|
|
321
|
+
* **Level 10-15:** High compression
|
|
322
|
+
* **Level 16-22:** Maximum compression (very slow)
|
|
323
|
+
|
|
324
|
+
=== Usage Example
|
|
325
|
+
|
|
326
|
+
[source,ruby]
|
|
327
|
+
----
|
|
328
|
+
zstd = Omnizip::AlgorithmRegistry.get(:zstd).new(level: 3)
|
|
329
|
+
zstd.compress(input, output)
|
|
330
|
+
----
|
|
331
|
+
|
|
332
|
+
== Algorithm Selection Guide
|
|
333
|
+
|
|
334
|
+
=== By Use Case
|
|
335
|
+
|
|
336
|
+
**Maximum Compression (size is priority):**
|
|
337
|
+
|
|
338
|
+
1. LZMA/LZMA2 (level 9)
|
|
339
|
+
2. PPMd7/PPMd8 (for text)
|
|
340
|
+
3. BZip2 (level 9)
|
|
341
|
+
|
|
342
|
+
**Balanced Compression (size + speed):**
|
|
343
|
+
|
|
344
|
+
1. LZMA2 (level 5-6)
|
|
345
|
+
2. BZip2 (level 5-6)
|
|
346
|
+
3. Deflate64 (level 6-7)
|
|
347
|
+
|
|
348
|
+
**Fast Compression (speed is priority):**
|
|
349
|
+
|
|
350
|
+
1. Zstandard (level 1-3)
|
|
351
|
+
2. Deflate (level 1-3)
|
|
352
|
+
3. LZMA2 (level 1-2)
|
|
353
|
+
|
|
354
|
+
**Text Files:**
|
|
355
|
+
|
|
356
|
+
1. PPMd7 (best ratio)
|
|
357
|
+
2. LZMA2 (good all-around)
|
|
358
|
+
3. BZip2 (fast and good)
|
|
359
|
+
|
|
360
|
+
**Binary Executable Files:**
|
|
361
|
+
|
|
362
|
+
1. LZMA2 + BCJ filter
|
|
363
|
+
2. Deflate64 + BCJ filter
|
|
364
|
+
3. BZip2
|
|
365
|
+
|
|
366
|
+
**Mixed Content Archives:**
|
|
367
|
+
|
|
368
|
+
1. LZMA2 (default for .7z)
|
|
369
|
+
2. Deflate (default for .zip)
|
|
370
|
+
3. BZip2
|
|
371
|
+
|
|
372
|
+
=== By File Type
|
|
373
|
+
|
|
374
|
+
[cols="30,70",options="header"]
|
|
375
|
+
|===
|
|
376
|
+
|File Type |Recommended Algorithm
|
|
377
|
+
|
|
378
|
+
|Text files (.txt, .log, .csv)
|
|
379
|
+
|PPMd7, LZMA2, BZip2
|
|
380
|
+
|
|
381
|
+
|Source code (.c, .java, .py)
|
|
382
|
+
|PPMd7, LZMA2
|
|
383
|
+
|
|
384
|
+
|Documents (.doc, .pdf, .odt)
|
|
385
|
+
|LZMA2, Deflate64
|
|
386
|
+
|
|
387
|
+
|Executables (.exe, .dll, .so)
|
|
388
|
+
|LZMA2 + BCJ filter
|
|
389
|
+
|
|
390
|
+
|Archives (.tar, .cpio)
|
|
391
|
+
|LZMA2, BZip2
|
|
392
|
+
|
|
393
|
+
|Database dumps (.sql, .db)
|
|
394
|
+
|PPMd7, LZMA2
|
|
395
|
+
|
|
396
|
+
|Configuration files (.xml, .json, .yaml)
|
|
397
|
+
|PPMd7, BZip2
|
|
398
|
+
|
|
399
|
+
|Mixed content
|
|
400
|
+
|LZMA2 (versatile)
|
|
401
|
+
|
|
402
|
+
|Already compressed (.jpg, .mp3, .mp4)
|
|
403
|
+
|Copy (no compression)
|
|
404
|
+
|===
|
|
405
|
+
|
|
406
|
+
== Performance Considerations
|
|
407
|
+
|
|
408
|
+
=== Pure Ruby Implementation
|
|
409
|
+
|
|
410
|
+
All algorithms are implemented in pure Ruby for maximum portability. This means:
|
|
411
|
+
|
|
412
|
+
* **10-60x slower** than native C implementations
|
|
413
|
+
* No external dependencies required
|
|
414
|
+
* Works on all Ruby platforms (MRI, JRuby, TruffleRuby)
|
|
415
|
+
* Acceptable for most non-real-time use cases
|
|
416
|
+
|
|
417
|
+
=== Performance Baseline (v1.0)
|
|
418
|
+
|
|
419
|
+
Relative to native implementations:
|
|
420
|
+
|
|
421
|
+
* **LZMA encode:** 13-15x slower (acceptable)
|
|
422
|
+
* **LZMA decode:** 8-10x slower (good)
|
|
423
|
+
* **Range coder:** 10x slower (excellent)
|
|
424
|
+
* **BWT (BZip2):** 50-60x slower (needs optimization)
|
|
425
|
+
|
|
426
|
+
=== Memory Usage Guidelines
|
|
427
|
+
|
|
428
|
+
* **LZMA level 9:** ~16MB dictionary + overhead
|
|
429
|
+
* **PPMd order 8:** ~64MB+ (configurable)
|
|
430
|
+
* **BZip2 level 9:** ~9MB blocks
|
|
431
|
+
* **Deflate/Deflate64:** ~1-2MB
|
|
432
|
+
* **Zstandard level 3:** ~2-4MB
|
|
433
|
+
|
|
434
|
+
For low-memory environments, prefer Deflate or lower compression levels.
|
|
435
|
+
|
|
436
|
+
== See Also
|
|
437
|
+
|
|
438
|
+
* link:api-usage.adoc[Library API Usage]
|
|
439
|
+
* link:cli-usage.adoc[CLI Usage Guide]
|
|
440
|
+
* link:preprocessing-filters.adoc[Preprocessing Filters]
|
|
441
|
+
* link:compression-profiles.adoc[Compression Profiles]
|
|
442
|
+
* link:../README.adoc[Main README]
|
|
@@ -0,0 +1,247 @@
|
|
|
1
|
+
= Compression Profiles
|
|
2
|
+
:toc:
|
|
3
|
+
:toclevels: 3
|
|
4
|
+
|
|
5
|
+
== Purpose
|
|
6
|
+
|
|
7
|
+
Compression profiles provide intelligent compression strategy selection based on file type, automatically choosing optimal algorithms and settings for different content types.
|
|
8
|
+
|
|
9
|
+
== Available Profiles
|
|
10
|
+
|
|
11
|
+
=== Built-in Profiles
|
|
12
|
+
|
|
13
|
+
[cols="20,20,60",options="header"]
|
|
14
|
+
|===
|
|
15
|
+
|Profile |Algorithm |Best For
|
|
16
|
+
|
|
17
|
+
|**Fast**
|
|
18
|
+
|Deflate (level 1)
|
|
19
|
+
|Quick compression, minimal CPU usage
|
|
20
|
+
|
|
21
|
+
|**Balanced**
|
|
22
|
+
|Deflate (level 6)
|
|
23
|
+
|General-purpose compression (default)
|
|
24
|
+
|
|
25
|
+
|**Maximum**
|
|
26
|
+
|LZMA2 (level 9)
|
|
27
|
+
|Maximum compression ratio, larger files
|
|
28
|
+
|
|
29
|
+
|**Text**
|
|
30
|
+
|PPMd7
|
|
31
|
+
|Text files, source code, documents
|
|
32
|
+
|
|
33
|
+
|**Binary**
|
|
34
|
+
|LZMA2 + BCJ
|
|
35
|
+
|Executables, compiled binaries
|
|
36
|
+
|
|
37
|
+
|**Archive**
|
|
38
|
+
|Store (no compression)
|
|
39
|
+
|Pre-compressed archives (zip, 7z, etc.)
|
|
40
|
+
|===
|
|
41
|
+
|
|
42
|
+
== Using Profiles
|
|
43
|
+
|
|
44
|
+
=== Explicit Profile Selection
|
|
45
|
+
|
|
46
|
+
[source,ruby]
|
|
47
|
+
----
|
|
48
|
+
# Get a profile by name
|
|
49
|
+
profile = Omnizip::Profile.get(:maximum)
|
|
50
|
+
|
|
51
|
+
# Use profile for compression
|
|
52
|
+
Omnizip::Formats::SevenZip::Writer.new('archive.7z', profile: profile) do |zip|
|
|
53
|
+
zip.add_file('large_file.dat')
|
|
54
|
+
end
|
|
55
|
+
----
|
|
56
|
+
|
|
57
|
+
=== Auto-detection Based on File Type
|
|
58
|
+
|
|
59
|
+
[source,ruby]
|
|
60
|
+
----
|
|
61
|
+
# Detect optimal profile for a file
|
|
62
|
+
profile = Omnizip::Profile.detect('document.txt')
|
|
63
|
+
puts "Selected: #{profile.name}" # => :text
|
|
64
|
+
|
|
65
|
+
# Apply detected profile
|
|
66
|
+
Omnizip::Formats::SevenZip::Writer.new('archive.7z', profile: profile) do |zip|
|
|
67
|
+
zip.add_file('document.txt')
|
|
68
|
+
end
|
|
69
|
+
----
|
|
70
|
+
|
|
71
|
+
=== Profile-based Compression
|
|
72
|
+
|
|
73
|
+
[source,ruby]
|
|
74
|
+
----
|
|
75
|
+
# Compress with auto-detected profile
|
|
76
|
+
files = ['app.exe', 'readme.txt', 'data.zip']
|
|
77
|
+
|
|
78
|
+
Omnizip::Formats::SevenZip::Writer.new('backup.7z') do |zip|
|
|
79
|
+
files.each do |file|
|
|
80
|
+
# Auto-detect and apply optimal profile for each file
|
|
81
|
+
profile = Omnizip::Profile.detect(file)
|
|
82
|
+
zip.add_file(file, profile: profile)
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
----
|
|
86
|
+
|
|
87
|
+
== Custom Profiles
|
|
88
|
+
|
|
89
|
+
=== Creating Custom Profiles
|
|
90
|
+
|
|
91
|
+
[source,ruby]
|
|
92
|
+
----
|
|
93
|
+
# Define a custom profile
|
|
94
|
+
Omnizip::Profile.define(:my_profile) do |p|
|
|
95
|
+
p.algorithm = :lzma2
|
|
96
|
+
p.level = 7
|
|
97
|
+
p.filter = :bcj_x86
|
|
98
|
+
p.solid = true
|
|
99
|
+
p.description = "Custom profile for my use case"
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
# Use custom profile
|
|
103
|
+
profile = Omnizip::Profile.get(:my_profile)
|
|
104
|
+
----
|
|
105
|
+
|
|
106
|
+
=== Extending Existing Profiles
|
|
107
|
+
|
|
108
|
+
[source,ruby]
|
|
109
|
+
----
|
|
110
|
+
# Extend fast profile with slightly better compression
|
|
111
|
+
Omnizip::Profile.define(:my_fast, base: :fast) do |p|
|
|
112
|
+
p.level = 2
|
|
113
|
+
p.description = "Slightly better than fast"
|
|
114
|
+
end
|
|
115
|
+
----
|
|
116
|
+
|
|
117
|
+
== Profile Auto-detection
|
|
118
|
+
|
|
119
|
+
=== By MIME Type
|
|
120
|
+
|
|
121
|
+
The profile system automatically selects profiles based on file MIME types:
|
|
122
|
+
|
|
123
|
+
[source,ruby]
|
|
124
|
+
----
|
|
125
|
+
# Text files → Text profile (PPMd7)
|
|
126
|
+
Omnizip::Profile.for_file_type('text/plain')
|
|
127
|
+
|
|
128
|
+
# Executables → Binary profile (LZMA2 + BCJ)
|
|
129
|
+
Omnizip::Profile.for_file_type('application/x-executable')
|
|
130
|
+
|
|
131
|
+
# Archives → Archive profile (Store)
|
|
132
|
+
Omnizip::Profile.for_file_type('application/zip')
|
|
133
|
+
----
|
|
134
|
+
|
|
135
|
+
=== By File Category
|
|
136
|
+
|
|
137
|
+
[source,ruby]
|
|
138
|
+
----
|
|
139
|
+
# Select by category
|
|
140
|
+
Omnizip::Profile.for_file_type(:text) # → text profile
|
|
141
|
+
Omnizip::Profile.for_file_type(:executable) # → binary profile
|
|
142
|
+
Omnizip::Profile.for_file_type(:archive) # → archive profile
|
|
143
|
+
----
|
|
144
|
+
|
|
145
|
+
== Profile Configuration
|
|
146
|
+
|
|
147
|
+
=== Profile Attributes
|
|
148
|
+
|
|
149
|
+
Each profile has the following configurable attributes:
|
|
150
|
+
|
|
151
|
+
[source,ruby]
|
|
152
|
+
----
|
|
153
|
+
profile = Omnizip::Profile.get(:maximum)
|
|
154
|
+
|
|
155
|
+
profile.name # => :maximum
|
|
156
|
+
profile.algorithm # => :lzma2
|
|
157
|
+
profile.level # => 9
|
|
158
|
+
profile.filter # => nil
|
|
159
|
+
profile.solid # => true
|
|
160
|
+
profile.description # => "Maximum compression..."
|
|
161
|
+
----
|
|
162
|
+
|
|
163
|
+
=== Listing Available Profiles
|
|
164
|
+
|
|
165
|
+
[source,ruby]
|
|
166
|
+
----
|
|
167
|
+
# List all profile names
|
|
168
|
+
Omnizip::Profile.list
|
|
169
|
+
# => [:fast, :balanced, :maximum, :text, :binary, :archive]
|
|
170
|
+
|
|
171
|
+
# Get profile details
|
|
172
|
+
Omnizip::Profile.list.each do |name|
|
|
173
|
+
profile = Omnizip::Profile.get(name)
|
|
174
|
+
puts "#{name}: #{profile.description}"
|
|
175
|
+
end
|
|
176
|
+
----
|
|
177
|
+
|
|
178
|
+
== Examples
|
|
179
|
+
|
|
180
|
+
=== Example 1: Backup with Optimal Compression
|
|
181
|
+
|
|
182
|
+
[source,ruby]
|
|
183
|
+
----
|
|
184
|
+
def backup_with_profiles(source_dir, archive_path)
|
|
185
|
+
Omnizip::Formats::SevenZip::Writer.new(archive_path) do |zip|
|
|
186
|
+
Dir.glob("#{source_dir}/**/*").each do |file|
|
|
187
|
+
next if File.directory?(file)
|
|
188
|
+
|
|
189
|
+
# Auto-detect and use optimal profile
|
|
190
|
+
profile = Omnizip::Profile.detect(file)
|
|
191
|
+
relative_path = file.sub("#{source_dir}/", '')
|
|
192
|
+
|
|
193
|
+
puts "Adding #{relative_path} with #{profile.name} profile"
|
|
194
|
+
zip.add_file(file, archive_path: relative_path, profile: profile)
|
|
195
|
+
end
|
|
196
|
+
end
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
backup_with_profiles('my_project/', 'backup.7z')
|
|
200
|
+
----
|
|
201
|
+
|
|
202
|
+
=== Example 2: Custom Profile for Log Files
|
|
203
|
+
|
|
204
|
+
[source,ruby]
|
|
205
|
+
----
|
|
206
|
+
# Create specialized profile for log files
|
|
207
|
+
Omnizip::Profile.define(:logs, base: :text) do |p|
|
|
208
|
+
p.level = 9 # Maximum compression for logs
|
|
209
|
+
p.solid = true # Solid compression for similar files
|
|
210
|
+
p.description = "Optimized for log files"
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
# Use for log archival
|
|
214
|
+
Dir.glob('logs/*.log').each do |log|
|
|
215
|
+
Omnizip::Formats::SevenZip::Writer.new("#{log}.7z",
|
|
216
|
+
profile: :logs) do |zip|
|
|
217
|
+
zip.add_file(log)
|
|
218
|
+
end
|
|
219
|
+
end
|
|
220
|
+
----
|
|
221
|
+
|
|
222
|
+
=== Example 3: Mixed Content with Auto-detection
|
|
223
|
+
|
|
224
|
+
[source,ruby]
|
|
225
|
+
----
|
|
226
|
+
# Archive with mixed file types
|
|
227
|
+
files = {
|
|
228
|
+
'report.pdf' => :balanced, # Documents
|
|
229
|
+
'app.exe' => :binary, # Executables
|
|
230
|
+
'source.cpp' => :text, # Source code
|
|
231
|
+
'backup.zip' => :archive # Pre-compressed
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
Omnizip::Formats::SevenZip::Writer.new('mixed.7z') do |zip|
|
|
235
|
+
files.each do |file, expected_profile|
|
|
236
|
+
profile = Omnizip::Profile.detect(file)
|
|
237
|
+
puts "#{file}: detected #{profile.name}, expected #{expected_profile}"
|
|
238
|
+
zip.add_file(file, profile: profile)
|
|
239
|
+
end
|
|
240
|
+
end
|
|
241
|
+
----
|
|
242
|
+
|
|
243
|
+
== See Also
|
|
244
|
+
|
|
245
|
+
* link:../README.adoc#compression-levels[Compression Levels]
|
|
246
|
+
* link:advanced-features.adoc[Advanced Features]
|
|
247
|
+
* link:performance-profiler.adoc[Performance Profiler]
|