omnizip 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/.rubocop.yml +32 -0
- data/.rubocop_todo.yml +754 -0
- data/COPYING +502 -0
- data/Gemfile +17 -0
- data/LICENSE +12 -0
- data/README.adoc +1045 -0
- data/Rakefile +12 -0
- data/benchmark/README.md +260 -0
- data/benchmark/benchmark_suite.rb +125 -0
- data/benchmark/compression_bench.rb +181 -0
- data/benchmark/filter_bench.rb +180 -0
- data/benchmark/models/benchmark_result.rb +59 -0
- data/benchmark/models/comparison_result.rb +69 -0
- data/benchmark/profile_suite.rb +167 -0
- data/benchmark/reporter.rb +150 -0
- data/benchmark/run_benchmarks.rb +66 -0
- data/benchmark/test_data.rb +137 -0
- data/config/formats/rar3_spec.yml +91 -0
- data/config/formats/rar5_spec.yml +102 -0
- data/docs/.github/workflows/docs.yml +142 -0
- data/docs/.gitignore +21 -0
- data/docs/.lychee.toml +67 -0
- data/docs/Gemfile +13 -0
- data/docs/RAR_WRITE_SUPPORT.md +26 -0
- data/docs/README.md +101 -0
- data/docs/_config.yml +112 -0
- data/docs/assets/logo.svg +1 -0
- data/docs/assets/omnizip-logo.pdf +1540 -11
- data/docs/comparison/feature-matrix.adoc +694 -0
- data/docs/comparison/index.adoc +113 -0
- data/docs/comparison/vs-7zip.adoc +309 -0
- data/docs/comparison/vs-peazip.adoc +77 -0
- data/docs/comparison/vs-rubyzip.adoc +342 -0
- data/docs/comparison/vs-winrar.adoc +100 -0
- data/docs/compatibility.adoc +579 -0
- data/docs/concepts/index.adoc +129 -0
- data/docs/developer/architecture.adoc +256 -0
- data/docs/developer/contributing.adoc +158 -0
- data/docs/developer/index.adoc +25 -0
- data/docs/developer/testing.adoc +212 -0
- data/docs/getting-started/basic-usage.adoc +271 -0
- data/docs/getting-started/index.adoc +42 -0
- data/docs/getting-started/installation.adoc +138 -0
- data/docs/getting-started/quick-start.adoc +185 -0
- data/docs/getting-started/your-first-archive.adoc +218 -0
- data/docs/guides/advanced-features/encryption.adoc +300 -0
- data/docs/guides/advanced-features/index.adoc +49 -0
- data/docs/guides/advanced-features/parallel-processing.adoc +246 -0
- data/docs/guides/advanced-features/progress-tracking.adoc +320 -0
- data/docs/guides/advanced-features/streaming.adoc +212 -0
- data/docs/guides/archive-formats/gzip-format.adoc +107 -0
- data/docs/guides/archive-formats/index.adoc +130 -0
- data/docs/guides/archive-formats/rar-format.adoc +104 -0
- data/docs/guides/archive-formats/rar5.adoc +521 -0
- data/docs/guides/archive-formats/seven-zip-format.adoc +35 -0
- data/docs/guides/archive-formats/tar-format.adoc +106 -0
- data/docs/guides/archive-formats/xz-format.adoc +118 -0
- data/docs/guides/archive-formats/zip-format.adoc +35 -0
- data/docs/guides/compression-algorithms/bzip2.adoc +113 -0
- data/docs/guides/compression-algorithms/deflate.adoc +319 -0
- data/docs/guides/compression-algorithms/index.adoc +190 -0
- data/docs/guides/compression-algorithms/lzma.adoc +398 -0
- data/docs/guides/compression-algorithms/lzma2.adoc +327 -0
- data/docs/guides/compression-algorithms/ppmd.adoc +316 -0
- data/docs/guides/compression-algorithms/zstandard.adoc +361 -0
- data/docs/guides/creating-archives.adoc +354 -0
- data/docs/guides/extracting-archives.adoc +53 -0
- data/docs/guides/format-conversion.adoc +64 -0
- data/docs/guides/index.adoc +49 -0
- data/docs/guides/migration-rubyzip.adoc +217 -0
- data/docs/guides/parity-archives.adoc +605 -0
- data/docs/guides/performance-tuning.adoc +88 -0
- data/docs/index.adoc +218 -0
- data/docs/lychee.toml +67 -0
- data/docs/reference/api/overview.adoc +188 -0
- data/docs/reference/cli/compress-command.adoc +114 -0
- data/docs/reference/cli/overview.adoc +140 -0
- data/docs/reference/index.adoc +26 -0
- data/docs/resources/faq.adoc +185 -0
- data/docs/resources/quick-reference.adoc +222 -0
- data/docs/troubleshooting/index.adoc +208 -0
- data/examples/api_comparison.rb +205 -0
- data/examples/deflate64_example.rb +96 -0
- data/examples/par2_demo.rb +121 -0
- data/examples/quick_start_native.rb +150 -0
- data/examples/quick_start_rubyzip.rb +115 -0
- data/examples/rubyzip_compatibility_demo.rb +194 -0
- data/exe/omnizip +27 -0
- data/lib/omnizip/algorithm.rb +130 -0
- data/lib/omnizip/algorithm_registry.rb +86 -0
- data/lib/omnizip/algorithms/.keep +0 -0
- data/lib/omnizip/algorithms/bzip2/bwt.rb +225 -0
- data/lib/omnizip/algorithms/bzip2/decoder.rb +193 -0
- data/lib/omnizip/algorithms/bzip2/encoder.rb +237 -0
- data/lib/omnizip/algorithms/bzip2/huffman.rb +206 -0
- data/lib/omnizip/algorithms/bzip2/mtf.rb +101 -0
- data/lib/omnizip/algorithms/bzip2/rle.rb +151 -0
- data/lib/omnizip/algorithms/bzip2.rb +130 -0
- data/lib/omnizip/algorithms/deflate/constants.rb +28 -0
- data/lib/omnizip/algorithms/deflate/decoder.rb +38 -0
- data/lib/omnizip/algorithms/deflate/encoder.rb +46 -0
- data/lib/omnizip/algorithms/deflate.rb +128 -0
- data/lib/omnizip/algorithms/deflate64/constants.rb +45 -0
- data/lib/omnizip/algorithms/deflate64/decoder.rb +153 -0
- data/lib/omnizip/algorithms/deflate64/encoder.rb +98 -0
- data/lib/omnizip/algorithms/deflate64/huffman_coder.rb +354 -0
- data/lib/omnizip/algorithms/deflate64/lz77_encoder.rb +142 -0
- data/lib/omnizip/algorithms/deflate64.rb +109 -0
- data/lib/omnizip/algorithms/lzma/bit_model.rb +120 -0
- data/lib/omnizip/algorithms/lzma/constants.rb +112 -0
- data/lib/omnizip/algorithms/lzma/decoder.rb +148 -0
- data/lib/omnizip/algorithms/lzma/dictionary.rb +69 -0
- data/lib/omnizip/algorithms/lzma/distance_coder.rb +415 -0
- data/lib/omnizip/algorithms/lzma/encoder.rb +142 -0
- data/lib/omnizip/algorithms/lzma/length_coder.rb +260 -0
- data/lib/omnizip/algorithms/lzma/literal_decoder.rb +320 -0
- data/lib/omnizip/algorithms/lzma/literal_encoder.rb +210 -0
- data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +341 -0
- data/lib/omnizip/algorithms/lzma/lzma_alone_decoder.rb +192 -0
- data/lib/omnizip/algorithms/lzma/lzma_state.rb +128 -0
- data/lib/omnizip/algorithms/lzma/match.rb +32 -0
- data/lib/omnizip/algorithms/lzma/match_finder.rb +205 -0
- data/lib/omnizip/algorithms/lzma/match_finder_config.rb +142 -0
- data/lib/omnizip/algorithms/lzma/match_finder_factory.rb +88 -0
- data/lib/omnizip/algorithms/lzma/optimal_encoder.rb +130 -0
- data/lib/omnizip/algorithms/lzma/probability_models.rb +72 -0
- data/lib/omnizip/algorithms/lzma/range_coder.rb +85 -0
- data/lib/omnizip/algorithms/lzma/range_decoder.rb +434 -0
- data/lib/omnizip/algorithms/lzma/range_encoder.rb +194 -0
- data/lib/omnizip/algorithms/lzma/state.rb +127 -0
- data/lib/omnizip/algorithms/lzma/xz_buffered_range_encoder.rb +325 -0
- data/lib/omnizip/algorithms/lzma/xz_encoder.rb +426 -0
- data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +645 -0
- data/lib/omnizip/algorithms/lzma/xz_match_finder_adapter.rb +227 -0
- data/lib/omnizip/algorithms/lzma/xz_price_calculator.rb +169 -0
- data/lib/omnizip/algorithms/lzma/xz_probability_models.rb +261 -0
- data/lib/omnizip/algorithms/lzma/xz_range_encoder.rb +223 -0
- data/lib/omnizip/algorithms/lzma/xz_range_encoder_exact.rb +331 -0
- data/lib/omnizip/algorithms/lzma/xz_state.rb +116 -0
- data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +2055 -0
- data/lib/omnizip/algorithms/lzma.rb +238 -0
- data/lib/omnizip/algorithms/lzma2/chunk_manager.rb +182 -0
- data/lib/omnizip/algorithms/lzma2/constants.rb +41 -0
- data/lib/omnizip/algorithms/lzma2/encoder.rb +147 -0
- data/lib/omnizip/algorithms/lzma2/lzma2_chunk.rb +161 -0
- data/lib/omnizip/algorithms/lzma2/properties.rb +179 -0
- data/lib/omnizip/algorithms/lzma2/simple_lzma2_encoder.rb +127 -0
- data/lib/omnizip/algorithms/lzma2/xz_encoder_adapter.rb +85 -0
- data/lib/omnizip/algorithms/lzma2.rb +141 -0
- data/lib/omnizip/algorithms/ppmd7/constants.rb +74 -0
- data/lib/omnizip/algorithms/ppmd7/context.rb +154 -0
- data/lib/omnizip/algorithms/ppmd7/decoder.rb +126 -0
- data/lib/omnizip/algorithms/ppmd7/encoder.rb +163 -0
- data/lib/omnizip/algorithms/ppmd7/model.rb +248 -0
- data/lib/omnizip/algorithms/ppmd7/symbol_state.rb +57 -0
- data/lib/omnizip/algorithms/ppmd7.rb +116 -0
- data/lib/omnizip/algorithms/ppmd8/constants.rb +61 -0
- data/lib/omnizip/algorithms/ppmd8/context.rb +34 -0
- data/lib/omnizip/algorithms/ppmd8/decoder.rb +107 -0
- data/lib/omnizip/algorithms/ppmd8/encoder.rb +138 -0
- data/lib/omnizip/algorithms/ppmd8/model.rb +250 -0
- data/lib/omnizip/algorithms/ppmd8/restoration_method.rb +78 -0
- data/lib/omnizip/algorithms/ppmd8.rb +82 -0
- data/lib/omnizip/algorithms/ppmd_base.rb +138 -0
- data/lib/omnizip/algorithms/sevenzip_lzma2.rb +123 -0
- data/lib/omnizip/algorithms/xz_lzma2.rb +118 -0
- data/lib/omnizip/algorithms/zstandard/constants.rb +25 -0
- data/lib/omnizip/algorithms/zstandard/decoder.rb +46 -0
- data/lib/omnizip/algorithms/zstandard/encoder.rb +51 -0
- data/lib/omnizip/algorithms/zstandard.rb +138 -0
- data/lib/omnizip/buffer/memory_archive.rb +251 -0
- data/lib/omnizip/buffer/memory_extractor.rb +224 -0
- data/lib/omnizip/buffer.rb +176 -0
- data/lib/omnizip/checksum_registry.rb +114 -0
- data/lib/omnizip/checksums/crc32.rb +100 -0
- data/lib/omnizip/checksums/crc64.rb +101 -0
- data/lib/omnizip/checksums/crc_base.rb +158 -0
- data/lib/omnizip/checksums/verifier.rb +131 -0
- data/lib/omnizip/chunked/memory_manager.rb +194 -0
- data/lib/omnizip/chunked/reader.rb +78 -0
- data/lib/omnizip/chunked/writer.rb +120 -0
- data/lib/omnizip/chunked.rb +129 -0
- data/lib/omnizip/cli/output_formatter.rb +104 -0
- data/lib/omnizip/cli.rb +572 -0
- data/lib/omnizip/commands/.keep +0 -0
- data/lib/omnizip/commands/archive_create_command.rb +427 -0
- data/lib/omnizip/commands/archive_extract_command.rb +272 -0
- data/lib/omnizip/commands/archive_list_command.rb +218 -0
- data/lib/omnizip/commands/archive_repair_command.rb +131 -0
- data/lib/omnizip/commands/archive_verify_command.rb +117 -0
- data/lib/omnizip/commands/compress_command.rb +117 -0
- data/lib/omnizip/commands/decompress_command.rb +120 -0
- data/lib/omnizip/commands/list_command.rb +53 -0
- data/lib/omnizip/commands/metadata_command.rb +153 -0
- data/lib/omnizip/commands/parity_create_command.rb +122 -0
- data/lib/omnizip/commands/parity_repair_command.rb +122 -0
- data/lib/omnizip/commands/parity_verify_command.rb +124 -0
- data/lib/omnizip/commands/profile_list_command.rb +56 -0
- data/lib/omnizip/commands/profile_show_command.rb +44 -0
- data/lib/omnizip/convenience.rb +359 -0
- data/lib/omnizip/converter/conversion_registry.rb +49 -0
- data/lib/omnizip/converter/conversion_strategy.rb +121 -0
- data/lib/omnizip/converter/seven_zip_to_zip_strategy.rb +97 -0
- data/lib/omnizip/converter/zip_to_seven_zip_strategy.rb +112 -0
- data/lib/omnizip/converter.rb +105 -0
- data/lib/omnizip/crypto/aes256/cipher.rb +100 -0
- data/lib/omnizip/crypto/aes256/constants.rb +28 -0
- data/lib/omnizip/crypto/aes256/key_derivation.rb +101 -0
- data/lib/omnizip/crypto/aes256.rb +102 -0
- data/lib/omnizip/error.rb +106 -0
- data/lib/omnizip/eta/exponential_smoothing_estimator.rb +98 -0
- data/lib/omnizip/eta/moving_average_estimator.rb +99 -0
- data/lib/omnizip/eta/rate_calculator.rb +104 -0
- data/lib/omnizip/eta/sample_history.rb +143 -0
- data/lib/omnizip/eta/time_estimator.rb +106 -0
- data/lib/omnizip/eta.rb +63 -0
- data/lib/omnizip/extraction/filter_chain.rb +177 -0
- data/lib/omnizip/extraction/glob_pattern.rb +140 -0
- data/lib/omnizip/extraction/pattern_matcher.rb +70 -0
- data/lib/omnizip/extraction/predicate_pattern.rb +52 -0
- data/lib/omnizip/extraction/regex_pattern.rb +50 -0
- data/lib/omnizip/extraction/selective_extractor.rb +240 -0
- data/lib/omnizip/extraction.rb +111 -0
- data/lib/omnizip/file_type/mime_classifier.rb +144 -0
- data/lib/omnizip/file_type.rb +113 -0
- data/lib/omnizip/filter.rb +139 -0
- data/lib/omnizip/filter_pipeline.rb +108 -0
- data/lib/omnizip/filter_registry.rb +166 -0
- data/lib/omnizip/filters/bcj.rb +279 -0
- data/lib/omnizip/filters/bcj2/constants.rb +53 -0
- data/lib/omnizip/filters/bcj2/decoder.rb +200 -0
- data/lib/omnizip/filters/bcj2/encoder.rb +61 -0
- data/lib/omnizip/filters/bcj2/stream_data.rb +93 -0
- data/lib/omnizip/filters/bcj2.rb +99 -0
- data/lib/omnizip/filters/bcj_arm.rb +176 -0
- data/lib/omnizip/filters/bcj_arm64.rb +244 -0
- data/lib/omnizip/filters/bcj_ia64.rb +196 -0
- data/lib/omnizip/filters/bcj_ppc.rb +190 -0
- data/lib/omnizip/filters/bcj_sparc.rb +176 -0
- data/lib/omnizip/filters/bcj_x86.rb +193 -0
- data/lib/omnizip/filters/delta.rb +196 -0
- data/lib/omnizip/filters/filter_base.rb +72 -0
- data/lib/omnizip/filters/registry.rb +123 -0
- data/lib/omnizip/filters/xz_delta.rb +258 -0
- data/lib/omnizip/format_detector.rb +162 -0
- data/lib/omnizip/format_registry.rb +59 -0
- data/lib/omnizip/formats/.keep +0 -0
- data/lib/omnizip/formats/bzip2_file.rb +172 -0
- data/lib/omnizip/formats/cpio/constants.rb +55 -0
- data/lib/omnizip/formats/cpio/entry.rb +385 -0
- data/lib/omnizip/formats/cpio/reader.rb +196 -0
- data/lib/omnizip/formats/cpio/writer.rb +234 -0
- data/lib/omnizip/formats/cpio.rb +140 -0
- data/lib/omnizip/formats/format_spec_loader.rb +230 -0
- data/lib/omnizip/formats/gzip.rb +238 -0
- data/lib/omnizip/formats/iso/directory_builder.rb +297 -0
- data/lib/omnizip/formats/iso/directory_record.rb +152 -0
- data/lib/omnizip/formats/iso/joliet.rb +204 -0
- data/lib/omnizip/formats/iso/path_table.rb +125 -0
- data/lib/omnizip/formats/iso/reader.rb +197 -0
- data/lib/omnizip/formats/iso/rock_ridge.rb +349 -0
- data/lib/omnizip/formats/iso/volume_builder.rb +320 -0
- data/lib/omnizip/formats/iso/volume_descriptor.rb +168 -0
- data/lib/omnizip/formats/iso/writer.rb +530 -0
- data/lib/omnizip/formats/iso.rb +140 -0
- data/lib/omnizip/formats/lzip.rb +175 -0
- data/lib/omnizip/formats/lzma_alone.rb +171 -0
- data/lib/omnizip/formats/rar/archive_repairer.rb +243 -0
- data/lib/omnizip/formats/rar/archive_verifier.rb +195 -0
- data/lib/omnizip/formats/rar/block_parser.rb +243 -0
- data/lib/omnizip/formats/rar/compression/bit_stream.rb +180 -0
- data/lib/omnizip/formats/rar/compression/dispatcher.rb +217 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/decoder.rb +216 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/encoder.rb +158 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_builder.rb +217 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_coder.rb +189 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/match_finder.rb +135 -0
- data/lib/omnizip/formats/rar/compression/lz77_huffman/sliding_window.rb +165 -0
- data/lib/omnizip/formats/rar/compression/ppmd/context.rb +105 -0
- data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +219 -0
- data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +262 -0
- data/lib/omnizip/formats/rar/compression_method_registry.rb +106 -0
- data/lib/omnizip/formats/rar/constants.rb +82 -0
- data/lib/omnizip/formats/rar/decompressor.rb +238 -0
- data/lib/omnizip/formats/rar/external_writer.rb +312 -0
- data/lib/omnizip/formats/rar/header.rb +192 -0
- data/lib/omnizip/formats/rar/license_validator.rb +109 -0
- data/lib/omnizip/formats/rar/models/rar_archive.rb +77 -0
- data/lib/omnizip/formats/rar/models/rar_entry.rb +65 -0
- data/lib/omnizip/formats/rar/models/rar_volume.rb +56 -0
- data/lib/omnizip/formats/rar/parity_handler.rb +292 -0
- data/lib/omnizip/formats/rar/rar5/compression/lzma.rb +202 -0
- data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +578 -0
- data/lib/omnizip/formats/rar/rar5/compression/store.rb +60 -0
- data/lib/omnizip/formats/rar/rar5/crc32.rb +39 -0
- data/lib/omnizip/formats/rar/rar5/encryption/aes256_cbc.rb +97 -0
- data/lib/omnizip/formats/rar/rar5/encryption/encryption_header.rb +114 -0
- data/lib/omnizip/formats/rar/rar5/encryption/encryption_manager.rb +166 -0
- data/lib/omnizip/formats/rar/rar5/encryption/key_derivation.rb +97 -0
- data/lib/omnizip/formats/rar/rar5/header.rb +187 -0
- data/lib/omnizip/formats/rar/rar5/models/encryption_options.rb +74 -0
- data/lib/omnizip/formats/rar/rar5/models/recovery_options.rb +63 -0
- data/lib/omnizip/formats/rar/rar5/models/solid_options.rb +63 -0
- data/lib/omnizip/formats/rar/rar5/models/volume_options.rb +74 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/ARCHITECTURE.md +290 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_manager.rb +264 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_splitter.rb +155 -0
- data/lib/omnizip/formats/rar/rar5/multi_volume/volume_writer.rb +194 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_encoder.rb +109 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_manager.rb +142 -0
- data/lib/omnizip/formats/rar/rar5/solid/solid_stream.rb +121 -0
- data/lib/omnizip/formats/rar/rar5/vint.rb +65 -0
- data/lib/omnizip/formats/rar/rar5/writer.rb +466 -0
- data/lib/omnizip/formats/rar/rar_format_base.rb +241 -0
- data/lib/omnizip/formats/rar/reader.rb +366 -0
- data/lib/omnizip/formats/rar/recovery_record.rb +245 -0
- data/lib/omnizip/formats/rar/volume_manager.rb +168 -0
- data/lib/omnizip/formats/rar/writer.rb +431 -0
- data/lib/omnizip/formats/rar.rb +205 -0
- data/lib/omnizip/formats/rar3/compressor.rb +73 -0
- data/lib/omnizip/formats/rar3/decompressor.rb +66 -0
- data/lib/omnizip/formats/rar3/reader.rb +386 -0
- data/lib/omnizip/formats/rar3/writer.rb +219 -0
- data/lib/omnizip/formats/rar5/compressor.rb +73 -0
- data/lib/omnizip/formats/rar5/decompressor.rb +66 -0
- data/lib/omnizip/formats/rar5/reader.rb +342 -0
- data/lib/omnizip/formats/rar5/writer.rb +214 -0
- data/lib/omnizip/formats/seven_zip/coder_chain.rb +150 -0
- data/lib/omnizip/formats/seven_zip/constants.rb +126 -0
- data/lib/omnizip/formats/seven_zip/encoded_header.rb +114 -0
- data/lib/omnizip/formats/seven_zip/encrypted_header.rb +142 -0
- data/lib/omnizip/formats/seven_zip/file_collector.rb +144 -0
- data/lib/omnizip/formats/seven_zip/header.rb +106 -0
- data/lib/omnizip/formats/seven_zip/header_encryptor.rb +134 -0
- data/lib/omnizip/formats/seven_zip/header_writer.rb +466 -0
- data/lib/omnizip/formats/seven_zip/models/coder_info.rb +30 -0
- data/lib/omnizip/formats/seven_zip/models/file_entry.rb +58 -0
- data/lib/omnizip/formats/seven_zip/models/folder.rb +69 -0
- data/lib/omnizip/formats/seven_zip/models/stream_info.rb +42 -0
- data/lib/omnizip/formats/seven_zip/parser.rb +660 -0
- data/lib/omnizip/formats/seven_zip/reader.rb +458 -0
- data/lib/omnizip/formats/seven_zip/split_archive_reader.rb +632 -0
- data/lib/omnizip/formats/seven_zip/split_archive_writer.rb +315 -0
- data/lib/omnizip/formats/seven_zip/stream_compressor.rb +151 -0
- data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +162 -0
- data/lib/omnizip/formats/seven_zip/writer.rb +740 -0
- data/lib/omnizip/formats/seven_zip.rb +93 -0
- data/lib/omnizip/formats/tar/constants.rb +73 -0
- data/lib/omnizip/formats/tar/entry.rb +94 -0
- data/lib/omnizip/formats/tar/header.rb +168 -0
- data/lib/omnizip/formats/tar/reader.rb +121 -0
- data/lib/omnizip/formats/tar/writer.rb +216 -0
- data/lib/omnizip/formats/tar.rb +84 -0
- data/lib/omnizip/formats/xz/reader.rb +116 -0
- data/lib/omnizip/formats/xz.rb +237 -0
- data/lib/omnizip/formats/xz_impl/block_decoder.rb +754 -0
- data/lib/omnizip/formats/xz_impl/block_encoder.rb +306 -0
- data/lib/omnizip/formats/xz_impl/block_header.rb +210 -0
- data/lib/omnizip/formats/xz_impl/block_header_parser.rb +186 -0
- data/lib/omnizip/formats/xz_impl/constants.rb +49 -0
- data/lib/omnizip/formats/xz_impl/index_decoder.rb +174 -0
- data/lib/omnizip/formats/xz_impl/index_encoder.rb +122 -0
- data/lib/omnizip/formats/xz_impl/stream_decoder.rb +468 -0
- data/lib/omnizip/formats/xz_impl/stream_encoder.rb +99 -0
- data/lib/omnizip/formats/xz_impl/stream_footer.rb +81 -0
- data/lib/omnizip/formats/xz_impl/stream_footer_parser.rb +117 -0
- data/lib/omnizip/formats/xz_impl/stream_header.rb +55 -0
- data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +108 -0
- data/lib/omnizip/formats/xz_impl/vli.rb +128 -0
- data/lib/omnizip/formats/xz_impl/writer.rb +421 -0
- data/lib/omnizip/formats/zip/central_directory_header.rb +195 -0
- data/lib/omnizip/formats/zip/constants.rb +69 -0
- data/lib/omnizip/formats/zip/end_of_central_directory.rb +133 -0
- data/lib/omnizip/formats/zip/local_file_header.rb +138 -0
- data/lib/omnizip/formats/zip/reader.rb +250 -0
- data/lib/omnizip/formats/zip/unix_extra_field.rb +153 -0
- data/lib/omnizip/formats/zip/writer.rb +375 -0
- data/lib/omnizip/formats/zip/zip64_end_of_central_directory.rb +104 -0
- data/lib/omnizip/formats/zip/zip64_end_of_central_directory_locator.rb +66 -0
- data/lib/omnizip/formats/zip/zip64_extra_field.rb +114 -0
- data/lib/omnizip/formats/zip.rb +50 -0
- data/lib/omnizip/implementations/base/lzma2_decoder_base.rb +75 -0
- data/lib/omnizip/implementations/base/lzma2_encoder_base.rb +128 -0
- data/lib/omnizip/implementations/base/lzma_decoder_base.rb +83 -0
- data/lib/omnizip/implementations/base/lzma_encoder_base.rb +108 -0
- data/lib/omnizip/implementations/base/state_machine_base.rb +182 -0
- data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +421 -0
- data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +465 -0
- data/lib/omnizip/implementations/seven_zip/lzma/match_finder.rb +288 -0
- data/lib/omnizip/implementations/seven_zip/lzma/range_decoder.rb +200 -0
- data/lib/omnizip/implementations/seven_zip/lzma/range_encoder.rb +197 -0
- data/lib/omnizip/implementations/seven_zip/lzma/state_machine.rb +141 -0
- data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +519 -0
- data/lib/omnizip/implementations/xz_utils/lzma2/decoder.rb +723 -0
- data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +750 -0
- data/lib/omnizip/io/buffered_input.rb +146 -0
- data/lib/omnizip/io/buffered_output.rb +105 -0
- data/lib/omnizip/io/stream_manager.rb +115 -0
- data/lib/omnizip/link_handler/hard_link.rb +79 -0
- data/lib/omnizip/link_handler/symbolic_link.rb +74 -0
- data/lib/omnizip/link_handler.rb +124 -0
- data/lib/omnizip/metadata/archive_metadata.rb +114 -0
- data/lib/omnizip/metadata/entry_metadata.rb +146 -0
- data/lib/omnizip/metadata/metadata_editor.rb +171 -0
- data/lib/omnizip/metadata/metadata_registry.rb +64 -0
- data/lib/omnizip/metadata/metadata_validator.rb +99 -0
- data/lib/omnizip/metadata.rb +57 -0
- data/lib/omnizip/models/.keep +0 -0
- data/lib/omnizip/models/algorithm_metadata.rb +73 -0
- data/lib/omnizip/models/compression_options.rb +71 -0
- data/lib/omnizip/models/conversion_options.rb +87 -0
- data/lib/omnizip/models/conversion_result.rb +135 -0
- data/lib/omnizip/models/eta_result.rb +46 -0
- data/lib/omnizip/models/extraction_rule.rb +115 -0
- data/lib/omnizip/models/filter_chain.rb +144 -0
- data/lib/omnizip/models/filter_config.rb +183 -0
- data/lib/omnizip/models/match_result.rb +124 -0
- data/lib/omnizip/models/optimization_suggestion.rb +91 -0
- data/lib/omnizip/models/parallel_options.rb +104 -0
- data/lib/omnizip/models/performance_result.rb +79 -0
- data/lib/omnizip/models/profile_report.rb +82 -0
- data/lib/omnizip/models/progress_options.rb +38 -0
- data/lib/omnizip/models/split_options.rb +116 -0
- data/lib/omnizip/optimization_registry.rb +81 -0
- data/lib/omnizip/parallel/job_queue.rb +209 -0
- data/lib/omnizip/parallel/job_scheduler.rb +203 -0
- data/lib/omnizip/parallel/parallel_compressor.rb +347 -0
- data/lib/omnizip/parallel/parallel_extractor.rb +329 -0
- data/lib/omnizip/parallel/worker_pool.rb +223 -0
- data/lib/omnizip/parallel.rb +149 -0
- data/lib/omnizip/parity/chunked_block_processor.rb +196 -0
- data/lib/omnizip/parity/galois16.rb +145 -0
- data/lib/omnizip/parity/models/creator_packet.rb +73 -0
- data/lib/omnizip/parity/models/file_description_packet.rb +133 -0
- data/lib/omnizip/parity/models/ifsc_packet.rb +123 -0
- data/lib/omnizip/parity/models/main_packet.rb +128 -0
- data/lib/omnizip/parity/models/packet.rb +156 -0
- data/lib/omnizip/parity/models/packet_registry.rb +109 -0
- data/lib/omnizip/parity/models/recovery_slice_packet.rb +78 -0
- data/lib/omnizip/parity/par2_creator.rb +531 -0
- data/lib/omnizip/parity/par2_repairer.rb +407 -0
- data/lib/omnizip/parity/par2_verifier.rb +364 -0
- data/lib/omnizip/parity/par2cmdline_algorithm.rb +110 -0
- data/lib/omnizip/parity/par2cmdline_coefficients.rb +78 -0
- data/lib/omnizip/parity/reed_solomon_decoder.rb +266 -0
- data/lib/omnizip/parity/reed_solomon_encoder.rb +111 -0
- data/lib/omnizip/parity/reed_solomon_matrix.rb +342 -0
- data/lib/omnizip/parity.rb +186 -0
- data/lib/omnizip/password/encryption_registry.rb +65 -0
- data/lib/omnizip/password/encryption_strategy.rb +96 -0
- data/lib/omnizip/password/password_validator.rb +129 -0
- data/lib/omnizip/password/winzip_aes_strategy.rb +192 -0
- data/lib/omnizip/password/zip_crypto_strategy.rb +141 -0
- data/lib/omnizip/password.rb +87 -0
- data/lib/omnizip/pipe/stream_compressor.rb +124 -0
- data/lib/omnizip/pipe/stream_decompressor.rb +174 -0
- data/lib/omnizip/pipe.rb +121 -0
- data/lib/omnizip/platform/ntfs_streams.rb +201 -0
- data/lib/omnizip/platform.rb +189 -0
- data/lib/omnizip/profile/archive_profile.rb +39 -0
- data/lib/omnizip/profile/balanced_profile.rb +33 -0
- data/lib/omnizip/profile/binary_profile.rb +36 -0
- data/lib/omnizip/profile/compression_profile.rb +158 -0
- data/lib/omnizip/profile/custom_profile.rb +157 -0
- data/lib/omnizip/profile/fast_profile.rb +33 -0
- data/lib/omnizip/profile/maximum_profile.rb +33 -0
- data/lib/omnizip/profile/profile_detector.rb +110 -0
- data/lib/omnizip/profile/profile_registry.rb +161 -0
- data/lib/omnizip/profile/text_profile.rb +36 -0
- data/lib/omnizip/profile.rb +190 -0
- data/lib/omnizip/profiler/memory_profiler.rb +66 -0
- data/lib/omnizip/profiler/method_profiler.rb +49 -0
- data/lib/omnizip/profiler/report_generator.rb +169 -0
- data/lib/omnizip/profiler.rb +204 -0
- data/lib/omnizip/progress/callback_reporter.rb +36 -0
- data/lib/omnizip/progress/console_reporter.rb +62 -0
- data/lib/omnizip/progress/log_reporter.rb +91 -0
- data/lib/omnizip/progress/operation_progress.rb +118 -0
- data/lib/omnizip/progress/progress_bar.rb +156 -0
- data/lib/omnizip/progress/progress_reporter.rb +40 -0
- data/lib/omnizip/progress/progress_tracker.rb +190 -0
- data/lib/omnizip/progress/silent_reporter.rb +24 -0
- data/lib/omnizip/progress.rb +127 -0
- data/lib/omnizip/rubyzip_compat.rb +63 -0
- data/lib/omnizip/temp/safe_extract.rb +168 -0
- data/lib/omnizip/temp/temp_file.rb +124 -0
- data/lib/omnizip/temp/temp_file_pool.rb +109 -0
- data/lib/omnizip/temp.rb +181 -0
- data/lib/omnizip/version.rb +5 -0
- data/lib/omnizip/zip/entry.rb +156 -0
- data/lib/omnizip/zip/file.rb +485 -0
- data/lib/omnizip/zip/input_stream.rb +273 -0
- data/lib/omnizip/zip/output_stream.rb +324 -0
- data/lib/omnizip.rb +156 -0
- data/readme-docs/advanced-features.adoc +515 -0
- data/readme-docs/api-usage.adoc +444 -0
- data/readme-docs/architecture.adoc +449 -0
- data/readme-docs/archive-formats.adoc +479 -0
- data/readme-docs/cli-usage.adoc +222 -0
- data/readme-docs/compression-algorithms.adoc +442 -0
- data/readme-docs/compression-profiles.adoc +247 -0
- data/readme-docs/encryption-checksums.adoc +328 -0
- data/readme-docs/format-converter.adoc +325 -0
- data/readme-docs/installation.adoc +228 -0
- data/readme-docs/par2-archives.adoc +608 -0
- data/readme-docs/performance-profiler.adoc +389 -0
- data/readme-docs/preprocessing-filters.adoc +280 -0
- data/xz-file-format-1.2.1.txt +1174 -0
- metadata +617 -0
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Copyright (C) 2025 Ribose Inc.
|
|
4
|
+
#
|
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a
|
|
6
|
+
# copy of this software and associated documentation files (the "Software"),
|
|
7
|
+
# to deal in the Software without restriction, including without limitation
|
|
8
|
+
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
9
|
+
# and/or sell copies of the Software, and to permit persons to whom the
|
|
10
|
+
# Software is furnished to do so, subject to the following conditions:
|
|
11
|
+
#
|
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
|
13
|
+
# all copies or substantial portions of the Software.
|
|
14
|
+
#
|
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
20
|
+
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
21
|
+
# DEALINGS IN THE SOFTWARE.
|
|
22
|
+
|
|
23
|
+
require_relative "../algorithm"
|
|
24
|
+
require_relative "../models/algorithm_metadata"
|
|
25
|
+
|
|
26
|
+
module Omnizip
|
|
27
|
+
module Algorithms
|
|
28
|
+
# LZMA (Lempel-Ziv-Markov chain Algorithm) compression
|
|
29
|
+
#
|
|
30
|
+
# LZMA is a lossless data compression algorithm that combines
|
|
31
|
+
# Lempel-Ziv dictionary compression with range coding (a form
|
|
32
|
+
# of arithmetic coding). It achieves high compression ratios
|
|
33
|
+
# by using adaptive probability models.
|
|
34
|
+
#
|
|
35
|
+
# This implementation uses:
|
|
36
|
+
# - LZ77 match finder for finding duplicate sequences
|
|
37
|
+
# - Range coding for probability-based encoding
|
|
38
|
+
# - Adaptive bit models that adjust based on input data
|
|
39
|
+
# - State machine for compression context tracking
|
|
40
|
+
#
|
|
41
|
+
# The algorithm operates by:
|
|
42
|
+
# 1. Finding matches using LZ77 dictionary compression
|
|
43
|
+
# 2. Encoding decisions using range coder with probability models
|
|
44
|
+
# 3. Maintaining state for optimal compression
|
|
45
|
+
class LZMA < Algorithm
|
|
46
|
+
# Initialize the LZMA algorithm with options
|
|
47
|
+
#
|
|
48
|
+
# @param options [Hash] Algorithm options
|
|
49
|
+
# @option options [Integer] :lc Literal context bits (default: 3)
|
|
50
|
+
# @option options [Integer] :lp Literal position bits (default: 0)
|
|
51
|
+
# @option options [Integer] :pb Position bits (default: 2)
|
|
52
|
+
# @option options [Integer] :dict_size Dictionary size (default: 4MB)
|
|
53
|
+
# @option options [Boolean] :lzma2_mode Raw LZMA mode (no header, for 7-Zip)
|
|
54
|
+
def initialize(options = {})
|
|
55
|
+
super()
|
|
56
|
+
@lc = options[:lc] || 3
|
|
57
|
+
@lp = options[:lp] || 0
|
|
58
|
+
@pb = options[:pb] || 2
|
|
59
|
+
@dict_size = options[:dict_size] || (4 * 1024 * 1024) # 4 MB default
|
|
60
|
+
@lzma2_mode = options[:lzma2_mode]
|
|
61
|
+
@uncompressed_size = options[:uncompressed_size] || options[:size]
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# Get algorithm metadata
|
|
65
|
+
#
|
|
66
|
+
# @return [AlgorithmMetadata] Algorithm information
|
|
67
|
+
def self.metadata
|
|
68
|
+
Models::AlgorithmMetadata.new.tap do |meta|
|
|
69
|
+
meta.name = "lzma"
|
|
70
|
+
meta.description = "LZMA compression using range coding " \
|
|
71
|
+
"and dictionary compression"
|
|
72
|
+
meta.version = "1.0.0"
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
# Compress data using LZMA algorithm
|
|
77
|
+
#
|
|
78
|
+
# @param input_stream [IO] Input stream to compress
|
|
79
|
+
# @param output_stream [IO] Output stream for compressed data
|
|
80
|
+
# @param options [Models::CompressionOptions] Compression options
|
|
81
|
+
# @return [void]
|
|
82
|
+
def compress(input_stream, output_stream, options = nil)
|
|
83
|
+
input_data = input_stream.read
|
|
84
|
+
encoder = Encoder.new(output_stream, build_encoder_options(options))
|
|
85
|
+
encoder.encode_stream(input_data)
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
# Decompress LZMA-compressed data
|
|
89
|
+
#
|
|
90
|
+
# @param input_stream [IO] Input stream of compressed data
|
|
91
|
+
# @param output_stream [IO] Output stream for decompressed data
|
|
92
|
+
# @param options [Models::CompressionOptions, Hash] Decompression options
|
|
93
|
+
# @return [IO] The output_stream (for chaining)
|
|
94
|
+
def decompress(input_stream, output_stream, options = nil)
|
|
95
|
+
# Set binary encoding on output stream for proper byte handling
|
|
96
|
+
output_stream.set_encoding(Encoding::BINARY) if output_stream.respond_to?(:set_encoding)
|
|
97
|
+
|
|
98
|
+
# Build decoder options, merging with instance variables as fallbacks
|
|
99
|
+
decoder_opts = build_decoder_options(options)
|
|
100
|
+
decoder_opts[:lzma2_mode] = @lzma2_mode if @lzma2_mode && !decoder_opts.key?(:lzma2_mode)
|
|
101
|
+
decoder_opts[:lc] = @lc if @lc && !decoder_opts.key?(:lc)
|
|
102
|
+
decoder_opts[:lp] = @lp if @lp && !decoder_opts.key?(:lp)
|
|
103
|
+
decoder_opts[:pb] = @pb if @pb && !decoder_opts.key?(:pb)
|
|
104
|
+
decoder_opts[:dict_size] = @dict_size if @dict_size && !decoder_opts.key?(:dict_size)
|
|
105
|
+
decoder_opts[:uncompressed_size] = @uncompressed_size if @uncompressed_size && !decoder_opts.key?(:uncompressed_size)
|
|
106
|
+
decoder_opts[:uncompressed_size] ||= options[:size] if options.respond_to?(:key?) && options.key?(:size)
|
|
107
|
+
|
|
108
|
+
decoder = Decoder.new(input_stream, decoder_opts)
|
|
109
|
+
decoder.decode_stream(output_stream)
|
|
110
|
+
output_stream
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
private
|
|
114
|
+
|
|
115
|
+
# Build encoder options from compression options
|
|
116
|
+
#
|
|
117
|
+
# @param options [Models::CompressionOptions, Hash, nil] Compression opts
|
|
118
|
+
# @return [Hash] Encoder options
|
|
119
|
+
def build_encoder_options(options)
|
|
120
|
+
return {} if options.nil?
|
|
121
|
+
|
|
122
|
+
opts = {}
|
|
123
|
+
|
|
124
|
+
# Handle Hash-like options
|
|
125
|
+
if options.respond_to?(:[])
|
|
126
|
+
opts[:lc] = options[:lc] if options[:lc]
|
|
127
|
+
opts[:lp] = options[:lp] if options[:lp]
|
|
128
|
+
opts[:pb] = options[:pb] if options[:pb]
|
|
129
|
+
opts[:dict_size] = options[:dict_size] if options[:dict_size]
|
|
130
|
+
opts[:write_size] = options[:write_size] if options.key?(:write_size)
|
|
131
|
+
if options.key?(:sdk_compatible)
|
|
132
|
+
opts[:sdk_compatible] =
|
|
133
|
+
options[:sdk_compatible]
|
|
134
|
+
end
|
|
135
|
+
if options.key?(:xz_compatible)
|
|
136
|
+
opts[:xz_compatible] =
|
|
137
|
+
options[:xz_compatible]
|
|
138
|
+
end
|
|
139
|
+
opts[:raw_mode] = options[:raw_mode] if options.key?(:raw_mode)
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
# Handle level from both Hash and CompressionOptions
|
|
143
|
+
level = if options.respond_to?(:level)
|
|
144
|
+
options.level || 5
|
|
145
|
+
elsif options.respond_to?(:[]) && options[:level]
|
|
146
|
+
options[:level] || 5
|
|
147
|
+
else
|
|
148
|
+
5
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
opts[:dict_size] ||= dictionary_size_for_level(level)
|
|
152
|
+
|
|
153
|
+
opts
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
# Build decoder options from decompression options
|
|
157
|
+
#
|
|
158
|
+
# @param options [Models::CompressionOptions, Hash, nil] Decompression opts
|
|
159
|
+
# @return [Hash] Decoder options
|
|
160
|
+
def build_decoder_options(options)
|
|
161
|
+
return {} if options.nil?
|
|
162
|
+
|
|
163
|
+
# Handle case where options is an Integer (uncompressed size) instead of Hash
|
|
164
|
+
return {} if options.is_a?(Integer)
|
|
165
|
+
|
|
166
|
+
opts = {}
|
|
167
|
+
|
|
168
|
+
# Handle Hash-like options - pass through all decoder-relevant options
|
|
169
|
+
if options.respond_to?(:key?)
|
|
170
|
+
opts[:sdk_compatible] = options[:sdk_compatible] if options.key?(:sdk_compatible)
|
|
171
|
+
opts[:lzma2_mode] = options[:lzma2_mode] if options.key?(:lzma2_mode)
|
|
172
|
+
opts[:lc] = options[:lc] if options.key?(:lc)
|
|
173
|
+
opts[:lp] = options[:lp] if options.key?(:lp)
|
|
174
|
+
opts[:pb] = options[:pb] if options.key?(:pb)
|
|
175
|
+
opts[:dict_size] = options[:dict_size] if options.key?(:dict_size)
|
|
176
|
+
opts[:uncompressed_size] = options[:uncompressed_size] if options.key?(:uncompressed_size)
|
|
177
|
+
opts[:size] = options[:size] if options.key?(:size)
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
opts
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
# Get dictionary size based on compression level
|
|
184
|
+
#
|
|
185
|
+
# @param level [Integer] Compression level (0-9)
|
|
186
|
+
# @return [Integer] Dictionary size in bytes
|
|
187
|
+
def dictionary_size_for_level(level)
|
|
188
|
+
1 << case level
|
|
189
|
+
when 0..1 then 16 # 64KB
|
|
190
|
+
when 2..3 then 20 # 1MB
|
|
191
|
+
when 4..5 then 22 # 4MB
|
|
192
|
+
when 6..7 then 23 # 8MB
|
|
193
|
+
else 24 # 16MB
|
|
194
|
+
end
|
|
195
|
+
end
|
|
196
|
+
end
|
|
197
|
+
end
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
# Load nested classes after LZMA class is defined
|
|
201
|
+
require_relative "lzma/constants"
|
|
202
|
+
require_relative "lzma/bit_model"
|
|
203
|
+
require_relative "lzma/probability_models"
|
|
204
|
+
require_relative "lzma/xz_range_encoder"
|
|
205
|
+
require_relative "lzma/dictionary"
|
|
206
|
+
require_relative "lzma/lzma_state"
|
|
207
|
+
require_relative "lzma/range_coder"
|
|
208
|
+
require_relative "lzma/range_encoder"
|
|
209
|
+
require_relative "lzma/range_decoder"
|
|
210
|
+
require_relative "lzma/match"
|
|
211
|
+
require_relative "lzma/match_finder"
|
|
212
|
+
require_relative "lzma/optimal_encoder"
|
|
213
|
+
require_relative "lzma/state"
|
|
214
|
+
require_relative "lzma/xz_state"
|
|
215
|
+
require_relative "lzma/xz_probability_models"
|
|
216
|
+
require_relative "lzma/xz_price_calculator"
|
|
217
|
+
require_relative "lzma/xz_match_finder_adapter"
|
|
218
|
+
require_relative "../implementations/seven_zip/lzma/state_machine"
|
|
219
|
+
require_relative "lzma/length_coder"
|
|
220
|
+
require_relative "lzma/distance_coder"
|
|
221
|
+
require_relative "lzma/literal_encoder"
|
|
222
|
+
require_relative "lzma/literal_decoder"
|
|
223
|
+
require_relative "lzma/match_finder_config"
|
|
224
|
+
require_relative "../implementations/seven_zip/lzma/match_finder"
|
|
225
|
+
require_relative "lzma/match_finder_factory"
|
|
226
|
+
require_relative "../implementations/seven_zip/lzma/encoder"
|
|
227
|
+
require_relative "lzma/xz_encoder"
|
|
228
|
+
require_relative "lzma/encoder"
|
|
229
|
+
require_relative "lzma/decoder"
|
|
230
|
+
require_relative "lzma/xz_utils_decoder"
|
|
231
|
+
|
|
232
|
+
# LZMA container format decoders (DIFFERENT from XZ format!)
|
|
233
|
+
# These are standalone formats that use LZMA1 compression
|
|
234
|
+
require_relative "lzma/lzma_alone_decoder" # .lzma (LZMA_Alone) format
|
|
235
|
+
require_relative "lzma/lzip_decoder" # .lz (lzip) format
|
|
236
|
+
|
|
237
|
+
# Register the LZMA algorithm
|
|
238
|
+
Omnizip::AlgorithmRegistry.register(:lzma, Omnizip::Algorithms::LZMA)
|
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Copyright (C) 2025 Ribose Inc.
|
|
4
|
+
#
|
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a
|
|
6
|
+
# copy of this software and associated documentation files (the "Software"),
|
|
7
|
+
# to deal in the Software without restriction, including without limitation
|
|
8
|
+
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
9
|
+
# and/or sell copies of the Software, and to permit persons to whom the
|
|
10
|
+
# Software is furnished to do so, subject to the following conditions:
|
|
11
|
+
#
|
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
|
13
|
+
# all copies or substantial portions of the Software.
|
|
14
|
+
#
|
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
20
|
+
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
21
|
+
# DEALINGS IN THE SOFTWARE.
|
|
22
|
+
|
|
23
|
+
require_relative "constants"
|
|
24
|
+
|
|
25
|
+
module Omnizip
|
|
26
|
+
module Algorithms
|
|
27
|
+
class LZMA2
|
|
28
|
+
# LZMA2 Chunk Manager - handles chunk boundaries and decisions
|
|
29
|
+
#
|
|
30
|
+
# This class is responsible for:
|
|
31
|
+
# - Splitting data into chunks of appropriate size
|
|
32
|
+
# - Deciding whether to compress or store each chunk uncompressed
|
|
33
|
+
# - Managing chunk buffering
|
|
34
|
+
# - Providing thread safety infrastructure (for future use)
|
|
35
|
+
#
|
|
36
|
+
# The chunk manager implements intelligent compression decisions
|
|
37
|
+
# based on compression ratio thresholds.
|
|
38
|
+
#
|
|
39
|
+
# IMPORTANT: LZMA2 format limits uncompressed chunk size to 65536 bytes
|
|
40
|
+
# due to 2-byte size encoding. This manager enforces that limit.
|
|
41
|
+
class ChunkManager
|
|
42
|
+
include Constants
|
|
43
|
+
|
|
44
|
+
attr_reader :chunk_size
|
|
45
|
+
|
|
46
|
+
# Maximum bytes per chunk (LZMA2 spec limit)
|
|
47
|
+
MAX_CHUNK_BYTES = UNCOMPRESSED_SIZE_MAX + 1 # 65536 bytes
|
|
48
|
+
|
|
49
|
+
# Chunk data model
|
|
50
|
+
#
|
|
51
|
+
# Represents a single chunk of data with its metadata
|
|
52
|
+
class Chunk
|
|
53
|
+
attr_reader :data, :compressed_data, :is_compressed
|
|
54
|
+
|
|
55
|
+
# Initialize a chunk
|
|
56
|
+
#
|
|
57
|
+
# @param data [String] Uncompressed chunk data
|
|
58
|
+
def initialize(data)
|
|
59
|
+
@data = data
|
|
60
|
+
@compressed_data = nil
|
|
61
|
+
@is_compressed = false
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# Set compressed data
|
|
65
|
+
#
|
|
66
|
+
# @param compressed [String] Compressed data
|
|
67
|
+
# @return [void]
|
|
68
|
+
def compressed_data=(compressed)
|
|
69
|
+
@compressed_data = compressed
|
|
70
|
+
@is_compressed = true
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# Get the data to write (compressed or uncompressed)
|
|
74
|
+
#
|
|
75
|
+
# @return [String] Data to write
|
|
76
|
+
def output_data
|
|
77
|
+
@is_compressed ? @compressed_data : @data
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# Get size of output data
|
|
81
|
+
#
|
|
82
|
+
# @return [Integer] Size in bytes
|
|
83
|
+
def output_size
|
|
84
|
+
output_data.bytesize
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
# Get uncompressed size
|
|
88
|
+
#
|
|
89
|
+
# @return [Integer] Size in bytes
|
|
90
|
+
def uncompressed_size
|
|
91
|
+
@data.bytesize
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
# Initialize chunk manager
|
|
96
|
+
#
|
|
97
|
+
# @param chunk_size [Integer] Desired chunk size
|
|
98
|
+
def initialize(chunk_size = CHUNK_SIZE_DEFAULT)
|
|
99
|
+
@chunk_size = validate_chunk_size(chunk_size)
|
|
100
|
+
# Enforce LZMA2 format limit
|
|
101
|
+
@effective_chunk_size = [@chunk_size, MAX_CHUNK_BYTES].min
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
# Split data into chunks
|
|
105
|
+
#
|
|
106
|
+
# @param data [String] Data to split
|
|
107
|
+
# @return [Array<Chunk>] Array of chunks
|
|
108
|
+
def create_chunks(data)
|
|
109
|
+
chunks = []
|
|
110
|
+
pos = 0
|
|
111
|
+
|
|
112
|
+
while pos < data.bytesize
|
|
113
|
+
chunk_data = data.byteslice(pos, @effective_chunk_size)
|
|
114
|
+
chunks << Chunk.new(chunk_data)
|
|
115
|
+
pos += @effective_chunk_size
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
chunks
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
# Decide if chunk should be compressed
|
|
122
|
+
#
|
|
123
|
+
# Makes decision based on compression ratio threshold.
|
|
124
|
+
# If compressed size is not significantly smaller than
|
|
125
|
+
# uncompressed size, store uncompressed.
|
|
126
|
+
#
|
|
127
|
+
# @param chunk [Chunk] Chunk with compressed data set
|
|
128
|
+
# @return [Boolean] True if should use compression
|
|
129
|
+
def should_compress?(chunk)
|
|
130
|
+
return false unless chunk.compressed_data
|
|
131
|
+
|
|
132
|
+
# Calculate compression ratio
|
|
133
|
+
ratio = chunk.output_size.to_f / chunk.uncompressed_size
|
|
134
|
+
|
|
135
|
+
# Only use compression if ratio is below threshold
|
|
136
|
+
ratio < COMPRESSION_THRESHOLD
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
# Decide if chunk is last chunk
|
|
140
|
+
#
|
|
141
|
+
# @param chunk_index [Integer] Current chunk index
|
|
142
|
+
# @param total_chunks [Integer] Total number of chunks
|
|
143
|
+
# @return [Boolean] True if last chunk
|
|
144
|
+
def last_chunk?(chunk_index, total_chunks)
|
|
145
|
+
chunk_index == total_chunks - 1
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
# Calculate optimal chunk size for data
|
|
149
|
+
#
|
|
150
|
+
# This method can be used to dynamically adjust chunk size
|
|
151
|
+
# based on data characteristics (future enhancement).
|
|
152
|
+
#
|
|
153
|
+
# @param data_size [Integer] Total data size
|
|
154
|
+
# @return [Integer] Optimal chunk size
|
|
155
|
+
def optimal_chunk_size(data_size)
|
|
156
|
+
# For now, use effective chunk size
|
|
157
|
+
# Future: could adjust based on data size
|
|
158
|
+
return @effective_chunk_size if data_size <= @effective_chunk_size * 2
|
|
159
|
+
|
|
160
|
+
# For larger data, might want larger chunks (up to max)
|
|
161
|
+
[@effective_chunk_size * 2, MAX_CHUNK_BYTES].min
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
private
|
|
165
|
+
|
|
166
|
+
# Validate chunk size
|
|
167
|
+
#
|
|
168
|
+
# @param size [Integer] Chunk size to validate
|
|
169
|
+
# @return [Integer] Validated size
|
|
170
|
+
# @raise [ArgumentError] If size is invalid
|
|
171
|
+
def validate_chunk_size(size)
|
|
172
|
+
unless size.between?(CHUNK_SIZE_MIN, CHUNK_SIZE_MAX)
|
|
173
|
+
raise ArgumentError,
|
|
174
|
+
"Chunk size must be between #{CHUNK_SIZE_MIN} " \
|
|
175
|
+
"and #{CHUNK_SIZE_MAX}"
|
|
176
|
+
end
|
|
177
|
+
size
|
|
178
|
+
end
|
|
179
|
+
end
|
|
180
|
+
end
|
|
181
|
+
end
|
|
182
|
+
end
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Omnizip
|
|
4
|
+
module Algorithms
|
|
5
|
+
# LZMA2 Format Constants (from XZ Utils specification)
|
|
6
|
+
module LZMA2Const
|
|
7
|
+
# Maximum size of compressed data per chunk (excluding headers)
|
|
8
|
+
CHUNK_MAX = 65536 # 64 KB
|
|
9
|
+
|
|
10
|
+
# Maximum size of uncompressed data per chunk
|
|
11
|
+
# Limited by 16-bit size field in LZMA2 header (stores size-1)
|
|
12
|
+
# Max value is 0xFFFF + 1 = 65536 bytes
|
|
13
|
+
UNCOMPRESSED_MAX = 65536 # 64 KB
|
|
14
|
+
|
|
15
|
+
# Maximum size of LZMA2 chunk header
|
|
16
|
+
HEADER_MAX = 6
|
|
17
|
+
|
|
18
|
+
# Size of uncompressed chunk header
|
|
19
|
+
HEADER_UNCOMPRESSED = 3
|
|
20
|
+
|
|
21
|
+
# Control byte values
|
|
22
|
+
CONTROL_END = 0x00 # End of stream marker
|
|
23
|
+
CONTROL_UNCOMPRESSED_RESET = 0x01 # Uncompressed with dict reset
|
|
24
|
+
CONTROL_UNCOMPRESSED = 0x02 # Uncompressed without reset
|
|
25
|
+
CONTROL_LZMA_MIN = 0x80 # Minimum LZMA control byte
|
|
26
|
+
|
|
27
|
+
# Control byte flags (for LZMA chunks)
|
|
28
|
+
FLAG_UNCOMPRESSED_SIZE = 0x80 # Base flag for LZMA chunks
|
|
29
|
+
FLAG_RESET_STATE = 0x20 # Reset LZMA state
|
|
30
|
+
FLAG_RESET_PROPERTIES = 0x40 # Reset properties + state
|
|
31
|
+
FLAG_RESET_DICT = 0x60 # Reset dict + properties + state
|
|
32
|
+
|
|
33
|
+
# Dictionary size encoding constants
|
|
34
|
+
DICT_SIZE_MIN = 4096 # 4 KB minimum
|
|
35
|
+
DICT_SIZE_MAX = 0xFFFFFFFF # 4 GB maximum
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# Alias for backward compatibility
|
|
39
|
+
LZMA2Constants = LZMA2Const
|
|
40
|
+
end
|
|
41
|
+
end
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "constants"
|
|
4
|
+
require_relative "properties"
|
|
5
|
+
require_relative "simple_lzma2_encoder"
|
|
6
|
+
|
|
7
|
+
module Omnizip
|
|
8
|
+
module Algorithms
|
|
9
|
+
# LZMA2 encoder - delegates to XzLZMA2Encoder
|
|
10
|
+
#
|
|
11
|
+
# This class provides a backward-compatible API that delegates to the
|
|
12
|
+
# complete XzLZMA2Encoder implementation ported from XZ Utils.
|
|
13
|
+
#
|
|
14
|
+
# Based on XZ Utils lzma2_encoder.c
|
|
15
|
+
class LZMA2Encoder
|
|
16
|
+
attr_reader :dict_size, :lc, :lp, :pb
|
|
17
|
+
|
|
18
|
+
# Initialize the encoder
|
|
19
|
+
#
|
|
20
|
+
# @param dict_size [Integer] Dictionary size (default: 8MB)
|
|
21
|
+
# @param lc [Integer] Literal context bits (default: 3)
|
|
22
|
+
# @param lp [Integer] Literal position bits (default: 0)
|
|
23
|
+
# @param pb [Integer] Position bits (default: 2)
|
|
24
|
+
# @param standalone [Boolean] If true, write property byte for
|
|
25
|
+
# standalone LZMA2 files (default: false)
|
|
26
|
+
def initialize(
|
|
27
|
+
dict_size: 8 * 1024 * 1024,
|
|
28
|
+
lc: 3,
|
|
29
|
+
lp: 0,
|
|
30
|
+
pb: 2,
|
|
31
|
+
standalone: false,
|
|
32
|
+
**
|
|
33
|
+
)
|
|
34
|
+
@dict_size = dict_size
|
|
35
|
+
@lc = lc
|
|
36
|
+
@lp = lp
|
|
37
|
+
@pb = pb
|
|
38
|
+
@standalone = standalone
|
|
39
|
+
|
|
40
|
+
# Create the SimpleLZMA2Encoder (uses working XzEncoder internally)
|
|
41
|
+
@encoder = LZMA2::SimpleLZMA2Encoder.new(
|
|
42
|
+
dict_size: dict_size,
|
|
43
|
+
lc: lc,
|
|
44
|
+
lp: lp,
|
|
45
|
+
pb: pb,
|
|
46
|
+
standalone: standalone,
|
|
47
|
+
)
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Encode data into LZMA2 format
|
|
51
|
+
#
|
|
52
|
+
# @param input [String] Input data to compress
|
|
53
|
+
# @return [String] LZMA2 compressed data
|
|
54
|
+
def encode(input)
|
|
55
|
+
@encoder.encode(input)
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# Compress data from input stream to output stream
|
|
59
|
+
# This method provides compatibility with the AlgorithmRegistry interface
|
|
60
|
+
#
|
|
61
|
+
# @param input_io [IO] Input stream to read from
|
|
62
|
+
# @param output_io [IO] Output stream to write to
|
|
63
|
+
# @param level [Integer] Compression level (not used, kept for compatibility)
|
|
64
|
+
# @return [Integer] Number of bytes written
|
|
65
|
+
def compress(input_io, output_io, _level = nil)
|
|
66
|
+
input_data = input_io.read
|
|
67
|
+
compressed = encode(input_data)
|
|
68
|
+
output_io.write(compressed)
|
|
69
|
+
compressed.bytesize
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# Decompress data from input stream to output stream
|
|
73
|
+
# This method provides compatibility with the AlgorithmRegistry interface
|
|
74
|
+
#
|
|
75
|
+
# @param input_io [IO] Input stream to read from
|
|
76
|
+
# @param output_io [IO] Output stream to write to
|
|
77
|
+
# @param size [Integer] Expected uncompressed size (optional)
|
|
78
|
+
# @return [Integer] Number of bytes written
|
|
79
|
+
def decompress(input_io, output_io, _size = nil)
|
|
80
|
+
# Check if this is being called for 7-Zip format (raw LZMA2 stream)
|
|
81
|
+
# 7-Zip stores LZMA2 without a property byte
|
|
82
|
+
# We can detect this by checking if input_io is a StringIO (which is used
|
|
83
|
+
# by StreamDecompressor for 7-Zip format)
|
|
84
|
+
raw_mode = input_io.is_a?(StringIO)
|
|
85
|
+
|
|
86
|
+
# Create a decoder instance
|
|
87
|
+
decoder = LZMA2::Decoder.new(input_io, raw_mode: raw_mode)
|
|
88
|
+
|
|
89
|
+
# For raw_mode (7-Zip format), we need to provide dict_size
|
|
90
|
+
# Use default 8MB if not specified
|
|
91
|
+
if raw_mode
|
|
92
|
+
# Re-create decoder with dict_size option
|
|
93
|
+
decoder = LZMA2::Decoder.new(input_io,
|
|
94
|
+
raw_mode: true,
|
|
95
|
+
dict_size: @dict_size)
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
# Decode the stream
|
|
99
|
+
result = decoder.decode_stream
|
|
100
|
+
|
|
101
|
+
# Write to output
|
|
102
|
+
output_io.write(result)
|
|
103
|
+
|
|
104
|
+
result.bytesize
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
# Encode dictionary size for LZMA2 properties
|
|
108
|
+
# Returns a single byte encoding the dictionary size
|
|
109
|
+
#
|
|
110
|
+
# @param dict_size [Integer] Dictionary size to encode
|
|
111
|
+
# @return [Integer] Encoded dictionary size byte
|
|
112
|
+
def self.encode_dict_size(dict_size)
|
|
113
|
+
# LZMA2 dictionary size encoding (XZ Utils format)
|
|
114
|
+
# Byte value d encodes dictionary size as:
|
|
115
|
+
# If d < 40: size = 2^((d/2) + 12) (for even d)
|
|
116
|
+
# or size = 3 * 2^((d-1)/2 + 11) (for odd d)
|
|
117
|
+
# If d == 40: size = 0xFFFFFFFF (4GB - 1)
|
|
118
|
+
|
|
119
|
+
# Clamp to valid range
|
|
120
|
+
d = [dict_size, LZMA2Constants::DICT_SIZE_MIN].max
|
|
121
|
+
|
|
122
|
+
# For 8MB (8 * 1024 * 1024 = 8388608 = 2^23):
|
|
123
|
+
# We want: 2^((d/2) + 12) = 2^23
|
|
124
|
+
# So: (d/2) + 12 = 23
|
|
125
|
+
# Therefore: d/2 = 11, d = 22
|
|
126
|
+
|
|
127
|
+
# Calculate log2 of dict_size
|
|
128
|
+
log2_size = 0
|
|
129
|
+
temp = d
|
|
130
|
+
while temp > 1
|
|
131
|
+
log2_size += 1
|
|
132
|
+
temp >>= 1
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
# Encoding formula for power-of-2 sizes:
|
|
136
|
+
# d = 2 * (log2_size - 12)
|
|
137
|
+
if d == (1 << log2_size)
|
|
138
|
+
# Exact power of 2
|
|
139
|
+
[(log2_size - 12) * 2, 40].min
|
|
140
|
+
else
|
|
141
|
+
# Between 2^n and 2^n + 2^(n-1), use odd encoding
|
|
142
|
+
[((log2_size - 12) * 2) + 1, 40].min
|
|
143
|
+
end
|
|
144
|
+
end
|
|
145
|
+
end
|
|
146
|
+
end
|
|
147
|
+
end
|