omnizip 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (511) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.rubocop.yml +32 -0
  4. data/.rubocop_todo.yml +754 -0
  5. data/COPYING +502 -0
  6. data/Gemfile +17 -0
  7. data/LICENSE +12 -0
  8. data/README.adoc +1045 -0
  9. data/Rakefile +12 -0
  10. data/benchmark/README.md +260 -0
  11. data/benchmark/benchmark_suite.rb +125 -0
  12. data/benchmark/compression_bench.rb +181 -0
  13. data/benchmark/filter_bench.rb +180 -0
  14. data/benchmark/models/benchmark_result.rb +59 -0
  15. data/benchmark/models/comparison_result.rb +69 -0
  16. data/benchmark/profile_suite.rb +167 -0
  17. data/benchmark/reporter.rb +150 -0
  18. data/benchmark/run_benchmarks.rb +66 -0
  19. data/benchmark/test_data.rb +137 -0
  20. data/config/formats/rar3_spec.yml +91 -0
  21. data/config/formats/rar5_spec.yml +102 -0
  22. data/docs/.github/workflows/docs.yml +142 -0
  23. data/docs/.gitignore +21 -0
  24. data/docs/.lychee.toml +67 -0
  25. data/docs/Gemfile +13 -0
  26. data/docs/RAR_WRITE_SUPPORT.md +26 -0
  27. data/docs/README.md +101 -0
  28. data/docs/_config.yml +112 -0
  29. data/docs/assets/logo.svg +1 -0
  30. data/docs/assets/omnizip-logo.pdf +1540 -11
  31. data/docs/comparison/feature-matrix.adoc +694 -0
  32. data/docs/comparison/index.adoc +113 -0
  33. data/docs/comparison/vs-7zip.adoc +309 -0
  34. data/docs/comparison/vs-peazip.adoc +77 -0
  35. data/docs/comparison/vs-rubyzip.adoc +342 -0
  36. data/docs/comparison/vs-winrar.adoc +100 -0
  37. data/docs/compatibility.adoc +579 -0
  38. data/docs/concepts/index.adoc +129 -0
  39. data/docs/developer/architecture.adoc +256 -0
  40. data/docs/developer/contributing.adoc +158 -0
  41. data/docs/developer/index.adoc +25 -0
  42. data/docs/developer/testing.adoc +212 -0
  43. data/docs/getting-started/basic-usage.adoc +271 -0
  44. data/docs/getting-started/index.adoc +42 -0
  45. data/docs/getting-started/installation.adoc +138 -0
  46. data/docs/getting-started/quick-start.adoc +185 -0
  47. data/docs/getting-started/your-first-archive.adoc +218 -0
  48. data/docs/guides/advanced-features/encryption.adoc +300 -0
  49. data/docs/guides/advanced-features/index.adoc +49 -0
  50. data/docs/guides/advanced-features/parallel-processing.adoc +246 -0
  51. data/docs/guides/advanced-features/progress-tracking.adoc +320 -0
  52. data/docs/guides/advanced-features/streaming.adoc +212 -0
  53. data/docs/guides/archive-formats/gzip-format.adoc +107 -0
  54. data/docs/guides/archive-formats/index.adoc +130 -0
  55. data/docs/guides/archive-formats/rar-format.adoc +104 -0
  56. data/docs/guides/archive-formats/rar5.adoc +521 -0
  57. data/docs/guides/archive-formats/seven-zip-format.adoc +35 -0
  58. data/docs/guides/archive-formats/tar-format.adoc +106 -0
  59. data/docs/guides/archive-formats/xz-format.adoc +118 -0
  60. data/docs/guides/archive-formats/zip-format.adoc +35 -0
  61. data/docs/guides/compression-algorithms/bzip2.adoc +113 -0
  62. data/docs/guides/compression-algorithms/deflate.adoc +319 -0
  63. data/docs/guides/compression-algorithms/index.adoc +190 -0
  64. data/docs/guides/compression-algorithms/lzma.adoc +398 -0
  65. data/docs/guides/compression-algorithms/lzma2.adoc +327 -0
  66. data/docs/guides/compression-algorithms/ppmd.adoc +316 -0
  67. data/docs/guides/compression-algorithms/zstandard.adoc +361 -0
  68. data/docs/guides/creating-archives.adoc +354 -0
  69. data/docs/guides/extracting-archives.adoc +53 -0
  70. data/docs/guides/format-conversion.adoc +64 -0
  71. data/docs/guides/index.adoc +49 -0
  72. data/docs/guides/migration-rubyzip.adoc +217 -0
  73. data/docs/guides/parity-archives.adoc +605 -0
  74. data/docs/guides/performance-tuning.adoc +88 -0
  75. data/docs/index.adoc +218 -0
  76. data/docs/lychee.toml +67 -0
  77. data/docs/reference/api/overview.adoc +188 -0
  78. data/docs/reference/cli/compress-command.adoc +114 -0
  79. data/docs/reference/cli/overview.adoc +140 -0
  80. data/docs/reference/index.adoc +26 -0
  81. data/docs/resources/faq.adoc +185 -0
  82. data/docs/resources/quick-reference.adoc +222 -0
  83. data/docs/troubleshooting/index.adoc +208 -0
  84. data/examples/api_comparison.rb +205 -0
  85. data/examples/deflate64_example.rb +96 -0
  86. data/examples/par2_demo.rb +121 -0
  87. data/examples/quick_start_native.rb +150 -0
  88. data/examples/quick_start_rubyzip.rb +115 -0
  89. data/examples/rubyzip_compatibility_demo.rb +194 -0
  90. data/exe/omnizip +27 -0
  91. data/lib/omnizip/algorithm.rb +130 -0
  92. data/lib/omnizip/algorithm_registry.rb +86 -0
  93. data/lib/omnizip/algorithms/.keep +0 -0
  94. data/lib/omnizip/algorithms/bzip2/bwt.rb +225 -0
  95. data/lib/omnizip/algorithms/bzip2/decoder.rb +193 -0
  96. data/lib/omnizip/algorithms/bzip2/encoder.rb +237 -0
  97. data/lib/omnizip/algorithms/bzip2/huffman.rb +206 -0
  98. data/lib/omnizip/algorithms/bzip2/mtf.rb +101 -0
  99. data/lib/omnizip/algorithms/bzip2/rle.rb +151 -0
  100. data/lib/omnizip/algorithms/bzip2.rb +130 -0
  101. data/lib/omnizip/algorithms/deflate/constants.rb +28 -0
  102. data/lib/omnizip/algorithms/deflate/decoder.rb +38 -0
  103. data/lib/omnizip/algorithms/deflate/encoder.rb +46 -0
  104. data/lib/omnizip/algorithms/deflate.rb +128 -0
  105. data/lib/omnizip/algorithms/deflate64/constants.rb +45 -0
  106. data/lib/omnizip/algorithms/deflate64/decoder.rb +153 -0
  107. data/lib/omnizip/algorithms/deflate64/encoder.rb +98 -0
  108. data/lib/omnizip/algorithms/deflate64/huffman_coder.rb +354 -0
  109. data/lib/omnizip/algorithms/deflate64/lz77_encoder.rb +142 -0
  110. data/lib/omnizip/algorithms/deflate64.rb +109 -0
  111. data/lib/omnizip/algorithms/lzma/bit_model.rb +120 -0
  112. data/lib/omnizip/algorithms/lzma/constants.rb +112 -0
  113. data/lib/omnizip/algorithms/lzma/decoder.rb +148 -0
  114. data/lib/omnizip/algorithms/lzma/dictionary.rb +69 -0
  115. data/lib/omnizip/algorithms/lzma/distance_coder.rb +415 -0
  116. data/lib/omnizip/algorithms/lzma/encoder.rb +142 -0
  117. data/lib/omnizip/algorithms/lzma/length_coder.rb +260 -0
  118. data/lib/omnizip/algorithms/lzma/literal_decoder.rb +320 -0
  119. data/lib/omnizip/algorithms/lzma/literal_encoder.rb +210 -0
  120. data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +341 -0
  121. data/lib/omnizip/algorithms/lzma/lzma_alone_decoder.rb +192 -0
  122. data/lib/omnizip/algorithms/lzma/lzma_state.rb +128 -0
  123. data/lib/omnizip/algorithms/lzma/match.rb +32 -0
  124. data/lib/omnizip/algorithms/lzma/match_finder.rb +205 -0
  125. data/lib/omnizip/algorithms/lzma/match_finder_config.rb +142 -0
  126. data/lib/omnizip/algorithms/lzma/match_finder_factory.rb +88 -0
  127. data/lib/omnizip/algorithms/lzma/optimal_encoder.rb +130 -0
  128. data/lib/omnizip/algorithms/lzma/probability_models.rb +72 -0
  129. data/lib/omnizip/algorithms/lzma/range_coder.rb +85 -0
  130. data/lib/omnizip/algorithms/lzma/range_decoder.rb +434 -0
  131. data/lib/omnizip/algorithms/lzma/range_encoder.rb +194 -0
  132. data/lib/omnizip/algorithms/lzma/state.rb +127 -0
  133. data/lib/omnizip/algorithms/lzma/xz_buffered_range_encoder.rb +325 -0
  134. data/lib/omnizip/algorithms/lzma/xz_encoder.rb +426 -0
  135. data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +645 -0
  136. data/lib/omnizip/algorithms/lzma/xz_match_finder_adapter.rb +227 -0
  137. data/lib/omnizip/algorithms/lzma/xz_price_calculator.rb +169 -0
  138. data/lib/omnizip/algorithms/lzma/xz_probability_models.rb +261 -0
  139. data/lib/omnizip/algorithms/lzma/xz_range_encoder.rb +223 -0
  140. data/lib/omnizip/algorithms/lzma/xz_range_encoder_exact.rb +331 -0
  141. data/lib/omnizip/algorithms/lzma/xz_state.rb +116 -0
  142. data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +2055 -0
  143. data/lib/omnizip/algorithms/lzma.rb +238 -0
  144. data/lib/omnizip/algorithms/lzma2/chunk_manager.rb +182 -0
  145. data/lib/omnizip/algorithms/lzma2/constants.rb +41 -0
  146. data/lib/omnizip/algorithms/lzma2/encoder.rb +147 -0
  147. data/lib/omnizip/algorithms/lzma2/lzma2_chunk.rb +161 -0
  148. data/lib/omnizip/algorithms/lzma2/properties.rb +179 -0
  149. data/lib/omnizip/algorithms/lzma2/simple_lzma2_encoder.rb +127 -0
  150. data/lib/omnizip/algorithms/lzma2/xz_encoder_adapter.rb +85 -0
  151. data/lib/omnizip/algorithms/lzma2.rb +141 -0
  152. data/lib/omnizip/algorithms/ppmd7/constants.rb +74 -0
  153. data/lib/omnizip/algorithms/ppmd7/context.rb +154 -0
  154. data/lib/omnizip/algorithms/ppmd7/decoder.rb +126 -0
  155. data/lib/omnizip/algorithms/ppmd7/encoder.rb +163 -0
  156. data/lib/omnizip/algorithms/ppmd7/model.rb +248 -0
  157. data/lib/omnizip/algorithms/ppmd7/symbol_state.rb +57 -0
  158. data/lib/omnizip/algorithms/ppmd7.rb +116 -0
  159. data/lib/omnizip/algorithms/ppmd8/constants.rb +61 -0
  160. data/lib/omnizip/algorithms/ppmd8/context.rb +34 -0
  161. data/lib/omnizip/algorithms/ppmd8/decoder.rb +107 -0
  162. data/lib/omnizip/algorithms/ppmd8/encoder.rb +138 -0
  163. data/lib/omnizip/algorithms/ppmd8/model.rb +250 -0
  164. data/lib/omnizip/algorithms/ppmd8/restoration_method.rb +78 -0
  165. data/lib/omnizip/algorithms/ppmd8.rb +82 -0
  166. data/lib/omnizip/algorithms/ppmd_base.rb +138 -0
  167. data/lib/omnizip/algorithms/sevenzip_lzma2.rb +123 -0
  168. data/lib/omnizip/algorithms/xz_lzma2.rb +118 -0
  169. data/lib/omnizip/algorithms/zstandard/constants.rb +25 -0
  170. data/lib/omnizip/algorithms/zstandard/decoder.rb +46 -0
  171. data/lib/omnizip/algorithms/zstandard/encoder.rb +51 -0
  172. data/lib/omnizip/algorithms/zstandard.rb +138 -0
  173. data/lib/omnizip/buffer/memory_archive.rb +251 -0
  174. data/lib/omnizip/buffer/memory_extractor.rb +224 -0
  175. data/lib/omnizip/buffer.rb +176 -0
  176. data/lib/omnizip/checksum_registry.rb +114 -0
  177. data/lib/omnizip/checksums/crc32.rb +100 -0
  178. data/lib/omnizip/checksums/crc64.rb +101 -0
  179. data/lib/omnizip/checksums/crc_base.rb +158 -0
  180. data/lib/omnizip/checksums/verifier.rb +131 -0
  181. data/lib/omnizip/chunked/memory_manager.rb +194 -0
  182. data/lib/omnizip/chunked/reader.rb +78 -0
  183. data/lib/omnizip/chunked/writer.rb +120 -0
  184. data/lib/omnizip/chunked.rb +129 -0
  185. data/lib/omnizip/cli/output_formatter.rb +104 -0
  186. data/lib/omnizip/cli.rb +572 -0
  187. data/lib/omnizip/commands/.keep +0 -0
  188. data/lib/omnizip/commands/archive_create_command.rb +427 -0
  189. data/lib/omnizip/commands/archive_extract_command.rb +272 -0
  190. data/lib/omnizip/commands/archive_list_command.rb +218 -0
  191. data/lib/omnizip/commands/archive_repair_command.rb +131 -0
  192. data/lib/omnizip/commands/archive_verify_command.rb +117 -0
  193. data/lib/omnizip/commands/compress_command.rb +117 -0
  194. data/lib/omnizip/commands/decompress_command.rb +120 -0
  195. data/lib/omnizip/commands/list_command.rb +53 -0
  196. data/lib/omnizip/commands/metadata_command.rb +153 -0
  197. data/lib/omnizip/commands/parity_create_command.rb +122 -0
  198. data/lib/omnizip/commands/parity_repair_command.rb +122 -0
  199. data/lib/omnizip/commands/parity_verify_command.rb +124 -0
  200. data/lib/omnizip/commands/profile_list_command.rb +56 -0
  201. data/lib/omnizip/commands/profile_show_command.rb +44 -0
  202. data/lib/omnizip/convenience.rb +359 -0
  203. data/lib/omnizip/converter/conversion_registry.rb +49 -0
  204. data/lib/omnizip/converter/conversion_strategy.rb +121 -0
  205. data/lib/omnizip/converter/seven_zip_to_zip_strategy.rb +97 -0
  206. data/lib/omnizip/converter/zip_to_seven_zip_strategy.rb +112 -0
  207. data/lib/omnizip/converter.rb +105 -0
  208. data/lib/omnizip/crypto/aes256/cipher.rb +100 -0
  209. data/lib/omnizip/crypto/aes256/constants.rb +28 -0
  210. data/lib/omnizip/crypto/aes256/key_derivation.rb +101 -0
  211. data/lib/omnizip/crypto/aes256.rb +102 -0
  212. data/lib/omnizip/error.rb +106 -0
  213. data/lib/omnizip/eta/exponential_smoothing_estimator.rb +98 -0
  214. data/lib/omnizip/eta/moving_average_estimator.rb +99 -0
  215. data/lib/omnizip/eta/rate_calculator.rb +104 -0
  216. data/lib/omnizip/eta/sample_history.rb +143 -0
  217. data/lib/omnizip/eta/time_estimator.rb +106 -0
  218. data/lib/omnizip/eta.rb +63 -0
  219. data/lib/omnizip/extraction/filter_chain.rb +177 -0
  220. data/lib/omnizip/extraction/glob_pattern.rb +140 -0
  221. data/lib/omnizip/extraction/pattern_matcher.rb +70 -0
  222. data/lib/omnizip/extraction/predicate_pattern.rb +52 -0
  223. data/lib/omnizip/extraction/regex_pattern.rb +50 -0
  224. data/lib/omnizip/extraction/selective_extractor.rb +240 -0
  225. data/lib/omnizip/extraction.rb +111 -0
  226. data/lib/omnizip/file_type/mime_classifier.rb +144 -0
  227. data/lib/omnizip/file_type.rb +113 -0
  228. data/lib/omnizip/filter.rb +139 -0
  229. data/lib/omnizip/filter_pipeline.rb +108 -0
  230. data/lib/omnizip/filter_registry.rb +166 -0
  231. data/lib/omnizip/filters/bcj.rb +279 -0
  232. data/lib/omnizip/filters/bcj2/constants.rb +53 -0
  233. data/lib/omnizip/filters/bcj2/decoder.rb +200 -0
  234. data/lib/omnizip/filters/bcj2/encoder.rb +61 -0
  235. data/lib/omnizip/filters/bcj2/stream_data.rb +93 -0
  236. data/lib/omnizip/filters/bcj2.rb +99 -0
  237. data/lib/omnizip/filters/bcj_arm.rb +176 -0
  238. data/lib/omnizip/filters/bcj_arm64.rb +244 -0
  239. data/lib/omnizip/filters/bcj_ia64.rb +196 -0
  240. data/lib/omnizip/filters/bcj_ppc.rb +190 -0
  241. data/lib/omnizip/filters/bcj_sparc.rb +176 -0
  242. data/lib/omnizip/filters/bcj_x86.rb +193 -0
  243. data/lib/omnizip/filters/delta.rb +196 -0
  244. data/lib/omnizip/filters/filter_base.rb +72 -0
  245. data/lib/omnizip/filters/registry.rb +123 -0
  246. data/lib/omnizip/filters/xz_delta.rb +258 -0
  247. data/lib/omnizip/format_detector.rb +162 -0
  248. data/lib/omnizip/format_registry.rb +59 -0
  249. data/lib/omnizip/formats/.keep +0 -0
  250. data/lib/omnizip/formats/bzip2_file.rb +172 -0
  251. data/lib/omnizip/formats/cpio/constants.rb +55 -0
  252. data/lib/omnizip/formats/cpio/entry.rb +385 -0
  253. data/lib/omnizip/formats/cpio/reader.rb +196 -0
  254. data/lib/omnizip/formats/cpio/writer.rb +234 -0
  255. data/lib/omnizip/formats/cpio.rb +140 -0
  256. data/lib/omnizip/formats/format_spec_loader.rb +230 -0
  257. data/lib/omnizip/formats/gzip.rb +238 -0
  258. data/lib/omnizip/formats/iso/directory_builder.rb +297 -0
  259. data/lib/omnizip/formats/iso/directory_record.rb +152 -0
  260. data/lib/omnizip/formats/iso/joliet.rb +204 -0
  261. data/lib/omnizip/formats/iso/path_table.rb +125 -0
  262. data/lib/omnizip/formats/iso/reader.rb +197 -0
  263. data/lib/omnizip/formats/iso/rock_ridge.rb +349 -0
  264. data/lib/omnizip/formats/iso/volume_builder.rb +320 -0
  265. data/lib/omnizip/formats/iso/volume_descriptor.rb +168 -0
  266. data/lib/omnizip/formats/iso/writer.rb +530 -0
  267. data/lib/omnizip/formats/iso.rb +140 -0
  268. data/lib/omnizip/formats/lzip.rb +175 -0
  269. data/lib/omnizip/formats/lzma_alone.rb +171 -0
  270. data/lib/omnizip/formats/rar/archive_repairer.rb +243 -0
  271. data/lib/omnizip/formats/rar/archive_verifier.rb +195 -0
  272. data/lib/omnizip/formats/rar/block_parser.rb +243 -0
  273. data/lib/omnizip/formats/rar/compression/bit_stream.rb +180 -0
  274. data/lib/omnizip/formats/rar/compression/dispatcher.rb +217 -0
  275. data/lib/omnizip/formats/rar/compression/lz77_huffman/decoder.rb +216 -0
  276. data/lib/omnizip/formats/rar/compression/lz77_huffman/encoder.rb +158 -0
  277. data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_builder.rb +217 -0
  278. data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_coder.rb +189 -0
  279. data/lib/omnizip/formats/rar/compression/lz77_huffman/match_finder.rb +135 -0
  280. data/lib/omnizip/formats/rar/compression/lz77_huffman/sliding_window.rb +165 -0
  281. data/lib/omnizip/formats/rar/compression/ppmd/context.rb +105 -0
  282. data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +219 -0
  283. data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +262 -0
  284. data/lib/omnizip/formats/rar/compression_method_registry.rb +106 -0
  285. data/lib/omnizip/formats/rar/constants.rb +82 -0
  286. data/lib/omnizip/formats/rar/decompressor.rb +238 -0
  287. data/lib/omnizip/formats/rar/external_writer.rb +312 -0
  288. data/lib/omnizip/formats/rar/header.rb +192 -0
  289. data/lib/omnizip/formats/rar/license_validator.rb +109 -0
  290. data/lib/omnizip/formats/rar/models/rar_archive.rb +77 -0
  291. data/lib/omnizip/formats/rar/models/rar_entry.rb +65 -0
  292. data/lib/omnizip/formats/rar/models/rar_volume.rb +56 -0
  293. data/lib/omnizip/formats/rar/parity_handler.rb +292 -0
  294. data/lib/omnizip/formats/rar/rar5/compression/lzma.rb +202 -0
  295. data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +578 -0
  296. data/lib/omnizip/formats/rar/rar5/compression/store.rb +60 -0
  297. data/lib/omnizip/formats/rar/rar5/crc32.rb +39 -0
  298. data/lib/omnizip/formats/rar/rar5/encryption/aes256_cbc.rb +97 -0
  299. data/lib/omnizip/formats/rar/rar5/encryption/encryption_header.rb +114 -0
  300. data/lib/omnizip/formats/rar/rar5/encryption/encryption_manager.rb +166 -0
  301. data/lib/omnizip/formats/rar/rar5/encryption/key_derivation.rb +97 -0
  302. data/lib/omnizip/formats/rar/rar5/header.rb +187 -0
  303. data/lib/omnizip/formats/rar/rar5/models/encryption_options.rb +74 -0
  304. data/lib/omnizip/formats/rar/rar5/models/recovery_options.rb +63 -0
  305. data/lib/omnizip/formats/rar/rar5/models/solid_options.rb +63 -0
  306. data/lib/omnizip/formats/rar/rar5/models/volume_options.rb +74 -0
  307. data/lib/omnizip/formats/rar/rar5/multi_volume/ARCHITECTURE.md +290 -0
  308. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_manager.rb +264 -0
  309. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_splitter.rb +155 -0
  310. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_writer.rb +194 -0
  311. data/lib/omnizip/formats/rar/rar5/solid/solid_encoder.rb +109 -0
  312. data/lib/omnizip/formats/rar/rar5/solid/solid_manager.rb +142 -0
  313. data/lib/omnizip/formats/rar/rar5/solid/solid_stream.rb +121 -0
  314. data/lib/omnizip/formats/rar/rar5/vint.rb +65 -0
  315. data/lib/omnizip/formats/rar/rar5/writer.rb +466 -0
  316. data/lib/omnizip/formats/rar/rar_format_base.rb +241 -0
  317. data/lib/omnizip/formats/rar/reader.rb +366 -0
  318. data/lib/omnizip/formats/rar/recovery_record.rb +245 -0
  319. data/lib/omnizip/formats/rar/volume_manager.rb +168 -0
  320. data/lib/omnizip/formats/rar/writer.rb +431 -0
  321. data/lib/omnizip/formats/rar.rb +205 -0
  322. data/lib/omnizip/formats/rar3/compressor.rb +73 -0
  323. data/lib/omnizip/formats/rar3/decompressor.rb +66 -0
  324. data/lib/omnizip/formats/rar3/reader.rb +386 -0
  325. data/lib/omnizip/formats/rar3/writer.rb +219 -0
  326. data/lib/omnizip/formats/rar5/compressor.rb +73 -0
  327. data/lib/omnizip/formats/rar5/decompressor.rb +66 -0
  328. data/lib/omnizip/formats/rar5/reader.rb +342 -0
  329. data/lib/omnizip/formats/rar5/writer.rb +214 -0
  330. data/lib/omnizip/formats/seven_zip/coder_chain.rb +150 -0
  331. data/lib/omnizip/formats/seven_zip/constants.rb +126 -0
  332. data/lib/omnizip/formats/seven_zip/encoded_header.rb +114 -0
  333. data/lib/omnizip/formats/seven_zip/encrypted_header.rb +142 -0
  334. data/lib/omnizip/formats/seven_zip/file_collector.rb +144 -0
  335. data/lib/omnizip/formats/seven_zip/header.rb +106 -0
  336. data/lib/omnizip/formats/seven_zip/header_encryptor.rb +134 -0
  337. data/lib/omnizip/formats/seven_zip/header_writer.rb +466 -0
  338. data/lib/omnizip/formats/seven_zip/models/coder_info.rb +30 -0
  339. data/lib/omnizip/formats/seven_zip/models/file_entry.rb +58 -0
  340. data/lib/omnizip/formats/seven_zip/models/folder.rb +69 -0
  341. data/lib/omnizip/formats/seven_zip/models/stream_info.rb +42 -0
  342. data/lib/omnizip/formats/seven_zip/parser.rb +660 -0
  343. data/lib/omnizip/formats/seven_zip/reader.rb +458 -0
  344. data/lib/omnizip/formats/seven_zip/split_archive_reader.rb +632 -0
  345. data/lib/omnizip/formats/seven_zip/split_archive_writer.rb +315 -0
  346. data/lib/omnizip/formats/seven_zip/stream_compressor.rb +151 -0
  347. data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +162 -0
  348. data/lib/omnizip/formats/seven_zip/writer.rb +740 -0
  349. data/lib/omnizip/formats/seven_zip.rb +93 -0
  350. data/lib/omnizip/formats/tar/constants.rb +73 -0
  351. data/lib/omnizip/formats/tar/entry.rb +94 -0
  352. data/lib/omnizip/formats/tar/header.rb +168 -0
  353. data/lib/omnizip/formats/tar/reader.rb +121 -0
  354. data/lib/omnizip/formats/tar/writer.rb +216 -0
  355. data/lib/omnizip/formats/tar.rb +84 -0
  356. data/lib/omnizip/formats/xz/reader.rb +116 -0
  357. data/lib/omnizip/formats/xz.rb +237 -0
  358. data/lib/omnizip/formats/xz_impl/block_decoder.rb +754 -0
  359. data/lib/omnizip/formats/xz_impl/block_encoder.rb +306 -0
  360. data/lib/omnizip/formats/xz_impl/block_header.rb +210 -0
  361. data/lib/omnizip/formats/xz_impl/block_header_parser.rb +186 -0
  362. data/lib/omnizip/formats/xz_impl/constants.rb +49 -0
  363. data/lib/omnizip/formats/xz_impl/index_decoder.rb +174 -0
  364. data/lib/omnizip/formats/xz_impl/index_encoder.rb +122 -0
  365. data/lib/omnizip/formats/xz_impl/stream_decoder.rb +468 -0
  366. data/lib/omnizip/formats/xz_impl/stream_encoder.rb +99 -0
  367. data/lib/omnizip/formats/xz_impl/stream_footer.rb +81 -0
  368. data/lib/omnizip/formats/xz_impl/stream_footer_parser.rb +117 -0
  369. data/lib/omnizip/formats/xz_impl/stream_header.rb +55 -0
  370. data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +108 -0
  371. data/lib/omnizip/formats/xz_impl/vli.rb +128 -0
  372. data/lib/omnizip/formats/xz_impl/writer.rb +421 -0
  373. data/lib/omnizip/formats/zip/central_directory_header.rb +195 -0
  374. data/lib/omnizip/formats/zip/constants.rb +69 -0
  375. data/lib/omnizip/formats/zip/end_of_central_directory.rb +133 -0
  376. data/lib/omnizip/formats/zip/local_file_header.rb +138 -0
  377. data/lib/omnizip/formats/zip/reader.rb +250 -0
  378. data/lib/omnizip/formats/zip/unix_extra_field.rb +153 -0
  379. data/lib/omnizip/formats/zip/writer.rb +375 -0
  380. data/lib/omnizip/formats/zip/zip64_end_of_central_directory.rb +104 -0
  381. data/lib/omnizip/formats/zip/zip64_end_of_central_directory_locator.rb +66 -0
  382. data/lib/omnizip/formats/zip/zip64_extra_field.rb +114 -0
  383. data/lib/omnizip/formats/zip.rb +50 -0
  384. data/lib/omnizip/implementations/base/lzma2_decoder_base.rb +75 -0
  385. data/lib/omnizip/implementations/base/lzma2_encoder_base.rb +128 -0
  386. data/lib/omnizip/implementations/base/lzma_decoder_base.rb +83 -0
  387. data/lib/omnizip/implementations/base/lzma_encoder_base.rb +108 -0
  388. data/lib/omnizip/implementations/base/state_machine_base.rb +182 -0
  389. data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +421 -0
  390. data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +465 -0
  391. data/lib/omnizip/implementations/seven_zip/lzma/match_finder.rb +288 -0
  392. data/lib/omnizip/implementations/seven_zip/lzma/range_decoder.rb +200 -0
  393. data/lib/omnizip/implementations/seven_zip/lzma/range_encoder.rb +197 -0
  394. data/lib/omnizip/implementations/seven_zip/lzma/state_machine.rb +141 -0
  395. data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +519 -0
  396. data/lib/omnizip/implementations/xz_utils/lzma2/decoder.rb +723 -0
  397. data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +750 -0
  398. data/lib/omnizip/io/buffered_input.rb +146 -0
  399. data/lib/omnizip/io/buffered_output.rb +105 -0
  400. data/lib/omnizip/io/stream_manager.rb +115 -0
  401. data/lib/omnizip/link_handler/hard_link.rb +79 -0
  402. data/lib/omnizip/link_handler/symbolic_link.rb +74 -0
  403. data/lib/omnizip/link_handler.rb +124 -0
  404. data/lib/omnizip/metadata/archive_metadata.rb +114 -0
  405. data/lib/omnizip/metadata/entry_metadata.rb +146 -0
  406. data/lib/omnizip/metadata/metadata_editor.rb +171 -0
  407. data/lib/omnizip/metadata/metadata_registry.rb +64 -0
  408. data/lib/omnizip/metadata/metadata_validator.rb +99 -0
  409. data/lib/omnizip/metadata.rb +57 -0
  410. data/lib/omnizip/models/.keep +0 -0
  411. data/lib/omnizip/models/algorithm_metadata.rb +73 -0
  412. data/lib/omnizip/models/compression_options.rb +71 -0
  413. data/lib/omnizip/models/conversion_options.rb +87 -0
  414. data/lib/omnizip/models/conversion_result.rb +135 -0
  415. data/lib/omnizip/models/eta_result.rb +46 -0
  416. data/lib/omnizip/models/extraction_rule.rb +115 -0
  417. data/lib/omnizip/models/filter_chain.rb +144 -0
  418. data/lib/omnizip/models/filter_config.rb +183 -0
  419. data/lib/omnizip/models/match_result.rb +124 -0
  420. data/lib/omnizip/models/optimization_suggestion.rb +91 -0
  421. data/lib/omnizip/models/parallel_options.rb +104 -0
  422. data/lib/omnizip/models/performance_result.rb +79 -0
  423. data/lib/omnizip/models/profile_report.rb +82 -0
  424. data/lib/omnizip/models/progress_options.rb +38 -0
  425. data/lib/omnizip/models/split_options.rb +116 -0
  426. data/lib/omnizip/optimization_registry.rb +81 -0
  427. data/lib/omnizip/parallel/job_queue.rb +209 -0
  428. data/lib/omnizip/parallel/job_scheduler.rb +203 -0
  429. data/lib/omnizip/parallel/parallel_compressor.rb +347 -0
  430. data/lib/omnizip/parallel/parallel_extractor.rb +329 -0
  431. data/lib/omnizip/parallel/worker_pool.rb +223 -0
  432. data/lib/omnizip/parallel.rb +149 -0
  433. data/lib/omnizip/parity/chunked_block_processor.rb +196 -0
  434. data/lib/omnizip/parity/galois16.rb +145 -0
  435. data/lib/omnizip/parity/models/creator_packet.rb +73 -0
  436. data/lib/omnizip/parity/models/file_description_packet.rb +133 -0
  437. data/lib/omnizip/parity/models/ifsc_packet.rb +123 -0
  438. data/lib/omnizip/parity/models/main_packet.rb +128 -0
  439. data/lib/omnizip/parity/models/packet.rb +156 -0
  440. data/lib/omnizip/parity/models/packet_registry.rb +109 -0
  441. data/lib/omnizip/parity/models/recovery_slice_packet.rb +78 -0
  442. data/lib/omnizip/parity/par2_creator.rb +531 -0
  443. data/lib/omnizip/parity/par2_repairer.rb +407 -0
  444. data/lib/omnizip/parity/par2_verifier.rb +364 -0
  445. data/lib/omnizip/parity/par2cmdline_algorithm.rb +110 -0
  446. data/lib/omnizip/parity/par2cmdline_coefficients.rb +78 -0
  447. data/lib/omnizip/parity/reed_solomon_decoder.rb +266 -0
  448. data/lib/omnizip/parity/reed_solomon_encoder.rb +111 -0
  449. data/lib/omnizip/parity/reed_solomon_matrix.rb +342 -0
  450. data/lib/omnizip/parity.rb +186 -0
  451. data/lib/omnizip/password/encryption_registry.rb +65 -0
  452. data/lib/omnizip/password/encryption_strategy.rb +96 -0
  453. data/lib/omnizip/password/password_validator.rb +129 -0
  454. data/lib/omnizip/password/winzip_aes_strategy.rb +192 -0
  455. data/lib/omnizip/password/zip_crypto_strategy.rb +141 -0
  456. data/lib/omnizip/password.rb +87 -0
  457. data/lib/omnizip/pipe/stream_compressor.rb +124 -0
  458. data/lib/omnizip/pipe/stream_decompressor.rb +174 -0
  459. data/lib/omnizip/pipe.rb +121 -0
  460. data/lib/omnizip/platform/ntfs_streams.rb +201 -0
  461. data/lib/omnizip/platform.rb +189 -0
  462. data/lib/omnizip/profile/archive_profile.rb +39 -0
  463. data/lib/omnizip/profile/balanced_profile.rb +33 -0
  464. data/lib/omnizip/profile/binary_profile.rb +36 -0
  465. data/lib/omnizip/profile/compression_profile.rb +158 -0
  466. data/lib/omnizip/profile/custom_profile.rb +157 -0
  467. data/lib/omnizip/profile/fast_profile.rb +33 -0
  468. data/lib/omnizip/profile/maximum_profile.rb +33 -0
  469. data/lib/omnizip/profile/profile_detector.rb +110 -0
  470. data/lib/omnizip/profile/profile_registry.rb +161 -0
  471. data/lib/omnizip/profile/text_profile.rb +36 -0
  472. data/lib/omnizip/profile.rb +190 -0
  473. data/lib/omnizip/profiler/memory_profiler.rb +66 -0
  474. data/lib/omnizip/profiler/method_profiler.rb +49 -0
  475. data/lib/omnizip/profiler/report_generator.rb +169 -0
  476. data/lib/omnizip/profiler.rb +204 -0
  477. data/lib/omnizip/progress/callback_reporter.rb +36 -0
  478. data/lib/omnizip/progress/console_reporter.rb +62 -0
  479. data/lib/omnizip/progress/log_reporter.rb +91 -0
  480. data/lib/omnizip/progress/operation_progress.rb +118 -0
  481. data/lib/omnizip/progress/progress_bar.rb +156 -0
  482. data/lib/omnizip/progress/progress_reporter.rb +40 -0
  483. data/lib/omnizip/progress/progress_tracker.rb +190 -0
  484. data/lib/omnizip/progress/silent_reporter.rb +24 -0
  485. data/lib/omnizip/progress.rb +127 -0
  486. data/lib/omnizip/rubyzip_compat.rb +63 -0
  487. data/lib/omnizip/temp/safe_extract.rb +168 -0
  488. data/lib/omnizip/temp/temp_file.rb +124 -0
  489. data/lib/omnizip/temp/temp_file_pool.rb +109 -0
  490. data/lib/omnizip/temp.rb +181 -0
  491. data/lib/omnizip/version.rb +5 -0
  492. data/lib/omnizip/zip/entry.rb +156 -0
  493. data/lib/omnizip/zip/file.rb +485 -0
  494. data/lib/omnizip/zip/input_stream.rb +273 -0
  495. data/lib/omnizip/zip/output_stream.rb +324 -0
  496. data/lib/omnizip.rb +156 -0
  497. data/readme-docs/advanced-features.adoc +515 -0
  498. data/readme-docs/api-usage.adoc +444 -0
  499. data/readme-docs/architecture.adoc +449 -0
  500. data/readme-docs/archive-formats.adoc +479 -0
  501. data/readme-docs/cli-usage.adoc +222 -0
  502. data/readme-docs/compression-algorithms.adoc +442 -0
  503. data/readme-docs/compression-profiles.adoc +247 -0
  504. data/readme-docs/encryption-checksums.adoc +328 -0
  505. data/readme-docs/format-converter.adoc +325 -0
  506. data/readme-docs/installation.adoc +228 -0
  507. data/readme-docs/par2-archives.adoc +608 -0
  508. data/readme-docs/performance-profiler.adoc +389 -0
  509. data/readme-docs/preprocessing-filters.adoc +280 -0
  510. data/xz-file-format-1.2.1.txt +1174 -0
  511. metadata +617 -0
@@ -0,0 +1,442 @@
1
+ = Compression Algorithms Guide
2
+ :toc:
3
+ :toclevels: 3
4
+
5
+ == Purpose
6
+
7
+ This document provides detailed information about all compression algorithms supported by Omnizip, including their characteristics, use cases, and performance considerations.
8
+
9
+ == Supported Algorithms Overview
10
+
11
+ [cols="20,15,15,50",options="header"]
12
+ |===
13
+ |Algorithm |ID |Type |Description
14
+
15
+ |LZMA |0x030101 |Dictionary |High compression, range coding
16
+ |LZMA2 |0x21 |Dictionary |Enhanced LZMA with better streaming
17
+ |PPMd7 |0x030401 |Statistical |Prediction by Partial Matching v7
18
+ |PPMd8 |0x030402 |Statistical |PPMd variant H v8
19
+ |BZip2 |0x040202 |BWT |Burrows-Wheeler Transform
20
+ |Deflate |0x040108 |LZ77 |ZIP-compatible (Zlib wrapper)
21
+ |Deflate64 |9 (ZIP) |LZ77 |Enhanced Deflate with 64KB window
22
+ |Zstandard |0x04F71101 |LZ77 |Fast modern compression
23
+ |Copy |0x00 |None |Uncompressed storage
24
+ |===
25
+
26
+ == LZMA/LZMA2
27
+
28
+ === General
29
+
30
+ LZMA (Lempel-Ziv-Markov chain Algorithm) is a lossless data compression algorithm that achieves high compression ratios through dictionary-based compression combined with range coding. The implementation uses adaptive probability models that adjust based on the input data.
31
+
32
+ The LZMA algorithm operates in three main stages:
33
+
34
+ . Dictionary-based compression using LZ77 match finding
35
+ . Range encoding with adaptive bit models
36
+ . State machine for context tracking
37
+
38
+ LZMA2 extends LZMA with:
39
+
40
+ * Better support for uncompressible data
41
+ * Improved parallelization potential
42
+ * More efficient handling of small chunks
43
+ * Reset capability for streaming
44
+
45
+ === Characteristics
46
+
47
+ **Compression Ratio:** ⭐⭐⭐⭐⭐ (Excellent)
48
+
49
+ **Speed:** ⭐⭐ (Slow)
50
+
51
+ **Memory Usage:** ⭐⭐⭐ (Moderate to High)
52
+
53
+ **Best For:**
54
+
55
+ * Archival storage where size matters most
56
+ * Software distribution packages
57
+ * Documents and text files
58
+ * Any data where maximum compression is priority
59
+
60
+ **Not Ideal For:**
61
+
62
+ * Real-time compression needs
63
+ * Streaming data that changes frequently
64
+ * Already compressed data (images, videos)
65
+
66
+ === Compression Levels
67
+
68
+ LZMA/LZMA2 support compression levels from 1 to 9:
69
+
70
+ * **Level 1:** Fastest compression, 64KB dictionary, ~3-5x compression
71
+ * **Level 2-3:** Fast compression, 1MB dictionary, ~4-7x compression
72
+ * **Level 4-5:** Balanced (default 5), 4MB dictionary, ~6-10x compression
73
+ * **Level 6-7:** Higher compression, 8MB dictionary, ~8-12x compression
74
+ * **Level 8-9:** Maximum compression, 16MB dictionary, ~10-15x compression
75
+
76
+ === Usage Example
77
+
78
+ [source,ruby]
79
+ ----
80
+ # LZMA compression
81
+ lzma = Omnizip::AlgorithmRegistry.get(:lzma).new(level: 9)
82
+ lzma.compress(input, output)
83
+
84
+ # LZMA2 compression (recommended for new archives)
85
+ lzma2 = Omnizip::AlgorithmRegistry.get(:lzma2).new(level: 7)
86
+ lzma2.compress(input, output)
87
+ ----
88
+
89
+ == BZip2
90
+
91
+ === General
92
+
93
+ BZip2 uses the Burrows-Wheeler Transform (BWT) for block-sorting compression, achieving good compression ratios with moderate speed. It's particularly effective for text data.
94
+
95
+ The algorithm operates in stages:
96
+
97
+ . Run-Length Encoding (RLE) preprocessing
98
+ . Burrows-Wheeler Transform
99
+ . Move-to-Front transform
100
+ . Run-Length Encoding (second pass)
101
+ . Huffman coding
102
+
103
+ === Characteristics
104
+
105
+ **Compression Ratio:** ⭐⭐⭐⭐ (Very Good)
106
+
107
+ **Speed:** ⭐⭐⭐ (Moderate)
108
+
109
+ **Memory Usage:** ⭐⭐⭐⭐ (Low to Moderate)
110
+
111
+ **Best For:**
112
+
113
+ * Text files and source code
114
+ * Log files
115
+ * Data with repetitive patterns
116
+ * When LZMA is too slow but good compression is needed
117
+
118
+ **Not Ideal For:**
119
+
120
+ * Binary data with low redundancy
121
+ * Multimedia files
122
+ * When maximum compression is required
123
+
124
+ === Compression Levels
125
+
126
+ BZip2 supports levels 1-9:
127
+
128
+ * **Level 1:** 100KB blocks, fastest
129
+ * **Level 5:** 500KB blocks, default
130
+ * **Level 9:** 900KB blocks, best compression
131
+
132
+ Block size directly affects memory usage and compression ratio.
133
+
134
+ === Usage Example
135
+
136
+ [source,ruby]
137
+ ----
138
+ bzip2 = Omnizip::AlgorithmRegistry.get(:bzip2).new(level: 9)
139
+ bzip2.compress(input, output)
140
+ ----
141
+
142
+ == PPMd7 and PPMd8
143
+
144
+ === General
145
+
146
+ PPMd (Prediction by Partial Matching) excels at text compression using statistical modeling. It maintains context models that predict the probability of each symbol based on preceding symbols.
147
+
148
+ Two variants are supported:
149
+
150
+ * **PPMd7:** Original variant, excellent for general text
151
+ * **PPMd8:** Variant H with improved handling of binary data
152
+
153
+ === Characteristics
154
+
155
+ **Compression Ratio:** ⭐⭐⭐⭐⭐ (Excellent for text)
156
+
157
+ **Speed:** ⭐⭐ (Slow)
158
+
159
+ **Memory Usage:** ⭐⭐ (High, configurable)
160
+
161
+ **Best For:**
162
+
163
+ * Plain text files
164
+ * Source code
165
+ * XML/JSON/YAML files
166
+ * Natural language text
167
+ * Any highly structured text data
168
+
169
+ **Not Ideal For:**
170
+
171
+ * Binary executable files
172
+ * Multimedia files
173
+ * Low-redundancy data
174
+ * Limited memory environments
175
+
176
+ === Configuration
177
+
178
+ PPMd requires two key parameters:
179
+
180
+ * `mem_size`: Memory size as power of 2 (e.g., 24 = 16MB)
181
+ * `order`: Context order (typically 4-8)
182
+
183
+ Higher memory and order = better compression but slower.
184
+
185
+ === Usage Example
186
+
187
+ [source,ruby]
188
+ ----
189
+ # PPMd7 for general text
190
+ ppmd7 = Omnizip::AlgorithmRegistry.get(:ppmd7).new(
191
+ mem_size: 24, # 16MB
192
+ order: 6
193
+ )
194
+ ppmd7.compress(input, output)
195
+
196
+ # PPMd8 for mixed content
197
+ ppmd8 = Omnizip::AlgorithmRegistry.get(:ppmd8).new(
198
+ mem_size: 26, # 64MB
199
+ order: 8
200
+ )
201
+ ppmd8.compress(input, output)
202
+ ----
203
+
204
+ == Deflate
205
+
206
+ === General
207
+
208
+ Deflate provides ZIP-compatible compression using LZ77 sliding window with Huffman coding. It's the standard compression algorithm for ZIP files and uses the native Zlib library.
209
+
210
+ === Characteristics
211
+
212
+ **Compression Ratio:** ⭐⭐⭐ (Good)
213
+
214
+ **Speed:** ⭐⭐⭐⭐ (Fast)
215
+
216
+ **Memory Usage:** ⭐⭐⭐⭐⭐ (Low)
217
+
218
+ **Best For:**
219
+
220
+ * ZIP file creation
221
+ * Fast compression needs
222
+ * Web content (gzip)
223
+ * Limited resource environments
224
+ * When compatibility is important
225
+
226
+ **Not Ideal For:**
227
+
228
+ * Maximum compression requirements
229
+ * When speed is not critical
230
+
231
+ === Compression Levels
232
+
233
+ Deflate supports levels 1-9:
234
+
235
+ * **Level 1:** Fastest, minimal compression
236
+ * **Level 6:** Default, balanced
237
+ * **Level 9:** Best compression, slower
238
+
239
+ === Usage Example
240
+
241
+ [source,ruby]
242
+ ----
243
+ deflate = Omnizip::AlgorithmRegistry.get(:deflate).new(level: 6)
244
+ deflate.compress(input, output)
245
+ ----
246
+
247
+ == Deflate64
248
+
249
+ === General
250
+
251
+ Deflate64 (Enhanced Deflate) extends standard Deflate with a 64KB sliding window (versus 32KB in standard Deflate), providing better compression ratios for larger files while maintaining ZIP format compatibility as compression method 9.
252
+
253
+ The algorithm operates in stages:
254
+
255
+ . LZ77 match finding with 64KB dictionary
256
+ . Huffman coding with dynamic trees
257
+ . Bitstream encoding
258
+
259
+ === Characteristics
260
+
261
+ **Compression Ratio:** ⭐⭐⭐⭐ (Better than Deflate)
262
+
263
+ **Speed:** ⭐⭐⭐⭐ (Fast)
264
+
265
+ **Memory Usage:** ⭐⭐⭐⭐ (Low to Moderate)
266
+
267
+ **Best For:**
268
+
269
+ * Large files (> 32KB)
270
+ * ZIP archives needing better compression
271
+ * When Deflate is not enough but LZMA is too slow
272
+ * Files with long-range repetition
273
+
274
+ **Not Ideal For:**
275
+
276
+ * Small files (< 32KB)
277
+ * When maximum compatibility is needed (not all ZIP readers support it)
278
+
279
+ === Usage Example
280
+
281
+ [source,ruby]
282
+ ----
283
+ deflate64 = Omnizip::AlgorithmRegistry.get(:deflate64).new(level: 7)
284
+ deflate64.compress(input, output)
285
+ ----
286
+
287
+ == Zstandard
288
+
289
+ === General
290
+
291
+ Zstandard (zstd) offers fast compression with good ratios, using a modern LZ77-based algorithm. It's designed to provide a good balance between compression ratio and speed.
292
+
293
+ **Note:** Current implementation uses the zstd-ruby gem. A pure Ruby implementation is planned for full portability.
294
+
295
+ === Characteristics
296
+
297
+ **Compression Ratio:** ⭐⭐⭐⭐ (Very Good)
298
+
299
+ **Speed:** ⭐⭐⭐⭐⭐ (Very Fast)
300
+
301
+ **Memory Usage:** ⭐⭐⭐⭐ (Low to Moderate)
302
+
303
+ **Best For:**
304
+
305
+ * Real-time compression
306
+ * Network transmission
307
+ * Fast backup operations
308
+ * When both speed and compression matter
309
+
310
+ **Not Ideal For:**
311
+
312
+ * Maximum compression needs (use LZMA instead)
313
+ * Environments requiring pure Ruby (until pure Ruby implementation is complete)
314
+
315
+ === Compression Levels
316
+
317
+ Zstandard supports levels 1-22:
318
+
319
+ * **Level 1-3:** Fast compression, ~2-3x ratio
320
+ * **Level 3:** Default, good balance
321
+ * **Level 10-15:** High compression
322
+ * **Level 16-22:** Maximum compression (very slow)
323
+
324
+ === Usage Example
325
+
326
+ [source,ruby]
327
+ ----
328
+ zstd = Omnizip::AlgorithmRegistry.get(:zstd).new(level: 3)
329
+ zstd.compress(input, output)
330
+ ----
331
+
332
+ == Algorithm Selection Guide
333
+
334
+ === By Use Case
335
+
336
+ **Maximum Compression (size is priority):**
337
+
338
+ 1. LZMA/LZMA2 (level 9)
339
+ 2. PPMd7/PPMd8 (for text)
340
+ 3. BZip2 (level 9)
341
+
342
+ **Balanced Compression (size + speed):**
343
+
344
+ 1. LZMA2 (level 5-6)
345
+ 2. BZip2 (level 5-6)
346
+ 3. Deflate64 (level 6-7)
347
+
348
+ **Fast Compression (speed is priority):**
349
+
350
+ 1. Zstandard (level 1-3)
351
+ 2. Deflate (level 1-3)
352
+ 3. LZMA2 (level 1-2)
353
+
354
+ **Text Files:**
355
+
356
+ 1. PPMd7 (best ratio)
357
+ 2. LZMA2 (good all-around)
358
+ 3. BZip2 (fast and good)
359
+
360
+ **Binary Executable Files:**
361
+
362
+ 1. LZMA2 + BCJ filter
363
+ 2. Deflate64 + BCJ filter
364
+ 3. BZip2
365
+
366
+ **Mixed Content Archives:**
367
+
368
+ 1. LZMA2 (default for .7z)
369
+ 2. Deflate (default for .zip)
370
+ 3. BZip2
371
+
372
+ === By File Type
373
+
374
+ [cols="30,70",options="header"]
375
+ |===
376
+ |File Type |Recommended Algorithm
377
+
378
+ |Text files (.txt, .log, .csv)
379
+ |PPMd7, LZMA2, BZip2
380
+
381
+ |Source code (.c, .java, .py)
382
+ |PPMd7, LZMA2
383
+
384
+ |Documents (.doc, .pdf, .odt)
385
+ |LZMA2, Deflate64
386
+
387
+ |Executables (.exe, .dll, .so)
388
+ |LZMA2 + BCJ filter
389
+
390
+ |Archives (.tar, .cpio)
391
+ |LZMA2, BZip2
392
+
393
+ |Database dumps (.sql, .db)
394
+ |PPMd7, LZMA2
395
+
396
+ |Configuration files (.xml, .json, .yaml)
397
+ |PPMd7, BZip2
398
+
399
+ |Mixed content
400
+ |LZMA2 (versatile)
401
+
402
+ |Already compressed (.jpg, .mp3, .mp4)
403
+ |Copy (no compression)
404
+ |===
405
+
406
+ == Performance Considerations
407
+
408
+ === Pure Ruby Implementation
409
+
410
+ All algorithms are implemented in pure Ruby for maximum portability. This means:
411
+
412
+ * **10-60x slower** than native C implementations
413
+ * No external dependencies required
414
+ * Works on all Ruby platforms (MRI, JRuby, TruffleRuby)
415
+ * Acceptable for most non-real-time use cases
416
+
417
+ === Performance Baseline (v1.0)
418
+
419
+ Relative to native implementations:
420
+
421
+ * **LZMA encode:** 13-15x slower (acceptable)
422
+ * **LZMA decode:** 8-10x slower (good)
423
+ * **Range coder:** 10x slower (excellent)
424
+ * **BWT (BZip2):** 50-60x slower (needs optimization)
425
+
426
+ === Memory Usage Guidelines
427
+
428
+ * **LZMA level 9:** ~16MB dictionary + overhead
429
+ * **PPMd order 8:** ~64MB+ (configurable)
430
+ * **BZip2 level 9:** ~9MB blocks
431
+ * **Deflate/Deflate64:** ~1-2MB
432
+ * **Zstandard level 3:** ~2-4MB
433
+
434
+ For low-memory environments, prefer Deflate or lower compression levels.
435
+
436
+ == See Also
437
+
438
+ * link:api-usage.adoc[Library API Usage]
439
+ * link:cli-usage.adoc[CLI Usage Guide]
440
+ * link:preprocessing-filters.adoc[Preprocessing Filters]
441
+ * link:compression-profiles.adoc[Compression Profiles]
442
+ * link:../README.adoc[Main README]
@@ -0,0 +1,247 @@
1
+ = Compression Profiles
2
+ :toc:
3
+ :toclevels: 3
4
+
5
+ == Purpose
6
+
7
+ Compression profiles provide intelligent compression strategy selection based on file type, automatically choosing optimal algorithms and settings for different content types.
8
+
9
+ == Available Profiles
10
+
11
+ === Built-in Profiles
12
+
13
+ [cols="20,20,60",options="header"]
14
+ |===
15
+ |Profile |Algorithm |Best For
16
+
17
+ |**Fast**
18
+ |Deflate (level 1)
19
+ |Quick compression, minimal CPU usage
20
+
21
+ |**Balanced**
22
+ |Deflate (level 6)
23
+ |General-purpose compression (default)
24
+
25
+ |**Maximum**
26
+ |LZMA2 (level 9)
27
+ |Maximum compression ratio, larger files
28
+
29
+ |**Text**
30
+ |PPMd7
31
+ |Text files, source code, documents
32
+
33
+ |**Binary**
34
+ |LZMA2 + BCJ
35
+ |Executables, compiled binaries
36
+
37
+ |**Archive**
38
+ |Store (no compression)
39
+ |Pre-compressed archives (zip, 7z, etc.)
40
+ |===
41
+
42
+ == Using Profiles
43
+
44
+ === Explicit Profile Selection
45
+
46
+ [source,ruby]
47
+ ----
48
+ # Get a profile by name
49
+ profile = Omnizip::Profile.get(:maximum)
50
+
51
+ # Use profile for compression
52
+ Omnizip::Formats::SevenZip::Writer.new('archive.7z', profile: profile) do |zip|
53
+ zip.add_file('large_file.dat')
54
+ end
55
+ ----
56
+
57
+ === Auto-detection Based on File Type
58
+
59
+ [source,ruby]
60
+ ----
61
+ # Detect optimal profile for a file
62
+ profile = Omnizip::Profile.detect('document.txt')
63
+ puts "Selected: #{profile.name}" # => :text
64
+
65
+ # Apply detected profile
66
+ Omnizip::Formats::SevenZip::Writer.new('archive.7z', profile: profile) do |zip|
67
+ zip.add_file('document.txt')
68
+ end
69
+ ----
70
+
71
+ === Profile-based Compression
72
+
73
+ [source,ruby]
74
+ ----
75
+ # Compress with auto-detected profile
76
+ files = ['app.exe', 'readme.txt', 'data.zip']
77
+
78
+ Omnizip::Formats::SevenZip::Writer.new('backup.7z') do |zip|
79
+ files.each do |file|
80
+ # Auto-detect and apply optimal profile for each file
81
+ profile = Omnizip::Profile.detect(file)
82
+ zip.add_file(file, profile: profile)
83
+ end
84
+ end
85
+ ----
86
+
87
+ == Custom Profiles
88
+
89
+ === Creating Custom Profiles
90
+
91
+ [source,ruby]
92
+ ----
93
+ # Define a custom profile
94
+ Omnizip::Profile.define(:my_profile) do |p|
95
+ p.algorithm = :lzma2
96
+ p.level = 7
97
+ p.filter = :bcj_x86
98
+ p.solid = true
99
+ p.description = "Custom profile for my use case"
100
+ end
101
+
102
+ # Use custom profile
103
+ profile = Omnizip::Profile.get(:my_profile)
104
+ ----
105
+
106
+ === Extending Existing Profiles
107
+
108
+ [source,ruby]
109
+ ----
110
+ # Extend fast profile with slightly better compression
111
+ Omnizip::Profile.define(:my_fast, base: :fast) do |p|
112
+ p.level = 2
113
+ p.description = "Slightly better than fast"
114
+ end
115
+ ----
116
+
117
+ == Profile Auto-detection
118
+
119
+ === By MIME Type
120
+
121
+ The profile system automatically selects profiles based on file MIME types:
122
+
123
+ [source,ruby]
124
+ ----
125
+ # Text files → Text profile (PPMd7)
126
+ Omnizip::Profile.for_file_type('text/plain')
127
+
128
+ # Executables → Binary profile (LZMA2 + BCJ)
129
+ Omnizip::Profile.for_file_type('application/x-executable')
130
+
131
+ # Archives → Archive profile (Store)
132
+ Omnizip::Profile.for_file_type('application/zip')
133
+ ----
134
+
135
+ === By File Category
136
+
137
+ [source,ruby]
138
+ ----
139
+ # Select by category
140
+ Omnizip::Profile.for_file_type(:text) # → text profile
141
+ Omnizip::Profile.for_file_type(:executable) # → binary profile
142
+ Omnizip::Profile.for_file_type(:archive) # → archive profile
143
+ ----
144
+
145
+ == Profile Configuration
146
+
147
+ === Profile Attributes
148
+
149
+ Each profile has the following configurable attributes:
150
+
151
+ [source,ruby]
152
+ ----
153
+ profile = Omnizip::Profile.get(:maximum)
154
+
155
+ profile.name # => :maximum
156
+ profile.algorithm # => :lzma2
157
+ profile.level # => 9
158
+ profile.filter # => nil
159
+ profile.solid # => true
160
+ profile.description # => "Maximum compression..."
161
+ ----
162
+
163
+ === Listing Available Profiles
164
+
165
+ [source,ruby]
166
+ ----
167
+ # List all profile names
168
+ Omnizip::Profile.list
169
+ # => [:fast, :balanced, :maximum, :text, :binary, :archive]
170
+
171
+ # Get profile details
172
+ Omnizip::Profile.list.each do |name|
173
+ profile = Omnizip::Profile.get(name)
174
+ puts "#{name}: #{profile.description}"
175
+ end
176
+ ----
177
+
178
+ == Examples
179
+
180
+ === Example 1: Backup with Optimal Compression
181
+
182
+ [source,ruby]
183
+ ----
184
+ def backup_with_profiles(source_dir, archive_path)
185
+ Omnizip::Formats::SevenZip::Writer.new(archive_path) do |zip|
186
+ Dir.glob("#{source_dir}/**/*").each do |file|
187
+ next if File.directory?(file)
188
+
189
+ # Auto-detect and use optimal profile
190
+ profile = Omnizip::Profile.detect(file)
191
+ relative_path = file.sub("#{source_dir}/", '')
192
+
193
+ puts "Adding #{relative_path} with #{profile.name} profile"
194
+ zip.add_file(file, archive_path: relative_path, profile: profile)
195
+ end
196
+ end
197
+ end
198
+
199
+ backup_with_profiles('my_project/', 'backup.7z')
200
+ ----
201
+
202
+ === Example 2: Custom Profile for Log Files
203
+
204
+ [source,ruby]
205
+ ----
206
+ # Create specialized profile for log files
207
+ Omnizip::Profile.define(:logs, base: :text) do |p|
208
+ p.level = 9 # Maximum compression for logs
209
+ p.solid = true # Solid compression for similar files
210
+ p.description = "Optimized for log files"
211
+ end
212
+
213
+ # Use for log archival
214
+ Dir.glob('logs/*.log').each do |log|
215
+ Omnizip::Formats::SevenZip::Writer.new("#{log}.7z",
216
+ profile: :logs) do |zip|
217
+ zip.add_file(log)
218
+ end
219
+ end
220
+ ----
221
+
222
+ === Example 3: Mixed Content with Auto-detection
223
+
224
+ [source,ruby]
225
+ ----
226
+ # Archive with mixed file types
227
+ files = {
228
+ 'report.pdf' => :balanced, # Documents
229
+ 'app.exe' => :binary, # Executables
230
+ 'source.cpp' => :text, # Source code
231
+ 'backup.zip' => :archive # Pre-compressed
232
+ }
233
+
234
+ Omnizip::Formats::SevenZip::Writer.new('mixed.7z') do |zip|
235
+ files.each do |file, expected_profile|
236
+ profile = Omnizip::Profile.detect(file)
237
+ puts "#{file}: detected #{profile.name}, expected #{expected_profile}"
238
+ zip.add_file(file, profile: profile)
239
+ end
240
+ end
241
+ ----
242
+
243
+ == See Also
244
+
245
+ * link:../README.adoc#compression-levels[Compression Levels]
246
+ * link:advanced-features.adoc[Advanced Features]
247
+ * link:performance-profiler.adoc[Performance Profiler]