omnizip 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (511) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.rubocop.yml +32 -0
  4. data/.rubocop_todo.yml +754 -0
  5. data/COPYING +502 -0
  6. data/Gemfile +17 -0
  7. data/LICENSE +12 -0
  8. data/README.adoc +1045 -0
  9. data/Rakefile +12 -0
  10. data/benchmark/README.md +260 -0
  11. data/benchmark/benchmark_suite.rb +125 -0
  12. data/benchmark/compression_bench.rb +181 -0
  13. data/benchmark/filter_bench.rb +180 -0
  14. data/benchmark/models/benchmark_result.rb +59 -0
  15. data/benchmark/models/comparison_result.rb +69 -0
  16. data/benchmark/profile_suite.rb +167 -0
  17. data/benchmark/reporter.rb +150 -0
  18. data/benchmark/run_benchmarks.rb +66 -0
  19. data/benchmark/test_data.rb +137 -0
  20. data/config/formats/rar3_spec.yml +91 -0
  21. data/config/formats/rar5_spec.yml +102 -0
  22. data/docs/.github/workflows/docs.yml +142 -0
  23. data/docs/.gitignore +21 -0
  24. data/docs/.lychee.toml +67 -0
  25. data/docs/Gemfile +13 -0
  26. data/docs/RAR_WRITE_SUPPORT.md +26 -0
  27. data/docs/README.md +101 -0
  28. data/docs/_config.yml +112 -0
  29. data/docs/assets/logo.svg +1 -0
  30. data/docs/assets/omnizip-logo.pdf +1540 -11
  31. data/docs/comparison/feature-matrix.adoc +694 -0
  32. data/docs/comparison/index.adoc +113 -0
  33. data/docs/comparison/vs-7zip.adoc +309 -0
  34. data/docs/comparison/vs-peazip.adoc +77 -0
  35. data/docs/comparison/vs-rubyzip.adoc +342 -0
  36. data/docs/comparison/vs-winrar.adoc +100 -0
  37. data/docs/compatibility.adoc +579 -0
  38. data/docs/concepts/index.adoc +129 -0
  39. data/docs/developer/architecture.adoc +256 -0
  40. data/docs/developer/contributing.adoc +158 -0
  41. data/docs/developer/index.adoc +25 -0
  42. data/docs/developer/testing.adoc +212 -0
  43. data/docs/getting-started/basic-usage.adoc +271 -0
  44. data/docs/getting-started/index.adoc +42 -0
  45. data/docs/getting-started/installation.adoc +138 -0
  46. data/docs/getting-started/quick-start.adoc +185 -0
  47. data/docs/getting-started/your-first-archive.adoc +218 -0
  48. data/docs/guides/advanced-features/encryption.adoc +300 -0
  49. data/docs/guides/advanced-features/index.adoc +49 -0
  50. data/docs/guides/advanced-features/parallel-processing.adoc +246 -0
  51. data/docs/guides/advanced-features/progress-tracking.adoc +320 -0
  52. data/docs/guides/advanced-features/streaming.adoc +212 -0
  53. data/docs/guides/archive-formats/gzip-format.adoc +107 -0
  54. data/docs/guides/archive-formats/index.adoc +130 -0
  55. data/docs/guides/archive-formats/rar-format.adoc +104 -0
  56. data/docs/guides/archive-formats/rar5.adoc +521 -0
  57. data/docs/guides/archive-formats/seven-zip-format.adoc +35 -0
  58. data/docs/guides/archive-formats/tar-format.adoc +106 -0
  59. data/docs/guides/archive-formats/xz-format.adoc +118 -0
  60. data/docs/guides/archive-formats/zip-format.adoc +35 -0
  61. data/docs/guides/compression-algorithms/bzip2.adoc +113 -0
  62. data/docs/guides/compression-algorithms/deflate.adoc +319 -0
  63. data/docs/guides/compression-algorithms/index.adoc +190 -0
  64. data/docs/guides/compression-algorithms/lzma.adoc +398 -0
  65. data/docs/guides/compression-algorithms/lzma2.adoc +327 -0
  66. data/docs/guides/compression-algorithms/ppmd.adoc +316 -0
  67. data/docs/guides/compression-algorithms/zstandard.adoc +361 -0
  68. data/docs/guides/creating-archives.adoc +354 -0
  69. data/docs/guides/extracting-archives.adoc +53 -0
  70. data/docs/guides/format-conversion.adoc +64 -0
  71. data/docs/guides/index.adoc +49 -0
  72. data/docs/guides/migration-rubyzip.adoc +217 -0
  73. data/docs/guides/parity-archives.adoc +605 -0
  74. data/docs/guides/performance-tuning.adoc +88 -0
  75. data/docs/index.adoc +218 -0
  76. data/docs/lychee.toml +67 -0
  77. data/docs/reference/api/overview.adoc +188 -0
  78. data/docs/reference/cli/compress-command.adoc +114 -0
  79. data/docs/reference/cli/overview.adoc +140 -0
  80. data/docs/reference/index.adoc +26 -0
  81. data/docs/resources/faq.adoc +185 -0
  82. data/docs/resources/quick-reference.adoc +222 -0
  83. data/docs/troubleshooting/index.adoc +208 -0
  84. data/examples/api_comparison.rb +205 -0
  85. data/examples/deflate64_example.rb +96 -0
  86. data/examples/par2_demo.rb +121 -0
  87. data/examples/quick_start_native.rb +150 -0
  88. data/examples/quick_start_rubyzip.rb +115 -0
  89. data/examples/rubyzip_compatibility_demo.rb +194 -0
  90. data/exe/omnizip +27 -0
  91. data/lib/omnizip/algorithm.rb +130 -0
  92. data/lib/omnizip/algorithm_registry.rb +86 -0
  93. data/lib/omnizip/algorithms/.keep +0 -0
  94. data/lib/omnizip/algorithms/bzip2/bwt.rb +225 -0
  95. data/lib/omnizip/algorithms/bzip2/decoder.rb +193 -0
  96. data/lib/omnizip/algorithms/bzip2/encoder.rb +237 -0
  97. data/lib/omnizip/algorithms/bzip2/huffman.rb +206 -0
  98. data/lib/omnizip/algorithms/bzip2/mtf.rb +101 -0
  99. data/lib/omnizip/algorithms/bzip2/rle.rb +151 -0
  100. data/lib/omnizip/algorithms/bzip2.rb +130 -0
  101. data/lib/omnizip/algorithms/deflate/constants.rb +28 -0
  102. data/lib/omnizip/algorithms/deflate/decoder.rb +38 -0
  103. data/lib/omnizip/algorithms/deflate/encoder.rb +46 -0
  104. data/lib/omnizip/algorithms/deflate.rb +128 -0
  105. data/lib/omnizip/algorithms/deflate64/constants.rb +45 -0
  106. data/lib/omnizip/algorithms/deflate64/decoder.rb +153 -0
  107. data/lib/omnizip/algorithms/deflate64/encoder.rb +98 -0
  108. data/lib/omnizip/algorithms/deflate64/huffman_coder.rb +354 -0
  109. data/lib/omnizip/algorithms/deflate64/lz77_encoder.rb +142 -0
  110. data/lib/omnizip/algorithms/deflate64.rb +109 -0
  111. data/lib/omnizip/algorithms/lzma/bit_model.rb +120 -0
  112. data/lib/omnizip/algorithms/lzma/constants.rb +112 -0
  113. data/lib/omnizip/algorithms/lzma/decoder.rb +148 -0
  114. data/lib/omnizip/algorithms/lzma/dictionary.rb +69 -0
  115. data/lib/omnizip/algorithms/lzma/distance_coder.rb +415 -0
  116. data/lib/omnizip/algorithms/lzma/encoder.rb +142 -0
  117. data/lib/omnizip/algorithms/lzma/length_coder.rb +260 -0
  118. data/lib/omnizip/algorithms/lzma/literal_decoder.rb +320 -0
  119. data/lib/omnizip/algorithms/lzma/literal_encoder.rb +210 -0
  120. data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +341 -0
  121. data/lib/omnizip/algorithms/lzma/lzma_alone_decoder.rb +192 -0
  122. data/lib/omnizip/algorithms/lzma/lzma_state.rb +128 -0
  123. data/lib/omnizip/algorithms/lzma/match.rb +32 -0
  124. data/lib/omnizip/algorithms/lzma/match_finder.rb +205 -0
  125. data/lib/omnizip/algorithms/lzma/match_finder_config.rb +142 -0
  126. data/lib/omnizip/algorithms/lzma/match_finder_factory.rb +88 -0
  127. data/lib/omnizip/algorithms/lzma/optimal_encoder.rb +130 -0
  128. data/lib/omnizip/algorithms/lzma/probability_models.rb +72 -0
  129. data/lib/omnizip/algorithms/lzma/range_coder.rb +85 -0
  130. data/lib/omnizip/algorithms/lzma/range_decoder.rb +434 -0
  131. data/lib/omnizip/algorithms/lzma/range_encoder.rb +194 -0
  132. data/lib/omnizip/algorithms/lzma/state.rb +127 -0
  133. data/lib/omnizip/algorithms/lzma/xz_buffered_range_encoder.rb +325 -0
  134. data/lib/omnizip/algorithms/lzma/xz_encoder.rb +426 -0
  135. data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +645 -0
  136. data/lib/omnizip/algorithms/lzma/xz_match_finder_adapter.rb +227 -0
  137. data/lib/omnizip/algorithms/lzma/xz_price_calculator.rb +169 -0
  138. data/lib/omnizip/algorithms/lzma/xz_probability_models.rb +261 -0
  139. data/lib/omnizip/algorithms/lzma/xz_range_encoder.rb +223 -0
  140. data/lib/omnizip/algorithms/lzma/xz_range_encoder_exact.rb +331 -0
  141. data/lib/omnizip/algorithms/lzma/xz_state.rb +116 -0
  142. data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +2055 -0
  143. data/lib/omnizip/algorithms/lzma.rb +238 -0
  144. data/lib/omnizip/algorithms/lzma2/chunk_manager.rb +182 -0
  145. data/lib/omnizip/algorithms/lzma2/constants.rb +41 -0
  146. data/lib/omnizip/algorithms/lzma2/encoder.rb +147 -0
  147. data/lib/omnizip/algorithms/lzma2/lzma2_chunk.rb +161 -0
  148. data/lib/omnizip/algorithms/lzma2/properties.rb +179 -0
  149. data/lib/omnizip/algorithms/lzma2/simple_lzma2_encoder.rb +127 -0
  150. data/lib/omnizip/algorithms/lzma2/xz_encoder_adapter.rb +85 -0
  151. data/lib/omnizip/algorithms/lzma2.rb +141 -0
  152. data/lib/omnizip/algorithms/ppmd7/constants.rb +74 -0
  153. data/lib/omnizip/algorithms/ppmd7/context.rb +154 -0
  154. data/lib/omnizip/algorithms/ppmd7/decoder.rb +126 -0
  155. data/lib/omnizip/algorithms/ppmd7/encoder.rb +163 -0
  156. data/lib/omnizip/algorithms/ppmd7/model.rb +248 -0
  157. data/lib/omnizip/algorithms/ppmd7/symbol_state.rb +57 -0
  158. data/lib/omnizip/algorithms/ppmd7.rb +116 -0
  159. data/lib/omnizip/algorithms/ppmd8/constants.rb +61 -0
  160. data/lib/omnizip/algorithms/ppmd8/context.rb +34 -0
  161. data/lib/omnizip/algorithms/ppmd8/decoder.rb +107 -0
  162. data/lib/omnizip/algorithms/ppmd8/encoder.rb +138 -0
  163. data/lib/omnizip/algorithms/ppmd8/model.rb +250 -0
  164. data/lib/omnizip/algorithms/ppmd8/restoration_method.rb +78 -0
  165. data/lib/omnizip/algorithms/ppmd8.rb +82 -0
  166. data/lib/omnizip/algorithms/ppmd_base.rb +138 -0
  167. data/lib/omnizip/algorithms/sevenzip_lzma2.rb +123 -0
  168. data/lib/omnizip/algorithms/xz_lzma2.rb +118 -0
  169. data/lib/omnizip/algorithms/zstandard/constants.rb +25 -0
  170. data/lib/omnizip/algorithms/zstandard/decoder.rb +46 -0
  171. data/lib/omnizip/algorithms/zstandard/encoder.rb +51 -0
  172. data/lib/omnizip/algorithms/zstandard.rb +138 -0
  173. data/lib/omnizip/buffer/memory_archive.rb +251 -0
  174. data/lib/omnizip/buffer/memory_extractor.rb +224 -0
  175. data/lib/omnizip/buffer.rb +176 -0
  176. data/lib/omnizip/checksum_registry.rb +114 -0
  177. data/lib/omnizip/checksums/crc32.rb +100 -0
  178. data/lib/omnizip/checksums/crc64.rb +101 -0
  179. data/lib/omnizip/checksums/crc_base.rb +158 -0
  180. data/lib/omnizip/checksums/verifier.rb +131 -0
  181. data/lib/omnizip/chunked/memory_manager.rb +194 -0
  182. data/lib/omnizip/chunked/reader.rb +78 -0
  183. data/lib/omnizip/chunked/writer.rb +120 -0
  184. data/lib/omnizip/chunked.rb +129 -0
  185. data/lib/omnizip/cli/output_formatter.rb +104 -0
  186. data/lib/omnizip/cli.rb +572 -0
  187. data/lib/omnizip/commands/.keep +0 -0
  188. data/lib/omnizip/commands/archive_create_command.rb +427 -0
  189. data/lib/omnizip/commands/archive_extract_command.rb +272 -0
  190. data/lib/omnizip/commands/archive_list_command.rb +218 -0
  191. data/lib/omnizip/commands/archive_repair_command.rb +131 -0
  192. data/lib/omnizip/commands/archive_verify_command.rb +117 -0
  193. data/lib/omnizip/commands/compress_command.rb +117 -0
  194. data/lib/omnizip/commands/decompress_command.rb +120 -0
  195. data/lib/omnizip/commands/list_command.rb +53 -0
  196. data/lib/omnizip/commands/metadata_command.rb +153 -0
  197. data/lib/omnizip/commands/parity_create_command.rb +122 -0
  198. data/lib/omnizip/commands/parity_repair_command.rb +122 -0
  199. data/lib/omnizip/commands/parity_verify_command.rb +124 -0
  200. data/lib/omnizip/commands/profile_list_command.rb +56 -0
  201. data/lib/omnizip/commands/profile_show_command.rb +44 -0
  202. data/lib/omnizip/convenience.rb +359 -0
  203. data/lib/omnizip/converter/conversion_registry.rb +49 -0
  204. data/lib/omnizip/converter/conversion_strategy.rb +121 -0
  205. data/lib/omnizip/converter/seven_zip_to_zip_strategy.rb +97 -0
  206. data/lib/omnizip/converter/zip_to_seven_zip_strategy.rb +112 -0
  207. data/lib/omnizip/converter.rb +105 -0
  208. data/lib/omnizip/crypto/aes256/cipher.rb +100 -0
  209. data/lib/omnizip/crypto/aes256/constants.rb +28 -0
  210. data/lib/omnizip/crypto/aes256/key_derivation.rb +101 -0
  211. data/lib/omnizip/crypto/aes256.rb +102 -0
  212. data/lib/omnizip/error.rb +106 -0
  213. data/lib/omnizip/eta/exponential_smoothing_estimator.rb +98 -0
  214. data/lib/omnizip/eta/moving_average_estimator.rb +99 -0
  215. data/lib/omnizip/eta/rate_calculator.rb +104 -0
  216. data/lib/omnizip/eta/sample_history.rb +143 -0
  217. data/lib/omnizip/eta/time_estimator.rb +106 -0
  218. data/lib/omnizip/eta.rb +63 -0
  219. data/lib/omnizip/extraction/filter_chain.rb +177 -0
  220. data/lib/omnizip/extraction/glob_pattern.rb +140 -0
  221. data/lib/omnizip/extraction/pattern_matcher.rb +70 -0
  222. data/lib/omnizip/extraction/predicate_pattern.rb +52 -0
  223. data/lib/omnizip/extraction/regex_pattern.rb +50 -0
  224. data/lib/omnizip/extraction/selective_extractor.rb +240 -0
  225. data/lib/omnizip/extraction.rb +111 -0
  226. data/lib/omnizip/file_type/mime_classifier.rb +144 -0
  227. data/lib/omnizip/file_type.rb +113 -0
  228. data/lib/omnizip/filter.rb +139 -0
  229. data/lib/omnizip/filter_pipeline.rb +108 -0
  230. data/lib/omnizip/filter_registry.rb +166 -0
  231. data/lib/omnizip/filters/bcj.rb +279 -0
  232. data/lib/omnizip/filters/bcj2/constants.rb +53 -0
  233. data/lib/omnizip/filters/bcj2/decoder.rb +200 -0
  234. data/lib/omnizip/filters/bcj2/encoder.rb +61 -0
  235. data/lib/omnizip/filters/bcj2/stream_data.rb +93 -0
  236. data/lib/omnizip/filters/bcj2.rb +99 -0
  237. data/lib/omnizip/filters/bcj_arm.rb +176 -0
  238. data/lib/omnizip/filters/bcj_arm64.rb +244 -0
  239. data/lib/omnizip/filters/bcj_ia64.rb +196 -0
  240. data/lib/omnizip/filters/bcj_ppc.rb +190 -0
  241. data/lib/omnizip/filters/bcj_sparc.rb +176 -0
  242. data/lib/omnizip/filters/bcj_x86.rb +193 -0
  243. data/lib/omnizip/filters/delta.rb +196 -0
  244. data/lib/omnizip/filters/filter_base.rb +72 -0
  245. data/lib/omnizip/filters/registry.rb +123 -0
  246. data/lib/omnizip/filters/xz_delta.rb +258 -0
  247. data/lib/omnizip/format_detector.rb +162 -0
  248. data/lib/omnizip/format_registry.rb +59 -0
  249. data/lib/omnizip/formats/.keep +0 -0
  250. data/lib/omnizip/formats/bzip2_file.rb +172 -0
  251. data/lib/omnizip/formats/cpio/constants.rb +55 -0
  252. data/lib/omnizip/formats/cpio/entry.rb +385 -0
  253. data/lib/omnizip/formats/cpio/reader.rb +196 -0
  254. data/lib/omnizip/formats/cpio/writer.rb +234 -0
  255. data/lib/omnizip/formats/cpio.rb +140 -0
  256. data/lib/omnizip/formats/format_spec_loader.rb +230 -0
  257. data/lib/omnizip/formats/gzip.rb +238 -0
  258. data/lib/omnizip/formats/iso/directory_builder.rb +297 -0
  259. data/lib/omnizip/formats/iso/directory_record.rb +152 -0
  260. data/lib/omnizip/formats/iso/joliet.rb +204 -0
  261. data/lib/omnizip/formats/iso/path_table.rb +125 -0
  262. data/lib/omnizip/formats/iso/reader.rb +197 -0
  263. data/lib/omnizip/formats/iso/rock_ridge.rb +349 -0
  264. data/lib/omnizip/formats/iso/volume_builder.rb +320 -0
  265. data/lib/omnizip/formats/iso/volume_descriptor.rb +168 -0
  266. data/lib/omnizip/formats/iso/writer.rb +530 -0
  267. data/lib/omnizip/formats/iso.rb +140 -0
  268. data/lib/omnizip/formats/lzip.rb +175 -0
  269. data/lib/omnizip/formats/lzma_alone.rb +171 -0
  270. data/lib/omnizip/formats/rar/archive_repairer.rb +243 -0
  271. data/lib/omnizip/formats/rar/archive_verifier.rb +195 -0
  272. data/lib/omnizip/formats/rar/block_parser.rb +243 -0
  273. data/lib/omnizip/formats/rar/compression/bit_stream.rb +180 -0
  274. data/lib/omnizip/formats/rar/compression/dispatcher.rb +217 -0
  275. data/lib/omnizip/formats/rar/compression/lz77_huffman/decoder.rb +216 -0
  276. data/lib/omnizip/formats/rar/compression/lz77_huffman/encoder.rb +158 -0
  277. data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_builder.rb +217 -0
  278. data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_coder.rb +189 -0
  279. data/lib/omnizip/formats/rar/compression/lz77_huffman/match_finder.rb +135 -0
  280. data/lib/omnizip/formats/rar/compression/lz77_huffman/sliding_window.rb +165 -0
  281. data/lib/omnizip/formats/rar/compression/ppmd/context.rb +105 -0
  282. data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +219 -0
  283. data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +262 -0
  284. data/lib/omnizip/formats/rar/compression_method_registry.rb +106 -0
  285. data/lib/omnizip/formats/rar/constants.rb +82 -0
  286. data/lib/omnizip/formats/rar/decompressor.rb +238 -0
  287. data/lib/omnizip/formats/rar/external_writer.rb +312 -0
  288. data/lib/omnizip/formats/rar/header.rb +192 -0
  289. data/lib/omnizip/formats/rar/license_validator.rb +109 -0
  290. data/lib/omnizip/formats/rar/models/rar_archive.rb +77 -0
  291. data/lib/omnizip/formats/rar/models/rar_entry.rb +65 -0
  292. data/lib/omnizip/formats/rar/models/rar_volume.rb +56 -0
  293. data/lib/omnizip/formats/rar/parity_handler.rb +292 -0
  294. data/lib/omnizip/formats/rar/rar5/compression/lzma.rb +202 -0
  295. data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +578 -0
  296. data/lib/omnizip/formats/rar/rar5/compression/store.rb +60 -0
  297. data/lib/omnizip/formats/rar/rar5/crc32.rb +39 -0
  298. data/lib/omnizip/formats/rar/rar5/encryption/aes256_cbc.rb +97 -0
  299. data/lib/omnizip/formats/rar/rar5/encryption/encryption_header.rb +114 -0
  300. data/lib/omnizip/formats/rar/rar5/encryption/encryption_manager.rb +166 -0
  301. data/lib/omnizip/formats/rar/rar5/encryption/key_derivation.rb +97 -0
  302. data/lib/omnizip/formats/rar/rar5/header.rb +187 -0
  303. data/lib/omnizip/formats/rar/rar5/models/encryption_options.rb +74 -0
  304. data/lib/omnizip/formats/rar/rar5/models/recovery_options.rb +63 -0
  305. data/lib/omnizip/formats/rar/rar5/models/solid_options.rb +63 -0
  306. data/lib/omnizip/formats/rar/rar5/models/volume_options.rb +74 -0
  307. data/lib/omnizip/formats/rar/rar5/multi_volume/ARCHITECTURE.md +290 -0
  308. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_manager.rb +264 -0
  309. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_splitter.rb +155 -0
  310. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_writer.rb +194 -0
  311. data/lib/omnizip/formats/rar/rar5/solid/solid_encoder.rb +109 -0
  312. data/lib/omnizip/formats/rar/rar5/solid/solid_manager.rb +142 -0
  313. data/lib/omnizip/formats/rar/rar5/solid/solid_stream.rb +121 -0
  314. data/lib/omnizip/formats/rar/rar5/vint.rb +65 -0
  315. data/lib/omnizip/formats/rar/rar5/writer.rb +466 -0
  316. data/lib/omnizip/formats/rar/rar_format_base.rb +241 -0
  317. data/lib/omnizip/formats/rar/reader.rb +366 -0
  318. data/lib/omnizip/formats/rar/recovery_record.rb +245 -0
  319. data/lib/omnizip/formats/rar/volume_manager.rb +168 -0
  320. data/lib/omnizip/formats/rar/writer.rb +431 -0
  321. data/lib/omnizip/formats/rar.rb +205 -0
  322. data/lib/omnizip/formats/rar3/compressor.rb +73 -0
  323. data/lib/omnizip/formats/rar3/decompressor.rb +66 -0
  324. data/lib/omnizip/formats/rar3/reader.rb +386 -0
  325. data/lib/omnizip/formats/rar3/writer.rb +219 -0
  326. data/lib/omnizip/formats/rar5/compressor.rb +73 -0
  327. data/lib/omnizip/formats/rar5/decompressor.rb +66 -0
  328. data/lib/omnizip/formats/rar5/reader.rb +342 -0
  329. data/lib/omnizip/formats/rar5/writer.rb +214 -0
  330. data/lib/omnizip/formats/seven_zip/coder_chain.rb +150 -0
  331. data/lib/omnizip/formats/seven_zip/constants.rb +126 -0
  332. data/lib/omnizip/formats/seven_zip/encoded_header.rb +114 -0
  333. data/lib/omnizip/formats/seven_zip/encrypted_header.rb +142 -0
  334. data/lib/omnizip/formats/seven_zip/file_collector.rb +144 -0
  335. data/lib/omnizip/formats/seven_zip/header.rb +106 -0
  336. data/lib/omnizip/formats/seven_zip/header_encryptor.rb +134 -0
  337. data/lib/omnizip/formats/seven_zip/header_writer.rb +466 -0
  338. data/lib/omnizip/formats/seven_zip/models/coder_info.rb +30 -0
  339. data/lib/omnizip/formats/seven_zip/models/file_entry.rb +58 -0
  340. data/lib/omnizip/formats/seven_zip/models/folder.rb +69 -0
  341. data/lib/omnizip/formats/seven_zip/models/stream_info.rb +42 -0
  342. data/lib/omnizip/formats/seven_zip/parser.rb +660 -0
  343. data/lib/omnizip/formats/seven_zip/reader.rb +458 -0
  344. data/lib/omnizip/formats/seven_zip/split_archive_reader.rb +632 -0
  345. data/lib/omnizip/formats/seven_zip/split_archive_writer.rb +315 -0
  346. data/lib/omnizip/formats/seven_zip/stream_compressor.rb +151 -0
  347. data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +162 -0
  348. data/lib/omnizip/formats/seven_zip/writer.rb +740 -0
  349. data/lib/omnizip/formats/seven_zip.rb +93 -0
  350. data/lib/omnizip/formats/tar/constants.rb +73 -0
  351. data/lib/omnizip/formats/tar/entry.rb +94 -0
  352. data/lib/omnizip/formats/tar/header.rb +168 -0
  353. data/lib/omnizip/formats/tar/reader.rb +121 -0
  354. data/lib/omnizip/formats/tar/writer.rb +216 -0
  355. data/lib/omnizip/formats/tar.rb +84 -0
  356. data/lib/omnizip/formats/xz/reader.rb +116 -0
  357. data/lib/omnizip/formats/xz.rb +237 -0
  358. data/lib/omnizip/formats/xz_impl/block_decoder.rb +754 -0
  359. data/lib/omnizip/formats/xz_impl/block_encoder.rb +306 -0
  360. data/lib/omnizip/formats/xz_impl/block_header.rb +210 -0
  361. data/lib/omnizip/formats/xz_impl/block_header_parser.rb +186 -0
  362. data/lib/omnizip/formats/xz_impl/constants.rb +49 -0
  363. data/lib/omnizip/formats/xz_impl/index_decoder.rb +174 -0
  364. data/lib/omnizip/formats/xz_impl/index_encoder.rb +122 -0
  365. data/lib/omnizip/formats/xz_impl/stream_decoder.rb +468 -0
  366. data/lib/omnizip/formats/xz_impl/stream_encoder.rb +99 -0
  367. data/lib/omnizip/formats/xz_impl/stream_footer.rb +81 -0
  368. data/lib/omnizip/formats/xz_impl/stream_footer_parser.rb +117 -0
  369. data/lib/omnizip/formats/xz_impl/stream_header.rb +55 -0
  370. data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +108 -0
  371. data/lib/omnizip/formats/xz_impl/vli.rb +128 -0
  372. data/lib/omnizip/formats/xz_impl/writer.rb +421 -0
  373. data/lib/omnizip/formats/zip/central_directory_header.rb +195 -0
  374. data/lib/omnizip/formats/zip/constants.rb +69 -0
  375. data/lib/omnizip/formats/zip/end_of_central_directory.rb +133 -0
  376. data/lib/omnizip/formats/zip/local_file_header.rb +138 -0
  377. data/lib/omnizip/formats/zip/reader.rb +250 -0
  378. data/lib/omnizip/formats/zip/unix_extra_field.rb +153 -0
  379. data/lib/omnizip/formats/zip/writer.rb +375 -0
  380. data/lib/omnizip/formats/zip/zip64_end_of_central_directory.rb +104 -0
  381. data/lib/omnizip/formats/zip/zip64_end_of_central_directory_locator.rb +66 -0
  382. data/lib/omnizip/formats/zip/zip64_extra_field.rb +114 -0
  383. data/lib/omnizip/formats/zip.rb +50 -0
  384. data/lib/omnizip/implementations/base/lzma2_decoder_base.rb +75 -0
  385. data/lib/omnizip/implementations/base/lzma2_encoder_base.rb +128 -0
  386. data/lib/omnizip/implementations/base/lzma_decoder_base.rb +83 -0
  387. data/lib/omnizip/implementations/base/lzma_encoder_base.rb +108 -0
  388. data/lib/omnizip/implementations/base/state_machine_base.rb +182 -0
  389. data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +421 -0
  390. data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +465 -0
  391. data/lib/omnizip/implementations/seven_zip/lzma/match_finder.rb +288 -0
  392. data/lib/omnizip/implementations/seven_zip/lzma/range_decoder.rb +200 -0
  393. data/lib/omnizip/implementations/seven_zip/lzma/range_encoder.rb +197 -0
  394. data/lib/omnizip/implementations/seven_zip/lzma/state_machine.rb +141 -0
  395. data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +519 -0
  396. data/lib/omnizip/implementations/xz_utils/lzma2/decoder.rb +723 -0
  397. data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +750 -0
  398. data/lib/omnizip/io/buffered_input.rb +146 -0
  399. data/lib/omnizip/io/buffered_output.rb +105 -0
  400. data/lib/omnizip/io/stream_manager.rb +115 -0
  401. data/lib/omnizip/link_handler/hard_link.rb +79 -0
  402. data/lib/omnizip/link_handler/symbolic_link.rb +74 -0
  403. data/lib/omnizip/link_handler.rb +124 -0
  404. data/lib/omnizip/metadata/archive_metadata.rb +114 -0
  405. data/lib/omnizip/metadata/entry_metadata.rb +146 -0
  406. data/lib/omnizip/metadata/metadata_editor.rb +171 -0
  407. data/lib/omnizip/metadata/metadata_registry.rb +64 -0
  408. data/lib/omnizip/metadata/metadata_validator.rb +99 -0
  409. data/lib/omnizip/metadata.rb +57 -0
  410. data/lib/omnizip/models/.keep +0 -0
  411. data/lib/omnizip/models/algorithm_metadata.rb +73 -0
  412. data/lib/omnizip/models/compression_options.rb +71 -0
  413. data/lib/omnizip/models/conversion_options.rb +87 -0
  414. data/lib/omnizip/models/conversion_result.rb +135 -0
  415. data/lib/omnizip/models/eta_result.rb +46 -0
  416. data/lib/omnizip/models/extraction_rule.rb +115 -0
  417. data/lib/omnizip/models/filter_chain.rb +144 -0
  418. data/lib/omnizip/models/filter_config.rb +183 -0
  419. data/lib/omnizip/models/match_result.rb +124 -0
  420. data/lib/omnizip/models/optimization_suggestion.rb +91 -0
  421. data/lib/omnizip/models/parallel_options.rb +104 -0
  422. data/lib/omnizip/models/performance_result.rb +79 -0
  423. data/lib/omnizip/models/profile_report.rb +82 -0
  424. data/lib/omnizip/models/progress_options.rb +38 -0
  425. data/lib/omnizip/models/split_options.rb +116 -0
  426. data/lib/omnizip/optimization_registry.rb +81 -0
  427. data/lib/omnizip/parallel/job_queue.rb +209 -0
  428. data/lib/omnizip/parallel/job_scheduler.rb +203 -0
  429. data/lib/omnizip/parallel/parallel_compressor.rb +347 -0
  430. data/lib/omnizip/parallel/parallel_extractor.rb +329 -0
  431. data/lib/omnizip/parallel/worker_pool.rb +223 -0
  432. data/lib/omnizip/parallel.rb +149 -0
  433. data/lib/omnizip/parity/chunked_block_processor.rb +196 -0
  434. data/lib/omnizip/parity/galois16.rb +145 -0
  435. data/lib/omnizip/parity/models/creator_packet.rb +73 -0
  436. data/lib/omnizip/parity/models/file_description_packet.rb +133 -0
  437. data/lib/omnizip/parity/models/ifsc_packet.rb +123 -0
  438. data/lib/omnizip/parity/models/main_packet.rb +128 -0
  439. data/lib/omnizip/parity/models/packet.rb +156 -0
  440. data/lib/omnizip/parity/models/packet_registry.rb +109 -0
  441. data/lib/omnizip/parity/models/recovery_slice_packet.rb +78 -0
  442. data/lib/omnizip/parity/par2_creator.rb +531 -0
  443. data/lib/omnizip/parity/par2_repairer.rb +407 -0
  444. data/lib/omnizip/parity/par2_verifier.rb +364 -0
  445. data/lib/omnizip/parity/par2cmdline_algorithm.rb +110 -0
  446. data/lib/omnizip/parity/par2cmdline_coefficients.rb +78 -0
  447. data/lib/omnizip/parity/reed_solomon_decoder.rb +266 -0
  448. data/lib/omnizip/parity/reed_solomon_encoder.rb +111 -0
  449. data/lib/omnizip/parity/reed_solomon_matrix.rb +342 -0
  450. data/lib/omnizip/parity.rb +186 -0
  451. data/lib/omnizip/password/encryption_registry.rb +65 -0
  452. data/lib/omnizip/password/encryption_strategy.rb +96 -0
  453. data/lib/omnizip/password/password_validator.rb +129 -0
  454. data/lib/omnizip/password/winzip_aes_strategy.rb +192 -0
  455. data/lib/omnizip/password/zip_crypto_strategy.rb +141 -0
  456. data/lib/omnizip/password.rb +87 -0
  457. data/lib/omnizip/pipe/stream_compressor.rb +124 -0
  458. data/lib/omnizip/pipe/stream_decompressor.rb +174 -0
  459. data/lib/omnizip/pipe.rb +121 -0
  460. data/lib/omnizip/platform/ntfs_streams.rb +201 -0
  461. data/lib/omnizip/platform.rb +189 -0
  462. data/lib/omnizip/profile/archive_profile.rb +39 -0
  463. data/lib/omnizip/profile/balanced_profile.rb +33 -0
  464. data/lib/omnizip/profile/binary_profile.rb +36 -0
  465. data/lib/omnizip/profile/compression_profile.rb +158 -0
  466. data/lib/omnizip/profile/custom_profile.rb +157 -0
  467. data/lib/omnizip/profile/fast_profile.rb +33 -0
  468. data/lib/omnizip/profile/maximum_profile.rb +33 -0
  469. data/lib/omnizip/profile/profile_detector.rb +110 -0
  470. data/lib/omnizip/profile/profile_registry.rb +161 -0
  471. data/lib/omnizip/profile/text_profile.rb +36 -0
  472. data/lib/omnizip/profile.rb +190 -0
  473. data/lib/omnizip/profiler/memory_profiler.rb +66 -0
  474. data/lib/omnizip/profiler/method_profiler.rb +49 -0
  475. data/lib/omnizip/profiler/report_generator.rb +169 -0
  476. data/lib/omnizip/profiler.rb +204 -0
  477. data/lib/omnizip/progress/callback_reporter.rb +36 -0
  478. data/lib/omnizip/progress/console_reporter.rb +62 -0
  479. data/lib/omnizip/progress/log_reporter.rb +91 -0
  480. data/lib/omnizip/progress/operation_progress.rb +118 -0
  481. data/lib/omnizip/progress/progress_bar.rb +156 -0
  482. data/lib/omnizip/progress/progress_reporter.rb +40 -0
  483. data/lib/omnizip/progress/progress_tracker.rb +190 -0
  484. data/lib/omnizip/progress/silent_reporter.rb +24 -0
  485. data/lib/omnizip/progress.rb +127 -0
  486. data/lib/omnizip/rubyzip_compat.rb +63 -0
  487. data/lib/omnizip/temp/safe_extract.rb +168 -0
  488. data/lib/omnizip/temp/temp_file.rb +124 -0
  489. data/lib/omnizip/temp/temp_file_pool.rb +109 -0
  490. data/lib/omnizip/temp.rb +181 -0
  491. data/lib/omnizip/version.rb +5 -0
  492. data/lib/omnizip/zip/entry.rb +156 -0
  493. data/lib/omnizip/zip/file.rb +485 -0
  494. data/lib/omnizip/zip/input_stream.rb +273 -0
  495. data/lib/omnizip/zip/output_stream.rb +324 -0
  496. data/lib/omnizip.rb +156 -0
  497. data/readme-docs/advanced-features.adoc +515 -0
  498. data/readme-docs/api-usage.adoc +444 -0
  499. data/readme-docs/architecture.adoc +449 -0
  500. data/readme-docs/archive-formats.adoc +479 -0
  501. data/readme-docs/cli-usage.adoc +222 -0
  502. data/readme-docs/compression-algorithms.adoc +442 -0
  503. data/readme-docs/compression-profiles.adoc +247 -0
  504. data/readme-docs/encryption-checksums.adoc +328 -0
  505. data/readme-docs/format-converter.adoc +325 -0
  506. data/readme-docs/installation.adoc +228 -0
  507. data/readme-docs/par2-archives.adoc +608 -0
  508. data/readme-docs/performance-profiler.adoc +389 -0
  509. data/readme-docs/preprocessing-filters.adoc +280 -0
  510. data/xz-file-format-1.2.1.txt +1174 -0
  511. metadata +617 -0
data/Rakefile ADDED
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "bundler/gem_tasks"
4
+ require "rspec/core/rake_task"
5
+
6
+ RSpec::Core::RakeTask.new(:spec)
7
+
8
+ require "rubocop/rake_task"
9
+
10
+ RuboCop::RakeTask.new
11
+
12
+ task default: %i[spec rubocop]
@@ -0,0 +1,260 @@
1
+ # Omnizip Benchmark Suite
2
+
3
+ Comprehensive benchmark suite comparing omnizip performance against native
4
+ 7-Zip.
5
+
6
+ ## Purpose
7
+
8
+ This benchmark suite provides:
9
+
10
+ * Performance comparison between omnizip (Ruby) and 7-Zip (C)
11
+ * Compression ratio analysis for each algorithm
12
+ * Filter effectiveness measurements
13
+ * Baseline for future optimization work
14
+
15
+ ## Requirements
16
+
17
+ * Ruby 2.7 or higher
18
+ * 7-Zip CLI tool (`7z` or `7za`) installed (optional but recommended)
19
+ * Omnizip gem dependencies installed (`bundle install`)
20
+
21
+ ## Installation
22
+
23
+ Install 7-Zip for comparisons:
24
+
25
+ ```bash
26
+ # macOS
27
+ brew install p7zip
28
+
29
+ # Ubuntu/Debian
30
+ sudo apt-get install p7zip-full
31
+
32
+ # Windows
33
+ # Download from https://www.7-zip.org/
34
+ ```
35
+
36
+ ## Running Benchmarks
37
+
38
+ ### Quick Start
39
+
40
+ Run all benchmarks:
41
+
42
+ ```bash
43
+ ruby benchmark/run_benchmarks.rb
44
+ ```
45
+
46
+ Run quick benchmarks (1 size, 1 data type):
47
+
48
+ ```bash
49
+ ruby benchmark/run_benchmarks.rb --quick
50
+ ```
51
+
52
+ ### Options
53
+
54
+ * `-v`, `--verbose` - Enable verbose output
55
+ * `-q`, `--quick` - Run quick benchmarks (faster, less coverage)
56
+ * `--compression-only` - Run only compression algorithm benchmarks
57
+ * `--filters-only` - Run only filter benchmarks
58
+ * `--output=FILE` - Save results to JSON file
59
+ * `-h`, `--help` - Show help message
60
+
61
+ ### Examples
62
+
63
+ ```bash
64
+ # Verbose output with JSON results
65
+ ruby benchmark/run_benchmarks.rb --verbose --output=results.json
66
+
67
+ # Quick compression-only benchmark
68
+ ruby benchmark/run_benchmarks.rb --quick --compression-only
69
+
70
+ # Full benchmark with results saved
71
+ ruby benchmark/run_benchmarks.rb --output=benchmark/results/full.json
72
+ ```
73
+
74
+ ## What Gets Benchmarked
75
+
76
+ ### Compression Algorithms
77
+
78
+ * **LZMA** - Lempel-Ziv-Markov chain algorithm
79
+ * **LZMA2** - Improved LZMA with better multithreading
80
+ * **PPMd7** - Prediction by partial matching
81
+ * **BZip2** - Burrows-Wheeler transform compression
82
+
83
+ ### Filters
84
+
85
+ * **BCJ x86** - Branch/Call/Jump filter for x86 executables
86
+ * **Delta** - Delta encoding for gradual data
87
+
88
+ ### Data Types
89
+
90
+ * **Text** - Lorem ipsum text data
91
+ * **Source Code** - Ruby source code
92
+ * **Repetitive** - Highly compressible repetitive patterns
93
+ * **Random** - Incompressible random data
94
+
95
+ ### Test Sizes
96
+
97
+ * 1KB (1,024 bytes)
98
+ * 10KB (10,240 bytes)
99
+ * 100KB (102,400 bytes)
100
+
101
+ ## Interpreting Results
102
+
103
+ ### Compression Ratio
104
+
105
+ ```
106
+ Compression Ratio = Original Size / Compressed Size
107
+ ```
108
+
109
+ Higher is better. Example: 3.0x means data compressed to 1/3 original size.
110
+
111
+ ### Size Difference
112
+
113
+ Shows how much larger/smaller omnizip output is compared to 7-Zip:
114
+
115
+ * Positive % = omnizip produces larger files
116
+ * Negative % = omnizip produces smaller files (better)
117
+
118
+ Expect omnizip to be within 10-20% of 7-Zip size.
119
+
120
+ ### Speed Ratio
121
+
122
+ ```
123
+ Speed Ratio = Omnizip Time / 7-Zip Time
124
+ ```
125
+
126
+ Shows how many times slower omnizip is compared to 7-Zip.
127
+
128
+ * Expected: 5-20x slower (Ruby vs C is normal)
129
+ * < 10x = Good performance for Ruby implementation
130
+ * > 20x = May need optimization
131
+
132
+ ## Expected Performance Characteristics
133
+
134
+ ### Compression Ratios
135
+
136
+ Omnizip should achieve similar compression ratios to 7-Zip (within 10-20%)
137
+ because both implement the same algorithms. Differences come from:
138
+
139
+ * Parameter tuning differences
140
+ * Implementation details
141
+ * Ruby vs C precision differences
142
+
143
+ ### Speed
144
+
145
+ Ruby implementations are typically 5-20x slower than C implementations:
146
+
147
+ * **5-10x slower** = Excellent for Ruby
148
+ * **10-15x slower** = Good for Ruby
149
+ * **15-20x slower** = Acceptable for Ruby
150
+ * **> 20x slower** = May indicate optimization opportunities
151
+
152
+ ### Algorithm-Specific Notes
153
+
154
+ * **LZMA/LZMA2**: Most complex, expect larger speed differences
155
+ * **BZip2**: Simpler algorithm, may have better speed ratios
156
+ * **PPMd7**: Memory-intensive, speed depends on implementation details
157
+
158
+ ## Architecture
159
+
160
+ The benchmark suite follows object-oriented architecture:
161
+
162
+ ```
163
+ benchmark/
164
+ ├── models/ # Data models
165
+ │ ├── benchmark_result.rb # Single benchmark result
166
+ │ └── comparison_result.rb # Omnizip vs 7-Zip comparison
167
+ ├── test_data.rb # Test data generator
168
+ ├── compression_bench.rb # Algorithm benchmarks
169
+ ├── filter_bench.rb # Filter benchmarks
170
+ ├── benchmark_suite.rb # Main orchestrator
171
+ ├── reporter.rb # Results formatting
172
+ └── run_benchmarks.rb # Executable runner
173
+ ```
174
+
175
+ ## Output Format
176
+
177
+ ### Console Output
178
+
179
+ ```
180
+ ================================================================================
181
+ OMNIZIP vs 7-ZIP BENCHMARK RESULTS
182
+ ================================================================================
183
+
184
+ --------------------------------------------------------------------------------
185
+ Test: lzma_text
186
+ --------------------------------------------------------------------------------
187
+ Metric Omnizip 7-Zip
188
+ --------------------------------------------------------------------------------
189
+ Input Size 10.0KB 10.0KB
190
+ Compressed Size 3.5KB 3.2KB
191
+ Compression Ratio 2.86 3.13
192
+ Compression Time 2.500s 0.150s
193
+
194
+ --------------------------------------------------------------------------------
195
+ Comparison:
196
+ --------------------------------------------------------------------------------
197
+ Size difference: +300 bytes (+9.4%)
198
+ Speed ratio: 16.7x slower
199
+ ```
200
+
201
+ ### JSON Output
202
+
203
+ ```json
204
+ {
205
+ "timestamp": "2025-10-26T12:00:00Z",
206
+ "results": [
207
+ {
208
+ "test_name": "lzma_text",
209
+ "omnizip": {
210
+ "algorithm": "lzma",
211
+ "input_size": 10240,
212
+ "compressed_size": 3584,
213
+ "compression_ratio": 2.86,
214
+ "compression_time": 2.5
215
+ },
216
+ "seven_zip": { ... },
217
+ "comparison": {
218
+ "size_difference_bytes": 300,
219
+ "size_difference_percentage": 9.4,
220
+ "compression_speed_ratio": 16.7
221
+ }
222
+ }
223
+ ]
224
+ }
225
+ ```
226
+
227
+ ## Troubleshooting
228
+
229
+ ### 7-Zip Not Found
230
+
231
+ If 7-Zip is not installed, benchmarks will still run but comparisons will show
232
+ "7-Zip not available". Install 7-Zip for full comparisons.
233
+
234
+ ### Slow Benchmarks
235
+
236
+ Use `--quick` flag for faster results with less coverage, or run specific
237
+ benchmark types with `--compression-only` or `--filters-only`.
238
+
239
+ ### Memory Issues
240
+
241
+ Large test files (100KB+) with complex algorithms may use significant memory.
242
+ Reduce test sizes in `benchmark_suite.rb` if needed.
243
+
244
+ ## Future Enhancements
245
+
246
+ * Add decompression benchmarks
247
+ * Test larger file sizes (1MB, 10MB)
248
+ * Add multi-threaded benchmarks
249
+ * Compare memory usage
250
+ * Add visualization/charts
251
+
252
+ ## Contributing
253
+
254
+ When adding new benchmarks:
255
+
256
+ 1. Follow object-oriented design patterns
257
+ 2. Use model classes for data representation
258
+ 3. Maintain separation of concerns
259
+ 4. Add documentation for new features
260
+ 5. Run `bundle exec rubocop` before committing
@@ -0,0 +1,125 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "test_data"
4
+ require_relative "compression_bench"
5
+ require_relative "filter_bench"
6
+ require_relative "reporter"
7
+
8
+ module Benchmark
9
+ # Main orchestrator for running all benchmarks
10
+ class BenchmarkSuite
11
+ TEST_SIZES = [1024, 10_240, 102_400].freeze # 1KB, 10KB, 100KB
12
+ DATA_TYPES = %w[text source_code repetitive random].freeze
13
+
14
+ attr_reader :verbose, :results
15
+
16
+ def initialize(verbose: false, quick: false)
17
+ @verbose = verbose
18
+ @quick = quick
19
+ @test_data = TestData.new
20
+ @compression_bench = CompressionBench.new(verbose: verbose)
21
+ @filter_bench = FilterBench.new(verbose: verbose)
22
+ @results = []
23
+ end
24
+
25
+ def run_all
26
+ puts "Starting Omnizip vs 7-Zip benchmark suite..."
27
+ puts "7-Zip available: #{@compression_bench.seven_zip_available?}"
28
+
29
+ generate_test_data
30
+ run_compression_benchmarks
31
+ run_filter_benchmarks
32
+ cleanup_test_data
33
+
34
+ self
35
+ end
36
+
37
+ def run_compression_only
38
+ puts "Running compression benchmarks only..."
39
+ generate_test_data
40
+ run_compression_benchmarks
41
+ cleanup_test_data
42
+ self
43
+ end
44
+
45
+ def run_filters_only
46
+ puts "Running filter benchmarks only..."
47
+ generate_test_data
48
+ run_filter_benchmarks
49
+ cleanup_test_data
50
+ self
51
+ end
52
+
53
+ def report
54
+ Reporter.new(@results).print_summary
55
+ end
56
+
57
+ def save_results(filename)
58
+ Reporter.new(@results).save_to_file(filename)
59
+ end
60
+
61
+ private
62
+
63
+ def generate_test_data
64
+ puts "\nGenerating test data..." if verbose
65
+
66
+ sizes = @quick ? [TEST_SIZES.first] : TEST_SIZES
67
+ types = @quick ? [DATA_TYPES.first] : DATA_TYPES
68
+
69
+ sizes.each do |size|
70
+ types.each do |type|
71
+ @test_data.public_send("generate_#{type}", size,
72
+ filename: "#{type}_#{size}.dat")
73
+ end
74
+ end
75
+ end
76
+
77
+ def run_compression_benchmarks
78
+ puts "\nRunning compression benchmarks..." if verbose
79
+
80
+ sizes = @quick ? [TEST_SIZES.first] : TEST_SIZES
81
+ types = @quick ? [DATA_TYPES.first] : DATA_TYPES
82
+ algos = @quick ? ["lzma"] : CompressionBench::ALGORITHMS
83
+
84
+ algos.each do |algorithm|
85
+ sizes.each do |size|
86
+ types.each do |type|
87
+ filename = "#{type}_#{size}.dat"
88
+ filepath = File.join(@test_data.data_dir, filename)
89
+
90
+ result = @compression_bench.benchmark_algorithm(
91
+ algorithm, filepath, type
92
+ )
93
+ @results << result
94
+ end
95
+ end
96
+ end
97
+ end
98
+
99
+ def run_filter_benchmarks
100
+ puts "\nRunning filter benchmarks..." if verbose
101
+
102
+ return if @quick
103
+
104
+ sizes = [TEST_SIZES[1]]
105
+ types = %w[source_code]
106
+
107
+ FilterBench::FILTERS.each do |filter|
108
+ sizes.each do |size|
109
+ types.each do |type|
110
+ filename = "#{type}_#{size}.dat"
111
+ filepath = File.join(@test_data.data_dir, filename)
112
+
113
+ result = @filter_bench.benchmark_filter(filter, filepath, type)
114
+ @results << result
115
+ end
116
+ end
117
+ end
118
+ end
119
+
120
+ def cleanup_test_data
121
+ puts "\nCleaning up test data..." if verbose
122
+ @test_data.cleanup
123
+ end
124
+ end
125
+ end
@@ -0,0 +1,181 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "benchmark"
4
+ require "tempfile"
5
+ require "fileutils"
6
+ require_relative "models/benchmark_result"
7
+ require_relative "models/comparison_result"
8
+
9
+ module Benchmark
10
+ # Benchmarks compression algorithms against native 7-Zip
11
+ class CompressionBench
12
+ ALGORITHMS = %w[lzma lzma2 ppmd7 bzip2].freeze
13
+ ITERATIONS = 3
14
+
15
+ attr_reader :verbose
16
+
17
+ def initialize(verbose: false)
18
+ @verbose = verbose
19
+ @seven_zip_available = check_seven_zip_availability
20
+ end
21
+
22
+ def seven_zip_available?
23
+ @seven_zip_available
24
+ end
25
+
26
+ def benchmark_algorithm(algorithm, input_file, input_type)
27
+ puts "Benchmarking #{algorithm} on #{input_type}..." if verbose
28
+
29
+ omnizip_result = benchmark_omnizip(algorithm, input_file, input_type)
30
+ seven_zip_result = if seven_zip_available?
31
+ benchmark_seven_zip(algorithm, input_file,
32
+ input_type)
33
+ else
34
+ create_unavailable_result(algorithm, input_file,
35
+ input_type)
36
+ end
37
+
38
+ Models::ComparisonResult.new(
39
+ test_name: "#{algorithm}_#{input_type}",
40
+ omnizip_result: omnizip_result,
41
+ seven_zip_result: seven_zip_result,
42
+ )
43
+ end
44
+
45
+ private
46
+
47
+ def check_seven_zip_availability
48
+ system("which 7z > /dev/null 2>&1") ||
49
+ system("which 7za > /dev/null 2>&1")
50
+ end
51
+
52
+ def get_7z_command
53
+ @get_7z_command ||= if system("which 7z > /dev/null 2>&1")
54
+ "7z"
55
+ elsif system("which 7za > /dev/null 2>&1")
56
+ "7za"
57
+ end
58
+ end
59
+
60
+ def benchmark_omnizip(algorithm, input_file, input_type)
61
+ input_size = File.size(input_file)
62
+ compressed_file = create_temp_file(".7z")
63
+
64
+ begin
65
+ time = measure_time do
66
+ compress_with_omnizip(algorithm, input_file, compressed_file)
67
+ end
68
+
69
+ compressed_size = if File.exist?(compressed_file)
70
+ File.size(compressed_file)
71
+ end
72
+
73
+ Models::BenchmarkResult.new(
74
+ algorithm: algorithm,
75
+ input_size: input_size,
76
+ input_type: input_type,
77
+ compressed_size: compressed_size,
78
+ compression_time: time,
79
+ )
80
+ rescue StandardError => e
81
+ Models::BenchmarkResult.new(
82
+ algorithm: algorithm,
83
+ input_size: input_size,
84
+ input_type: input_type,
85
+ error: e.message,
86
+ )
87
+ ensure
88
+ FileUtils.rm_f(compressed_file)
89
+ end
90
+ end
91
+
92
+ def benchmark_seven_zip(algorithm, input_file, input_type)
93
+ input_size = File.size(input_file)
94
+ compressed_file = create_temp_file(".7z")
95
+
96
+ begin
97
+ time = measure_time do
98
+ compress_with_7z(algorithm, input_file, compressed_file)
99
+ end
100
+
101
+ compressed_size = if File.exist?(compressed_file)
102
+ File.size(compressed_file)
103
+ end
104
+
105
+ Models::BenchmarkResult.new(
106
+ algorithm: algorithm,
107
+ input_size: input_size,
108
+ input_type: input_type,
109
+ compressed_size: compressed_size,
110
+ compression_time: time,
111
+ )
112
+ rescue StandardError => e
113
+ Models::BenchmarkResult.new(
114
+ algorithm: algorithm,
115
+ input_size: input_size,
116
+ input_type: input_type,
117
+ error: e.message,
118
+ )
119
+ ensure
120
+ FileUtils.rm_f(compressed_file)
121
+ end
122
+ end
123
+
124
+ def create_unavailable_result(algorithm, input_file, input_type)
125
+ Models::BenchmarkResult.new(
126
+ algorithm: algorithm,
127
+ input_size: File.size(input_file),
128
+ input_type: input_type,
129
+ error: "7-Zip not available",
130
+ )
131
+ end
132
+
133
+ def compress_with_omnizip(algorithm, input_file, output_file)
134
+ require_relative "../lib/omnizip"
135
+
136
+ algo_class = case algorithm
137
+ when "lzma" then Omnizip::Algorithms::LZMA
138
+ when "lzma2" then Omnizip::Algorithms::LZMA2
139
+ when "ppmd7" then Omnizip::Algorithms::PPMd7
140
+ when "bzip2" then Omnizip::Algorithms::BZip2
141
+ else raise "Unknown algorithm: #{algorithm}"
142
+ end
143
+
144
+ input = File.binread(input_file)
145
+ compressed = algo_class.compress(input)
146
+ File.binwrite(output_file, compressed)
147
+ end
148
+
149
+ def compress_with_7z(algorithm, input_file, output_file)
150
+ method_flag = case algorithm
151
+ when "lzma" then "LZMA"
152
+ when "lzma2" then "LZMA2"
153
+ when "ppmd7" then "PPMd"
154
+ when "bzip2" then "BZip2"
155
+ else raise "Unknown algorithm: #{algorithm}"
156
+ end
157
+
158
+ cmd = "#{get_7z_command} a -m0=#{method_flag} -mx=5 #{output_file} " \
159
+ "#{input_file} > /dev/null 2>&1"
160
+ success = system(cmd)
161
+ raise "7z compression failed" unless success
162
+ end
163
+
164
+ def measure_time(&block)
165
+ times = []
166
+ ITERATIONS.times do
167
+ time = ::Benchmark.realtime(&block)
168
+ times << time
169
+ end
170
+ times.sum / times.size
171
+ end
172
+
173
+ def create_temp_file(extension)
174
+ temp = Tempfile.new(["benchmark", extension])
175
+ path = temp.path
176
+ temp.close
177
+ temp.unlink
178
+ path
179
+ end
180
+ end
181
+ end