omnizip 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (511) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.rubocop.yml +32 -0
  4. data/.rubocop_todo.yml +754 -0
  5. data/COPYING +502 -0
  6. data/Gemfile +17 -0
  7. data/LICENSE +12 -0
  8. data/README.adoc +1045 -0
  9. data/Rakefile +12 -0
  10. data/benchmark/README.md +260 -0
  11. data/benchmark/benchmark_suite.rb +125 -0
  12. data/benchmark/compression_bench.rb +181 -0
  13. data/benchmark/filter_bench.rb +180 -0
  14. data/benchmark/models/benchmark_result.rb +59 -0
  15. data/benchmark/models/comparison_result.rb +69 -0
  16. data/benchmark/profile_suite.rb +167 -0
  17. data/benchmark/reporter.rb +150 -0
  18. data/benchmark/run_benchmarks.rb +66 -0
  19. data/benchmark/test_data.rb +137 -0
  20. data/config/formats/rar3_spec.yml +91 -0
  21. data/config/formats/rar5_spec.yml +102 -0
  22. data/docs/.github/workflows/docs.yml +142 -0
  23. data/docs/.gitignore +21 -0
  24. data/docs/.lychee.toml +67 -0
  25. data/docs/Gemfile +13 -0
  26. data/docs/RAR_WRITE_SUPPORT.md +26 -0
  27. data/docs/README.md +101 -0
  28. data/docs/_config.yml +112 -0
  29. data/docs/assets/logo.svg +1 -0
  30. data/docs/assets/omnizip-logo.pdf +1540 -11
  31. data/docs/comparison/feature-matrix.adoc +694 -0
  32. data/docs/comparison/index.adoc +113 -0
  33. data/docs/comparison/vs-7zip.adoc +309 -0
  34. data/docs/comparison/vs-peazip.adoc +77 -0
  35. data/docs/comparison/vs-rubyzip.adoc +342 -0
  36. data/docs/comparison/vs-winrar.adoc +100 -0
  37. data/docs/compatibility.adoc +579 -0
  38. data/docs/concepts/index.adoc +129 -0
  39. data/docs/developer/architecture.adoc +256 -0
  40. data/docs/developer/contributing.adoc +158 -0
  41. data/docs/developer/index.adoc +25 -0
  42. data/docs/developer/testing.adoc +212 -0
  43. data/docs/getting-started/basic-usage.adoc +271 -0
  44. data/docs/getting-started/index.adoc +42 -0
  45. data/docs/getting-started/installation.adoc +138 -0
  46. data/docs/getting-started/quick-start.adoc +185 -0
  47. data/docs/getting-started/your-first-archive.adoc +218 -0
  48. data/docs/guides/advanced-features/encryption.adoc +300 -0
  49. data/docs/guides/advanced-features/index.adoc +49 -0
  50. data/docs/guides/advanced-features/parallel-processing.adoc +246 -0
  51. data/docs/guides/advanced-features/progress-tracking.adoc +320 -0
  52. data/docs/guides/advanced-features/streaming.adoc +212 -0
  53. data/docs/guides/archive-formats/gzip-format.adoc +107 -0
  54. data/docs/guides/archive-formats/index.adoc +130 -0
  55. data/docs/guides/archive-formats/rar-format.adoc +104 -0
  56. data/docs/guides/archive-formats/rar5.adoc +521 -0
  57. data/docs/guides/archive-formats/seven-zip-format.adoc +35 -0
  58. data/docs/guides/archive-formats/tar-format.adoc +106 -0
  59. data/docs/guides/archive-formats/xz-format.adoc +118 -0
  60. data/docs/guides/archive-formats/zip-format.adoc +35 -0
  61. data/docs/guides/compression-algorithms/bzip2.adoc +113 -0
  62. data/docs/guides/compression-algorithms/deflate.adoc +319 -0
  63. data/docs/guides/compression-algorithms/index.adoc +190 -0
  64. data/docs/guides/compression-algorithms/lzma.adoc +398 -0
  65. data/docs/guides/compression-algorithms/lzma2.adoc +327 -0
  66. data/docs/guides/compression-algorithms/ppmd.adoc +316 -0
  67. data/docs/guides/compression-algorithms/zstandard.adoc +361 -0
  68. data/docs/guides/creating-archives.adoc +354 -0
  69. data/docs/guides/extracting-archives.adoc +53 -0
  70. data/docs/guides/format-conversion.adoc +64 -0
  71. data/docs/guides/index.adoc +49 -0
  72. data/docs/guides/migration-rubyzip.adoc +217 -0
  73. data/docs/guides/parity-archives.adoc +605 -0
  74. data/docs/guides/performance-tuning.adoc +88 -0
  75. data/docs/index.adoc +218 -0
  76. data/docs/lychee.toml +67 -0
  77. data/docs/reference/api/overview.adoc +188 -0
  78. data/docs/reference/cli/compress-command.adoc +114 -0
  79. data/docs/reference/cli/overview.adoc +140 -0
  80. data/docs/reference/index.adoc +26 -0
  81. data/docs/resources/faq.adoc +185 -0
  82. data/docs/resources/quick-reference.adoc +222 -0
  83. data/docs/troubleshooting/index.adoc +208 -0
  84. data/examples/api_comparison.rb +205 -0
  85. data/examples/deflate64_example.rb +96 -0
  86. data/examples/par2_demo.rb +121 -0
  87. data/examples/quick_start_native.rb +150 -0
  88. data/examples/quick_start_rubyzip.rb +115 -0
  89. data/examples/rubyzip_compatibility_demo.rb +194 -0
  90. data/exe/omnizip +27 -0
  91. data/lib/omnizip/algorithm.rb +130 -0
  92. data/lib/omnizip/algorithm_registry.rb +86 -0
  93. data/lib/omnizip/algorithms/.keep +0 -0
  94. data/lib/omnizip/algorithms/bzip2/bwt.rb +225 -0
  95. data/lib/omnizip/algorithms/bzip2/decoder.rb +193 -0
  96. data/lib/omnizip/algorithms/bzip2/encoder.rb +237 -0
  97. data/lib/omnizip/algorithms/bzip2/huffman.rb +206 -0
  98. data/lib/omnizip/algorithms/bzip2/mtf.rb +101 -0
  99. data/lib/omnizip/algorithms/bzip2/rle.rb +151 -0
  100. data/lib/omnizip/algorithms/bzip2.rb +130 -0
  101. data/lib/omnizip/algorithms/deflate/constants.rb +28 -0
  102. data/lib/omnizip/algorithms/deflate/decoder.rb +38 -0
  103. data/lib/omnizip/algorithms/deflate/encoder.rb +46 -0
  104. data/lib/omnizip/algorithms/deflate.rb +128 -0
  105. data/lib/omnizip/algorithms/deflate64/constants.rb +45 -0
  106. data/lib/omnizip/algorithms/deflate64/decoder.rb +153 -0
  107. data/lib/omnizip/algorithms/deflate64/encoder.rb +98 -0
  108. data/lib/omnizip/algorithms/deflate64/huffman_coder.rb +354 -0
  109. data/lib/omnizip/algorithms/deflate64/lz77_encoder.rb +142 -0
  110. data/lib/omnizip/algorithms/deflate64.rb +109 -0
  111. data/lib/omnizip/algorithms/lzma/bit_model.rb +120 -0
  112. data/lib/omnizip/algorithms/lzma/constants.rb +112 -0
  113. data/lib/omnizip/algorithms/lzma/decoder.rb +148 -0
  114. data/lib/omnizip/algorithms/lzma/dictionary.rb +69 -0
  115. data/lib/omnizip/algorithms/lzma/distance_coder.rb +415 -0
  116. data/lib/omnizip/algorithms/lzma/encoder.rb +142 -0
  117. data/lib/omnizip/algorithms/lzma/length_coder.rb +260 -0
  118. data/lib/omnizip/algorithms/lzma/literal_decoder.rb +320 -0
  119. data/lib/omnizip/algorithms/lzma/literal_encoder.rb +210 -0
  120. data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +341 -0
  121. data/lib/omnizip/algorithms/lzma/lzma_alone_decoder.rb +192 -0
  122. data/lib/omnizip/algorithms/lzma/lzma_state.rb +128 -0
  123. data/lib/omnizip/algorithms/lzma/match.rb +32 -0
  124. data/lib/omnizip/algorithms/lzma/match_finder.rb +205 -0
  125. data/lib/omnizip/algorithms/lzma/match_finder_config.rb +142 -0
  126. data/lib/omnizip/algorithms/lzma/match_finder_factory.rb +88 -0
  127. data/lib/omnizip/algorithms/lzma/optimal_encoder.rb +130 -0
  128. data/lib/omnizip/algorithms/lzma/probability_models.rb +72 -0
  129. data/lib/omnizip/algorithms/lzma/range_coder.rb +85 -0
  130. data/lib/omnizip/algorithms/lzma/range_decoder.rb +434 -0
  131. data/lib/omnizip/algorithms/lzma/range_encoder.rb +194 -0
  132. data/lib/omnizip/algorithms/lzma/state.rb +127 -0
  133. data/lib/omnizip/algorithms/lzma/xz_buffered_range_encoder.rb +325 -0
  134. data/lib/omnizip/algorithms/lzma/xz_encoder.rb +426 -0
  135. data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +645 -0
  136. data/lib/omnizip/algorithms/lzma/xz_match_finder_adapter.rb +227 -0
  137. data/lib/omnizip/algorithms/lzma/xz_price_calculator.rb +169 -0
  138. data/lib/omnizip/algorithms/lzma/xz_probability_models.rb +261 -0
  139. data/lib/omnizip/algorithms/lzma/xz_range_encoder.rb +223 -0
  140. data/lib/omnizip/algorithms/lzma/xz_range_encoder_exact.rb +331 -0
  141. data/lib/omnizip/algorithms/lzma/xz_state.rb +116 -0
  142. data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +2055 -0
  143. data/lib/omnizip/algorithms/lzma.rb +238 -0
  144. data/lib/omnizip/algorithms/lzma2/chunk_manager.rb +182 -0
  145. data/lib/omnizip/algorithms/lzma2/constants.rb +41 -0
  146. data/lib/omnizip/algorithms/lzma2/encoder.rb +147 -0
  147. data/lib/omnizip/algorithms/lzma2/lzma2_chunk.rb +161 -0
  148. data/lib/omnizip/algorithms/lzma2/properties.rb +179 -0
  149. data/lib/omnizip/algorithms/lzma2/simple_lzma2_encoder.rb +127 -0
  150. data/lib/omnizip/algorithms/lzma2/xz_encoder_adapter.rb +85 -0
  151. data/lib/omnizip/algorithms/lzma2.rb +141 -0
  152. data/lib/omnizip/algorithms/ppmd7/constants.rb +74 -0
  153. data/lib/omnizip/algorithms/ppmd7/context.rb +154 -0
  154. data/lib/omnizip/algorithms/ppmd7/decoder.rb +126 -0
  155. data/lib/omnizip/algorithms/ppmd7/encoder.rb +163 -0
  156. data/lib/omnizip/algorithms/ppmd7/model.rb +248 -0
  157. data/lib/omnizip/algorithms/ppmd7/symbol_state.rb +57 -0
  158. data/lib/omnizip/algorithms/ppmd7.rb +116 -0
  159. data/lib/omnizip/algorithms/ppmd8/constants.rb +61 -0
  160. data/lib/omnizip/algorithms/ppmd8/context.rb +34 -0
  161. data/lib/omnizip/algorithms/ppmd8/decoder.rb +107 -0
  162. data/lib/omnizip/algorithms/ppmd8/encoder.rb +138 -0
  163. data/lib/omnizip/algorithms/ppmd8/model.rb +250 -0
  164. data/lib/omnizip/algorithms/ppmd8/restoration_method.rb +78 -0
  165. data/lib/omnizip/algorithms/ppmd8.rb +82 -0
  166. data/lib/omnizip/algorithms/ppmd_base.rb +138 -0
  167. data/lib/omnizip/algorithms/sevenzip_lzma2.rb +123 -0
  168. data/lib/omnizip/algorithms/xz_lzma2.rb +118 -0
  169. data/lib/omnizip/algorithms/zstandard/constants.rb +25 -0
  170. data/lib/omnizip/algorithms/zstandard/decoder.rb +46 -0
  171. data/lib/omnizip/algorithms/zstandard/encoder.rb +51 -0
  172. data/lib/omnizip/algorithms/zstandard.rb +138 -0
  173. data/lib/omnizip/buffer/memory_archive.rb +251 -0
  174. data/lib/omnizip/buffer/memory_extractor.rb +224 -0
  175. data/lib/omnizip/buffer.rb +176 -0
  176. data/lib/omnizip/checksum_registry.rb +114 -0
  177. data/lib/omnizip/checksums/crc32.rb +100 -0
  178. data/lib/omnizip/checksums/crc64.rb +101 -0
  179. data/lib/omnizip/checksums/crc_base.rb +158 -0
  180. data/lib/omnizip/checksums/verifier.rb +131 -0
  181. data/lib/omnizip/chunked/memory_manager.rb +194 -0
  182. data/lib/omnizip/chunked/reader.rb +78 -0
  183. data/lib/omnizip/chunked/writer.rb +120 -0
  184. data/lib/omnizip/chunked.rb +129 -0
  185. data/lib/omnizip/cli/output_formatter.rb +104 -0
  186. data/lib/omnizip/cli.rb +572 -0
  187. data/lib/omnizip/commands/.keep +0 -0
  188. data/lib/omnizip/commands/archive_create_command.rb +427 -0
  189. data/lib/omnizip/commands/archive_extract_command.rb +272 -0
  190. data/lib/omnizip/commands/archive_list_command.rb +218 -0
  191. data/lib/omnizip/commands/archive_repair_command.rb +131 -0
  192. data/lib/omnizip/commands/archive_verify_command.rb +117 -0
  193. data/lib/omnizip/commands/compress_command.rb +117 -0
  194. data/lib/omnizip/commands/decompress_command.rb +120 -0
  195. data/lib/omnizip/commands/list_command.rb +53 -0
  196. data/lib/omnizip/commands/metadata_command.rb +153 -0
  197. data/lib/omnizip/commands/parity_create_command.rb +122 -0
  198. data/lib/omnizip/commands/parity_repair_command.rb +122 -0
  199. data/lib/omnizip/commands/parity_verify_command.rb +124 -0
  200. data/lib/omnizip/commands/profile_list_command.rb +56 -0
  201. data/lib/omnizip/commands/profile_show_command.rb +44 -0
  202. data/lib/omnizip/convenience.rb +359 -0
  203. data/lib/omnizip/converter/conversion_registry.rb +49 -0
  204. data/lib/omnizip/converter/conversion_strategy.rb +121 -0
  205. data/lib/omnizip/converter/seven_zip_to_zip_strategy.rb +97 -0
  206. data/lib/omnizip/converter/zip_to_seven_zip_strategy.rb +112 -0
  207. data/lib/omnizip/converter.rb +105 -0
  208. data/lib/omnizip/crypto/aes256/cipher.rb +100 -0
  209. data/lib/omnizip/crypto/aes256/constants.rb +28 -0
  210. data/lib/omnizip/crypto/aes256/key_derivation.rb +101 -0
  211. data/lib/omnizip/crypto/aes256.rb +102 -0
  212. data/lib/omnizip/error.rb +106 -0
  213. data/lib/omnizip/eta/exponential_smoothing_estimator.rb +98 -0
  214. data/lib/omnizip/eta/moving_average_estimator.rb +99 -0
  215. data/lib/omnizip/eta/rate_calculator.rb +104 -0
  216. data/lib/omnizip/eta/sample_history.rb +143 -0
  217. data/lib/omnizip/eta/time_estimator.rb +106 -0
  218. data/lib/omnizip/eta.rb +63 -0
  219. data/lib/omnizip/extraction/filter_chain.rb +177 -0
  220. data/lib/omnizip/extraction/glob_pattern.rb +140 -0
  221. data/lib/omnizip/extraction/pattern_matcher.rb +70 -0
  222. data/lib/omnizip/extraction/predicate_pattern.rb +52 -0
  223. data/lib/omnizip/extraction/regex_pattern.rb +50 -0
  224. data/lib/omnizip/extraction/selective_extractor.rb +240 -0
  225. data/lib/omnizip/extraction.rb +111 -0
  226. data/lib/omnizip/file_type/mime_classifier.rb +144 -0
  227. data/lib/omnizip/file_type.rb +113 -0
  228. data/lib/omnizip/filter.rb +139 -0
  229. data/lib/omnizip/filter_pipeline.rb +108 -0
  230. data/lib/omnizip/filter_registry.rb +166 -0
  231. data/lib/omnizip/filters/bcj.rb +279 -0
  232. data/lib/omnizip/filters/bcj2/constants.rb +53 -0
  233. data/lib/omnizip/filters/bcj2/decoder.rb +200 -0
  234. data/lib/omnizip/filters/bcj2/encoder.rb +61 -0
  235. data/lib/omnizip/filters/bcj2/stream_data.rb +93 -0
  236. data/lib/omnizip/filters/bcj2.rb +99 -0
  237. data/lib/omnizip/filters/bcj_arm.rb +176 -0
  238. data/lib/omnizip/filters/bcj_arm64.rb +244 -0
  239. data/lib/omnizip/filters/bcj_ia64.rb +196 -0
  240. data/lib/omnizip/filters/bcj_ppc.rb +190 -0
  241. data/lib/omnizip/filters/bcj_sparc.rb +176 -0
  242. data/lib/omnizip/filters/bcj_x86.rb +193 -0
  243. data/lib/omnizip/filters/delta.rb +196 -0
  244. data/lib/omnizip/filters/filter_base.rb +72 -0
  245. data/lib/omnizip/filters/registry.rb +123 -0
  246. data/lib/omnizip/filters/xz_delta.rb +258 -0
  247. data/lib/omnizip/format_detector.rb +162 -0
  248. data/lib/omnizip/format_registry.rb +59 -0
  249. data/lib/omnizip/formats/.keep +0 -0
  250. data/lib/omnizip/formats/bzip2_file.rb +172 -0
  251. data/lib/omnizip/formats/cpio/constants.rb +55 -0
  252. data/lib/omnizip/formats/cpio/entry.rb +385 -0
  253. data/lib/omnizip/formats/cpio/reader.rb +196 -0
  254. data/lib/omnizip/formats/cpio/writer.rb +234 -0
  255. data/lib/omnizip/formats/cpio.rb +140 -0
  256. data/lib/omnizip/formats/format_spec_loader.rb +230 -0
  257. data/lib/omnizip/formats/gzip.rb +238 -0
  258. data/lib/omnizip/formats/iso/directory_builder.rb +297 -0
  259. data/lib/omnizip/formats/iso/directory_record.rb +152 -0
  260. data/lib/omnizip/formats/iso/joliet.rb +204 -0
  261. data/lib/omnizip/formats/iso/path_table.rb +125 -0
  262. data/lib/omnizip/formats/iso/reader.rb +197 -0
  263. data/lib/omnizip/formats/iso/rock_ridge.rb +349 -0
  264. data/lib/omnizip/formats/iso/volume_builder.rb +320 -0
  265. data/lib/omnizip/formats/iso/volume_descriptor.rb +168 -0
  266. data/lib/omnizip/formats/iso/writer.rb +530 -0
  267. data/lib/omnizip/formats/iso.rb +140 -0
  268. data/lib/omnizip/formats/lzip.rb +175 -0
  269. data/lib/omnizip/formats/lzma_alone.rb +171 -0
  270. data/lib/omnizip/formats/rar/archive_repairer.rb +243 -0
  271. data/lib/omnizip/formats/rar/archive_verifier.rb +195 -0
  272. data/lib/omnizip/formats/rar/block_parser.rb +243 -0
  273. data/lib/omnizip/formats/rar/compression/bit_stream.rb +180 -0
  274. data/lib/omnizip/formats/rar/compression/dispatcher.rb +217 -0
  275. data/lib/omnizip/formats/rar/compression/lz77_huffman/decoder.rb +216 -0
  276. data/lib/omnizip/formats/rar/compression/lz77_huffman/encoder.rb +158 -0
  277. data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_builder.rb +217 -0
  278. data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_coder.rb +189 -0
  279. data/lib/omnizip/formats/rar/compression/lz77_huffman/match_finder.rb +135 -0
  280. data/lib/omnizip/formats/rar/compression/lz77_huffman/sliding_window.rb +165 -0
  281. data/lib/omnizip/formats/rar/compression/ppmd/context.rb +105 -0
  282. data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +219 -0
  283. data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +262 -0
  284. data/lib/omnizip/formats/rar/compression_method_registry.rb +106 -0
  285. data/lib/omnizip/formats/rar/constants.rb +82 -0
  286. data/lib/omnizip/formats/rar/decompressor.rb +238 -0
  287. data/lib/omnizip/formats/rar/external_writer.rb +312 -0
  288. data/lib/omnizip/formats/rar/header.rb +192 -0
  289. data/lib/omnizip/formats/rar/license_validator.rb +109 -0
  290. data/lib/omnizip/formats/rar/models/rar_archive.rb +77 -0
  291. data/lib/omnizip/formats/rar/models/rar_entry.rb +65 -0
  292. data/lib/omnizip/formats/rar/models/rar_volume.rb +56 -0
  293. data/lib/omnizip/formats/rar/parity_handler.rb +292 -0
  294. data/lib/omnizip/formats/rar/rar5/compression/lzma.rb +202 -0
  295. data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +578 -0
  296. data/lib/omnizip/formats/rar/rar5/compression/store.rb +60 -0
  297. data/lib/omnizip/formats/rar/rar5/crc32.rb +39 -0
  298. data/lib/omnizip/formats/rar/rar5/encryption/aes256_cbc.rb +97 -0
  299. data/lib/omnizip/formats/rar/rar5/encryption/encryption_header.rb +114 -0
  300. data/lib/omnizip/formats/rar/rar5/encryption/encryption_manager.rb +166 -0
  301. data/lib/omnizip/formats/rar/rar5/encryption/key_derivation.rb +97 -0
  302. data/lib/omnizip/formats/rar/rar5/header.rb +187 -0
  303. data/lib/omnizip/formats/rar/rar5/models/encryption_options.rb +74 -0
  304. data/lib/omnizip/formats/rar/rar5/models/recovery_options.rb +63 -0
  305. data/lib/omnizip/formats/rar/rar5/models/solid_options.rb +63 -0
  306. data/lib/omnizip/formats/rar/rar5/models/volume_options.rb +74 -0
  307. data/lib/omnizip/formats/rar/rar5/multi_volume/ARCHITECTURE.md +290 -0
  308. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_manager.rb +264 -0
  309. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_splitter.rb +155 -0
  310. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_writer.rb +194 -0
  311. data/lib/omnizip/formats/rar/rar5/solid/solid_encoder.rb +109 -0
  312. data/lib/omnizip/formats/rar/rar5/solid/solid_manager.rb +142 -0
  313. data/lib/omnizip/formats/rar/rar5/solid/solid_stream.rb +121 -0
  314. data/lib/omnizip/formats/rar/rar5/vint.rb +65 -0
  315. data/lib/omnizip/formats/rar/rar5/writer.rb +466 -0
  316. data/lib/omnizip/formats/rar/rar_format_base.rb +241 -0
  317. data/lib/omnizip/formats/rar/reader.rb +366 -0
  318. data/lib/omnizip/formats/rar/recovery_record.rb +245 -0
  319. data/lib/omnizip/formats/rar/volume_manager.rb +168 -0
  320. data/lib/omnizip/formats/rar/writer.rb +431 -0
  321. data/lib/omnizip/formats/rar.rb +205 -0
  322. data/lib/omnizip/formats/rar3/compressor.rb +73 -0
  323. data/lib/omnizip/formats/rar3/decompressor.rb +66 -0
  324. data/lib/omnizip/formats/rar3/reader.rb +386 -0
  325. data/lib/omnizip/formats/rar3/writer.rb +219 -0
  326. data/lib/omnizip/formats/rar5/compressor.rb +73 -0
  327. data/lib/omnizip/formats/rar5/decompressor.rb +66 -0
  328. data/lib/omnizip/formats/rar5/reader.rb +342 -0
  329. data/lib/omnizip/formats/rar5/writer.rb +214 -0
  330. data/lib/omnizip/formats/seven_zip/coder_chain.rb +150 -0
  331. data/lib/omnizip/formats/seven_zip/constants.rb +126 -0
  332. data/lib/omnizip/formats/seven_zip/encoded_header.rb +114 -0
  333. data/lib/omnizip/formats/seven_zip/encrypted_header.rb +142 -0
  334. data/lib/omnizip/formats/seven_zip/file_collector.rb +144 -0
  335. data/lib/omnizip/formats/seven_zip/header.rb +106 -0
  336. data/lib/omnizip/formats/seven_zip/header_encryptor.rb +134 -0
  337. data/lib/omnizip/formats/seven_zip/header_writer.rb +466 -0
  338. data/lib/omnizip/formats/seven_zip/models/coder_info.rb +30 -0
  339. data/lib/omnizip/formats/seven_zip/models/file_entry.rb +58 -0
  340. data/lib/omnizip/formats/seven_zip/models/folder.rb +69 -0
  341. data/lib/omnizip/formats/seven_zip/models/stream_info.rb +42 -0
  342. data/lib/omnizip/formats/seven_zip/parser.rb +660 -0
  343. data/lib/omnizip/formats/seven_zip/reader.rb +458 -0
  344. data/lib/omnizip/formats/seven_zip/split_archive_reader.rb +632 -0
  345. data/lib/omnizip/formats/seven_zip/split_archive_writer.rb +315 -0
  346. data/lib/omnizip/formats/seven_zip/stream_compressor.rb +151 -0
  347. data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +162 -0
  348. data/lib/omnizip/formats/seven_zip/writer.rb +740 -0
  349. data/lib/omnizip/formats/seven_zip.rb +93 -0
  350. data/lib/omnizip/formats/tar/constants.rb +73 -0
  351. data/lib/omnizip/formats/tar/entry.rb +94 -0
  352. data/lib/omnizip/formats/tar/header.rb +168 -0
  353. data/lib/omnizip/formats/tar/reader.rb +121 -0
  354. data/lib/omnizip/formats/tar/writer.rb +216 -0
  355. data/lib/omnizip/formats/tar.rb +84 -0
  356. data/lib/omnizip/formats/xz/reader.rb +116 -0
  357. data/lib/omnizip/formats/xz.rb +237 -0
  358. data/lib/omnizip/formats/xz_impl/block_decoder.rb +754 -0
  359. data/lib/omnizip/formats/xz_impl/block_encoder.rb +306 -0
  360. data/lib/omnizip/formats/xz_impl/block_header.rb +210 -0
  361. data/lib/omnizip/formats/xz_impl/block_header_parser.rb +186 -0
  362. data/lib/omnizip/formats/xz_impl/constants.rb +49 -0
  363. data/lib/omnizip/formats/xz_impl/index_decoder.rb +174 -0
  364. data/lib/omnizip/formats/xz_impl/index_encoder.rb +122 -0
  365. data/lib/omnizip/formats/xz_impl/stream_decoder.rb +468 -0
  366. data/lib/omnizip/formats/xz_impl/stream_encoder.rb +99 -0
  367. data/lib/omnizip/formats/xz_impl/stream_footer.rb +81 -0
  368. data/lib/omnizip/formats/xz_impl/stream_footer_parser.rb +117 -0
  369. data/lib/omnizip/formats/xz_impl/stream_header.rb +55 -0
  370. data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +108 -0
  371. data/lib/omnizip/formats/xz_impl/vli.rb +128 -0
  372. data/lib/omnizip/formats/xz_impl/writer.rb +421 -0
  373. data/lib/omnizip/formats/zip/central_directory_header.rb +195 -0
  374. data/lib/omnizip/formats/zip/constants.rb +69 -0
  375. data/lib/omnizip/formats/zip/end_of_central_directory.rb +133 -0
  376. data/lib/omnizip/formats/zip/local_file_header.rb +138 -0
  377. data/lib/omnizip/formats/zip/reader.rb +250 -0
  378. data/lib/omnizip/formats/zip/unix_extra_field.rb +153 -0
  379. data/lib/omnizip/formats/zip/writer.rb +375 -0
  380. data/lib/omnizip/formats/zip/zip64_end_of_central_directory.rb +104 -0
  381. data/lib/omnizip/formats/zip/zip64_end_of_central_directory_locator.rb +66 -0
  382. data/lib/omnizip/formats/zip/zip64_extra_field.rb +114 -0
  383. data/lib/omnizip/formats/zip.rb +50 -0
  384. data/lib/omnizip/implementations/base/lzma2_decoder_base.rb +75 -0
  385. data/lib/omnizip/implementations/base/lzma2_encoder_base.rb +128 -0
  386. data/lib/omnizip/implementations/base/lzma_decoder_base.rb +83 -0
  387. data/lib/omnizip/implementations/base/lzma_encoder_base.rb +108 -0
  388. data/lib/omnizip/implementations/base/state_machine_base.rb +182 -0
  389. data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +421 -0
  390. data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +465 -0
  391. data/lib/omnizip/implementations/seven_zip/lzma/match_finder.rb +288 -0
  392. data/lib/omnizip/implementations/seven_zip/lzma/range_decoder.rb +200 -0
  393. data/lib/omnizip/implementations/seven_zip/lzma/range_encoder.rb +197 -0
  394. data/lib/omnizip/implementations/seven_zip/lzma/state_machine.rb +141 -0
  395. data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +519 -0
  396. data/lib/omnizip/implementations/xz_utils/lzma2/decoder.rb +723 -0
  397. data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +750 -0
  398. data/lib/omnizip/io/buffered_input.rb +146 -0
  399. data/lib/omnizip/io/buffered_output.rb +105 -0
  400. data/lib/omnizip/io/stream_manager.rb +115 -0
  401. data/lib/omnizip/link_handler/hard_link.rb +79 -0
  402. data/lib/omnizip/link_handler/symbolic_link.rb +74 -0
  403. data/lib/omnizip/link_handler.rb +124 -0
  404. data/lib/omnizip/metadata/archive_metadata.rb +114 -0
  405. data/lib/omnizip/metadata/entry_metadata.rb +146 -0
  406. data/lib/omnizip/metadata/metadata_editor.rb +171 -0
  407. data/lib/omnizip/metadata/metadata_registry.rb +64 -0
  408. data/lib/omnizip/metadata/metadata_validator.rb +99 -0
  409. data/lib/omnizip/metadata.rb +57 -0
  410. data/lib/omnizip/models/.keep +0 -0
  411. data/lib/omnizip/models/algorithm_metadata.rb +73 -0
  412. data/lib/omnizip/models/compression_options.rb +71 -0
  413. data/lib/omnizip/models/conversion_options.rb +87 -0
  414. data/lib/omnizip/models/conversion_result.rb +135 -0
  415. data/lib/omnizip/models/eta_result.rb +46 -0
  416. data/lib/omnizip/models/extraction_rule.rb +115 -0
  417. data/lib/omnizip/models/filter_chain.rb +144 -0
  418. data/lib/omnizip/models/filter_config.rb +183 -0
  419. data/lib/omnizip/models/match_result.rb +124 -0
  420. data/lib/omnizip/models/optimization_suggestion.rb +91 -0
  421. data/lib/omnizip/models/parallel_options.rb +104 -0
  422. data/lib/omnizip/models/performance_result.rb +79 -0
  423. data/lib/omnizip/models/profile_report.rb +82 -0
  424. data/lib/omnizip/models/progress_options.rb +38 -0
  425. data/lib/omnizip/models/split_options.rb +116 -0
  426. data/lib/omnizip/optimization_registry.rb +81 -0
  427. data/lib/omnizip/parallel/job_queue.rb +209 -0
  428. data/lib/omnizip/parallel/job_scheduler.rb +203 -0
  429. data/lib/omnizip/parallel/parallel_compressor.rb +347 -0
  430. data/lib/omnizip/parallel/parallel_extractor.rb +329 -0
  431. data/lib/omnizip/parallel/worker_pool.rb +223 -0
  432. data/lib/omnizip/parallel.rb +149 -0
  433. data/lib/omnizip/parity/chunked_block_processor.rb +196 -0
  434. data/lib/omnizip/parity/galois16.rb +145 -0
  435. data/lib/omnizip/parity/models/creator_packet.rb +73 -0
  436. data/lib/omnizip/parity/models/file_description_packet.rb +133 -0
  437. data/lib/omnizip/parity/models/ifsc_packet.rb +123 -0
  438. data/lib/omnizip/parity/models/main_packet.rb +128 -0
  439. data/lib/omnizip/parity/models/packet.rb +156 -0
  440. data/lib/omnizip/parity/models/packet_registry.rb +109 -0
  441. data/lib/omnizip/parity/models/recovery_slice_packet.rb +78 -0
  442. data/lib/omnizip/parity/par2_creator.rb +531 -0
  443. data/lib/omnizip/parity/par2_repairer.rb +407 -0
  444. data/lib/omnizip/parity/par2_verifier.rb +364 -0
  445. data/lib/omnizip/parity/par2cmdline_algorithm.rb +110 -0
  446. data/lib/omnizip/parity/par2cmdline_coefficients.rb +78 -0
  447. data/lib/omnizip/parity/reed_solomon_decoder.rb +266 -0
  448. data/lib/omnizip/parity/reed_solomon_encoder.rb +111 -0
  449. data/lib/omnizip/parity/reed_solomon_matrix.rb +342 -0
  450. data/lib/omnizip/parity.rb +186 -0
  451. data/lib/omnizip/password/encryption_registry.rb +65 -0
  452. data/lib/omnizip/password/encryption_strategy.rb +96 -0
  453. data/lib/omnizip/password/password_validator.rb +129 -0
  454. data/lib/omnizip/password/winzip_aes_strategy.rb +192 -0
  455. data/lib/omnizip/password/zip_crypto_strategy.rb +141 -0
  456. data/lib/omnizip/password.rb +87 -0
  457. data/lib/omnizip/pipe/stream_compressor.rb +124 -0
  458. data/lib/omnizip/pipe/stream_decompressor.rb +174 -0
  459. data/lib/omnizip/pipe.rb +121 -0
  460. data/lib/omnizip/platform/ntfs_streams.rb +201 -0
  461. data/lib/omnizip/platform.rb +189 -0
  462. data/lib/omnizip/profile/archive_profile.rb +39 -0
  463. data/lib/omnizip/profile/balanced_profile.rb +33 -0
  464. data/lib/omnizip/profile/binary_profile.rb +36 -0
  465. data/lib/omnizip/profile/compression_profile.rb +158 -0
  466. data/lib/omnizip/profile/custom_profile.rb +157 -0
  467. data/lib/omnizip/profile/fast_profile.rb +33 -0
  468. data/lib/omnizip/profile/maximum_profile.rb +33 -0
  469. data/lib/omnizip/profile/profile_detector.rb +110 -0
  470. data/lib/omnizip/profile/profile_registry.rb +161 -0
  471. data/lib/omnizip/profile/text_profile.rb +36 -0
  472. data/lib/omnizip/profile.rb +190 -0
  473. data/lib/omnizip/profiler/memory_profiler.rb +66 -0
  474. data/lib/omnizip/profiler/method_profiler.rb +49 -0
  475. data/lib/omnizip/profiler/report_generator.rb +169 -0
  476. data/lib/omnizip/profiler.rb +204 -0
  477. data/lib/omnizip/progress/callback_reporter.rb +36 -0
  478. data/lib/omnizip/progress/console_reporter.rb +62 -0
  479. data/lib/omnizip/progress/log_reporter.rb +91 -0
  480. data/lib/omnizip/progress/operation_progress.rb +118 -0
  481. data/lib/omnizip/progress/progress_bar.rb +156 -0
  482. data/lib/omnizip/progress/progress_reporter.rb +40 -0
  483. data/lib/omnizip/progress/progress_tracker.rb +190 -0
  484. data/lib/omnizip/progress/silent_reporter.rb +24 -0
  485. data/lib/omnizip/progress.rb +127 -0
  486. data/lib/omnizip/rubyzip_compat.rb +63 -0
  487. data/lib/omnizip/temp/safe_extract.rb +168 -0
  488. data/lib/omnizip/temp/temp_file.rb +124 -0
  489. data/lib/omnizip/temp/temp_file_pool.rb +109 -0
  490. data/lib/omnizip/temp.rb +181 -0
  491. data/lib/omnizip/version.rb +5 -0
  492. data/lib/omnizip/zip/entry.rb +156 -0
  493. data/lib/omnizip/zip/file.rb +485 -0
  494. data/lib/omnizip/zip/input_stream.rb +273 -0
  495. data/lib/omnizip/zip/output_stream.rb +324 -0
  496. data/lib/omnizip.rb +156 -0
  497. data/readme-docs/advanced-features.adoc +515 -0
  498. data/readme-docs/api-usage.adoc +444 -0
  499. data/readme-docs/architecture.adoc +449 -0
  500. data/readme-docs/archive-formats.adoc +479 -0
  501. data/readme-docs/cli-usage.adoc +222 -0
  502. data/readme-docs/compression-algorithms.adoc +442 -0
  503. data/readme-docs/compression-profiles.adoc +247 -0
  504. data/readme-docs/encryption-checksums.adoc +328 -0
  505. data/readme-docs/format-converter.adoc +325 -0
  506. data/readme-docs/installation.adoc +228 -0
  507. data/readme-docs/par2-archives.adoc +608 -0
  508. data/readme-docs/performance-profiler.adoc +389 -0
  509. data/readme-docs/preprocessing-filters.adoc +280 -0
  510. data/xz-file-format-1.2.1.txt +1174 -0
  511. metadata +617 -0
@@ -0,0 +1,194 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ #
5
+ # Rubyzip Compatibility Demo
6
+ #
7
+ # This example demonstrates that Omnizip provides a 100% compatible API
8
+ # with rubyzip for common operations.
9
+ #
10
+
11
+ require_relative "../lib/omnizip/rubyzip_compat"
12
+ require "tempfile"
13
+ require "fileutils"
14
+
15
+ puts "=" * 80
16
+ puts "Omnizip Rubyzip Compatibility Demo"
17
+ puts "=" * 80
18
+ puts
19
+
20
+ # Create temp directory for demo
21
+ temp_dir = Dir.mktmpdir("omnizip_demo")
22
+ puts "Working directory: #{temp_dir}"
23
+ puts
24
+
25
+ begin
26
+ # Demo 1: Basic File Operations
27
+ puts "Demo 1: Basic File Operations"
28
+ puts "-" * 40
29
+
30
+ zip_path = File.join(temp_dir, "demo.zip")
31
+
32
+ Zip::File.open(zip_path, create: true) do |zip|
33
+ zip.add("readme.txt") { "Hello from Omnizip!" }
34
+ zip.add("data.txt") { "Sample data\nLine 2\nLine 3" }
35
+ zip.add("dir/")
36
+ zip.add("dir/nested.txt") { "Nested file content" }
37
+ end
38
+
39
+ puts "✓ Created archive with 4 entries"
40
+
41
+ # Read back
42
+ Zip::File.open(zip_path) do |zip|
43
+ puts "✓ Archive contains #{zip.size} entries:"
44
+ zip.each do |entry|
45
+ puts " - #{entry.name} (#{entry.size} bytes)"
46
+ end
47
+ end
48
+ puts
49
+
50
+ # Demo 2: Streaming Write
51
+ puts "Demo 2: Streaming Write"
52
+ puts "-" * 40
53
+
54
+ stream_path = File.join(temp_dir, "stream.zip")
55
+
56
+ Zip::OutputStream.open(stream_path) do |zos|
57
+ zos.put_next_entry("file1.txt")
58
+ zos.write("Content 1")
59
+
60
+ zos.put_next_entry("file2.txt")
61
+ zos.write("Content 2")
62
+
63
+ zos.put_next_entry("file3.txt")
64
+ zos.write("Content 3")
65
+ end
66
+
67
+ puts "✓ Created streaming archive with 3 entries"
68
+ puts
69
+
70
+ # Demo 3: Streaming Read
71
+ puts "Demo 3: Streaming Read"
72
+ puts "-" * 40
73
+
74
+ Zip::InputStream.open(stream_path) do |zis|
75
+ count = 0
76
+ while entry = zis.get_next_entry
77
+ content = zis.read
78
+ puts "✓ Read #{entry.name}: #{content.inspect}"
79
+ count += 1
80
+ end
81
+ puts "✓ Total entries read: #{count}"
82
+ end
83
+ puts
84
+
85
+ # Demo 4: Entry Info and Metadata
86
+ puts "Demo 4: Entry Info and Metadata"
87
+ puts "-" * 40
88
+
89
+ Zip::InputStream.open(stream_path) do |zis|
90
+ entry = zis.get_next_entry
91
+ puts "✓ Entry metadata:"
92
+ puts " - Name: #{entry.name}"
93
+ puts " - Size: #{entry.size} bytes"
94
+ puts " - Compressed: #{entry.compressed_size} bytes"
95
+ puts " - Time: #{entry.time}"
96
+ puts " - Directory: #{entry.directory?}"
97
+ puts " - File: #{entry.file?}"
98
+ puts " - Compression: #{entry.compression_method}"
99
+ end
100
+ puts
101
+
102
+ # Demo 5: Batch Operations
103
+ puts "Demo 5: Batch Operations"
104
+ puts "-" * 40
105
+
106
+ batch_path = File.join(temp_dir, "batch.zip")
107
+
108
+ Zip::OutputStream.open(batch_path) do |zos|
109
+ 5.times do |i|
110
+ zos.put_next_entry("batch_#{i}.txt")
111
+ zos.write("Batch content #{i}")
112
+ end
113
+ end
114
+
115
+ count = 0
116
+ Zip::InputStream.open(batch_path) do |zis|
117
+ while zis.get_next_entry
118
+ count += 1
119
+ end
120
+ end
121
+ puts "✓ Created and read #{count} batch entries"
122
+ puts
123
+
124
+ # Demo 6: Directory Entries
125
+ puts "Demo 6: Directory Entries"
126
+ puts "-" * 40
127
+
128
+ dir_path = File.join(temp_dir, "with_dirs.zip")
129
+
130
+ Zip::OutputStream.open(dir_path) do |zos|
131
+ zos.put_next_entry("folder/")
132
+ zos.put_next_entry("folder/subfolder/")
133
+ zos.put_next_entry("folder/file.txt")
134
+ zos.write("In folder")
135
+ zos.put_next_entry("folder/subfolder/deep.txt")
136
+ zos.write("Deep file")
137
+ end
138
+
139
+ Zip::InputStream.open(dir_path) do |zis|
140
+ while entry = zis.get_next_entry
141
+ type = entry.directory? ? "[DIR]" : "[FILE]"
142
+ puts "✓ #{type} #{entry.name}"
143
+ end
144
+ end
145
+ puts
146
+
147
+ # Demo 7: Compression Methods
148
+ puts "Demo 7: Compression Methods"
149
+ puts "-" * 40
150
+
151
+ test_data = "Test data " * 100
152
+
153
+ %i[store deflate].each do |method|
154
+ comp_path = File.join(temp_dir, "#{method}.zip")
155
+
156
+ Zip::OutputStream.open(comp_path) do |zos|
157
+ zos.put_next_entry("test.txt", compression: method)
158
+ zos.write(test_data)
159
+ end
160
+
161
+ Zip::InputStream.open(comp_path) do |zis|
162
+ entry = zis.get_next_entry
163
+ ratio = entry.size.positive? ? (100 - (entry.compressed_size * 100 / entry.size)) : 0
164
+ puts "✓ #{method.to_s.capitalize}: #{entry.compressed_size}/#{entry.size} bytes (#{ratio}% savings)"
165
+ end
166
+ end
167
+ puts
168
+
169
+ # Summary
170
+ puts "=" * 80
171
+ puts "All demos completed successfully!"
172
+ puts "=" * 80
173
+ puts
174
+ puts "Key Features Demonstrated:"
175
+ puts " ✓ Zip::OutputStream (streaming write with put_next_entry)"
176
+ puts " ✓ Zip::InputStream (streaming read with get_next_entry)"
177
+ puts " ✓ Zip::Entry metadata (name, size, time, compression, etc.)"
178
+ puts " ✓ Directory entries"
179
+ puts " ✓ Batch operations"
180
+ puts " ✓ Multiple compression methods (Store, Deflate)"
181
+ puts " ✓ Content reading from streams"
182
+ puts
183
+ puts "Rubyzip API Compatibility: Streaming API ✓"
184
+ puts "Note: File-based API (Zip::File) works for creation and basic reads."
185
+ puts " Full round-trip with Zip::File will be completed in v1.2."
186
+ puts
187
+ rescue StandardError => e
188
+ puts "❌ Error: #{e.message}"
189
+ puts e.backtrace.first(5)
190
+ exit 1
191
+ ensure
192
+ # Cleanup
193
+ FileUtils.rm_rf(temp_dir) if temp_dir
194
+ end
data/exe/omnizip ADDED
@@ -0,0 +1,27 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ #
5
+ # Copyright (C) 2024 Ribose Inc.
6
+ #
7
+ # This file is part of Omnizip.
8
+ #
9
+ # Omnizip is a pure Ruby port of 7-Zip compression algorithms.
10
+ # Based on the 7-Zip LZMA SDK by Igor Pavlov.
11
+ #
12
+ # This library is free software; you can redistribute it and/or
13
+ # modify it under the terms of the GNU Lesser General Public
14
+ # License as published by the Free Software Foundation; either
15
+ # version 2.1 of the License, or (at your option) any later version.
16
+ #
17
+ # See the COPYING file for the complete text of the license.
18
+ #
19
+
20
+ require_relative "../lib/omnizip"
21
+
22
+ begin
23
+ Omnizip::Cli.start(ARGV)
24
+ rescue StandardError => e
25
+ warn "Error: #{e.message}"
26
+ exit 1
27
+ end
@@ -0,0 +1,130 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ # Copyright (C) 2024 Ribose Inc.
5
+ #
6
+ # This file is part of Omnizip.
7
+ #
8
+ # Omnizip is a pure Ruby port of 7-Zip compression algorithms.
9
+ # Based on the 7-Zip LZMA SDK by Igor Pavlov.
10
+ #
11
+ # This library is free software; you can redistribute it and/or
12
+ # modify it under the terms of the GNU Lesser General Public
13
+ # License as published by the Free Software Foundation; either
14
+ # version 2.1 of the License, or (at your option) any later version.
15
+ #
16
+ # See the COPYING file for the complete text of the license.
17
+ #
18
+
19
+ require_relative "algorithm_registry"
20
+
21
+ module Omnizip
22
+ # Abstract base class for compression algorithms.
23
+ #
24
+ # All compression algorithms should inherit from this class and implement
25
+ # the required methods. Algorithms are automatically registered with the
26
+ # AlgorithmRegistry when defined.
27
+ class Algorithm
28
+ attr_reader :options, :filter
29
+
30
+ # Initialize algorithm with options.
31
+ #
32
+ # @param options [Hash] Algorithm-specific options
33
+ def initialize(options = {})
34
+ @options = options
35
+ @filter = nil
36
+ end
37
+
38
+ # Set a preprocessing filter for this algorithm.
39
+ #
40
+ # The filter will be applied before compression and reversed after
41
+ # decompression. Returns self for method chaining.
42
+ #
43
+ # @param filter [Filters::FilterBase, FilterPipeline] Filter or
44
+ # pipeline to use
45
+ # @return [self] For method chaining
46
+ def with_filter(filter)
47
+ @filter = filter
48
+ self
49
+ end
50
+
51
+ # Compress data from input to output.
52
+ #
53
+ # If a filter is set, data is filtered before compression.
54
+ #
55
+ # @param input [IO, String, #read] Input source
56
+ # @param output [IO, #write] Output destination
57
+ # @raise [NotImplementedError] Must be implemented by subclass
58
+ # @return [void]
59
+ def compress(input, output)
60
+ raise NotImplementedError,
61
+ "#{self.class} must implement #compress"
62
+ end
63
+
64
+ # Decompress data from input to output.
65
+ #
66
+ # If a filter is set, data is unfiltered after decompression.
67
+ #
68
+ # @param input [IO, String, #read] Input source
69
+ # @param output [IO, #write] Output destination
70
+ # @raise [NotImplementedError] Must be implemented by subclass
71
+ # @return [void]
72
+ def decompress(input, output)
73
+ raise NotImplementedError,
74
+ "#{self.class} must implement #decompress"
75
+ end
76
+
77
+ class << self
78
+ # Get metadata about this algorithm.
79
+ #
80
+ # @raise [NotImplementedError] Must be implemented by subclass
81
+ # @return [Models::AlgorithmMetadata] Algorithm metadata
82
+ def metadata
83
+ raise NotImplementedError,
84
+ "#{self} must implement .metadata"
85
+ end
86
+
87
+ # Automatically register algorithm when inherited.
88
+ #
89
+ # This hook is called whenever a class inherits from Algorithm,
90
+ # automatically registering it with the AlgorithmRegistry.
91
+ #
92
+ # @param subclass [Class] The inheriting class
93
+ # @return [void]
94
+ def inherited(subclass)
95
+ super
96
+ # Register algorithm when metadata is defined
97
+ subclass.define_singleton_method(:register_algorithm) do
98
+ meta = subclass.metadata
99
+ AlgorithmRegistry.register(meta.name.to_sym, subclass)
100
+ rescue NotImplementedError
101
+ # Metadata not yet defined, will be registered manually
102
+ end
103
+ end
104
+ end
105
+
106
+ protected
107
+
108
+ # Apply filter to data if filter is set.
109
+ #
110
+ # @param data [String] Data to filter
111
+ # @param position [Integer] Stream position
112
+ # @return [String] Filtered data
113
+ def apply_filter(data, position = 0)
114
+ return data unless @filter
115
+
116
+ @filter.encode(data, position)
117
+ end
118
+
119
+ # Reverse filter on data if filter is set.
120
+ #
121
+ # @param data [String] Data to unfilter
122
+ # @param position [Integer] Stream position
123
+ # @return [String] Unfiltered data
124
+ def reverse_filter(data, position = 0)
125
+ return data unless @filter
126
+
127
+ @filter.decode(data, position)
128
+ end
129
+ end
130
+ end
@@ -0,0 +1,86 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ # Copyright (C) 2024 Ribose Inc.
5
+ #
6
+ # This file is part of Omnizip.
7
+ #
8
+ # Omnizip is a pure Ruby port of 7-Zip compression algorithms.
9
+ # Based on the 7-Zip LZMA SDK by Igor Pavlov.
10
+ #
11
+ # This library is free software; you can redistribute it and/or
12
+ # modify it under the terms of the GNU Lesser General Public
13
+ # License as published by the Free Software Foundation; either
14
+ # version 2.1 of the License, or (at your option) any later version.
15
+ #
16
+ # See the COPYING file for the complete text of the license.
17
+ #
18
+
19
+ module Omnizip
20
+ # Registry for managing compression algorithm classes.
21
+ #
22
+ # This class provides a centralized registry for compression algorithms,
23
+ # allowing algorithms to self-register and be retrieved by name.
24
+ # It implements a plugin-style architecture for extensibility.
25
+ class AlgorithmRegistry
26
+ class << self
27
+ # Register an algorithm class with the registry.
28
+ #
29
+ # @param name [Symbol, String] The name identifier for the algorithm
30
+ # @param klass [Class] The algorithm class to register
31
+ # @raise [ArgumentError] If name or klass is nil
32
+ # @return [void]
33
+ def register(name, klass)
34
+ raise ArgumentError, "Algorithm name cannot be nil" if name.nil?
35
+ raise ArgumentError, "Algorithm class cannot be nil" if klass.nil?
36
+
37
+ algorithms[name.to_sym] = klass
38
+ end
39
+
40
+ # Retrieve an algorithm class by name.
41
+ #
42
+ # @param name [Symbol, String] The name identifier for the algorithm
43
+ # @raise [UnknownAlgorithmError] If algorithm is not registered
44
+ # @return [Class] The registered algorithm class
45
+ def get(name)
46
+ algorithm = algorithms[name.to_sym]
47
+ return algorithm if algorithm
48
+
49
+ raise UnknownAlgorithmError,
50
+ "Unknown algorithm: #{name}. " \
51
+ "Available: #{available.join(', ')}"
52
+ end
53
+
54
+ # Check if an algorithm is registered.
55
+ #
56
+ # @param name [Symbol, String] The name identifier for the algorithm
57
+ # @return [Boolean] True if algorithm is registered, false otherwise
58
+ def registered?(name)
59
+ algorithms.key?(name.to_sym)
60
+ end
61
+
62
+ # Get list of all registered algorithm names.
63
+ #
64
+ # @return [Array<Symbol>] Array of registered algorithm names
65
+ def available
66
+ algorithms.keys
67
+ end
68
+
69
+ # Reset the registry (primarily for testing).
70
+ #
71
+ # @return [void]
72
+ def reset!
73
+ algorithms.clear
74
+ end
75
+
76
+ private
77
+
78
+ # Get or initialize the algorithms hash.
79
+ #
80
+ # @return [Hash] The algorithms registry
81
+ def algorithms
82
+ @algorithms ||= {}
83
+ end
84
+ end
85
+ end
86
+ end
File without changes
@@ -0,0 +1,225 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Copyright (C) 2025 Ribose Inc.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a
6
+ # copy of this software and associated documentation files (the "Software"),
7
+ # to deal in the Software without restriction, including without limitation
8
+ # the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
+ # and/or sell copies of the Software, and to permit persons to whom the
10
+ # Software is furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
+ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21
+ # DEALINGS IN THE SOFTWARE.
22
+
23
+ module Omnizip
24
+ module Algorithms
25
+ class BZip2 < Algorithm
26
+ # Burrows-Wheeler Transform (BWT)
27
+ #
28
+ # The BWT is a block-sorting compression algorithm that
29
+ # rearranges a character string into runs of similar characters.
30
+ # This transformation is reversible and forms the foundation of
31
+ # the BZip2 compression algorithm.
32
+ #
33
+ # The transformation works by:
34
+ # 1. Creating all rotations of the input string
35
+ # 2. Sorting these rotations lexicographically
36
+ # 3. Taking the last column of the sorted rotations
37
+ # 4. Recording the row index of the original string
38
+ #
39
+ # This groups similar characters together, making the data
40
+ # more compressible for subsequent stages (MTF, RLE, Huffman).
41
+ class Bwt
42
+ # Encode data using Burrows-Wheeler Transform (optimized)
43
+ #
44
+ # @param data [String] Input data to transform
45
+ # @return [Array<String, Integer>] Transformed data and primary idx
46
+ def encode(data)
47
+ return ["".b, 0] if data.empty?
48
+
49
+ n = data.length
50
+ bytes = data.bytes
51
+
52
+ # Build suffix array without creating rotation strings
53
+ # Use direct byte comparison for efficiency
54
+ suffix_array = (0...n).to_a
55
+
56
+ # Sort using optimized comparison that avoids string allocation
57
+ suffix_array.sort! do |a, b|
58
+ compare_rotations(bytes, a, b, n)
59
+ end
60
+
61
+ # Find primary index (position where suffix starts at 0)
62
+ primary_index = suffix_array.index(0)
63
+
64
+ # Extract last column (character before each suffix)
65
+ transformed = suffix_array.map do |idx|
66
+ bytes[(idx - 1) % n]
67
+ end.pack("C*").b
68
+
69
+ [transformed, primary_index]
70
+ end
71
+
72
+ # Decode data using reverse Burrows-Wheeler Transform
73
+ #
74
+ # @param data [String] Transformed data (last column)
75
+ # @param primary_index [Integer] Index of original string
76
+ # @return [String] Original data
77
+ def decode(data, primary_index)
78
+ return "".b if data.empty?
79
+
80
+ # Build LF (Last-to-First) mapping
81
+ # This maps each position in L to corresponding position in F
82
+ lf = build_lf_mapping(data)
83
+
84
+ # Reconstruct by following the LF chain
85
+ result = []
86
+ idx = primary_index
87
+
88
+ data.length.times do
89
+ # The first column is the sorted last column
90
+ # Get the character at this position
91
+ byte_val = data.bytes.sort[idx]
92
+ result << byte_val
93
+ # Follow LF mapping to next position
94
+ idx = lf[idx]
95
+ end
96
+
97
+ result.pack("C*").b
98
+ end
99
+
100
+ # Build LF (Last-to-First) mapping for BWT decode
101
+ #
102
+ # For each position i in the sorted order (first column),
103
+ # LF[i] tells us which position in the sorted order corresponds
104
+ # to the same character in the last column
105
+ #
106
+ # @param last_column [String] Last column (transformed data)
107
+ # @return [Array<Integer>] LF mapping array
108
+ def build_lf_mapping(last_column)
109
+ n = last_column.length
110
+
111
+ # Count occurrences of each byte value
112
+ counts = Array.new(256, 0)
113
+ last_column.each_byte { |b| counts[b] += 1 }
114
+
115
+ # Build cumulative counts (start position of each byte in sorted array)
116
+ cumulative = Array.new(256, 0)
117
+ sum = 0
118
+ 256.times do |i|
119
+ cumulative[i] = sum
120
+ sum += counts[i]
121
+ end
122
+
123
+ # Build the LF mapping
124
+ # For each position in last column, find its position in first column
125
+ lf = Array.new(n)
126
+ occurrence = Array.new(256, 0) # Track which occurrence of each byte
127
+
128
+ last_column.each_byte.with_index do |byte, i|
129
+ # This byte's position in first column is:
130
+ # cumulative[byte] + occurrence[byte]
131
+ pos_in_first = cumulative[byte] + occurrence[byte]
132
+ occurrence[byte] += 1
133
+
134
+ # Now find which last column position corresponds to this first column position
135
+ # We need the inverse: which last column index has this sorted position
136
+ lf[pos_in_first] = i
137
+ end
138
+
139
+ lf
140
+ end
141
+
142
+ private
143
+
144
+ # Compare two rotations without creating strings
145
+ # This is the key optimization - avoids O(n²) memory allocation
146
+ #
147
+ # @param bytes [Array<Integer>] Byte array
148
+ # @param a [Integer] First rotation start index
149
+ # @param b [Integer] Second rotation start index
150
+ # @param n [Integer] Length
151
+ # @return [Integer] -1, 0, or 1 for comparison result
152
+ def compare_rotations(bytes, a, b, n)
153
+ # Fast path: compare first few bytes directly
154
+ 8.times do |offset|
155
+ byte_a = bytes[(a + offset) % n]
156
+ byte_b = bytes[(b + offset) % n]
157
+ cmp = byte_a <=> byte_b
158
+ return cmp if cmp != 0
159
+ end
160
+
161
+ # Continue comparing remaining bytes
162
+ (8...n).each do |offset|
163
+ byte_a = bytes[(a + offset) % n]
164
+ byte_b = bytes[(b + offset) % n]
165
+ cmp = byte_a <=> byte_b
166
+ return cmp if cmp != 0
167
+ end
168
+
169
+ 0
170
+ end
171
+
172
+ # Build next array for BWT decode
173
+ #
174
+ # The next array tells us where each character in the last
175
+ # column appears in the first column, taking duplicates into
176
+ # account using stable counting.
177
+ #
178
+ # @param last_column [String] Last column (transformed data)
179
+ # @return [Array<Integer>] Next array
180
+ def build_next_array(last_column)
181
+ n = last_column.length
182
+
183
+ # Count character frequencies
184
+ counts = Array.new(256, 0)
185
+ last_column.each_byte { |b| counts[b] += 1 }
186
+
187
+ # Calculate cumulative sums (positions in first column)
188
+ cumulative = Array.new(256, 0)
189
+ sum = 0
190
+ 256.times do |i|
191
+ cumulative[i] = sum
192
+ sum += counts[i]
193
+ end
194
+
195
+ # Build next array
196
+ next_array = Array.new(n)
197
+ last_column.each_byte.with_index do |byte, i|
198
+ next_array[i] = cumulative[byte]
199
+ cumulative[byte] += 1
200
+ end
201
+
202
+ next_array
203
+ end
204
+
205
+ # Reconstruct original string using next array
206
+ #
207
+ # @param first_column [String] First column (sorted)
208
+ # @param next_array [Array<Integer>] Next positions
209
+ # @param primary_index [Integer] Starting position
210
+ # @return [String] Original string
211
+ def reconstruct_from_next(first_column, next_array, primary_index)
212
+ result = []
213
+ idx = primary_index
214
+
215
+ first_column.length.times do
216
+ result << first_column.getbyte(idx)
217
+ idx = next_array[idx]
218
+ end
219
+
220
+ result.pack("C*").b
221
+ end
222
+ end
223
+ end
224
+ end
225
+ end