omnizip 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (511) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.rubocop.yml +32 -0
  4. data/.rubocop_todo.yml +754 -0
  5. data/COPYING +502 -0
  6. data/Gemfile +17 -0
  7. data/LICENSE +12 -0
  8. data/README.adoc +1045 -0
  9. data/Rakefile +12 -0
  10. data/benchmark/README.md +260 -0
  11. data/benchmark/benchmark_suite.rb +125 -0
  12. data/benchmark/compression_bench.rb +181 -0
  13. data/benchmark/filter_bench.rb +180 -0
  14. data/benchmark/models/benchmark_result.rb +59 -0
  15. data/benchmark/models/comparison_result.rb +69 -0
  16. data/benchmark/profile_suite.rb +167 -0
  17. data/benchmark/reporter.rb +150 -0
  18. data/benchmark/run_benchmarks.rb +66 -0
  19. data/benchmark/test_data.rb +137 -0
  20. data/config/formats/rar3_spec.yml +91 -0
  21. data/config/formats/rar5_spec.yml +102 -0
  22. data/docs/.github/workflows/docs.yml +142 -0
  23. data/docs/.gitignore +21 -0
  24. data/docs/.lychee.toml +67 -0
  25. data/docs/Gemfile +13 -0
  26. data/docs/RAR_WRITE_SUPPORT.md +26 -0
  27. data/docs/README.md +101 -0
  28. data/docs/_config.yml +112 -0
  29. data/docs/assets/logo.svg +1 -0
  30. data/docs/assets/omnizip-logo.pdf +1540 -11
  31. data/docs/comparison/feature-matrix.adoc +694 -0
  32. data/docs/comparison/index.adoc +113 -0
  33. data/docs/comparison/vs-7zip.adoc +309 -0
  34. data/docs/comparison/vs-peazip.adoc +77 -0
  35. data/docs/comparison/vs-rubyzip.adoc +342 -0
  36. data/docs/comparison/vs-winrar.adoc +100 -0
  37. data/docs/compatibility.adoc +579 -0
  38. data/docs/concepts/index.adoc +129 -0
  39. data/docs/developer/architecture.adoc +256 -0
  40. data/docs/developer/contributing.adoc +158 -0
  41. data/docs/developer/index.adoc +25 -0
  42. data/docs/developer/testing.adoc +212 -0
  43. data/docs/getting-started/basic-usage.adoc +271 -0
  44. data/docs/getting-started/index.adoc +42 -0
  45. data/docs/getting-started/installation.adoc +138 -0
  46. data/docs/getting-started/quick-start.adoc +185 -0
  47. data/docs/getting-started/your-first-archive.adoc +218 -0
  48. data/docs/guides/advanced-features/encryption.adoc +300 -0
  49. data/docs/guides/advanced-features/index.adoc +49 -0
  50. data/docs/guides/advanced-features/parallel-processing.adoc +246 -0
  51. data/docs/guides/advanced-features/progress-tracking.adoc +320 -0
  52. data/docs/guides/advanced-features/streaming.adoc +212 -0
  53. data/docs/guides/archive-formats/gzip-format.adoc +107 -0
  54. data/docs/guides/archive-formats/index.adoc +130 -0
  55. data/docs/guides/archive-formats/rar-format.adoc +104 -0
  56. data/docs/guides/archive-formats/rar5.adoc +521 -0
  57. data/docs/guides/archive-formats/seven-zip-format.adoc +35 -0
  58. data/docs/guides/archive-formats/tar-format.adoc +106 -0
  59. data/docs/guides/archive-formats/xz-format.adoc +118 -0
  60. data/docs/guides/archive-formats/zip-format.adoc +35 -0
  61. data/docs/guides/compression-algorithms/bzip2.adoc +113 -0
  62. data/docs/guides/compression-algorithms/deflate.adoc +319 -0
  63. data/docs/guides/compression-algorithms/index.adoc +190 -0
  64. data/docs/guides/compression-algorithms/lzma.adoc +398 -0
  65. data/docs/guides/compression-algorithms/lzma2.adoc +327 -0
  66. data/docs/guides/compression-algorithms/ppmd.adoc +316 -0
  67. data/docs/guides/compression-algorithms/zstandard.adoc +361 -0
  68. data/docs/guides/creating-archives.adoc +354 -0
  69. data/docs/guides/extracting-archives.adoc +53 -0
  70. data/docs/guides/format-conversion.adoc +64 -0
  71. data/docs/guides/index.adoc +49 -0
  72. data/docs/guides/migration-rubyzip.adoc +217 -0
  73. data/docs/guides/parity-archives.adoc +605 -0
  74. data/docs/guides/performance-tuning.adoc +88 -0
  75. data/docs/index.adoc +218 -0
  76. data/docs/lychee.toml +67 -0
  77. data/docs/reference/api/overview.adoc +188 -0
  78. data/docs/reference/cli/compress-command.adoc +114 -0
  79. data/docs/reference/cli/overview.adoc +140 -0
  80. data/docs/reference/index.adoc +26 -0
  81. data/docs/resources/faq.adoc +185 -0
  82. data/docs/resources/quick-reference.adoc +222 -0
  83. data/docs/troubleshooting/index.adoc +208 -0
  84. data/examples/api_comparison.rb +205 -0
  85. data/examples/deflate64_example.rb +96 -0
  86. data/examples/par2_demo.rb +121 -0
  87. data/examples/quick_start_native.rb +150 -0
  88. data/examples/quick_start_rubyzip.rb +115 -0
  89. data/examples/rubyzip_compatibility_demo.rb +194 -0
  90. data/exe/omnizip +27 -0
  91. data/lib/omnizip/algorithm.rb +130 -0
  92. data/lib/omnizip/algorithm_registry.rb +86 -0
  93. data/lib/omnizip/algorithms/.keep +0 -0
  94. data/lib/omnizip/algorithms/bzip2/bwt.rb +225 -0
  95. data/lib/omnizip/algorithms/bzip2/decoder.rb +193 -0
  96. data/lib/omnizip/algorithms/bzip2/encoder.rb +237 -0
  97. data/lib/omnizip/algorithms/bzip2/huffman.rb +206 -0
  98. data/lib/omnizip/algorithms/bzip2/mtf.rb +101 -0
  99. data/lib/omnizip/algorithms/bzip2/rle.rb +151 -0
  100. data/lib/omnizip/algorithms/bzip2.rb +130 -0
  101. data/lib/omnizip/algorithms/deflate/constants.rb +28 -0
  102. data/lib/omnizip/algorithms/deflate/decoder.rb +38 -0
  103. data/lib/omnizip/algorithms/deflate/encoder.rb +46 -0
  104. data/lib/omnizip/algorithms/deflate.rb +128 -0
  105. data/lib/omnizip/algorithms/deflate64/constants.rb +45 -0
  106. data/lib/omnizip/algorithms/deflate64/decoder.rb +153 -0
  107. data/lib/omnizip/algorithms/deflate64/encoder.rb +98 -0
  108. data/lib/omnizip/algorithms/deflate64/huffman_coder.rb +354 -0
  109. data/lib/omnizip/algorithms/deflate64/lz77_encoder.rb +142 -0
  110. data/lib/omnizip/algorithms/deflate64.rb +109 -0
  111. data/lib/omnizip/algorithms/lzma/bit_model.rb +120 -0
  112. data/lib/omnizip/algorithms/lzma/constants.rb +112 -0
  113. data/lib/omnizip/algorithms/lzma/decoder.rb +148 -0
  114. data/lib/omnizip/algorithms/lzma/dictionary.rb +69 -0
  115. data/lib/omnizip/algorithms/lzma/distance_coder.rb +415 -0
  116. data/lib/omnizip/algorithms/lzma/encoder.rb +142 -0
  117. data/lib/omnizip/algorithms/lzma/length_coder.rb +260 -0
  118. data/lib/omnizip/algorithms/lzma/literal_decoder.rb +320 -0
  119. data/lib/omnizip/algorithms/lzma/literal_encoder.rb +210 -0
  120. data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +341 -0
  121. data/lib/omnizip/algorithms/lzma/lzma_alone_decoder.rb +192 -0
  122. data/lib/omnizip/algorithms/lzma/lzma_state.rb +128 -0
  123. data/lib/omnizip/algorithms/lzma/match.rb +32 -0
  124. data/lib/omnizip/algorithms/lzma/match_finder.rb +205 -0
  125. data/lib/omnizip/algorithms/lzma/match_finder_config.rb +142 -0
  126. data/lib/omnizip/algorithms/lzma/match_finder_factory.rb +88 -0
  127. data/lib/omnizip/algorithms/lzma/optimal_encoder.rb +130 -0
  128. data/lib/omnizip/algorithms/lzma/probability_models.rb +72 -0
  129. data/lib/omnizip/algorithms/lzma/range_coder.rb +85 -0
  130. data/lib/omnizip/algorithms/lzma/range_decoder.rb +434 -0
  131. data/lib/omnizip/algorithms/lzma/range_encoder.rb +194 -0
  132. data/lib/omnizip/algorithms/lzma/state.rb +127 -0
  133. data/lib/omnizip/algorithms/lzma/xz_buffered_range_encoder.rb +325 -0
  134. data/lib/omnizip/algorithms/lzma/xz_encoder.rb +426 -0
  135. data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +645 -0
  136. data/lib/omnizip/algorithms/lzma/xz_match_finder_adapter.rb +227 -0
  137. data/lib/omnizip/algorithms/lzma/xz_price_calculator.rb +169 -0
  138. data/lib/omnizip/algorithms/lzma/xz_probability_models.rb +261 -0
  139. data/lib/omnizip/algorithms/lzma/xz_range_encoder.rb +223 -0
  140. data/lib/omnizip/algorithms/lzma/xz_range_encoder_exact.rb +331 -0
  141. data/lib/omnizip/algorithms/lzma/xz_state.rb +116 -0
  142. data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +2055 -0
  143. data/lib/omnizip/algorithms/lzma.rb +238 -0
  144. data/lib/omnizip/algorithms/lzma2/chunk_manager.rb +182 -0
  145. data/lib/omnizip/algorithms/lzma2/constants.rb +41 -0
  146. data/lib/omnizip/algorithms/lzma2/encoder.rb +147 -0
  147. data/lib/omnizip/algorithms/lzma2/lzma2_chunk.rb +161 -0
  148. data/lib/omnizip/algorithms/lzma2/properties.rb +179 -0
  149. data/lib/omnizip/algorithms/lzma2/simple_lzma2_encoder.rb +127 -0
  150. data/lib/omnizip/algorithms/lzma2/xz_encoder_adapter.rb +85 -0
  151. data/lib/omnizip/algorithms/lzma2.rb +141 -0
  152. data/lib/omnizip/algorithms/ppmd7/constants.rb +74 -0
  153. data/lib/omnizip/algorithms/ppmd7/context.rb +154 -0
  154. data/lib/omnizip/algorithms/ppmd7/decoder.rb +126 -0
  155. data/lib/omnizip/algorithms/ppmd7/encoder.rb +163 -0
  156. data/lib/omnizip/algorithms/ppmd7/model.rb +248 -0
  157. data/lib/omnizip/algorithms/ppmd7/symbol_state.rb +57 -0
  158. data/lib/omnizip/algorithms/ppmd7.rb +116 -0
  159. data/lib/omnizip/algorithms/ppmd8/constants.rb +61 -0
  160. data/lib/omnizip/algorithms/ppmd8/context.rb +34 -0
  161. data/lib/omnizip/algorithms/ppmd8/decoder.rb +107 -0
  162. data/lib/omnizip/algorithms/ppmd8/encoder.rb +138 -0
  163. data/lib/omnizip/algorithms/ppmd8/model.rb +250 -0
  164. data/lib/omnizip/algorithms/ppmd8/restoration_method.rb +78 -0
  165. data/lib/omnizip/algorithms/ppmd8.rb +82 -0
  166. data/lib/omnizip/algorithms/ppmd_base.rb +138 -0
  167. data/lib/omnizip/algorithms/sevenzip_lzma2.rb +123 -0
  168. data/lib/omnizip/algorithms/xz_lzma2.rb +118 -0
  169. data/lib/omnizip/algorithms/zstandard/constants.rb +25 -0
  170. data/lib/omnizip/algorithms/zstandard/decoder.rb +46 -0
  171. data/lib/omnizip/algorithms/zstandard/encoder.rb +51 -0
  172. data/lib/omnizip/algorithms/zstandard.rb +138 -0
  173. data/lib/omnizip/buffer/memory_archive.rb +251 -0
  174. data/lib/omnizip/buffer/memory_extractor.rb +224 -0
  175. data/lib/omnizip/buffer.rb +176 -0
  176. data/lib/omnizip/checksum_registry.rb +114 -0
  177. data/lib/omnizip/checksums/crc32.rb +100 -0
  178. data/lib/omnizip/checksums/crc64.rb +101 -0
  179. data/lib/omnizip/checksums/crc_base.rb +158 -0
  180. data/lib/omnizip/checksums/verifier.rb +131 -0
  181. data/lib/omnizip/chunked/memory_manager.rb +194 -0
  182. data/lib/omnizip/chunked/reader.rb +78 -0
  183. data/lib/omnizip/chunked/writer.rb +120 -0
  184. data/lib/omnizip/chunked.rb +129 -0
  185. data/lib/omnizip/cli/output_formatter.rb +104 -0
  186. data/lib/omnizip/cli.rb +572 -0
  187. data/lib/omnizip/commands/.keep +0 -0
  188. data/lib/omnizip/commands/archive_create_command.rb +427 -0
  189. data/lib/omnizip/commands/archive_extract_command.rb +272 -0
  190. data/lib/omnizip/commands/archive_list_command.rb +218 -0
  191. data/lib/omnizip/commands/archive_repair_command.rb +131 -0
  192. data/lib/omnizip/commands/archive_verify_command.rb +117 -0
  193. data/lib/omnizip/commands/compress_command.rb +117 -0
  194. data/lib/omnizip/commands/decompress_command.rb +120 -0
  195. data/lib/omnizip/commands/list_command.rb +53 -0
  196. data/lib/omnizip/commands/metadata_command.rb +153 -0
  197. data/lib/omnizip/commands/parity_create_command.rb +122 -0
  198. data/lib/omnizip/commands/parity_repair_command.rb +122 -0
  199. data/lib/omnizip/commands/parity_verify_command.rb +124 -0
  200. data/lib/omnizip/commands/profile_list_command.rb +56 -0
  201. data/lib/omnizip/commands/profile_show_command.rb +44 -0
  202. data/lib/omnizip/convenience.rb +359 -0
  203. data/lib/omnizip/converter/conversion_registry.rb +49 -0
  204. data/lib/omnizip/converter/conversion_strategy.rb +121 -0
  205. data/lib/omnizip/converter/seven_zip_to_zip_strategy.rb +97 -0
  206. data/lib/omnizip/converter/zip_to_seven_zip_strategy.rb +112 -0
  207. data/lib/omnizip/converter.rb +105 -0
  208. data/lib/omnizip/crypto/aes256/cipher.rb +100 -0
  209. data/lib/omnizip/crypto/aes256/constants.rb +28 -0
  210. data/lib/omnizip/crypto/aes256/key_derivation.rb +101 -0
  211. data/lib/omnizip/crypto/aes256.rb +102 -0
  212. data/lib/omnizip/error.rb +106 -0
  213. data/lib/omnizip/eta/exponential_smoothing_estimator.rb +98 -0
  214. data/lib/omnizip/eta/moving_average_estimator.rb +99 -0
  215. data/lib/omnizip/eta/rate_calculator.rb +104 -0
  216. data/lib/omnizip/eta/sample_history.rb +143 -0
  217. data/lib/omnizip/eta/time_estimator.rb +106 -0
  218. data/lib/omnizip/eta.rb +63 -0
  219. data/lib/omnizip/extraction/filter_chain.rb +177 -0
  220. data/lib/omnizip/extraction/glob_pattern.rb +140 -0
  221. data/lib/omnizip/extraction/pattern_matcher.rb +70 -0
  222. data/lib/omnizip/extraction/predicate_pattern.rb +52 -0
  223. data/lib/omnizip/extraction/regex_pattern.rb +50 -0
  224. data/lib/omnizip/extraction/selective_extractor.rb +240 -0
  225. data/lib/omnizip/extraction.rb +111 -0
  226. data/lib/omnizip/file_type/mime_classifier.rb +144 -0
  227. data/lib/omnizip/file_type.rb +113 -0
  228. data/lib/omnizip/filter.rb +139 -0
  229. data/lib/omnizip/filter_pipeline.rb +108 -0
  230. data/lib/omnizip/filter_registry.rb +166 -0
  231. data/lib/omnizip/filters/bcj.rb +279 -0
  232. data/lib/omnizip/filters/bcj2/constants.rb +53 -0
  233. data/lib/omnizip/filters/bcj2/decoder.rb +200 -0
  234. data/lib/omnizip/filters/bcj2/encoder.rb +61 -0
  235. data/lib/omnizip/filters/bcj2/stream_data.rb +93 -0
  236. data/lib/omnizip/filters/bcj2.rb +99 -0
  237. data/lib/omnizip/filters/bcj_arm.rb +176 -0
  238. data/lib/omnizip/filters/bcj_arm64.rb +244 -0
  239. data/lib/omnizip/filters/bcj_ia64.rb +196 -0
  240. data/lib/omnizip/filters/bcj_ppc.rb +190 -0
  241. data/lib/omnizip/filters/bcj_sparc.rb +176 -0
  242. data/lib/omnizip/filters/bcj_x86.rb +193 -0
  243. data/lib/omnizip/filters/delta.rb +196 -0
  244. data/lib/omnizip/filters/filter_base.rb +72 -0
  245. data/lib/omnizip/filters/registry.rb +123 -0
  246. data/lib/omnizip/filters/xz_delta.rb +258 -0
  247. data/lib/omnizip/format_detector.rb +162 -0
  248. data/lib/omnizip/format_registry.rb +59 -0
  249. data/lib/omnizip/formats/.keep +0 -0
  250. data/lib/omnizip/formats/bzip2_file.rb +172 -0
  251. data/lib/omnizip/formats/cpio/constants.rb +55 -0
  252. data/lib/omnizip/formats/cpio/entry.rb +385 -0
  253. data/lib/omnizip/formats/cpio/reader.rb +196 -0
  254. data/lib/omnizip/formats/cpio/writer.rb +234 -0
  255. data/lib/omnizip/formats/cpio.rb +140 -0
  256. data/lib/omnizip/formats/format_spec_loader.rb +230 -0
  257. data/lib/omnizip/formats/gzip.rb +238 -0
  258. data/lib/omnizip/formats/iso/directory_builder.rb +297 -0
  259. data/lib/omnizip/formats/iso/directory_record.rb +152 -0
  260. data/lib/omnizip/formats/iso/joliet.rb +204 -0
  261. data/lib/omnizip/formats/iso/path_table.rb +125 -0
  262. data/lib/omnizip/formats/iso/reader.rb +197 -0
  263. data/lib/omnizip/formats/iso/rock_ridge.rb +349 -0
  264. data/lib/omnizip/formats/iso/volume_builder.rb +320 -0
  265. data/lib/omnizip/formats/iso/volume_descriptor.rb +168 -0
  266. data/lib/omnizip/formats/iso/writer.rb +530 -0
  267. data/lib/omnizip/formats/iso.rb +140 -0
  268. data/lib/omnizip/formats/lzip.rb +175 -0
  269. data/lib/omnizip/formats/lzma_alone.rb +171 -0
  270. data/lib/omnizip/formats/rar/archive_repairer.rb +243 -0
  271. data/lib/omnizip/formats/rar/archive_verifier.rb +195 -0
  272. data/lib/omnizip/formats/rar/block_parser.rb +243 -0
  273. data/lib/omnizip/formats/rar/compression/bit_stream.rb +180 -0
  274. data/lib/omnizip/formats/rar/compression/dispatcher.rb +217 -0
  275. data/lib/omnizip/formats/rar/compression/lz77_huffman/decoder.rb +216 -0
  276. data/lib/omnizip/formats/rar/compression/lz77_huffman/encoder.rb +158 -0
  277. data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_builder.rb +217 -0
  278. data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_coder.rb +189 -0
  279. data/lib/omnizip/formats/rar/compression/lz77_huffman/match_finder.rb +135 -0
  280. data/lib/omnizip/formats/rar/compression/lz77_huffman/sliding_window.rb +165 -0
  281. data/lib/omnizip/formats/rar/compression/ppmd/context.rb +105 -0
  282. data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +219 -0
  283. data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +262 -0
  284. data/lib/omnizip/formats/rar/compression_method_registry.rb +106 -0
  285. data/lib/omnizip/formats/rar/constants.rb +82 -0
  286. data/lib/omnizip/formats/rar/decompressor.rb +238 -0
  287. data/lib/omnizip/formats/rar/external_writer.rb +312 -0
  288. data/lib/omnizip/formats/rar/header.rb +192 -0
  289. data/lib/omnizip/formats/rar/license_validator.rb +109 -0
  290. data/lib/omnizip/formats/rar/models/rar_archive.rb +77 -0
  291. data/lib/omnizip/formats/rar/models/rar_entry.rb +65 -0
  292. data/lib/omnizip/formats/rar/models/rar_volume.rb +56 -0
  293. data/lib/omnizip/formats/rar/parity_handler.rb +292 -0
  294. data/lib/omnizip/formats/rar/rar5/compression/lzma.rb +202 -0
  295. data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +578 -0
  296. data/lib/omnizip/formats/rar/rar5/compression/store.rb +60 -0
  297. data/lib/omnizip/formats/rar/rar5/crc32.rb +39 -0
  298. data/lib/omnizip/formats/rar/rar5/encryption/aes256_cbc.rb +97 -0
  299. data/lib/omnizip/formats/rar/rar5/encryption/encryption_header.rb +114 -0
  300. data/lib/omnizip/formats/rar/rar5/encryption/encryption_manager.rb +166 -0
  301. data/lib/omnizip/formats/rar/rar5/encryption/key_derivation.rb +97 -0
  302. data/lib/omnizip/formats/rar/rar5/header.rb +187 -0
  303. data/lib/omnizip/formats/rar/rar5/models/encryption_options.rb +74 -0
  304. data/lib/omnizip/formats/rar/rar5/models/recovery_options.rb +63 -0
  305. data/lib/omnizip/formats/rar/rar5/models/solid_options.rb +63 -0
  306. data/lib/omnizip/formats/rar/rar5/models/volume_options.rb +74 -0
  307. data/lib/omnizip/formats/rar/rar5/multi_volume/ARCHITECTURE.md +290 -0
  308. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_manager.rb +264 -0
  309. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_splitter.rb +155 -0
  310. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_writer.rb +194 -0
  311. data/lib/omnizip/formats/rar/rar5/solid/solid_encoder.rb +109 -0
  312. data/lib/omnizip/formats/rar/rar5/solid/solid_manager.rb +142 -0
  313. data/lib/omnizip/formats/rar/rar5/solid/solid_stream.rb +121 -0
  314. data/lib/omnizip/formats/rar/rar5/vint.rb +65 -0
  315. data/lib/omnizip/formats/rar/rar5/writer.rb +466 -0
  316. data/lib/omnizip/formats/rar/rar_format_base.rb +241 -0
  317. data/lib/omnizip/formats/rar/reader.rb +366 -0
  318. data/lib/omnizip/formats/rar/recovery_record.rb +245 -0
  319. data/lib/omnizip/formats/rar/volume_manager.rb +168 -0
  320. data/lib/omnizip/formats/rar/writer.rb +431 -0
  321. data/lib/omnizip/formats/rar.rb +205 -0
  322. data/lib/omnizip/formats/rar3/compressor.rb +73 -0
  323. data/lib/omnizip/formats/rar3/decompressor.rb +66 -0
  324. data/lib/omnizip/formats/rar3/reader.rb +386 -0
  325. data/lib/omnizip/formats/rar3/writer.rb +219 -0
  326. data/lib/omnizip/formats/rar5/compressor.rb +73 -0
  327. data/lib/omnizip/formats/rar5/decompressor.rb +66 -0
  328. data/lib/omnizip/formats/rar5/reader.rb +342 -0
  329. data/lib/omnizip/formats/rar5/writer.rb +214 -0
  330. data/lib/omnizip/formats/seven_zip/coder_chain.rb +150 -0
  331. data/lib/omnizip/formats/seven_zip/constants.rb +126 -0
  332. data/lib/omnizip/formats/seven_zip/encoded_header.rb +114 -0
  333. data/lib/omnizip/formats/seven_zip/encrypted_header.rb +142 -0
  334. data/lib/omnizip/formats/seven_zip/file_collector.rb +144 -0
  335. data/lib/omnizip/formats/seven_zip/header.rb +106 -0
  336. data/lib/omnizip/formats/seven_zip/header_encryptor.rb +134 -0
  337. data/lib/omnizip/formats/seven_zip/header_writer.rb +466 -0
  338. data/lib/omnizip/formats/seven_zip/models/coder_info.rb +30 -0
  339. data/lib/omnizip/formats/seven_zip/models/file_entry.rb +58 -0
  340. data/lib/omnizip/formats/seven_zip/models/folder.rb +69 -0
  341. data/lib/omnizip/formats/seven_zip/models/stream_info.rb +42 -0
  342. data/lib/omnizip/formats/seven_zip/parser.rb +660 -0
  343. data/lib/omnizip/formats/seven_zip/reader.rb +458 -0
  344. data/lib/omnizip/formats/seven_zip/split_archive_reader.rb +632 -0
  345. data/lib/omnizip/formats/seven_zip/split_archive_writer.rb +315 -0
  346. data/lib/omnizip/formats/seven_zip/stream_compressor.rb +151 -0
  347. data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +162 -0
  348. data/lib/omnizip/formats/seven_zip/writer.rb +740 -0
  349. data/lib/omnizip/formats/seven_zip.rb +93 -0
  350. data/lib/omnizip/formats/tar/constants.rb +73 -0
  351. data/lib/omnizip/formats/tar/entry.rb +94 -0
  352. data/lib/omnizip/formats/tar/header.rb +168 -0
  353. data/lib/omnizip/formats/tar/reader.rb +121 -0
  354. data/lib/omnizip/formats/tar/writer.rb +216 -0
  355. data/lib/omnizip/formats/tar.rb +84 -0
  356. data/lib/omnizip/formats/xz/reader.rb +116 -0
  357. data/lib/omnizip/formats/xz.rb +237 -0
  358. data/lib/omnizip/formats/xz_impl/block_decoder.rb +754 -0
  359. data/lib/omnizip/formats/xz_impl/block_encoder.rb +306 -0
  360. data/lib/omnizip/formats/xz_impl/block_header.rb +210 -0
  361. data/lib/omnizip/formats/xz_impl/block_header_parser.rb +186 -0
  362. data/lib/omnizip/formats/xz_impl/constants.rb +49 -0
  363. data/lib/omnizip/formats/xz_impl/index_decoder.rb +174 -0
  364. data/lib/omnizip/formats/xz_impl/index_encoder.rb +122 -0
  365. data/lib/omnizip/formats/xz_impl/stream_decoder.rb +468 -0
  366. data/lib/omnizip/formats/xz_impl/stream_encoder.rb +99 -0
  367. data/lib/omnizip/formats/xz_impl/stream_footer.rb +81 -0
  368. data/lib/omnizip/formats/xz_impl/stream_footer_parser.rb +117 -0
  369. data/lib/omnizip/formats/xz_impl/stream_header.rb +55 -0
  370. data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +108 -0
  371. data/lib/omnizip/formats/xz_impl/vli.rb +128 -0
  372. data/lib/omnizip/formats/xz_impl/writer.rb +421 -0
  373. data/lib/omnizip/formats/zip/central_directory_header.rb +195 -0
  374. data/lib/omnizip/formats/zip/constants.rb +69 -0
  375. data/lib/omnizip/formats/zip/end_of_central_directory.rb +133 -0
  376. data/lib/omnizip/formats/zip/local_file_header.rb +138 -0
  377. data/lib/omnizip/formats/zip/reader.rb +250 -0
  378. data/lib/omnizip/formats/zip/unix_extra_field.rb +153 -0
  379. data/lib/omnizip/formats/zip/writer.rb +375 -0
  380. data/lib/omnizip/formats/zip/zip64_end_of_central_directory.rb +104 -0
  381. data/lib/omnizip/formats/zip/zip64_end_of_central_directory_locator.rb +66 -0
  382. data/lib/omnizip/formats/zip/zip64_extra_field.rb +114 -0
  383. data/lib/omnizip/formats/zip.rb +50 -0
  384. data/lib/omnizip/implementations/base/lzma2_decoder_base.rb +75 -0
  385. data/lib/omnizip/implementations/base/lzma2_encoder_base.rb +128 -0
  386. data/lib/omnizip/implementations/base/lzma_decoder_base.rb +83 -0
  387. data/lib/omnizip/implementations/base/lzma_encoder_base.rb +108 -0
  388. data/lib/omnizip/implementations/base/state_machine_base.rb +182 -0
  389. data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +421 -0
  390. data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +465 -0
  391. data/lib/omnizip/implementations/seven_zip/lzma/match_finder.rb +288 -0
  392. data/lib/omnizip/implementations/seven_zip/lzma/range_decoder.rb +200 -0
  393. data/lib/omnizip/implementations/seven_zip/lzma/range_encoder.rb +197 -0
  394. data/lib/omnizip/implementations/seven_zip/lzma/state_machine.rb +141 -0
  395. data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +519 -0
  396. data/lib/omnizip/implementations/xz_utils/lzma2/decoder.rb +723 -0
  397. data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +750 -0
  398. data/lib/omnizip/io/buffered_input.rb +146 -0
  399. data/lib/omnizip/io/buffered_output.rb +105 -0
  400. data/lib/omnizip/io/stream_manager.rb +115 -0
  401. data/lib/omnizip/link_handler/hard_link.rb +79 -0
  402. data/lib/omnizip/link_handler/symbolic_link.rb +74 -0
  403. data/lib/omnizip/link_handler.rb +124 -0
  404. data/lib/omnizip/metadata/archive_metadata.rb +114 -0
  405. data/lib/omnizip/metadata/entry_metadata.rb +146 -0
  406. data/lib/omnizip/metadata/metadata_editor.rb +171 -0
  407. data/lib/omnizip/metadata/metadata_registry.rb +64 -0
  408. data/lib/omnizip/metadata/metadata_validator.rb +99 -0
  409. data/lib/omnizip/metadata.rb +57 -0
  410. data/lib/omnizip/models/.keep +0 -0
  411. data/lib/omnizip/models/algorithm_metadata.rb +73 -0
  412. data/lib/omnizip/models/compression_options.rb +71 -0
  413. data/lib/omnizip/models/conversion_options.rb +87 -0
  414. data/lib/omnizip/models/conversion_result.rb +135 -0
  415. data/lib/omnizip/models/eta_result.rb +46 -0
  416. data/lib/omnizip/models/extraction_rule.rb +115 -0
  417. data/lib/omnizip/models/filter_chain.rb +144 -0
  418. data/lib/omnizip/models/filter_config.rb +183 -0
  419. data/lib/omnizip/models/match_result.rb +124 -0
  420. data/lib/omnizip/models/optimization_suggestion.rb +91 -0
  421. data/lib/omnizip/models/parallel_options.rb +104 -0
  422. data/lib/omnizip/models/performance_result.rb +79 -0
  423. data/lib/omnizip/models/profile_report.rb +82 -0
  424. data/lib/omnizip/models/progress_options.rb +38 -0
  425. data/lib/omnizip/models/split_options.rb +116 -0
  426. data/lib/omnizip/optimization_registry.rb +81 -0
  427. data/lib/omnizip/parallel/job_queue.rb +209 -0
  428. data/lib/omnizip/parallel/job_scheduler.rb +203 -0
  429. data/lib/omnizip/parallel/parallel_compressor.rb +347 -0
  430. data/lib/omnizip/parallel/parallel_extractor.rb +329 -0
  431. data/lib/omnizip/parallel/worker_pool.rb +223 -0
  432. data/lib/omnizip/parallel.rb +149 -0
  433. data/lib/omnizip/parity/chunked_block_processor.rb +196 -0
  434. data/lib/omnizip/parity/galois16.rb +145 -0
  435. data/lib/omnizip/parity/models/creator_packet.rb +73 -0
  436. data/lib/omnizip/parity/models/file_description_packet.rb +133 -0
  437. data/lib/omnizip/parity/models/ifsc_packet.rb +123 -0
  438. data/lib/omnizip/parity/models/main_packet.rb +128 -0
  439. data/lib/omnizip/parity/models/packet.rb +156 -0
  440. data/lib/omnizip/parity/models/packet_registry.rb +109 -0
  441. data/lib/omnizip/parity/models/recovery_slice_packet.rb +78 -0
  442. data/lib/omnizip/parity/par2_creator.rb +531 -0
  443. data/lib/omnizip/parity/par2_repairer.rb +407 -0
  444. data/lib/omnizip/parity/par2_verifier.rb +364 -0
  445. data/lib/omnizip/parity/par2cmdline_algorithm.rb +110 -0
  446. data/lib/omnizip/parity/par2cmdline_coefficients.rb +78 -0
  447. data/lib/omnizip/parity/reed_solomon_decoder.rb +266 -0
  448. data/lib/omnizip/parity/reed_solomon_encoder.rb +111 -0
  449. data/lib/omnizip/parity/reed_solomon_matrix.rb +342 -0
  450. data/lib/omnizip/parity.rb +186 -0
  451. data/lib/omnizip/password/encryption_registry.rb +65 -0
  452. data/lib/omnizip/password/encryption_strategy.rb +96 -0
  453. data/lib/omnizip/password/password_validator.rb +129 -0
  454. data/lib/omnizip/password/winzip_aes_strategy.rb +192 -0
  455. data/lib/omnizip/password/zip_crypto_strategy.rb +141 -0
  456. data/lib/omnizip/password.rb +87 -0
  457. data/lib/omnizip/pipe/stream_compressor.rb +124 -0
  458. data/lib/omnizip/pipe/stream_decompressor.rb +174 -0
  459. data/lib/omnizip/pipe.rb +121 -0
  460. data/lib/omnizip/platform/ntfs_streams.rb +201 -0
  461. data/lib/omnizip/platform.rb +189 -0
  462. data/lib/omnizip/profile/archive_profile.rb +39 -0
  463. data/lib/omnizip/profile/balanced_profile.rb +33 -0
  464. data/lib/omnizip/profile/binary_profile.rb +36 -0
  465. data/lib/omnizip/profile/compression_profile.rb +158 -0
  466. data/lib/omnizip/profile/custom_profile.rb +157 -0
  467. data/lib/omnizip/profile/fast_profile.rb +33 -0
  468. data/lib/omnizip/profile/maximum_profile.rb +33 -0
  469. data/lib/omnizip/profile/profile_detector.rb +110 -0
  470. data/lib/omnizip/profile/profile_registry.rb +161 -0
  471. data/lib/omnizip/profile/text_profile.rb +36 -0
  472. data/lib/omnizip/profile.rb +190 -0
  473. data/lib/omnizip/profiler/memory_profiler.rb +66 -0
  474. data/lib/omnizip/profiler/method_profiler.rb +49 -0
  475. data/lib/omnizip/profiler/report_generator.rb +169 -0
  476. data/lib/omnizip/profiler.rb +204 -0
  477. data/lib/omnizip/progress/callback_reporter.rb +36 -0
  478. data/lib/omnizip/progress/console_reporter.rb +62 -0
  479. data/lib/omnizip/progress/log_reporter.rb +91 -0
  480. data/lib/omnizip/progress/operation_progress.rb +118 -0
  481. data/lib/omnizip/progress/progress_bar.rb +156 -0
  482. data/lib/omnizip/progress/progress_reporter.rb +40 -0
  483. data/lib/omnizip/progress/progress_tracker.rb +190 -0
  484. data/lib/omnizip/progress/silent_reporter.rb +24 -0
  485. data/lib/omnizip/progress.rb +127 -0
  486. data/lib/omnizip/rubyzip_compat.rb +63 -0
  487. data/lib/omnizip/temp/safe_extract.rb +168 -0
  488. data/lib/omnizip/temp/temp_file.rb +124 -0
  489. data/lib/omnizip/temp/temp_file_pool.rb +109 -0
  490. data/lib/omnizip/temp.rb +181 -0
  491. data/lib/omnizip/version.rb +5 -0
  492. data/lib/omnizip/zip/entry.rb +156 -0
  493. data/lib/omnizip/zip/file.rb +485 -0
  494. data/lib/omnizip/zip/input_stream.rb +273 -0
  495. data/lib/omnizip/zip/output_stream.rb +324 -0
  496. data/lib/omnizip.rb +156 -0
  497. data/readme-docs/advanced-features.adoc +515 -0
  498. data/readme-docs/api-usage.adoc +444 -0
  499. data/readme-docs/architecture.adoc +449 -0
  500. data/readme-docs/archive-formats.adoc +479 -0
  501. data/readme-docs/cli-usage.adoc +222 -0
  502. data/readme-docs/compression-algorithms.adoc +442 -0
  503. data/readme-docs/compression-profiles.adoc +247 -0
  504. data/readme-docs/encryption-checksums.adoc +328 -0
  505. data/readme-docs/format-converter.adoc +325 -0
  506. data/readme-docs/installation.adoc +228 -0
  507. data/readme-docs/par2-archives.adoc +608 -0
  508. data/readme-docs/performance-profiler.adoc +389 -0
  509. data/readme-docs/preprocessing-filters.adoc +280 -0
  510. data/xz-file-format-1.2.1.txt +1174 -0
  511. metadata +617 -0
@@ -0,0 +1,193 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Copyright (C) 2025 Ribose Inc.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a
6
+ # copy of this software and associated documentation files (the "Software"),
7
+ # to deal in the Software without restriction, including without limitation
8
+ # the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
+ # and/or sell copies of the Software, and to permit persons to whom the
10
+ # Software is furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
+ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21
+ # DEALINGS IN THE SOFTWARE.
22
+
23
+ require_relative "bwt"
24
+ require_relative "mtf"
25
+ require_relative "rle"
26
+ require_relative "huffman"
27
+ require_relative "../../checksums/crc32"
28
+
29
+ module Omnizip
30
+ module Algorithms
31
+ class BZip2 < Algorithm
32
+ # BZip2 Decoder
33
+ #
34
+ # Reverses the full BZip2 compression pipeline:
35
+ # 1. Read block headers
36
+ # 2. Decode Huffman coding
37
+ # 3. Reverse Run-Length Encoding (RLE)
38
+ # 4. Reverse Move-to-Front Transform (MTF)
39
+ # 5. Reverse Burrows-Wheeler Transform (BWT)
40
+ # 6. Verify CRC32 checksum
41
+ #
42
+ # Processes each block independently and concatenates results.
43
+ class Decoder
44
+ attr_reader :input
45
+
46
+ # Initialize decoder
47
+ #
48
+ # @param input [IO] Input stream
49
+ # @param options [Hash] Decoding options
50
+ def initialize(input, _options = {})
51
+ @input = input
52
+ @bwt = Bwt.new
53
+ @mtf = Mtf.new
54
+ @rle = Rle.new
55
+ @huffman = Huffman.new
56
+ end
57
+
58
+ # Decode stream using BZip2 algorithm
59
+ #
60
+ # @return [String] Decoded data
61
+ def decode_stream
62
+ result = []
63
+
64
+ # Read and decode all blocks
65
+ loop do
66
+ block_data = decode_block
67
+ break unless block_data
68
+
69
+ result << block_data
70
+ end
71
+
72
+ result.join.b
73
+ end
74
+
75
+ private
76
+
77
+ # Decode single block
78
+ #
79
+ # @return [String, nil] Decoded block or nil if no more blocks
80
+ def decode_block
81
+ # Read block header
82
+ crc_bytes = @input.read(4)
83
+ return nil unless crc_bytes && crc_bytes.length == 4
84
+
85
+ expected_crc = crc_bytes.unpack1("N")
86
+ primary_index = @input.read(4).unpack1("N")
87
+ @input.read(4).unpack1("N")
88
+ rle_length = @input.read(4).unpack1("N")
89
+
90
+ # Read Huffman code table
91
+ codes = read_huffman_codes
92
+
93
+ # Read encoded data
94
+ encoded_length = @input.read(4).unpack1("N")
95
+ encoded_data = @input.read(encoded_length)
96
+
97
+ # Rebuild Huffman tree from codes
98
+ tree = rebuild_huffman_tree(codes)
99
+
100
+ # Decode Huffman
101
+ rle_data = @huffman.decode(encoded_data, tree, rle_length)
102
+
103
+ # Reverse RLE
104
+ mtf_data = @rle.decode(rle_data)
105
+
106
+ # Reverse MTF
107
+ bwt_data = @mtf.decode(mtf_data)
108
+
109
+ # Reverse BWT
110
+ original_data = @bwt.decode(bwt_data, primary_index)
111
+
112
+ # Verify CRC
113
+ actual_crc = Checksums::Crc32.calculate(original_data)
114
+ if actual_crc != expected_crc
115
+ raise "CRC mismatch: expected #{expected_crc}, " \
116
+ "got #{actual_crc}"
117
+ end
118
+
119
+ original_data
120
+ end
121
+
122
+ # Read Huffman code table from stream
123
+ #
124
+ # @return [Hash<Integer, String>] Symbol => binary code
125
+ def read_huffman_codes
126
+ codes = {}
127
+ code_count = @input.read(2).unpack1("n")
128
+
129
+ code_count.times do
130
+ symbol = @input.read(1).unpack1("C")
131
+ code_length = @input.read(1).unpack1("C")
132
+ codes[symbol] = code_length
133
+ end
134
+
135
+ codes
136
+ end
137
+
138
+ # Rebuild Huffman tree from code lengths
139
+ #
140
+ # Creates canonical Huffman codes and builds tree
141
+ #
142
+ # @param code_lengths [Hash<Integer, Integer>] Symbol => length
143
+ # @return [Huffman::Node] Root of Huffman tree
144
+ def rebuild_huffman_tree(code_lengths)
145
+ # Sort symbols by code length, then by symbol value
146
+ sorted_symbols = code_lengths.sort_by { |sym, len| [len, sym] }
147
+
148
+ # Generate canonical codes
149
+ codes = {}
150
+ code_value = 0
151
+ prev_length = 0
152
+
153
+ sorted_symbols.each do |symbol, length|
154
+ # Shift code value for new length
155
+ code_value <<= (length - prev_length)
156
+ codes[symbol] = format("%0#{length}b", code_value)
157
+ code_value += 1
158
+ prev_length = length
159
+ end
160
+
161
+ # Build tree from codes
162
+ build_tree_from_codes(codes)
163
+ end
164
+
165
+ # Build Huffman tree from code strings
166
+ #
167
+ # @param codes [Hash<Integer, String>] Symbol => binary code
168
+ # @return [Huffman::Node] Root node
169
+ def build_tree_from_codes(codes)
170
+ root = Huffman::Node.new(nil, 0)
171
+
172
+ codes.each do |symbol, code|
173
+ current = root
174
+
175
+ code.each_char do |bit|
176
+ if bit == "0"
177
+ current.left ||= Huffman::Node.new(nil, 0)
178
+ current = current.left
179
+ else
180
+ current.right ||= Huffman::Node.new(nil, 0)
181
+ current = current.right
182
+ end
183
+ end
184
+
185
+ current.symbol = symbol
186
+ end
187
+
188
+ root
189
+ end
190
+ end
191
+ end
192
+ end
193
+ end
@@ -0,0 +1,237 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Copyright (C) 2025 Ribose Inc.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a
6
+ # copy of this software and associated documentation files (the "Software"),
7
+ # to deal in the Software without restriction, including without limitation
8
+ # the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
+ # and/or sell copies of the Software, and to permit persons to whom the
10
+ # Software is furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
+ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21
+ # DEALINGS IN THE SOFTWARE.
22
+
23
+ require_relative "bwt"
24
+ require_relative "mtf"
25
+ require_relative "rle"
26
+ require_relative "huffman"
27
+ require_relative "../../checksums/crc32"
28
+
29
+ module Omnizip
30
+ module Algorithms
31
+ class BZip2 < Algorithm
32
+ # BZip2 Encoder
33
+ #
34
+ # Orchestrates the full BZip2 compression pipeline:
35
+ # 1. Block splitting (configurable block size)
36
+ # 2. Burrows-Wheeler Transform (BWT)
37
+ # 3. Move-to-Front Transform (MTF)
38
+ # 4. Run-Length Encoding (RLE)
39
+ # 5. Huffman Coding
40
+ # 6. CRC32 checksum calculation
41
+ #
42
+ # Each block is compressed independently for better parallelization
43
+ # potential and error recovery.
44
+ class Encoder
45
+ attr_reader :output, :block_size
46
+
47
+ # Block size constants (in bytes)
48
+ MIN_BLOCK_SIZE = 100_000 # 100KB
49
+ MAX_BLOCK_SIZE = 900_000 # 900KB
50
+ DEFAULT_BLOCK_SIZE = 900_000 # 900KB (level 9)
51
+
52
+ # Initialize encoder
53
+ #
54
+ # @param output [IO] Output stream
55
+ # @param options [Hash] Encoding options
56
+ # @option options [Integer] :block_size Block size in bytes
57
+ def initialize(output, options = {})
58
+ @output = output
59
+ @block_size = validate_block_size(
60
+ options[:block_size] || DEFAULT_BLOCK_SIZE,
61
+ )
62
+ @bwt = Bwt.new
63
+ @mtf = Mtf.new
64
+ @rle = Rle.new
65
+ @huffman = Huffman.new
66
+ end
67
+
68
+ # Encode stream using BZip2 algorithm
69
+ #
70
+ # @param input [String] Input data to encode
71
+ # @return [void]
72
+ def encode_stream(input)
73
+ return if input.empty?
74
+
75
+ # Split into blocks and encode each
76
+ blocks = split_into_blocks(input)
77
+ blocks.each { |block| encode_block(block) }
78
+ end
79
+
80
+ private
81
+
82
+ # Validate and clamp block size to valid range
83
+ #
84
+ # @param size [Integer] Requested block size
85
+ # @return [Integer] Validated block size
86
+ def validate_block_size(size)
87
+ size.clamp(MIN_BLOCK_SIZE, MAX_BLOCK_SIZE)
88
+ end
89
+
90
+ # Split input into blocks
91
+ #
92
+ # @param input [String] Input data
93
+ # @return [Array<String>] Array of blocks
94
+ def split_into_blocks(input)
95
+ blocks = []
96
+ offset = 0
97
+
98
+ while offset < input.length
99
+ block = input[offset, @block_size]
100
+ blocks << block if block && !block.empty?
101
+ offset += @block_size
102
+ end
103
+
104
+ blocks
105
+ end
106
+
107
+ # Encode single block through full pipeline
108
+ #
109
+ # @param block [String] Block data
110
+ # @return [void]
111
+ def encode_block(block)
112
+ # Calculate CRC of original data
113
+ crc = Checksums::Crc32.calculate(block)
114
+
115
+ # Apply BWT
116
+ bwt_data, primary_index = @bwt.encode(block)
117
+
118
+ # Apply MTF
119
+ mtf_data = @mtf.encode(bwt_data)
120
+
121
+ # Apply RLE
122
+ rle_data = @rle.encode(mtf_data)
123
+
124
+ # Build frequency table for Huffman
125
+ frequencies = build_frequency_table(rle_data)
126
+
127
+ # Build Huffman tree and generate codes
128
+ tree = @huffman.build_tree(frequencies)
129
+ codes = generate_canonical_codes(tree)
130
+
131
+ # Encode data with Huffman
132
+ encoded_data = @huffman.encode(rle_data, codes)
133
+
134
+ # Write block to output
135
+ write_block(crc, primary_index, block.length, codes,
136
+ encoded_data, rle_data.length)
137
+ end
138
+
139
+ # Build frequency table from data
140
+ #
141
+ # @param data [String] Input data
142
+ # @return [Hash<Integer, Integer>] Byte => frequency
143
+ def build_frequency_table(data)
144
+ freq = Hash.new(0)
145
+ data.each_byte { |byte| freq[byte] += 1 }
146
+ freq
147
+ end
148
+
149
+ # Generate canonical Huffman codes from tree
150
+ #
151
+ # @param tree [Huffman::Node] Huffman tree root
152
+ # @return [Hash<Integer, String>] Symbol => canonical code
153
+ def generate_canonical_codes(tree)
154
+ # Get standard codes first
155
+ standard_codes = @huffman.generate_codes(tree)
156
+
157
+ # Convert to code lengths
158
+ code_lengths = {}
159
+ standard_codes.each do |symbol, code|
160
+ code_lengths[symbol] = code.length
161
+ end
162
+
163
+ # Generate canonical codes from lengths
164
+ # Sort by (length, symbol) to ensure deterministic ordering
165
+ sorted_symbols = code_lengths.sort_by { |sym, len| [len, sym] }
166
+
167
+ canonical_codes = {}
168
+ code_value = 0
169
+ prev_length = 0
170
+
171
+ sorted_symbols.each do |symbol, length|
172
+ # Shift code value for new length
173
+ code_value <<= (length - prev_length)
174
+ canonical_codes[symbol] = format("%0#{length}b", code_value)
175
+ code_value += 1
176
+ prev_length = length
177
+ end
178
+
179
+ canonical_codes
180
+ end
181
+
182
+ # Write encoded block to output
183
+ #
184
+ # @param crc [Integer] CRC32 of original block
185
+ # @param primary_index [Integer] BWT primary index
186
+ # @param original_length [Integer] Original block length
187
+ # @param codes [Hash] Huffman codes
188
+ # @param encoded_data [String] Huffman-encoded data
189
+ # @param rle_length [Integer] Length after RLE
190
+ # @return [void]
191
+ def write_block(crc, primary_index, original_length, codes,
192
+ encoded_data, rle_length)
193
+ write_block_header(crc, primary_index, original_length, rle_length)
194
+ write_huffman_codes(codes)
195
+ write_encoded_data(encoded_data)
196
+ end
197
+
198
+ # Write block header
199
+ #
200
+ # @param crc [Integer] CRC32 checksum
201
+ # @param primary_index [Integer] BWT primary index
202
+ # @param original_length [Integer] Original block length
203
+ # @param rle_length [Integer] RLE length
204
+ # @return [void]
205
+ def write_block_header(crc, primary_index, original_length,
206
+ rle_length)
207
+ @output.write([crc].pack("N"))
208
+ @output.write([primary_index].pack("N"))
209
+ @output.write([original_length].pack("N"))
210
+ @output.write([rle_length].pack("N"))
211
+ end
212
+
213
+ # Write Huffman codes to output
214
+ #
215
+ # @param codes [Hash] Huffman codes
216
+ # @return [void]
217
+ def write_huffman_codes(codes)
218
+ @output.write([codes.size].pack("n"))
219
+
220
+ codes.each do |symbol, code|
221
+ @output.write([symbol].pack("C"))
222
+ @output.write([code.length].pack("C"))
223
+ end
224
+ end
225
+
226
+ # Write encoded data to output
227
+ #
228
+ # @param encoded_data [String] Huffman-encoded data
229
+ # @return [void]
230
+ def write_encoded_data(encoded_data)
231
+ @output.write([encoded_data.length].pack("N"))
232
+ @output.write(encoded_data)
233
+ end
234
+ end
235
+ end
236
+ end
237
+ end
@@ -0,0 +1,206 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Copyright (C) 2025 Ribose Inc.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a
6
+ # copy of this software and associated documentation files (the "Software"),
7
+ # to deal in the Software without restriction, including without limitation
8
+ # the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
+ # and/or sell copies of the Software, and to permit persons to whom the
10
+ # Software is furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
+ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21
+ # DEALINGS IN THE SOFTWARE.
22
+
23
+ module Omnizip
24
+ module Algorithms
25
+ class BZip2 < Algorithm
26
+ # Huffman Coding for BZip2
27
+ #
28
+ # Implements canonical Huffman coding for the final compression
29
+ # stage of BZip2. Huffman coding assigns variable-length codes
30
+ # to symbols based on their frequencies, with more frequent
31
+ # symbols getting shorter codes.
32
+ #
33
+ # This implementation:
34
+ # 1. Builds a Huffman tree from symbol frequencies
35
+ # 2. Generates canonical codes for efficient decoding
36
+ # 3. Encodes data as a bit stream
37
+ # 4. Decodes bit streams back to symbols
38
+ class Huffman
39
+ # Huffman tree node
40
+ class Node
41
+ attr_accessor :symbol, :frequency, :left, :right
42
+
43
+ # Initialize node
44
+ #
45
+ # @param symbol [Integer, nil] Symbol (nil for internal nodes)
46
+ # @param frequency [Integer] Frequency/weight
47
+ def initialize(symbol, frequency)
48
+ @symbol = symbol
49
+ @frequency = frequency
50
+ @left = nil
51
+ @right = nil
52
+ end
53
+
54
+ # Check if node is a leaf
55
+ #
56
+ # @return [Boolean] True if leaf node
57
+ def leaf?
58
+ @left.nil? && @right.nil?
59
+ end
60
+ end
61
+
62
+ # Build Huffman tree from frequency table
63
+ #
64
+ # @param frequencies [Hash<Integer, Integer>] Symbol => frequency
65
+ # @return [Node, nil] Root node of Huffman tree
66
+ def build_tree(frequencies)
67
+ return nil if frequencies.empty?
68
+
69
+ # Create leaf nodes
70
+ nodes = frequencies.map do |symbol, freq|
71
+ Node.new(symbol, freq)
72
+ end
73
+
74
+ # Build tree bottom-up
75
+ while nodes.length > 1
76
+ # Sort by frequency
77
+ nodes.sort_by!(&:frequency)
78
+
79
+ # Take two lowest frequency nodes
80
+ left = nodes.shift
81
+ right = nodes.shift
82
+
83
+ # Create parent node
84
+ parent = Node.new(nil, left.frequency + right.frequency)
85
+ parent.left = left
86
+ parent.right = right
87
+
88
+ # Add back to nodes
89
+ nodes << parent
90
+ end
91
+
92
+ nodes.first
93
+ end
94
+
95
+ # Generate code table from Huffman tree
96
+ #
97
+ # @param root [Node] Root of Huffman tree
98
+ # @return [Hash<Integer, String>] Symbol => binary code
99
+ def generate_codes(root)
100
+ return {} if root.nil?
101
+
102
+ codes = {}
103
+ generate_codes_recursive(root, "", codes)
104
+ codes
105
+ end
106
+
107
+ # Encode data using Huffman codes
108
+ #
109
+ # @param data [String] Input data to encode
110
+ # @param codes [Hash<Integer, String>] Symbol => binary code
111
+ # @return [String] Encoded bit stream (as binary string)
112
+ def encode(data, codes)
113
+ return "".b if data.empty?
114
+
115
+ bits = []
116
+
117
+ data.each_byte do |byte|
118
+ code = codes[byte]
119
+ raise "No code for byte #{byte}" unless code
120
+
121
+ bits << code
122
+ end
123
+
124
+ bits_to_bytes(bits.join)
125
+ end
126
+
127
+ # Decode bit stream using Huffman tree
128
+ #
129
+ # @param bits [String] Encoded bit stream
130
+ # @param root [Node] Root of Huffman tree
131
+ # @param length [Integer] Expected output length
132
+ # @return [String] Decoded data
133
+ def decode(bits, root, length)
134
+ return "".b if bits.empty? || root.nil?
135
+
136
+ result = []
137
+ current = root
138
+ bit_string = bytes_to_bits(bits)
139
+ bit_index = 0
140
+
141
+ while result.length < length && bit_index < bit_string.length
142
+ bit = bit_string[bit_index]
143
+ bit_index += 1
144
+
145
+ # Navigate tree
146
+ current = (bit == "0" ? current.left : current.right)
147
+
148
+ # Check if we reached a leaf
149
+ if current.leaf?
150
+ result << current.symbol
151
+ current = root
152
+ end
153
+ end
154
+
155
+ result.pack("C*")
156
+ end
157
+
158
+ private
159
+
160
+ # Recursively generate codes for all symbols
161
+ #
162
+ # @param node [Node] Current node
163
+ # @param code [String] Current code path
164
+ # @param codes [Hash] Code table being built
165
+ # @return [void]
166
+ def generate_codes_recursive(node, code, codes)
167
+ return if node.nil?
168
+
169
+ if node.leaf?
170
+ codes[node.symbol] = code.empty? ? "0" : code
171
+ else
172
+ generate_codes_recursive(node.left, "#{code}0", codes)
173
+ generate_codes_recursive(node.right, "#{code}1", codes)
174
+ end
175
+ end
176
+
177
+ # Convert bit string to bytes
178
+ #
179
+ # @param bits [String] Bit string (e.g., "10110101")
180
+ # @return [String] Byte string
181
+ def bits_to_bytes(bits)
182
+ # Pad to multiple of 8
183
+ padding = (8 - (bits.length % 8)) % 8
184
+ bits += ("0" * padding)
185
+
186
+ # Convert to bytes
187
+ bytes = []
188
+ (0...bits.length).step(8) do |i|
189
+ byte_bits = bits[i, 8]
190
+ bytes << byte_bits.to_i(2)
191
+ end
192
+
193
+ bytes.pack("C*")
194
+ end
195
+
196
+ # Convert bytes to bit string
197
+ #
198
+ # @param bytes [String] Byte string
199
+ # @return [String] Bit string
200
+ def bytes_to_bits(bytes)
201
+ bytes.bytes.map { |b| format("%08b", b) }.join
202
+ end
203
+ end
204
+ end
205
+ end
206
+ end