omnizip 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (511) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.rubocop.yml +32 -0
  4. data/.rubocop_todo.yml +754 -0
  5. data/COPYING +502 -0
  6. data/Gemfile +17 -0
  7. data/LICENSE +12 -0
  8. data/README.adoc +1045 -0
  9. data/Rakefile +12 -0
  10. data/benchmark/README.md +260 -0
  11. data/benchmark/benchmark_suite.rb +125 -0
  12. data/benchmark/compression_bench.rb +181 -0
  13. data/benchmark/filter_bench.rb +180 -0
  14. data/benchmark/models/benchmark_result.rb +59 -0
  15. data/benchmark/models/comparison_result.rb +69 -0
  16. data/benchmark/profile_suite.rb +167 -0
  17. data/benchmark/reporter.rb +150 -0
  18. data/benchmark/run_benchmarks.rb +66 -0
  19. data/benchmark/test_data.rb +137 -0
  20. data/config/formats/rar3_spec.yml +91 -0
  21. data/config/formats/rar5_spec.yml +102 -0
  22. data/docs/.github/workflows/docs.yml +142 -0
  23. data/docs/.gitignore +21 -0
  24. data/docs/.lychee.toml +67 -0
  25. data/docs/Gemfile +13 -0
  26. data/docs/RAR_WRITE_SUPPORT.md +26 -0
  27. data/docs/README.md +101 -0
  28. data/docs/_config.yml +112 -0
  29. data/docs/assets/logo.svg +1 -0
  30. data/docs/assets/omnizip-logo.pdf +1540 -11
  31. data/docs/comparison/feature-matrix.adoc +694 -0
  32. data/docs/comparison/index.adoc +113 -0
  33. data/docs/comparison/vs-7zip.adoc +309 -0
  34. data/docs/comparison/vs-peazip.adoc +77 -0
  35. data/docs/comparison/vs-rubyzip.adoc +342 -0
  36. data/docs/comparison/vs-winrar.adoc +100 -0
  37. data/docs/compatibility.adoc +579 -0
  38. data/docs/concepts/index.adoc +129 -0
  39. data/docs/developer/architecture.adoc +256 -0
  40. data/docs/developer/contributing.adoc +158 -0
  41. data/docs/developer/index.adoc +25 -0
  42. data/docs/developer/testing.adoc +212 -0
  43. data/docs/getting-started/basic-usage.adoc +271 -0
  44. data/docs/getting-started/index.adoc +42 -0
  45. data/docs/getting-started/installation.adoc +138 -0
  46. data/docs/getting-started/quick-start.adoc +185 -0
  47. data/docs/getting-started/your-first-archive.adoc +218 -0
  48. data/docs/guides/advanced-features/encryption.adoc +300 -0
  49. data/docs/guides/advanced-features/index.adoc +49 -0
  50. data/docs/guides/advanced-features/parallel-processing.adoc +246 -0
  51. data/docs/guides/advanced-features/progress-tracking.adoc +320 -0
  52. data/docs/guides/advanced-features/streaming.adoc +212 -0
  53. data/docs/guides/archive-formats/gzip-format.adoc +107 -0
  54. data/docs/guides/archive-formats/index.adoc +130 -0
  55. data/docs/guides/archive-formats/rar-format.adoc +104 -0
  56. data/docs/guides/archive-formats/rar5.adoc +521 -0
  57. data/docs/guides/archive-formats/seven-zip-format.adoc +35 -0
  58. data/docs/guides/archive-formats/tar-format.adoc +106 -0
  59. data/docs/guides/archive-formats/xz-format.adoc +118 -0
  60. data/docs/guides/archive-formats/zip-format.adoc +35 -0
  61. data/docs/guides/compression-algorithms/bzip2.adoc +113 -0
  62. data/docs/guides/compression-algorithms/deflate.adoc +319 -0
  63. data/docs/guides/compression-algorithms/index.adoc +190 -0
  64. data/docs/guides/compression-algorithms/lzma.adoc +398 -0
  65. data/docs/guides/compression-algorithms/lzma2.adoc +327 -0
  66. data/docs/guides/compression-algorithms/ppmd.adoc +316 -0
  67. data/docs/guides/compression-algorithms/zstandard.adoc +361 -0
  68. data/docs/guides/creating-archives.adoc +354 -0
  69. data/docs/guides/extracting-archives.adoc +53 -0
  70. data/docs/guides/format-conversion.adoc +64 -0
  71. data/docs/guides/index.adoc +49 -0
  72. data/docs/guides/migration-rubyzip.adoc +217 -0
  73. data/docs/guides/parity-archives.adoc +605 -0
  74. data/docs/guides/performance-tuning.adoc +88 -0
  75. data/docs/index.adoc +218 -0
  76. data/docs/lychee.toml +67 -0
  77. data/docs/reference/api/overview.adoc +188 -0
  78. data/docs/reference/cli/compress-command.adoc +114 -0
  79. data/docs/reference/cli/overview.adoc +140 -0
  80. data/docs/reference/index.adoc +26 -0
  81. data/docs/resources/faq.adoc +185 -0
  82. data/docs/resources/quick-reference.adoc +222 -0
  83. data/docs/troubleshooting/index.adoc +208 -0
  84. data/examples/api_comparison.rb +205 -0
  85. data/examples/deflate64_example.rb +96 -0
  86. data/examples/par2_demo.rb +121 -0
  87. data/examples/quick_start_native.rb +150 -0
  88. data/examples/quick_start_rubyzip.rb +115 -0
  89. data/examples/rubyzip_compatibility_demo.rb +194 -0
  90. data/exe/omnizip +27 -0
  91. data/lib/omnizip/algorithm.rb +130 -0
  92. data/lib/omnizip/algorithm_registry.rb +86 -0
  93. data/lib/omnizip/algorithms/.keep +0 -0
  94. data/lib/omnizip/algorithms/bzip2/bwt.rb +225 -0
  95. data/lib/omnizip/algorithms/bzip2/decoder.rb +193 -0
  96. data/lib/omnizip/algorithms/bzip2/encoder.rb +237 -0
  97. data/lib/omnizip/algorithms/bzip2/huffman.rb +206 -0
  98. data/lib/omnizip/algorithms/bzip2/mtf.rb +101 -0
  99. data/lib/omnizip/algorithms/bzip2/rle.rb +151 -0
  100. data/lib/omnizip/algorithms/bzip2.rb +130 -0
  101. data/lib/omnizip/algorithms/deflate/constants.rb +28 -0
  102. data/lib/omnizip/algorithms/deflate/decoder.rb +38 -0
  103. data/lib/omnizip/algorithms/deflate/encoder.rb +46 -0
  104. data/lib/omnizip/algorithms/deflate.rb +128 -0
  105. data/lib/omnizip/algorithms/deflate64/constants.rb +45 -0
  106. data/lib/omnizip/algorithms/deflate64/decoder.rb +153 -0
  107. data/lib/omnizip/algorithms/deflate64/encoder.rb +98 -0
  108. data/lib/omnizip/algorithms/deflate64/huffman_coder.rb +354 -0
  109. data/lib/omnizip/algorithms/deflate64/lz77_encoder.rb +142 -0
  110. data/lib/omnizip/algorithms/deflate64.rb +109 -0
  111. data/lib/omnizip/algorithms/lzma/bit_model.rb +120 -0
  112. data/lib/omnizip/algorithms/lzma/constants.rb +112 -0
  113. data/lib/omnizip/algorithms/lzma/decoder.rb +148 -0
  114. data/lib/omnizip/algorithms/lzma/dictionary.rb +69 -0
  115. data/lib/omnizip/algorithms/lzma/distance_coder.rb +415 -0
  116. data/lib/omnizip/algorithms/lzma/encoder.rb +142 -0
  117. data/lib/omnizip/algorithms/lzma/length_coder.rb +260 -0
  118. data/lib/omnizip/algorithms/lzma/literal_decoder.rb +320 -0
  119. data/lib/omnizip/algorithms/lzma/literal_encoder.rb +210 -0
  120. data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +341 -0
  121. data/lib/omnizip/algorithms/lzma/lzma_alone_decoder.rb +192 -0
  122. data/lib/omnizip/algorithms/lzma/lzma_state.rb +128 -0
  123. data/lib/omnizip/algorithms/lzma/match.rb +32 -0
  124. data/lib/omnizip/algorithms/lzma/match_finder.rb +205 -0
  125. data/lib/omnizip/algorithms/lzma/match_finder_config.rb +142 -0
  126. data/lib/omnizip/algorithms/lzma/match_finder_factory.rb +88 -0
  127. data/lib/omnizip/algorithms/lzma/optimal_encoder.rb +130 -0
  128. data/lib/omnizip/algorithms/lzma/probability_models.rb +72 -0
  129. data/lib/omnizip/algorithms/lzma/range_coder.rb +85 -0
  130. data/lib/omnizip/algorithms/lzma/range_decoder.rb +434 -0
  131. data/lib/omnizip/algorithms/lzma/range_encoder.rb +194 -0
  132. data/lib/omnizip/algorithms/lzma/state.rb +127 -0
  133. data/lib/omnizip/algorithms/lzma/xz_buffered_range_encoder.rb +325 -0
  134. data/lib/omnizip/algorithms/lzma/xz_encoder.rb +426 -0
  135. data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +645 -0
  136. data/lib/omnizip/algorithms/lzma/xz_match_finder_adapter.rb +227 -0
  137. data/lib/omnizip/algorithms/lzma/xz_price_calculator.rb +169 -0
  138. data/lib/omnizip/algorithms/lzma/xz_probability_models.rb +261 -0
  139. data/lib/omnizip/algorithms/lzma/xz_range_encoder.rb +223 -0
  140. data/lib/omnizip/algorithms/lzma/xz_range_encoder_exact.rb +331 -0
  141. data/lib/omnizip/algorithms/lzma/xz_state.rb +116 -0
  142. data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +2055 -0
  143. data/lib/omnizip/algorithms/lzma.rb +238 -0
  144. data/lib/omnizip/algorithms/lzma2/chunk_manager.rb +182 -0
  145. data/lib/omnizip/algorithms/lzma2/constants.rb +41 -0
  146. data/lib/omnizip/algorithms/lzma2/encoder.rb +147 -0
  147. data/lib/omnizip/algorithms/lzma2/lzma2_chunk.rb +161 -0
  148. data/lib/omnizip/algorithms/lzma2/properties.rb +179 -0
  149. data/lib/omnizip/algorithms/lzma2/simple_lzma2_encoder.rb +127 -0
  150. data/lib/omnizip/algorithms/lzma2/xz_encoder_adapter.rb +85 -0
  151. data/lib/omnizip/algorithms/lzma2.rb +141 -0
  152. data/lib/omnizip/algorithms/ppmd7/constants.rb +74 -0
  153. data/lib/omnizip/algorithms/ppmd7/context.rb +154 -0
  154. data/lib/omnizip/algorithms/ppmd7/decoder.rb +126 -0
  155. data/lib/omnizip/algorithms/ppmd7/encoder.rb +163 -0
  156. data/lib/omnizip/algorithms/ppmd7/model.rb +248 -0
  157. data/lib/omnizip/algorithms/ppmd7/symbol_state.rb +57 -0
  158. data/lib/omnizip/algorithms/ppmd7.rb +116 -0
  159. data/lib/omnizip/algorithms/ppmd8/constants.rb +61 -0
  160. data/lib/omnizip/algorithms/ppmd8/context.rb +34 -0
  161. data/lib/omnizip/algorithms/ppmd8/decoder.rb +107 -0
  162. data/lib/omnizip/algorithms/ppmd8/encoder.rb +138 -0
  163. data/lib/omnizip/algorithms/ppmd8/model.rb +250 -0
  164. data/lib/omnizip/algorithms/ppmd8/restoration_method.rb +78 -0
  165. data/lib/omnizip/algorithms/ppmd8.rb +82 -0
  166. data/lib/omnizip/algorithms/ppmd_base.rb +138 -0
  167. data/lib/omnizip/algorithms/sevenzip_lzma2.rb +123 -0
  168. data/lib/omnizip/algorithms/xz_lzma2.rb +118 -0
  169. data/lib/omnizip/algorithms/zstandard/constants.rb +25 -0
  170. data/lib/omnizip/algorithms/zstandard/decoder.rb +46 -0
  171. data/lib/omnizip/algorithms/zstandard/encoder.rb +51 -0
  172. data/lib/omnizip/algorithms/zstandard.rb +138 -0
  173. data/lib/omnizip/buffer/memory_archive.rb +251 -0
  174. data/lib/omnizip/buffer/memory_extractor.rb +224 -0
  175. data/lib/omnizip/buffer.rb +176 -0
  176. data/lib/omnizip/checksum_registry.rb +114 -0
  177. data/lib/omnizip/checksums/crc32.rb +100 -0
  178. data/lib/omnizip/checksums/crc64.rb +101 -0
  179. data/lib/omnizip/checksums/crc_base.rb +158 -0
  180. data/lib/omnizip/checksums/verifier.rb +131 -0
  181. data/lib/omnizip/chunked/memory_manager.rb +194 -0
  182. data/lib/omnizip/chunked/reader.rb +78 -0
  183. data/lib/omnizip/chunked/writer.rb +120 -0
  184. data/lib/omnizip/chunked.rb +129 -0
  185. data/lib/omnizip/cli/output_formatter.rb +104 -0
  186. data/lib/omnizip/cli.rb +572 -0
  187. data/lib/omnizip/commands/.keep +0 -0
  188. data/lib/omnizip/commands/archive_create_command.rb +427 -0
  189. data/lib/omnizip/commands/archive_extract_command.rb +272 -0
  190. data/lib/omnizip/commands/archive_list_command.rb +218 -0
  191. data/lib/omnizip/commands/archive_repair_command.rb +131 -0
  192. data/lib/omnizip/commands/archive_verify_command.rb +117 -0
  193. data/lib/omnizip/commands/compress_command.rb +117 -0
  194. data/lib/omnizip/commands/decompress_command.rb +120 -0
  195. data/lib/omnizip/commands/list_command.rb +53 -0
  196. data/lib/omnizip/commands/metadata_command.rb +153 -0
  197. data/lib/omnizip/commands/parity_create_command.rb +122 -0
  198. data/lib/omnizip/commands/parity_repair_command.rb +122 -0
  199. data/lib/omnizip/commands/parity_verify_command.rb +124 -0
  200. data/lib/omnizip/commands/profile_list_command.rb +56 -0
  201. data/lib/omnizip/commands/profile_show_command.rb +44 -0
  202. data/lib/omnizip/convenience.rb +359 -0
  203. data/lib/omnizip/converter/conversion_registry.rb +49 -0
  204. data/lib/omnizip/converter/conversion_strategy.rb +121 -0
  205. data/lib/omnizip/converter/seven_zip_to_zip_strategy.rb +97 -0
  206. data/lib/omnizip/converter/zip_to_seven_zip_strategy.rb +112 -0
  207. data/lib/omnizip/converter.rb +105 -0
  208. data/lib/omnizip/crypto/aes256/cipher.rb +100 -0
  209. data/lib/omnizip/crypto/aes256/constants.rb +28 -0
  210. data/lib/omnizip/crypto/aes256/key_derivation.rb +101 -0
  211. data/lib/omnizip/crypto/aes256.rb +102 -0
  212. data/lib/omnizip/error.rb +106 -0
  213. data/lib/omnizip/eta/exponential_smoothing_estimator.rb +98 -0
  214. data/lib/omnizip/eta/moving_average_estimator.rb +99 -0
  215. data/lib/omnizip/eta/rate_calculator.rb +104 -0
  216. data/lib/omnizip/eta/sample_history.rb +143 -0
  217. data/lib/omnizip/eta/time_estimator.rb +106 -0
  218. data/lib/omnizip/eta.rb +63 -0
  219. data/lib/omnizip/extraction/filter_chain.rb +177 -0
  220. data/lib/omnizip/extraction/glob_pattern.rb +140 -0
  221. data/lib/omnizip/extraction/pattern_matcher.rb +70 -0
  222. data/lib/omnizip/extraction/predicate_pattern.rb +52 -0
  223. data/lib/omnizip/extraction/regex_pattern.rb +50 -0
  224. data/lib/omnizip/extraction/selective_extractor.rb +240 -0
  225. data/lib/omnizip/extraction.rb +111 -0
  226. data/lib/omnizip/file_type/mime_classifier.rb +144 -0
  227. data/lib/omnizip/file_type.rb +113 -0
  228. data/lib/omnizip/filter.rb +139 -0
  229. data/lib/omnizip/filter_pipeline.rb +108 -0
  230. data/lib/omnizip/filter_registry.rb +166 -0
  231. data/lib/omnizip/filters/bcj.rb +279 -0
  232. data/lib/omnizip/filters/bcj2/constants.rb +53 -0
  233. data/lib/omnizip/filters/bcj2/decoder.rb +200 -0
  234. data/lib/omnizip/filters/bcj2/encoder.rb +61 -0
  235. data/lib/omnizip/filters/bcj2/stream_data.rb +93 -0
  236. data/lib/omnizip/filters/bcj2.rb +99 -0
  237. data/lib/omnizip/filters/bcj_arm.rb +176 -0
  238. data/lib/omnizip/filters/bcj_arm64.rb +244 -0
  239. data/lib/omnizip/filters/bcj_ia64.rb +196 -0
  240. data/lib/omnizip/filters/bcj_ppc.rb +190 -0
  241. data/lib/omnizip/filters/bcj_sparc.rb +176 -0
  242. data/lib/omnizip/filters/bcj_x86.rb +193 -0
  243. data/lib/omnizip/filters/delta.rb +196 -0
  244. data/lib/omnizip/filters/filter_base.rb +72 -0
  245. data/lib/omnizip/filters/registry.rb +123 -0
  246. data/lib/omnizip/filters/xz_delta.rb +258 -0
  247. data/lib/omnizip/format_detector.rb +162 -0
  248. data/lib/omnizip/format_registry.rb +59 -0
  249. data/lib/omnizip/formats/.keep +0 -0
  250. data/lib/omnizip/formats/bzip2_file.rb +172 -0
  251. data/lib/omnizip/formats/cpio/constants.rb +55 -0
  252. data/lib/omnizip/formats/cpio/entry.rb +385 -0
  253. data/lib/omnizip/formats/cpio/reader.rb +196 -0
  254. data/lib/omnizip/formats/cpio/writer.rb +234 -0
  255. data/lib/omnizip/formats/cpio.rb +140 -0
  256. data/lib/omnizip/formats/format_spec_loader.rb +230 -0
  257. data/lib/omnizip/formats/gzip.rb +238 -0
  258. data/lib/omnizip/formats/iso/directory_builder.rb +297 -0
  259. data/lib/omnizip/formats/iso/directory_record.rb +152 -0
  260. data/lib/omnizip/formats/iso/joliet.rb +204 -0
  261. data/lib/omnizip/formats/iso/path_table.rb +125 -0
  262. data/lib/omnizip/formats/iso/reader.rb +197 -0
  263. data/lib/omnizip/formats/iso/rock_ridge.rb +349 -0
  264. data/lib/omnizip/formats/iso/volume_builder.rb +320 -0
  265. data/lib/omnizip/formats/iso/volume_descriptor.rb +168 -0
  266. data/lib/omnizip/formats/iso/writer.rb +530 -0
  267. data/lib/omnizip/formats/iso.rb +140 -0
  268. data/lib/omnizip/formats/lzip.rb +175 -0
  269. data/lib/omnizip/formats/lzma_alone.rb +171 -0
  270. data/lib/omnizip/formats/rar/archive_repairer.rb +243 -0
  271. data/lib/omnizip/formats/rar/archive_verifier.rb +195 -0
  272. data/lib/omnizip/formats/rar/block_parser.rb +243 -0
  273. data/lib/omnizip/formats/rar/compression/bit_stream.rb +180 -0
  274. data/lib/omnizip/formats/rar/compression/dispatcher.rb +217 -0
  275. data/lib/omnizip/formats/rar/compression/lz77_huffman/decoder.rb +216 -0
  276. data/lib/omnizip/formats/rar/compression/lz77_huffman/encoder.rb +158 -0
  277. data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_builder.rb +217 -0
  278. data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_coder.rb +189 -0
  279. data/lib/omnizip/formats/rar/compression/lz77_huffman/match_finder.rb +135 -0
  280. data/lib/omnizip/formats/rar/compression/lz77_huffman/sliding_window.rb +165 -0
  281. data/lib/omnizip/formats/rar/compression/ppmd/context.rb +105 -0
  282. data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +219 -0
  283. data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +262 -0
  284. data/lib/omnizip/formats/rar/compression_method_registry.rb +106 -0
  285. data/lib/omnizip/formats/rar/constants.rb +82 -0
  286. data/lib/omnizip/formats/rar/decompressor.rb +238 -0
  287. data/lib/omnizip/formats/rar/external_writer.rb +312 -0
  288. data/lib/omnizip/formats/rar/header.rb +192 -0
  289. data/lib/omnizip/formats/rar/license_validator.rb +109 -0
  290. data/lib/omnizip/formats/rar/models/rar_archive.rb +77 -0
  291. data/lib/omnizip/formats/rar/models/rar_entry.rb +65 -0
  292. data/lib/omnizip/formats/rar/models/rar_volume.rb +56 -0
  293. data/lib/omnizip/formats/rar/parity_handler.rb +292 -0
  294. data/lib/omnizip/formats/rar/rar5/compression/lzma.rb +202 -0
  295. data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +578 -0
  296. data/lib/omnizip/formats/rar/rar5/compression/store.rb +60 -0
  297. data/lib/omnizip/formats/rar/rar5/crc32.rb +39 -0
  298. data/lib/omnizip/formats/rar/rar5/encryption/aes256_cbc.rb +97 -0
  299. data/lib/omnizip/formats/rar/rar5/encryption/encryption_header.rb +114 -0
  300. data/lib/omnizip/formats/rar/rar5/encryption/encryption_manager.rb +166 -0
  301. data/lib/omnizip/formats/rar/rar5/encryption/key_derivation.rb +97 -0
  302. data/lib/omnizip/formats/rar/rar5/header.rb +187 -0
  303. data/lib/omnizip/formats/rar/rar5/models/encryption_options.rb +74 -0
  304. data/lib/omnizip/formats/rar/rar5/models/recovery_options.rb +63 -0
  305. data/lib/omnizip/formats/rar/rar5/models/solid_options.rb +63 -0
  306. data/lib/omnizip/formats/rar/rar5/models/volume_options.rb +74 -0
  307. data/lib/omnizip/formats/rar/rar5/multi_volume/ARCHITECTURE.md +290 -0
  308. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_manager.rb +264 -0
  309. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_splitter.rb +155 -0
  310. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_writer.rb +194 -0
  311. data/lib/omnizip/formats/rar/rar5/solid/solid_encoder.rb +109 -0
  312. data/lib/omnizip/formats/rar/rar5/solid/solid_manager.rb +142 -0
  313. data/lib/omnizip/formats/rar/rar5/solid/solid_stream.rb +121 -0
  314. data/lib/omnizip/formats/rar/rar5/vint.rb +65 -0
  315. data/lib/omnizip/formats/rar/rar5/writer.rb +466 -0
  316. data/lib/omnizip/formats/rar/rar_format_base.rb +241 -0
  317. data/lib/omnizip/formats/rar/reader.rb +366 -0
  318. data/lib/omnizip/formats/rar/recovery_record.rb +245 -0
  319. data/lib/omnizip/formats/rar/volume_manager.rb +168 -0
  320. data/lib/omnizip/formats/rar/writer.rb +431 -0
  321. data/lib/omnizip/formats/rar.rb +205 -0
  322. data/lib/omnizip/formats/rar3/compressor.rb +73 -0
  323. data/lib/omnizip/formats/rar3/decompressor.rb +66 -0
  324. data/lib/omnizip/formats/rar3/reader.rb +386 -0
  325. data/lib/omnizip/formats/rar3/writer.rb +219 -0
  326. data/lib/omnizip/formats/rar5/compressor.rb +73 -0
  327. data/lib/omnizip/formats/rar5/decompressor.rb +66 -0
  328. data/lib/omnizip/formats/rar5/reader.rb +342 -0
  329. data/lib/omnizip/formats/rar5/writer.rb +214 -0
  330. data/lib/omnizip/formats/seven_zip/coder_chain.rb +150 -0
  331. data/lib/omnizip/formats/seven_zip/constants.rb +126 -0
  332. data/lib/omnizip/formats/seven_zip/encoded_header.rb +114 -0
  333. data/lib/omnizip/formats/seven_zip/encrypted_header.rb +142 -0
  334. data/lib/omnizip/formats/seven_zip/file_collector.rb +144 -0
  335. data/lib/omnizip/formats/seven_zip/header.rb +106 -0
  336. data/lib/omnizip/formats/seven_zip/header_encryptor.rb +134 -0
  337. data/lib/omnizip/formats/seven_zip/header_writer.rb +466 -0
  338. data/lib/omnizip/formats/seven_zip/models/coder_info.rb +30 -0
  339. data/lib/omnizip/formats/seven_zip/models/file_entry.rb +58 -0
  340. data/lib/omnizip/formats/seven_zip/models/folder.rb +69 -0
  341. data/lib/omnizip/formats/seven_zip/models/stream_info.rb +42 -0
  342. data/lib/omnizip/formats/seven_zip/parser.rb +660 -0
  343. data/lib/omnizip/formats/seven_zip/reader.rb +458 -0
  344. data/lib/omnizip/formats/seven_zip/split_archive_reader.rb +632 -0
  345. data/lib/omnizip/formats/seven_zip/split_archive_writer.rb +315 -0
  346. data/lib/omnizip/formats/seven_zip/stream_compressor.rb +151 -0
  347. data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +162 -0
  348. data/lib/omnizip/formats/seven_zip/writer.rb +740 -0
  349. data/lib/omnizip/formats/seven_zip.rb +93 -0
  350. data/lib/omnizip/formats/tar/constants.rb +73 -0
  351. data/lib/omnizip/formats/tar/entry.rb +94 -0
  352. data/lib/omnizip/formats/tar/header.rb +168 -0
  353. data/lib/omnizip/formats/tar/reader.rb +121 -0
  354. data/lib/omnizip/formats/tar/writer.rb +216 -0
  355. data/lib/omnizip/formats/tar.rb +84 -0
  356. data/lib/omnizip/formats/xz/reader.rb +116 -0
  357. data/lib/omnizip/formats/xz.rb +237 -0
  358. data/lib/omnizip/formats/xz_impl/block_decoder.rb +754 -0
  359. data/lib/omnizip/formats/xz_impl/block_encoder.rb +306 -0
  360. data/lib/omnizip/formats/xz_impl/block_header.rb +210 -0
  361. data/lib/omnizip/formats/xz_impl/block_header_parser.rb +186 -0
  362. data/lib/omnizip/formats/xz_impl/constants.rb +49 -0
  363. data/lib/omnizip/formats/xz_impl/index_decoder.rb +174 -0
  364. data/lib/omnizip/formats/xz_impl/index_encoder.rb +122 -0
  365. data/lib/omnizip/formats/xz_impl/stream_decoder.rb +468 -0
  366. data/lib/omnizip/formats/xz_impl/stream_encoder.rb +99 -0
  367. data/lib/omnizip/formats/xz_impl/stream_footer.rb +81 -0
  368. data/lib/omnizip/formats/xz_impl/stream_footer_parser.rb +117 -0
  369. data/lib/omnizip/formats/xz_impl/stream_header.rb +55 -0
  370. data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +108 -0
  371. data/lib/omnizip/formats/xz_impl/vli.rb +128 -0
  372. data/lib/omnizip/formats/xz_impl/writer.rb +421 -0
  373. data/lib/omnizip/formats/zip/central_directory_header.rb +195 -0
  374. data/lib/omnizip/formats/zip/constants.rb +69 -0
  375. data/lib/omnizip/formats/zip/end_of_central_directory.rb +133 -0
  376. data/lib/omnizip/formats/zip/local_file_header.rb +138 -0
  377. data/lib/omnizip/formats/zip/reader.rb +250 -0
  378. data/lib/omnizip/formats/zip/unix_extra_field.rb +153 -0
  379. data/lib/omnizip/formats/zip/writer.rb +375 -0
  380. data/lib/omnizip/formats/zip/zip64_end_of_central_directory.rb +104 -0
  381. data/lib/omnizip/formats/zip/zip64_end_of_central_directory_locator.rb +66 -0
  382. data/lib/omnizip/formats/zip/zip64_extra_field.rb +114 -0
  383. data/lib/omnizip/formats/zip.rb +50 -0
  384. data/lib/omnizip/implementations/base/lzma2_decoder_base.rb +75 -0
  385. data/lib/omnizip/implementations/base/lzma2_encoder_base.rb +128 -0
  386. data/lib/omnizip/implementations/base/lzma_decoder_base.rb +83 -0
  387. data/lib/omnizip/implementations/base/lzma_encoder_base.rb +108 -0
  388. data/lib/omnizip/implementations/base/state_machine_base.rb +182 -0
  389. data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +421 -0
  390. data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +465 -0
  391. data/lib/omnizip/implementations/seven_zip/lzma/match_finder.rb +288 -0
  392. data/lib/omnizip/implementations/seven_zip/lzma/range_decoder.rb +200 -0
  393. data/lib/omnizip/implementations/seven_zip/lzma/range_encoder.rb +197 -0
  394. data/lib/omnizip/implementations/seven_zip/lzma/state_machine.rb +141 -0
  395. data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +519 -0
  396. data/lib/omnizip/implementations/xz_utils/lzma2/decoder.rb +723 -0
  397. data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +750 -0
  398. data/lib/omnizip/io/buffered_input.rb +146 -0
  399. data/lib/omnizip/io/buffered_output.rb +105 -0
  400. data/lib/omnizip/io/stream_manager.rb +115 -0
  401. data/lib/omnizip/link_handler/hard_link.rb +79 -0
  402. data/lib/omnizip/link_handler/symbolic_link.rb +74 -0
  403. data/lib/omnizip/link_handler.rb +124 -0
  404. data/lib/omnizip/metadata/archive_metadata.rb +114 -0
  405. data/lib/omnizip/metadata/entry_metadata.rb +146 -0
  406. data/lib/omnizip/metadata/metadata_editor.rb +171 -0
  407. data/lib/omnizip/metadata/metadata_registry.rb +64 -0
  408. data/lib/omnizip/metadata/metadata_validator.rb +99 -0
  409. data/lib/omnizip/metadata.rb +57 -0
  410. data/lib/omnizip/models/.keep +0 -0
  411. data/lib/omnizip/models/algorithm_metadata.rb +73 -0
  412. data/lib/omnizip/models/compression_options.rb +71 -0
  413. data/lib/omnizip/models/conversion_options.rb +87 -0
  414. data/lib/omnizip/models/conversion_result.rb +135 -0
  415. data/lib/omnizip/models/eta_result.rb +46 -0
  416. data/lib/omnizip/models/extraction_rule.rb +115 -0
  417. data/lib/omnizip/models/filter_chain.rb +144 -0
  418. data/lib/omnizip/models/filter_config.rb +183 -0
  419. data/lib/omnizip/models/match_result.rb +124 -0
  420. data/lib/omnizip/models/optimization_suggestion.rb +91 -0
  421. data/lib/omnizip/models/parallel_options.rb +104 -0
  422. data/lib/omnizip/models/performance_result.rb +79 -0
  423. data/lib/omnizip/models/profile_report.rb +82 -0
  424. data/lib/omnizip/models/progress_options.rb +38 -0
  425. data/lib/omnizip/models/split_options.rb +116 -0
  426. data/lib/omnizip/optimization_registry.rb +81 -0
  427. data/lib/omnizip/parallel/job_queue.rb +209 -0
  428. data/lib/omnizip/parallel/job_scheduler.rb +203 -0
  429. data/lib/omnizip/parallel/parallel_compressor.rb +347 -0
  430. data/lib/omnizip/parallel/parallel_extractor.rb +329 -0
  431. data/lib/omnizip/parallel/worker_pool.rb +223 -0
  432. data/lib/omnizip/parallel.rb +149 -0
  433. data/lib/omnizip/parity/chunked_block_processor.rb +196 -0
  434. data/lib/omnizip/parity/galois16.rb +145 -0
  435. data/lib/omnizip/parity/models/creator_packet.rb +73 -0
  436. data/lib/omnizip/parity/models/file_description_packet.rb +133 -0
  437. data/lib/omnizip/parity/models/ifsc_packet.rb +123 -0
  438. data/lib/omnizip/parity/models/main_packet.rb +128 -0
  439. data/lib/omnizip/parity/models/packet.rb +156 -0
  440. data/lib/omnizip/parity/models/packet_registry.rb +109 -0
  441. data/lib/omnizip/parity/models/recovery_slice_packet.rb +78 -0
  442. data/lib/omnizip/parity/par2_creator.rb +531 -0
  443. data/lib/omnizip/parity/par2_repairer.rb +407 -0
  444. data/lib/omnizip/parity/par2_verifier.rb +364 -0
  445. data/lib/omnizip/parity/par2cmdline_algorithm.rb +110 -0
  446. data/lib/omnizip/parity/par2cmdline_coefficients.rb +78 -0
  447. data/lib/omnizip/parity/reed_solomon_decoder.rb +266 -0
  448. data/lib/omnizip/parity/reed_solomon_encoder.rb +111 -0
  449. data/lib/omnizip/parity/reed_solomon_matrix.rb +342 -0
  450. data/lib/omnizip/parity.rb +186 -0
  451. data/lib/omnizip/password/encryption_registry.rb +65 -0
  452. data/lib/omnizip/password/encryption_strategy.rb +96 -0
  453. data/lib/omnizip/password/password_validator.rb +129 -0
  454. data/lib/omnizip/password/winzip_aes_strategy.rb +192 -0
  455. data/lib/omnizip/password/zip_crypto_strategy.rb +141 -0
  456. data/lib/omnizip/password.rb +87 -0
  457. data/lib/omnizip/pipe/stream_compressor.rb +124 -0
  458. data/lib/omnizip/pipe/stream_decompressor.rb +174 -0
  459. data/lib/omnizip/pipe.rb +121 -0
  460. data/lib/omnizip/platform/ntfs_streams.rb +201 -0
  461. data/lib/omnizip/platform.rb +189 -0
  462. data/lib/omnizip/profile/archive_profile.rb +39 -0
  463. data/lib/omnizip/profile/balanced_profile.rb +33 -0
  464. data/lib/omnizip/profile/binary_profile.rb +36 -0
  465. data/lib/omnizip/profile/compression_profile.rb +158 -0
  466. data/lib/omnizip/profile/custom_profile.rb +157 -0
  467. data/lib/omnizip/profile/fast_profile.rb +33 -0
  468. data/lib/omnizip/profile/maximum_profile.rb +33 -0
  469. data/lib/omnizip/profile/profile_detector.rb +110 -0
  470. data/lib/omnizip/profile/profile_registry.rb +161 -0
  471. data/lib/omnizip/profile/text_profile.rb +36 -0
  472. data/lib/omnizip/profile.rb +190 -0
  473. data/lib/omnizip/profiler/memory_profiler.rb +66 -0
  474. data/lib/omnizip/profiler/method_profiler.rb +49 -0
  475. data/lib/omnizip/profiler/report_generator.rb +169 -0
  476. data/lib/omnizip/profiler.rb +204 -0
  477. data/lib/omnizip/progress/callback_reporter.rb +36 -0
  478. data/lib/omnizip/progress/console_reporter.rb +62 -0
  479. data/lib/omnizip/progress/log_reporter.rb +91 -0
  480. data/lib/omnizip/progress/operation_progress.rb +118 -0
  481. data/lib/omnizip/progress/progress_bar.rb +156 -0
  482. data/lib/omnizip/progress/progress_reporter.rb +40 -0
  483. data/lib/omnizip/progress/progress_tracker.rb +190 -0
  484. data/lib/omnizip/progress/silent_reporter.rb +24 -0
  485. data/lib/omnizip/progress.rb +127 -0
  486. data/lib/omnizip/rubyzip_compat.rb +63 -0
  487. data/lib/omnizip/temp/safe_extract.rb +168 -0
  488. data/lib/omnizip/temp/temp_file.rb +124 -0
  489. data/lib/omnizip/temp/temp_file_pool.rb +109 -0
  490. data/lib/omnizip/temp.rb +181 -0
  491. data/lib/omnizip/version.rb +5 -0
  492. data/lib/omnizip/zip/entry.rb +156 -0
  493. data/lib/omnizip/zip/file.rb +485 -0
  494. data/lib/omnizip/zip/input_stream.rb +273 -0
  495. data/lib/omnizip/zip/output_stream.rb +324 -0
  496. data/lib/omnizip.rb +156 -0
  497. data/readme-docs/advanced-features.adoc +515 -0
  498. data/readme-docs/api-usage.adoc +444 -0
  499. data/readme-docs/architecture.adoc +449 -0
  500. data/readme-docs/archive-formats.adoc +479 -0
  501. data/readme-docs/cli-usage.adoc +222 -0
  502. data/readme-docs/compression-algorithms.adoc +442 -0
  503. data/readme-docs/compression-profiles.adoc +247 -0
  504. data/readme-docs/encryption-checksums.adoc +328 -0
  505. data/readme-docs/format-converter.adoc +325 -0
  506. data/readme-docs/installation.adoc +228 -0
  507. data/readme-docs/par2-archives.adoc +608 -0
  508. data/readme-docs/performance-profiler.adoc +389 -0
  509. data/readme-docs/preprocessing-filters.adoc +280 -0
  510. data/xz-file-format-1.2.1.txt +1174 -0
  511. metadata +617 -0
@@ -0,0 +1,109 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "packet"
4
+ require_relative "main_packet"
5
+ require_relative "file_description_packet"
6
+ require_relative "ifsc_packet"
7
+ require_relative "recovery_slice_packet"
8
+ require_relative "creator_packet"
9
+
10
+ module Omnizip
11
+ module Parity
12
+ module Models
13
+ # Registry for PAR2 packet types
14
+ #
15
+ # Maps 16-byte packet type identifiers to their corresponding
16
+ # packet class implementations. Enables polymorphic packet
17
+ # parsing based on type field.
18
+ class PacketRegistry
19
+ @registry = {}
20
+
21
+ class << self
22
+ # Register a packet type
23
+ #
24
+ # @param type_id [String] 16-byte type identifier
25
+ # @param klass [Class] Packet class
26
+ def register(type_id, klass)
27
+ unless type_id.bytesize == 16
28
+ raise ArgumentError,
29
+ "Type ID must be 16 bytes, got #{type_id.bytesize}"
30
+ end
31
+
32
+ @registry[type_id] = klass
33
+ end
34
+
35
+ # Get packet class for type identifier
36
+ #
37
+ # @param type_id [String] 16-byte type identifier
38
+ # @return [Class, nil] Packet class or nil if not registered
39
+ def get(type_id)
40
+ @registry[type_id]
41
+ end
42
+
43
+ # Check if type is registered
44
+ #
45
+ # @param type_id [String] 16-byte type identifier
46
+ # @return [Boolean] true if type is registered
47
+ def registered?(type_id)
48
+ @registry.key?(type_id)
49
+ end
50
+
51
+ # Get all registered packet types
52
+ #
53
+ # @return [Array<String>] Array of 16-byte type identifiers
54
+ def types
55
+ @registry.keys
56
+ end
57
+
58
+ # Parse packet from IO and return appropriate subclass
59
+ #
60
+ # Reads a packet header, determines the type, and returns
61
+ # an instance of the appropriate packet subclass.
62
+ #
63
+ # @param io [IO] Input stream
64
+ # @return [Packet, nil] Parsed packet or nil if EOF/unknown type
65
+ def read_packet(io)
66
+ # Read base packet
67
+ packet = Packet.read_from(io)
68
+ return nil unless packet
69
+
70
+ # Look up packet class for this type
71
+ packet_class = get(packet.type)
72
+
73
+ # If unknown type, return base Packet
74
+ return packet unless packet_class
75
+
76
+ # Create specific packet type
77
+ specific_packet = packet_class.new(
78
+ magic: packet.magic,
79
+ length: packet.length,
80
+ packet_hash: packet.packet_hash,
81
+ set_id: packet.set_id,
82
+ type: packet.type,
83
+ )
84
+
85
+ # Set body_data directly (not a lutaml attribute)
86
+ specific_packet.body_data = packet.body_data
87
+
88
+ # Parse body into structured fields
89
+ specific_packet.parse_body
90
+
91
+ specific_packet
92
+ end
93
+
94
+ # Clear all registrations (primarily for testing)
95
+ def clear!
96
+ @registry.clear
97
+ end
98
+ end
99
+
100
+ # Register all standard PAR2 packet types
101
+ register(MainPacket::PACKET_TYPE, MainPacket)
102
+ register(FileDescriptionPacket::PACKET_TYPE, FileDescriptionPacket)
103
+ register(IfscPacket::PACKET_TYPE, IfscPacket)
104
+ register(RecoverySlicePacket::PACKET_TYPE, RecoverySlicePacket)
105
+ register(CreatorPacket::PACKET_TYPE, CreatorPacket)
106
+ end
107
+ end
108
+ end
109
+ end
@@ -0,0 +1,78 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "packet"
4
+
5
+ module Omnizip
6
+ module Parity
7
+ module Models
8
+ # PAR2 Recovery Slice packet
9
+ #
10
+ # Contains Reed-Solomon encoded recovery data for error correction.
11
+ #
12
+ # Body structure:
13
+ # - exponent (4 bytes, L<): Recovery block exponent/index
14
+ # - recovery_data (variable): Reed-Solomon encoded block data
15
+ class RecoverySlicePacket < Packet
16
+ # Packet type identifier
17
+ PACKET_TYPE = "PAR 2.0\x00RecvSlic"
18
+
19
+ # Recovery block exponent (4 bytes)
20
+ attribute :exponent, :integer
21
+
22
+ # Recovery data (variable length)
23
+ attribute :recovery_data, :string, default: -> { "" }
24
+
25
+ # Initialize recovery slice packet
26
+ #
27
+ # @param attributes [Hash] Packet attributes
28
+ def initialize(**attributes)
29
+ super
30
+ self.type = PACKET_TYPE
31
+ end
32
+
33
+ # Parse body data into attributes
34
+ #
35
+ # Body format:
36
+ # - exponent: 4 bytes (L<)
37
+ # - recovery_data: remainder
38
+ def parse_body
39
+ return if body_data.nil? || body_data.empty?
40
+ return if body_data.bytesize < 4
41
+
42
+ pos = 0
43
+
44
+ # Read exponent (4 bytes, little-endian unsigned 32-bit)
45
+ self.exponent = body_data[pos, 4].unpack1("L<")
46
+ pos += 4
47
+
48
+ # Read recovery data (remainder)
49
+ self.recovery_data = body_data[pos..] || ""
50
+ end
51
+
52
+ # Build body data from attributes
53
+ #
54
+ # Constructs binary body data
55
+ #
56
+ # @return [String] Binary body data
57
+ def build_body
58
+ data = +""
59
+
60
+ # Write exponent (4 bytes, little-endian)
61
+ data << [exponent].pack("L<")
62
+
63
+ # Write recovery data
64
+ data << recovery_data
65
+
66
+ self.body_data = data
67
+ end
68
+
69
+ # Get size of recovery data
70
+ #
71
+ # @return [Integer] Recovery data size in bytes
72
+ def data_size
73
+ recovery_data.bytesize
74
+ end
75
+ end
76
+ end
77
+ end
78
+ end
@@ -0,0 +1,531 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "digest"
4
+ require "fileutils"
5
+ require_relative "reed_solomon_encoder"
6
+
7
+ module Omnizip
8
+ module Parity
9
+ # PAR2 parity archive creator
10
+ #
11
+ # Creates PAR2 recovery files using Reed-Solomon error correction.
12
+ # PAR2 files allow recovery of corrupted or missing data blocks.
13
+ #
14
+ # @example Create PAR2 files for an archive
15
+ # creator = Par2Creator.new(redundancy: 10, block_size: 16384)
16
+ # creator.add_file('important.zip')
17
+ # creator.create('important')
18
+ # # Creates: important.par2, important.vol00+01.par2, etc.
19
+ #
20
+ # @example Multiple files with custom settings
21
+ # creator = Par2Creator.new(
22
+ # redundancy: 5,
23
+ # block_size: 32768,
24
+ # progress: ->(pct) { puts "Progress: #{pct}%" }
25
+ # )
26
+ # creator.add_file('file1.dat')
27
+ # creator.add_file('file2.dat')
28
+ # creator.create('backup')
29
+ class Par2Creator
30
+ # PAR2 packet signature
31
+ PACKET_SIGNATURE = "PAR2\x00PKT".b.freeze
32
+
33
+ # Packet type identifiers
34
+ PACKET_TYPE_MAIN = "PAR 2.0\x00Main\x00\x00\x00\x00"
35
+ PACKET_TYPE_FILE_DESC = "PAR 2.0\x00FileDesc"
36
+ PACKET_TYPE_IFSC = "PAR 2.0\x00IFSC\x00\x00\x00\x00"
37
+ PACKET_TYPE_RECOVERY = "PAR 2.0\x00RecvSlic"
38
+
39
+ # Default block size (16KB)
40
+ DEFAULT_BLOCK_SIZE = 16384
41
+
42
+ # @return [Integer] Block size in bytes
43
+ attr_reader :block_size
44
+
45
+ # @return [Integer] Redundancy percentage (0-100)
46
+ attr_reader :redundancy
47
+
48
+ # @return [Array<Hash>] Files to protect
49
+ attr_reader :files
50
+
51
+ # @return [Proc, nil] Progress callback
52
+ attr_reader :progress_callback
53
+
54
+ # File information structure
55
+ FileInfo = Struct.new(
56
+ :path, # File path
57
+ :file_id, # MD5 hash of file ID
58
+ :hash_16k, # MD5 of first 16KB
59
+ :hash_full, # MD5 of full file
60
+ :size, # File size
61
+ :blocks, # Array of file blocks
62
+ keyword_init: true,
63
+ )
64
+
65
+ # Initialize PAR2 creator
66
+ #
67
+ # @param redundancy [Integer] Redundancy percentage (0-100)
68
+ # @param block_size [Integer] Block size in bytes
69
+ # @param progress [Proc, nil] Progress callback proc
70
+ def initialize(redundancy: 5, block_size: DEFAULT_BLOCK_SIZE,
71
+ progress: nil)
72
+ @redundancy = validate_redundancy(redundancy)
73
+ @block_size = validate_block_size(block_size)
74
+ @progress_callback = progress
75
+ @files = []
76
+ @set_id = generate_set_id
77
+ end
78
+
79
+ # Add file to PAR2 set
80
+ #
81
+ # @param file_path [String] Path to file
82
+ # @raise [ArgumentError] if file doesn't exist
83
+ def add_file(file_path)
84
+ raise ArgumentError, "File not found: #{file_path}" unless
85
+ File.exist?(file_path)
86
+
87
+ file_info = analyze_file(file_path)
88
+ @files << file_info
89
+ end
90
+
91
+ # Create PAR2 recovery files
92
+ #
93
+ # @param base_name [String] Base name for PAR2 files
94
+ # @return [Array<String>] Paths to created PAR2 files
95
+ def create(base_name)
96
+ validate_files!
97
+
98
+ # CRITICAL FIX: Sort files by file_id (MD5 hash) to match PAR2 spec
99
+ # Par2cmdline creates recovery blocks with files sorted by file_id MD5.
100
+ # The verifier also sorts by file_id, so ALL operations
101
+ # (file descriptions, IFSC packets, recovery blocks) must use the same order.
102
+ @files.sort_by!(&:file_id)
103
+
104
+ # Calculate total blocks and recovery blocks needed
105
+ total_blocks = calculate_total_blocks
106
+ recovery_blocks = calculate_recovery_blocks(total_blocks)
107
+
108
+ report_progress(0, "Initializing PAR2 creation")
109
+
110
+ # Create main PAR2 index file
111
+ index_file = create_index_file(base_name)
112
+
113
+ # Create recovery volume files
114
+ volume_files = create_recovery_volumes(
115
+ base_name,
116
+ recovery_blocks,
117
+ total_blocks,
118
+ )
119
+
120
+ report_progress(100, "PAR2 creation complete")
121
+
122
+ [index_file] + volume_files
123
+ end
124
+
125
+ private
126
+
127
+ # Validate redundancy percentage
128
+ #
129
+ # @param redundancy [Integer] Redundancy value
130
+ # @return [Integer] Validated redundancy
131
+ def validate_redundancy(redundancy)
132
+ unless redundancy.between?(0, 100)
133
+ raise ArgumentError, "Redundancy must be 0-100, got #{redundancy}"
134
+ end
135
+
136
+ redundancy
137
+ end
138
+
139
+ # Validate block size
140
+ #
141
+ # @param block_size [Integer] Block size value
142
+ # @return [Integer] Validated block size
143
+ def validate_block_size(block_size)
144
+ unless block_size.positive? && (block_size % 4).zero?
145
+ raise ArgumentError,
146
+ "Block size must be positive and divisible by 4, got #{block_size}"
147
+ end
148
+ block_size
149
+ end
150
+
151
+ # Validate that files have been added
152
+ #
153
+ # @raise [StandardError] if no files added
154
+ def validate_files!
155
+ raise "No files added to PAR2 set" if @files.empty?
156
+ end
157
+
158
+ # Generate unique set ID for this PAR2 set
159
+ #
160
+ # @return [String] 16-byte set ID
161
+ def generate_set_id
162
+ Digest::MD5.digest("#{Time.now.to_f}#{rand}")
163
+ end
164
+
165
+ # Analyze file and calculate hashes
166
+ #
167
+ # @param file_path [String] Path to file
168
+ # @return [FileInfo] File information
169
+ def analyze_file(file_path)
170
+ File.open(file_path, "rb") do |io|
171
+ file_size = io.size
172
+
173
+ # Calculate hash of first 16KB
174
+ first_16k = io.read(16384) || ""
175
+ hash_16k = Digest::MD5.digest(first_16k)
176
+
177
+ # Calculate full file hash
178
+ io.rewind
179
+ hash_full = Digest::MD5.file(file_path).digest
180
+
181
+ # Generate file ID
182
+ file_id = Digest::MD5.digest("#{File.basename(file_path)}#{file_size}")
183
+
184
+ # Read file blocks
185
+ io.rewind
186
+ blocks = read_file_blocks(io)
187
+
188
+ FileInfo.new(
189
+ path: file_path,
190
+ file_id: file_id,
191
+ hash_16k: hash_16k,
192
+ hash_full: hash_full,
193
+ size: file_size,
194
+ blocks: blocks,
195
+ )
196
+ end
197
+ end
198
+
199
+ # Read file data into blocks
200
+ #
201
+ # @param io [IO] File IO object
202
+ # @return [Array<String>] File blocks
203
+ def read_file_blocks(io)
204
+ blocks = []
205
+ while (data = io.read(@block_size))
206
+ # Pad last block if needed
207
+ if data.bytesize < @block_size
208
+ data += "\x00" * (@block_size - data.bytesize)
209
+ end
210
+ blocks << data
211
+ end
212
+ blocks
213
+ end
214
+
215
+ # Calculate total number of data blocks
216
+ #
217
+ # @return [Integer] Total blocks across all files
218
+ def calculate_total_blocks
219
+ @files.sum { |f| f.blocks.size }
220
+ end
221
+
222
+ # Calculate number of recovery blocks needed
223
+ #
224
+ # @param total_blocks [Integer] Total data blocks
225
+ # @return [Integer] Number of recovery blocks
226
+ def calculate_recovery_blocks(total_blocks)
227
+ (total_blocks * @redundancy / 100.0).ceil
228
+ end
229
+
230
+ # Create main PAR2 index file
231
+ #
232
+ # @param base_name [String] Base name for file
233
+ # @return [String] Path to created file
234
+ def create_index_file(base_name)
235
+ file_path = "#{base_name}.par2"
236
+
237
+ # Ensure directory exists
238
+ FileUtils.mkdir_p(File.dirname(file_path))
239
+
240
+ File.open(file_path, "wb") do |io|
241
+ write_main_packet(io)
242
+ write_file_description_packets(io)
243
+ write_ifsc_packets(io)
244
+ end
245
+
246
+ file_path
247
+ end
248
+
249
+ # Write main packet
250
+ #
251
+ # @param io [IO] Output IO
252
+ def write_main_packet(io)
253
+ packet_data = build_main_packet_data
254
+
255
+ write_packet(io, PACKET_TYPE_MAIN, packet_data)
256
+ end
257
+
258
+ # Build main packet data
259
+ #
260
+ # @return [String] Packet data
261
+ def build_main_packet_data
262
+ data = +""
263
+ data << [@block_size].pack("Q<") # Block size (8 bytes)
264
+
265
+ # File IDs of all files in set (16 bytes each)
266
+ @files.each do |file_info|
267
+ data << file_info.file_id
268
+ end
269
+
270
+ data
271
+ end
272
+
273
+ # Write file description packets
274
+ #
275
+ # @param io [IO] Output IO
276
+ def write_file_description_packets(io)
277
+ @files.each do |file_info|
278
+ packet_data = build_file_desc_packet_data(file_info)
279
+ write_packet(io, PACKET_TYPE_FILE_DESC, packet_data)
280
+ end
281
+ end
282
+
283
+ # Build file description packet data
284
+ #
285
+ # @param file_info [FileInfo] File information
286
+ # @return [String] Packet data
287
+ def build_file_desc_packet_data(file_info)
288
+ data = +""
289
+ data << file_info.file_id # File ID (16 bytes)
290
+ data << file_info.hash_full # File hash (16 bytes)
291
+ data << file_info.hash_16k # Hash of first 16K (16 bytes)
292
+ data << [file_info.size].pack("Q<") # File length (8 bytes)
293
+
294
+ # Filename (null-terminated, padded to multiple of 4)
295
+ filename = File.basename(file_info.path)
296
+ data << filename
297
+ data << "\x00"
298
+ padding = (4 - ((filename.bytesize + 1) % 4)) % 4
299
+ data << ("\x00" * padding) if padding.positive?
300
+
301
+ data
302
+ end
303
+
304
+ # Write IFSC packets (Input File Slice Checksum)
305
+ #
306
+ # @param io [IO] Output IO
307
+ def write_ifsc_packets(io)
308
+ @files.each do |file_info|
309
+ file_info.blocks.each do |block|
310
+ packet_data = build_ifsc_packet_data(file_info, block)
311
+ write_packet(io, PACKET_TYPE_IFSC, packet_data)
312
+ end
313
+ end
314
+ end
315
+
316
+ # Build IFSC packet data
317
+ #
318
+ # @param file_info [FileInfo] File information
319
+ # @param block [String] Block data
320
+ # @return [String] Packet data
321
+ def build_ifsc_packet_data(file_info, block)
322
+ data = +""
323
+ data << file_info.file_id # File ID (16 bytes)
324
+ data << Digest::MD5.digest(block) # Block hash (16 bytes)
325
+ data << calculate_block_crc32(block) # Block CRC32 (4 bytes)
326
+ data
327
+ end
328
+
329
+ # Calculate CRC32 of block
330
+ #
331
+ # @param block [String] Block data
332
+ # @return [String] Packed CRC32 (4 bytes)
333
+ def calculate_block_crc32(block)
334
+ require "zlib"
335
+ [Zlib.crc32(block)].pack("L<")
336
+ end
337
+
338
+ # Create recovery volume files
339
+ #
340
+ # @param base_name [String] Base name for files
341
+ # @param num_recovery [Integer] Number of recovery blocks
342
+ # @param total_blocks [Integer] Total data blocks
343
+ # @return [Array<String>] Paths to created files
344
+ def create_recovery_volumes(base_name, num_recovery, _total_blocks)
345
+ return [] if num_recovery.zero?
346
+
347
+ # Collect all data blocks from all files (already sorted alphabetically)
348
+ all_data_blocks = @files.flat_map(&:blocks)
349
+
350
+ # Generate exponents for recovery blocks (0, 1, 2, ...)
351
+ # PAR2 uses sequential exponents starting from 0
352
+ exponents = (0...num_recovery).to_a
353
+
354
+ report_progress(10,
355
+ "Generating recovery blocks (this may take a while)")
356
+
357
+ # Generate parity blocks using new Reed-Solomon encoder
358
+ parity_blocks = ReedSolomonEncoder.encode(
359
+ all_data_blocks,
360
+ @block_size,
361
+ exponents,
362
+ )
363
+
364
+ report_progress(60, "Writing recovery volume files")
365
+
366
+ # Write recovery volumes using standard PAR2 naming scheme
367
+ volume_files = write_recovery_volumes(base_name, parity_blocks)
368
+
369
+ report_progress(90, "Finalizing recovery volumes")
370
+
371
+ volume_files
372
+ end
373
+
374
+ # Write recovery volumes using PAR2 naming scheme
375
+ #
376
+ # PAR2 uses exponential distribution:
377
+ # vol00+01.par2 = 1 block
378
+ # vol01+02.par2 = 2 blocks
379
+ # vol03+04.par2 = 4 blocks
380
+ # vol07+08.par2 = 8 blocks
381
+ # etc.
382
+ #
383
+ # @param base_name [String] Base name
384
+ # @param parity_blocks [Array<String>] Parity blocks
385
+ # @return [Array<String>] Created file paths
386
+ def write_recovery_volumes(base_name, parity_blocks)
387
+ volume_files = []
388
+ current_block = 0
389
+ volume_num = 0
390
+
391
+ while current_block < parity_blocks.size
392
+ # Calculate blocks in this volume (powers of 2)
393
+ blocks_in_volume = 2**volume_num
394
+ blocks_in_volume = [
395
+ blocks_in_volume,
396
+ parity_blocks.size - current_block,
397
+ ].min
398
+
399
+ # Create volume file
400
+ file_path = format(
401
+ "%s.vol%02d+%02d.par2",
402
+ base_name,
403
+ current_block,
404
+ blocks_in_volume,
405
+ )
406
+
407
+ write_recovery_volume(
408
+ file_path,
409
+ parity_blocks[current_block, blocks_in_volume],
410
+ current_block,
411
+ )
412
+
413
+ volume_files << file_path
414
+ current_block += blocks_in_volume
415
+ volume_num += 1
416
+ end
417
+
418
+ volume_files
419
+ end
420
+
421
+ # Write single recovery volume file
422
+ #
423
+ # @param file_path [String] Output file path
424
+ # @param blocks [Array<String>] Recovery blocks
425
+ # @param start_exponent [Integer] Starting exponent
426
+ def write_recovery_volume(file_path, blocks, start_exponent)
427
+ # Ensure directory exists
428
+ FileUtils.mkdir_p(File.dirname(file_path))
429
+
430
+ File.open(file_path, "wb") do |io|
431
+ # Write main packet (same as index file)
432
+ write_main_packet(io)
433
+
434
+ # Write recovery slice packets
435
+ blocks.each_with_index do |block, idx|
436
+ exponent = start_exponent + idx
437
+ packet_data = build_recovery_packet_data(block, exponent)
438
+ write_packet(io, PACKET_TYPE_RECOVERY, packet_data)
439
+ end
440
+ end
441
+ end
442
+
443
+ # Build recovery slice packet data
444
+ #
445
+ # @param block [String] Recovery block data
446
+ # @param exponent [Integer] Recovery exponent
447
+ # @return [String] Packet data
448
+ def build_recovery_packet_data(block, exponent)
449
+ data = +""
450
+ data << [exponent].pack("L<") # Exponent (4 bytes)
451
+ data << block # Recovery data
452
+ data
453
+ end
454
+
455
+ # Write packet with header
456
+ #
457
+ # @param io [IO] Output IO
458
+ # @param packet_type [String] Packet type identifier
459
+ # @param packet_data [String] Packet data
460
+ def write_packet(io, packet_type, packet_data)
461
+ # Calculate packet length
462
+ # Header: 8 bytes magic + 8 bytes length + 16 bytes hash + 16 bytes set_id
463
+ # + 16 bytes type = 64 bytes
464
+ # Plus packet data
465
+ packet_length = 64 + packet_data.bytesize
466
+
467
+ # Build complete packet
468
+ packet = +""
469
+ packet << PACKET_SIGNATURE # Magic (8 bytes)
470
+ packet << [packet_length].pack("Q<") # Length (8 bytes)
471
+
472
+ # Calculate packet hash (MD5 of everything after length field)
473
+ packet_body = +""
474
+ packet_body << @set_id # Recovery Set ID (16 bytes)
475
+ packet_body << packet_type # Packet type (16 bytes)
476
+ packet_body << packet_data # Packet data
477
+
478
+ packet_hash = Digest::MD5.digest(packet_body)
479
+ packet << packet_hash # Hash (16 bytes)
480
+ packet << packet_body # Body
481
+
482
+ # Write to file
483
+ io.write(packet)
484
+ end
485
+
486
+ # Report progress if callback provided
487
+ #
488
+ # @param percent [Integer] Completion percentage
489
+ # @param message [String] Progress message
490
+ def report_progress(percent, message)
491
+ @progress_callback&.call(percent, message)
492
+ end
493
+
494
+ # Analyze file and calculate hashes/blocks
495
+ #
496
+ # @param file_path [String] Path to file
497
+ # @return [FileInfo] File information
498
+ def analyze_file(file_path)
499
+ File.open(file_path, "rb") do |io|
500
+ file_size = io.size
501
+
502
+ # Read first 16KB for hash
503
+ first_16k = io.read(16384) || ""
504
+ hash_16k = Digest::MD5.digest(first_16k)
505
+
506
+ # Calculate full file hash
507
+ io.rewind
508
+ hash_full = Digest::MD5.file(file_path).digest
509
+
510
+ # Generate file ID
511
+ basename = File.basename(file_path)
512
+ file_id_string = "#{basename}\x00#{file_size}"
513
+ file_id = Digest::MD5.digest(file_id_string)
514
+
515
+ # Read all blocks
516
+ io.rewind
517
+ blocks = read_file_blocks(io)
518
+
519
+ FileInfo.new(
520
+ path: file_path,
521
+ file_id: file_id,
522
+ hash_16k: hash_16k,
523
+ hash_full: hash_full,
524
+ size: file_size,
525
+ blocks: blocks,
526
+ )
527
+ end
528
+ end
529
+ end
530
+ end
531
+ end