omnizip 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (511) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.rubocop.yml +32 -0
  4. data/.rubocop_todo.yml +754 -0
  5. data/COPYING +502 -0
  6. data/Gemfile +17 -0
  7. data/LICENSE +12 -0
  8. data/README.adoc +1045 -0
  9. data/Rakefile +12 -0
  10. data/benchmark/README.md +260 -0
  11. data/benchmark/benchmark_suite.rb +125 -0
  12. data/benchmark/compression_bench.rb +181 -0
  13. data/benchmark/filter_bench.rb +180 -0
  14. data/benchmark/models/benchmark_result.rb +59 -0
  15. data/benchmark/models/comparison_result.rb +69 -0
  16. data/benchmark/profile_suite.rb +167 -0
  17. data/benchmark/reporter.rb +150 -0
  18. data/benchmark/run_benchmarks.rb +66 -0
  19. data/benchmark/test_data.rb +137 -0
  20. data/config/formats/rar3_spec.yml +91 -0
  21. data/config/formats/rar5_spec.yml +102 -0
  22. data/docs/.github/workflows/docs.yml +142 -0
  23. data/docs/.gitignore +21 -0
  24. data/docs/.lychee.toml +67 -0
  25. data/docs/Gemfile +13 -0
  26. data/docs/RAR_WRITE_SUPPORT.md +26 -0
  27. data/docs/README.md +101 -0
  28. data/docs/_config.yml +112 -0
  29. data/docs/assets/logo.svg +1 -0
  30. data/docs/assets/omnizip-logo.pdf +1540 -11
  31. data/docs/comparison/feature-matrix.adoc +694 -0
  32. data/docs/comparison/index.adoc +113 -0
  33. data/docs/comparison/vs-7zip.adoc +309 -0
  34. data/docs/comparison/vs-peazip.adoc +77 -0
  35. data/docs/comparison/vs-rubyzip.adoc +342 -0
  36. data/docs/comparison/vs-winrar.adoc +100 -0
  37. data/docs/compatibility.adoc +579 -0
  38. data/docs/concepts/index.adoc +129 -0
  39. data/docs/developer/architecture.adoc +256 -0
  40. data/docs/developer/contributing.adoc +158 -0
  41. data/docs/developer/index.adoc +25 -0
  42. data/docs/developer/testing.adoc +212 -0
  43. data/docs/getting-started/basic-usage.adoc +271 -0
  44. data/docs/getting-started/index.adoc +42 -0
  45. data/docs/getting-started/installation.adoc +138 -0
  46. data/docs/getting-started/quick-start.adoc +185 -0
  47. data/docs/getting-started/your-first-archive.adoc +218 -0
  48. data/docs/guides/advanced-features/encryption.adoc +300 -0
  49. data/docs/guides/advanced-features/index.adoc +49 -0
  50. data/docs/guides/advanced-features/parallel-processing.adoc +246 -0
  51. data/docs/guides/advanced-features/progress-tracking.adoc +320 -0
  52. data/docs/guides/advanced-features/streaming.adoc +212 -0
  53. data/docs/guides/archive-formats/gzip-format.adoc +107 -0
  54. data/docs/guides/archive-formats/index.adoc +130 -0
  55. data/docs/guides/archive-formats/rar-format.adoc +104 -0
  56. data/docs/guides/archive-formats/rar5.adoc +521 -0
  57. data/docs/guides/archive-formats/seven-zip-format.adoc +35 -0
  58. data/docs/guides/archive-formats/tar-format.adoc +106 -0
  59. data/docs/guides/archive-formats/xz-format.adoc +118 -0
  60. data/docs/guides/archive-formats/zip-format.adoc +35 -0
  61. data/docs/guides/compression-algorithms/bzip2.adoc +113 -0
  62. data/docs/guides/compression-algorithms/deflate.adoc +319 -0
  63. data/docs/guides/compression-algorithms/index.adoc +190 -0
  64. data/docs/guides/compression-algorithms/lzma.adoc +398 -0
  65. data/docs/guides/compression-algorithms/lzma2.adoc +327 -0
  66. data/docs/guides/compression-algorithms/ppmd.adoc +316 -0
  67. data/docs/guides/compression-algorithms/zstandard.adoc +361 -0
  68. data/docs/guides/creating-archives.adoc +354 -0
  69. data/docs/guides/extracting-archives.adoc +53 -0
  70. data/docs/guides/format-conversion.adoc +64 -0
  71. data/docs/guides/index.adoc +49 -0
  72. data/docs/guides/migration-rubyzip.adoc +217 -0
  73. data/docs/guides/parity-archives.adoc +605 -0
  74. data/docs/guides/performance-tuning.adoc +88 -0
  75. data/docs/index.adoc +218 -0
  76. data/docs/lychee.toml +67 -0
  77. data/docs/reference/api/overview.adoc +188 -0
  78. data/docs/reference/cli/compress-command.adoc +114 -0
  79. data/docs/reference/cli/overview.adoc +140 -0
  80. data/docs/reference/index.adoc +26 -0
  81. data/docs/resources/faq.adoc +185 -0
  82. data/docs/resources/quick-reference.adoc +222 -0
  83. data/docs/troubleshooting/index.adoc +208 -0
  84. data/examples/api_comparison.rb +205 -0
  85. data/examples/deflate64_example.rb +96 -0
  86. data/examples/par2_demo.rb +121 -0
  87. data/examples/quick_start_native.rb +150 -0
  88. data/examples/quick_start_rubyzip.rb +115 -0
  89. data/examples/rubyzip_compatibility_demo.rb +194 -0
  90. data/exe/omnizip +27 -0
  91. data/lib/omnizip/algorithm.rb +130 -0
  92. data/lib/omnizip/algorithm_registry.rb +86 -0
  93. data/lib/omnizip/algorithms/.keep +0 -0
  94. data/lib/omnizip/algorithms/bzip2/bwt.rb +225 -0
  95. data/lib/omnizip/algorithms/bzip2/decoder.rb +193 -0
  96. data/lib/omnizip/algorithms/bzip2/encoder.rb +237 -0
  97. data/lib/omnizip/algorithms/bzip2/huffman.rb +206 -0
  98. data/lib/omnizip/algorithms/bzip2/mtf.rb +101 -0
  99. data/lib/omnizip/algorithms/bzip2/rle.rb +151 -0
  100. data/lib/omnizip/algorithms/bzip2.rb +130 -0
  101. data/lib/omnizip/algorithms/deflate/constants.rb +28 -0
  102. data/lib/omnizip/algorithms/deflate/decoder.rb +38 -0
  103. data/lib/omnizip/algorithms/deflate/encoder.rb +46 -0
  104. data/lib/omnizip/algorithms/deflate.rb +128 -0
  105. data/lib/omnizip/algorithms/deflate64/constants.rb +45 -0
  106. data/lib/omnizip/algorithms/deflate64/decoder.rb +153 -0
  107. data/lib/omnizip/algorithms/deflate64/encoder.rb +98 -0
  108. data/lib/omnizip/algorithms/deflate64/huffman_coder.rb +354 -0
  109. data/lib/omnizip/algorithms/deflate64/lz77_encoder.rb +142 -0
  110. data/lib/omnizip/algorithms/deflate64.rb +109 -0
  111. data/lib/omnizip/algorithms/lzma/bit_model.rb +120 -0
  112. data/lib/omnizip/algorithms/lzma/constants.rb +112 -0
  113. data/lib/omnizip/algorithms/lzma/decoder.rb +148 -0
  114. data/lib/omnizip/algorithms/lzma/dictionary.rb +69 -0
  115. data/lib/omnizip/algorithms/lzma/distance_coder.rb +415 -0
  116. data/lib/omnizip/algorithms/lzma/encoder.rb +142 -0
  117. data/lib/omnizip/algorithms/lzma/length_coder.rb +260 -0
  118. data/lib/omnizip/algorithms/lzma/literal_decoder.rb +320 -0
  119. data/lib/omnizip/algorithms/lzma/literal_encoder.rb +210 -0
  120. data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +341 -0
  121. data/lib/omnizip/algorithms/lzma/lzma_alone_decoder.rb +192 -0
  122. data/lib/omnizip/algorithms/lzma/lzma_state.rb +128 -0
  123. data/lib/omnizip/algorithms/lzma/match.rb +32 -0
  124. data/lib/omnizip/algorithms/lzma/match_finder.rb +205 -0
  125. data/lib/omnizip/algorithms/lzma/match_finder_config.rb +142 -0
  126. data/lib/omnizip/algorithms/lzma/match_finder_factory.rb +88 -0
  127. data/lib/omnizip/algorithms/lzma/optimal_encoder.rb +130 -0
  128. data/lib/omnizip/algorithms/lzma/probability_models.rb +72 -0
  129. data/lib/omnizip/algorithms/lzma/range_coder.rb +85 -0
  130. data/lib/omnizip/algorithms/lzma/range_decoder.rb +434 -0
  131. data/lib/omnizip/algorithms/lzma/range_encoder.rb +194 -0
  132. data/lib/omnizip/algorithms/lzma/state.rb +127 -0
  133. data/lib/omnizip/algorithms/lzma/xz_buffered_range_encoder.rb +325 -0
  134. data/lib/omnizip/algorithms/lzma/xz_encoder.rb +426 -0
  135. data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +645 -0
  136. data/lib/omnizip/algorithms/lzma/xz_match_finder_adapter.rb +227 -0
  137. data/lib/omnizip/algorithms/lzma/xz_price_calculator.rb +169 -0
  138. data/lib/omnizip/algorithms/lzma/xz_probability_models.rb +261 -0
  139. data/lib/omnizip/algorithms/lzma/xz_range_encoder.rb +223 -0
  140. data/lib/omnizip/algorithms/lzma/xz_range_encoder_exact.rb +331 -0
  141. data/lib/omnizip/algorithms/lzma/xz_state.rb +116 -0
  142. data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +2055 -0
  143. data/lib/omnizip/algorithms/lzma.rb +238 -0
  144. data/lib/omnizip/algorithms/lzma2/chunk_manager.rb +182 -0
  145. data/lib/omnizip/algorithms/lzma2/constants.rb +41 -0
  146. data/lib/omnizip/algorithms/lzma2/encoder.rb +147 -0
  147. data/lib/omnizip/algorithms/lzma2/lzma2_chunk.rb +161 -0
  148. data/lib/omnizip/algorithms/lzma2/properties.rb +179 -0
  149. data/lib/omnizip/algorithms/lzma2/simple_lzma2_encoder.rb +127 -0
  150. data/lib/omnizip/algorithms/lzma2/xz_encoder_adapter.rb +85 -0
  151. data/lib/omnizip/algorithms/lzma2.rb +141 -0
  152. data/lib/omnizip/algorithms/ppmd7/constants.rb +74 -0
  153. data/lib/omnizip/algorithms/ppmd7/context.rb +154 -0
  154. data/lib/omnizip/algorithms/ppmd7/decoder.rb +126 -0
  155. data/lib/omnizip/algorithms/ppmd7/encoder.rb +163 -0
  156. data/lib/omnizip/algorithms/ppmd7/model.rb +248 -0
  157. data/lib/omnizip/algorithms/ppmd7/symbol_state.rb +57 -0
  158. data/lib/omnizip/algorithms/ppmd7.rb +116 -0
  159. data/lib/omnizip/algorithms/ppmd8/constants.rb +61 -0
  160. data/lib/omnizip/algorithms/ppmd8/context.rb +34 -0
  161. data/lib/omnizip/algorithms/ppmd8/decoder.rb +107 -0
  162. data/lib/omnizip/algorithms/ppmd8/encoder.rb +138 -0
  163. data/lib/omnizip/algorithms/ppmd8/model.rb +250 -0
  164. data/lib/omnizip/algorithms/ppmd8/restoration_method.rb +78 -0
  165. data/lib/omnizip/algorithms/ppmd8.rb +82 -0
  166. data/lib/omnizip/algorithms/ppmd_base.rb +138 -0
  167. data/lib/omnizip/algorithms/sevenzip_lzma2.rb +123 -0
  168. data/lib/omnizip/algorithms/xz_lzma2.rb +118 -0
  169. data/lib/omnizip/algorithms/zstandard/constants.rb +25 -0
  170. data/lib/omnizip/algorithms/zstandard/decoder.rb +46 -0
  171. data/lib/omnizip/algorithms/zstandard/encoder.rb +51 -0
  172. data/lib/omnizip/algorithms/zstandard.rb +138 -0
  173. data/lib/omnizip/buffer/memory_archive.rb +251 -0
  174. data/lib/omnizip/buffer/memory_extractor.rb +224 -0
  175. data/lib/omnizip/buffer.rb +176 -0
  176. data/lib/omnizip/checksum_registry.rb +114 -0
  177. data/lib/omnizip/checksums/crc32.rb +100 -0
  178. data/lib/omnizip/checksums/crc64.rb +101 -0
  179. data/lib/omnizip/checksums/crc_base.rb +158 -0
  180. data/lib/omnizip/checksums/verifier.rb +131 -0
  181. data/lib/omnizip/chunked/memory_manager.rb +194 -0
  182. data/lib/omnizip/chunked/reader.rb +78 -0
  183. data/lib/omnizip/chunked/writer.rb +120 -0
  184. data/lib/omnizip/chunked.rb +129 -0
  185. data/lib/omnizip/cli/output_formatter.rb +104 -0
  186. data/lib/omnizip/cli.rb +572 -0
  187. data/lib/omnizip/commands/.keep +0 -0
  188. data/lib/omnizip/commands/archive_create_command.rb +427 -0
  189. data/lib/omnizip/commands/archive_extract_command.rb +272 -0
  190. data/lib/omnizip/commands/archive_list_command.rb +218 -0
  191. data/lib/omnizip/commands/archive_repair_command.rb +131 -0
  192. data/lib/omnizip/commands/archive_verify_command.rb +117 -0
  193. data/lib/omnizip/commands/compress_command.rb +117 -0
  194. data/lib/omnizip/commands/decompress_command.rb +120 -0
  195. data/lib/omnizip/commands/list_command.rb +53 -0
  196. data/lib/omnizip/commands/metadata_command.rb +153 -0
  197. data/lib/omnizip/commands/parity_create_command.rb +122 -0
  198. data/lib/omnizip/commands/parity_repair_command.rb +122 -0
  199. data/lib/omnizip/commands/parity_verify_command.rb +124 -0
  200. data/lib/omnizip/commands/profile_list_command.rb +56 -0
  201. data/lib/omnizip/commands/profile_show_command.rb +44 -0
  202. data/lib/omnizip/convenience.rb +359 -0
  203. data/lib/omnizip/converter/conversion_registry.rb +49 -0
  204. data/lib/omnizip/converter/conversion_strategy.rb +121 -0
  205. data/lib/omnizip/converter/seven_zip_to_zip_strategy.rb +97 -0
  206. data/lib/omnizip/converter/zip_to_seven_zip_strategy.rb +112 -0
  207. data/lib/omnizip/converter.rb +105 -0
  208. data/lib/omnizip/crypto/aes256/cipher.rb +100 -0
  209. data/lib/omnizip/crypto/aes256/constants.rb +28 -0
  210. data/lib/omnizip/crypto/aes256/key_derivation.rb +101 -0
  211. data/lib/omnizip/crypto/aes256.rb +102 -0
  212. data/lib/omnizip/error.rb +106 -0
  213. data/lib/omnizip/eta/exponential_smoothing_estimator.rb +98 -0
  214. data/lib/omnizip/eta/moving_average_estimator.rb +99 -0
  215. data/lib/omnizip/eta/rate_calculator.rb +104 -0
  216. data/lib/omnizip/eta/sample_history.rb +143 -0
  217. data/lib/omnizip/eta/time_estimator.rb +106 -0
  218. data/lib/omnizip/eta.rb +63 -0
  219. data/lib/omnizip/extraction/filter_chain.rb +177 -0
  220. data/lib/omnizip/extraction/glob_pattern.rb +140 -0
  221. data/lib/omnizip/extraction/pattern_matcher.rb +70 -0
  222. data/lib/omnizip/extraction/predicate_pattern.rb +52 -0
  223. data/lib/omnizip/extraction/regex_pattern.rb +50 -0
  224. data/lib/omnizip/extraction/selective_extractor.rb +240 -0
  225. data/lib/omnizip/extraction.rb +111 -0
  226. data/lib/omnizip/file_type/mime_classifier.rb +144 -0
  227. data/lib/omnizip/file_type.rb +113 -0
  228. data/lib/omnizip/filter.rb +139 -0
  229. data/lib/omnizip/filter_pipeline.rb +108 -0
  230. data/lib/omnizip/filter_registry.rb +166 -0
  231. data/lib/omnizip/filters/bcj.rb +279 -0
  232. data/lib/omnizip/filters/bcj2/constants.rb +53 -0
  233. data/lib/omnizip/filters/bcj2/decoder.rb +200 -0
  234. data/lib/omnizip/filters/bcj2/encoder.rb +61 -0
  235. data/lib/omnizip/filters/bcj2/stream_data.rb +93 -0
  236. data/lib/omnizip/filters/bcj2.rb +99 -0
  237. data/lib/omnizip/filters/bcj_arm.rb +176 -0
  238. data/lib/omnizip/filters/bcj_arm64.rb +244 -0
  239. data/lib/omnizip/filters/bcj_ia64.rb +196 -0
  240. data/lib/omnizip/filters/bcj_ppc.rb +190 -0
  241. data/lib/omnizip/filters/bcj_sparc.rb +176 -0
  242. data/lib/omnizip/filters/bcj_x86.rb +193 -0
  243. data/lib/omnizip/filters/delta.rb +196 -0
  244. data/lib/omnizip/filters/filter_base.rb +72 -0
  245. data/lib/omnizip/filters/registry.rb +123 -0
  246. data/lib/omnizip/filters/xz_delta.rb +258 -0
  247. data/lib/omnizip/format_detector.rb +162 -0
  248. data/lib/omnizip/format_registry.rb +59 -0
  249. data/lib/omnizip/formats/.keep +0 -0
  250. data/lib/omnizip/formats/bzip2_file.rb +172 -0
  251. data/lib/omnizip/formats/cpio/constants.rb +55 -0
  252. data/lib/omnizip/formats/cpio/entry.rb +385 -0
  253. data/lib/omnizip/formats/cpio/reader.rb +196 -0
  254. data/lib/omnizip/formats/cpio/writer.rb +234 -0
  255. data/lib/omnizip/formats/cpio.rb +140 -0
  256. data/lib/omnizip/formats/format_spec_loader.rb +230 -0
  257. data/lib/omnizip/formats/gzip.rb +238 -0
  258. data/lib/omnizip/formats/iso/directory_builder.rb +297 -0
  259. data/lib/omnizip/formats/iso/directory_record.rb +152 -0
  260. data/lib/omnizip/formats/iso/joliet.rb +204 -0
  261. data/lib/omnizip/formats/iso/path_table.rb +125 -0
  262. data/lib/omnizip/formats/iso/reader.rb +197 -0
  263. data/lib/omnizip/formats/iso/rock_ridge.rb +349 -0
  264. data/lib/omnizip/formats/iso/volume_builder.rb +320 -0
  265. data/lib/omnizip/formats/iso/volume_descriptor.rb +168 -0
  266. data/lib/omnizip/formats/iso/writer.rb +530 -0
  267. data/lib/omnizip/formats/iso.rb +140 -0
  268. data/lib/omnizip/formats/lzip.rb +175 -0
  269. data/lib/omnizip/formats/lzma_alone.rb +171 -0
  270. data/lib/omnizip/formats/rar/archive_repairer.rb +243 -0
  271. data/lib/omnizip/formats/rar/archive_verifier.rb +195 -0
  272. data/lib/omnizip/formats/rar/block_parser.rb +243 -0
  273. data/lib/omnizip/formats/rar/compression/bit_stream.rb +180 -0
  274. data/lib/omnizip/formats/rar/compression/dispatcher.rb +217 -0
  275. data/lib/omnizip/formats/rar/compression/lz77_huffman/decoder.rb +216 -0
  276. data/lib/omnizip/formats/rar/compression/lz77_huffman/encoder.rb +158 -0
  277. data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_builder.rb +217 -0
  278. data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_coder.rb +189 -0
  279. data/lib/omnizip/formats/rar/compression/lz77_huffman/match_finder.rb +135 -0
  280. data/lib/omnizip/formats/rar/compression/lz77_huffman/sliding_window.rb +165 -0
  281. data/lib/omnizip/formats/rar/compression/ppmd/context.rb +105 -0
  282. data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +219 -0
  283. data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +262 -0
  284. data/lib/omnizip/formats/rar/compression_method_registry.rb +106 -0
  285. data/lib/omnizip/formats/rar/constants.rb +82 -0
  286. data/lib/omnizip/formats/rar/decompressor.rb +238 -0
  287. data/lib/omnizip/formats/rar/external_writer.rb +312 -0
  288. data/lib/omnizip/formats/rar/header.rb +192 -0
  289. data/lib/omnizip/formats/rar/license_validator.rb +109 -0
  290. data/lib/omnizip/formats/rar/models/rar_archive.rb +77 -0
  291. data/lib/omnizip/formats/rar/models/rar_entry.rb +65 -0
  292. data/lib/omnizip/formats/rar/models/rar_volume.rb +56 -0
  293. data/lib/omnizip/formats/rar/parity_handler.rb +292 -0
  294. data/lib/omnizip/formats/rar/rar5/compression/lzma.rb +202 -0
  295. data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +578 -0
  296. data/lib/omnizip/formats/rar/rar5/compression/store.rb +60 -0
  297. data/lib/omnizip/formats/rar/rar5/crc32.rb +39 -0
  298. data/lib/omnizip/formats/rar/rar5/encryption/aes256_cbc.rb +97 -0
  299. data/lib/omnizip/formats/rar/rar5/encryption/encryption_header.rb +114 -0
  300. data/lib/omnizip/formats/rar/rar5/encryption/encryption_manager.rb +166 -0
  301. data/lib/omnizip/formats/rar/rar5/encryption/key_derivation.rb +97 -0
  302. data/lib/omnizip/formats/rar/rar5/header.rb +187 -0
  303. data/lib/omnizip/formats/rar/rar5/models/encryption_options.rb +74 -0
  304. data/lib/omnizip/formats/rar/rar5/models/recovery_options.rb +63 -0
  305. data/lib/omnizip/formats/rar/rar5/models/solid_options.rb +63 -0
  306. data/lib/omnizip/formats/rar/rar5/models/volume_options.rb +74 -0
  307. data/lib/omnizip/formats/rar/rar5/multi_volume/ARCHITECTURE.md +290 -0
  308. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_manager.rb +264 -0
  309. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_splitter.rb +155 -0
  310. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_writer.rb +194 -0
  311. data/lib/omnizip/formats/rar/rar5/solid/solid_encoder.rb +109 -0
  312. data/lib/omnizip/formats/rar/rar5/solid/solid_manager.rb +142 -0
  313. data/lib/omnizip/formats/rar/rar5/solid/solid_stream.rb +121 -0
  314. data/lib/omnizip/formats/rar/rar5/vint.rb +65 -0
  315. data/lib/omnizip/formats/rar/rar5/writer.rb +466 -0
  316. data/lib/omnizip/formats/rar/rar_format_base.rb +241 -0
  317. data/lib/omnizip/formats/rar/reader.rb +366 -0
  318. data/lib/omnizip/formats/rar/recovery_record.rb +245 -0
  319. data/lib/omnizip/formats/rar/volume_manager.rb +168 -0
  320. data/lib/omnizip/formats/rar/writer.rb +431 -0
  321. data/lib/omnizip/formats/rar.rb +205 -0
  322. data/lib/omnizip/formats/rar3/compressor.rb +73 -0
  323. data/lib/omnizip/formats/rar3/decompressor.rb +66 -0
  324. data/lib/omnizip/formats/rar3/reader.rb +386 -0
  325. data/lib/omnizip/formats/rar3/writer.rb +219 -0
  326. data/lib/omnizip/formats/rar5/compressor.rb +73 -0
  327. data/lib/omnizip/formats/rar5/decompressor.rb +66 -0
  328. data/lib/omnizip/formats/rar5/reader.rb +342 -0
  329. data/lib/omnizip/formats/rar5/writer.rb +214 -0
  330. data/lib/omnizip/formats/seven_zip/coder_chain.rb +150 -0
  331. data/lib/omnizip/formats/seven_zip/constants.rb +126 -0
  332. data/lib/omnizip/formats/seven_zip/encoded_header.rb +114 -0
  333. data/lib/omnizip/formats/seven_zip/encrypted_header.rb +142 -0
  334. data/lib/omnizip/formats/seven_zip/file_collector.rb +144 -0
  335. data/lib/omnizip/formats/seven_zip/header.rb +106 -0
  336. data/lib/omnizip/formats/seven_zip/header_encryptor.rb +134 -0
  337. data/lib/omnizip/formats/seven_zip/header_writer.rb +466 -0
  338. data/lib/omnizip/formats/seven_zip/models/coder_info.rb +30 -0
  339. data/lib/omnizip/formats/seven_zip/models/file_entry.rb +58 -0
  340. data/lib/omnizip/formats/seven_zip/models/folder.rb +69 -0
  341. data/lib/omnizip/formats/seven_zip/models/stream_info.rb +42 -0
  342. data/lib/omnizip/formats/seven_zip/parser.rb +660 -0
  343. data/lib/omnizip/formats/seven_zip/reader.rb +458 -0
  344. data/lib/omnizip/formats/seven_zip/split_archive_reader.rb +632 -0
  345. data/lib/omnizip/formats/seven_zip/split_archive_writer.rb +315 -0
  346. data/lib/omnizip/formats/seven_zip/stream_compressor.rb +151 -0
  347. data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +162 -0
  348. data/lib/omnizip/formats/seven_zip/writer.rb +740 -0
  349. data/lib/omnizip/formats/seven_zip.rb +93 -0
  350. data/lib/omnizip/formats/tar/constants.rb +73 -0
  351. data/lib/omnizip/formats/tar/entry.rb +94 -0
  352. data/lib/omnizip/formats/tar/header.rb +168 -0
  353. data/lib/omnizip/formats/tar/reader.rb +121 -0
  354. data/lib/omnizip/formats/tar/writer.rb +216 -0
  355. data/lib/omnizip/formats/tar.rb +84 -0
  356. data/lib/omnizip/formats/xz/reader.rb +116 -0
  357. data/lib/omnizip/formats/xz.rb +237 -0
  358. data/lib/omnizip/formats/xz_impl/block_decoder.rb +754 -0
  359. data/lib/omnizip/formats/xz_impl/block_encoder.rb +306 -0
  360. data/lib/omnizip/formats/xz_impl/block_header.rb +210 -0
  361. data/lib/omnizip/formats/xz_impl/block_header_parser.rb +186 -0
  362. data/lib/omnizip/formats/xz_impl/constants.rb +49 -0
  363. data/lib/omnizip/formats/xz_impl/index_decoder.rb +174 -0
  364. data/lib/omnizip/formats/xz_impl/index_encoder.rb +122 -0
  365. data/lib/omnizip/formats/xz_impl/stream_decoder.rb +468 -0
  366. data/lib/omnizip/formats/xz_impl/stream_encoder.rb +99 -0
  367. data/lib/omnizip/formats/xz_impl/stream_footer.rb +81 -0
  368. data/lib/omnizip/formats/xz_impl/stream_footer_parser.rb +117 -0
  369. data/lib/omnizip/formats/xz_impl/stream_header.rb +55 -0
  370. data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +108 -0
  371. data/lib/omnizip/formats/xz_impl/vli.rb +128 -0
  372. data/lib/omnizip/formats/xz_impl/writer.rb +421 -0
  373. data/lib/omnizip/formats/zip/central_directory_header.rb +195 -0
  374. data/lib/omnizip/formats/zip/constants.rb +69 -0
  375. data/lib/omnizip/formats/zip/end_of_central_directory.rb +133 -0
  376. data/lib/omnizip/formats/zip/local_file_header.rb +138 -0
  377. data/lib/omnizip/formats/zip/reader.rb +250 -0
  378. data/lib/omnizip/formats/zip/unix_extra_field.rb +153 -0
  379. data/lib/omnizip/formats/zip/writer.rb +375 -0
  380. data/lib/omnizip/formats/zip/zip64_end_of_central_directory.rb +104 -0
  381. data/lib/omnizip/formats/zip/zip64_end_of_central_directory_locator.rb +66 -0
  382. data/lib/omnizip/formats/zip/zip64_extra_field.rb +114 -0
  383. data/lib/omnizip/formats/zip.rb +50 -0
  384. data/lib/omnizip/implementations/base/lzma2_decoder_base.rb +75 -0
  385. data/lib/omnizip/implementations/base/lzma2_encoder_base.rb +128 -0
  386. data/lib/omnizip/implementations/base/lzma_decoder_base.rb +83 -0
  387. data/lib/omnizip/implementations/base/lzma_encoder_base.rb +108 -0
  388. data/lib/omnizip/implementations/base/state_machine_base.rb +182 -0
  389. data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +421 -0
  390. data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +465 -0
  391. data/lib/omnizip/implementations/seven_zip/lzma/match_finder.rb +288 -0
  392. data/lib/omnizip/implementations/seven_zip/lzma/range_decoder.rb +200 -0
  393. data/lib/omnizip/implementations/seven_zip/lzma/range_encoder.rb +197 -0
  394. data/lib/omnizip/implementations/seven_zip/lzma/state_machine.rb +141 -0
  395. data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +519 -0
  396. data/lib/omnizip/implementations/xz_utils/lzma2/decoder.rb +723 -0
  397. data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +750 -0
  398. data/lib/omnizip/io/buffered_input.rb +146 -0
  399. data/lib/omnizip/io/buffered_output.rb +105 -0
  400. data/lib/omnizip/io/stream_manager.rb +115 -0
  401. data/lib/omnizip/link_handler/hard_link.rb +79 -0
  402. data/lib/omnizip/link_handler/symbolic_link.rb +74 -0
  403. data/lib/omnizip/link_handler.rb +124 -0
  404. data/lib/omnizip/metadata/archive_metadata.rb +114 -0
  405. data/lib/omnizip/metadata/entry_metadata.rb +146 -0
  406. data/lib/omnizip/metadata/metadata_editor.rb +171 -0
  407. data/lib/omnizip/metadata/metadata_registry.rb +64 -0
  408. data/lib/omnizip/metadata/metadata_validator.rb +99 -0
  409. data/lib/omnizip/metadata.rb +57 -0
  410. data/lib/omnizip/models/.keep +0 -0
  411. data/lib/omnizip/models/algorithm_metadata.rb +73 -0
  412. data/lib/omnizip/models/compression_options.rb +71 -0
  413. data/lib/omnizip/models/conversion_options.rb +87 -0
  414. data/lib/omnizip/models/conversion_result.rb +135 -0
  415. data/lib/omnizip/models/eta_result.rb +46 -0
  416. data/lib/omnizip/models/extraction_rule.rb +115 -0
  417. data/lib/omnizip/models/filter_chain.rb +144 -0
  418. data/lib/omnizip/models/filter_config.rb +183 -0
  419. data/lib/omnizip/models/match_result.rb +124 -0
  420. data/lib/omnizip/models/optimization_suggestion.rb +91 -0
  421. data/lib/omnizip/models/parallel_options.rb +104 -0
  422. data/lib/omnizip/models/performance_result.rb +79 -0
  423. data/lib/omnizip/models/profile_report.rb +82 -0
  424. data/lib/omnizip/models/progress_options.rb +38 -0
  425. data/lib/omnizip/models/split_options.rb +116 -0
  426. data/lib/omnizip/optimization_registry.rb +81 -0
  427. data/lib/omnizip/parallel/job_queue.rb +209 -0
  428. data/lib/omnizip/parallel/job_scheduler.rb +203 -0
  429. data/lib/omnizip/parallel/parallel_compressor.rb +347 -0
  430. data/lib/omnizip/parallel/parallel_extractor.rb +329 -0
  431. data/lib/omnizip/parallel/worker_pool.rb +223 -0
  432. data/lib/omnizip/parallel.rb +149 -0
  433. data/lib/omnizip/parity/chunked_block_processor.rb +196 -0
  434. data/lib/omnizip/parity/galois16.rb +145 -0
  435. data/lib/omnizip/parity/models/creator_packet.rb +73 -0
  436. data/lib/omnizip/parity/models/file_description_packet.rb +133 -0
  437. data/lib/omnizip/parity/models/ifsc_packet.rb +123 -0
  438. data/lib/omnizip/parity/models/main_packet.rb +128 -0
  439. data/lib/omnizip/parity/models/packet.rb +156 -0
  440. data/lib/omnizip/parity/models/packet_registry.rb +109 -0
  441. data/lib/omnizip/parity/models/recovery_slice_packet.rb +78 -0
  442. data/lib/omnizip/parity/par2_creator.rb +531 -0
  443. data/lib/omnizip/parity/par2_repairer.rb +407 -0
  444. data/lib/omnizip/parity/par2_verifier.rb +364 -0
  445. data/lib/omnizip/parity/par2cmdline_algorithm.rb +110 -0
  446. data/lib/omnizip/parity/par2cmdline_coefficients.rb +78 -0
  447. data/lib/omnizip/parity/reed_solomon_decoder.rb +266 -0
  448. data/lib/omnizip/parity/reed_solomon_encoder.rb +111 -0
  449. data/lib/omnizip/parity/reed_solomon_matrix.rb +342 -0
  450. data/lib/omnizip/parity.rb +186 -0
  451. data/lib/omnizip/password/encryption_registry.rb +65 -0
  452. data/lib/omnizip/password/encryption_strategy.rb +96 -0
  453. data/lib/omnizip/password/password_validator.rb +129 -0
  454. data/lib/omnizip/password/winzip_aes_strategy.rb +192 -0
  455. data/lib/omnizip/password/zip_crypto_strategy.rb +141 -0
  456. data/lib/omnizip/password.rb +87 -0
  457. data/lib/omnizip/pipe/stream_compressor.rb +124 -0
  458. data/lib/omnizip/pipe/stream_decompressor.rb +174 -0
  459. data/lib/omnizip/pipe.rb +121 -0
  460. data/lib/omnizip/platform/ntfs_streams.rb +201 -0
  461. data/lib/omnizip/platform.rb +189 -0
  462. data/lib/omnizip/profile/archive_profile.rb +39 -0
  463. data/lib/omnizip/profile/balanced_profile.rb +33 -0
  464. data/lib/omnizip/profile/binary_profile.rb +36 -0
  465. data/lib/omnizip/profile/compression_profile.rb +158 -0
  466. data/lib/omnizip/profile/custom_profile.rb +157 -0
  467. data/lib/omnizip/profile/fast_profile.rb +33 -0
  468. data/lib/omnizip/profile/maximum_profile.rb +33 -0
  469. data/lib/omnizip/profile/profile_detector.rb +110 -0
  470. data/lib/omnizip/profile/profile_registry.rb +161 -0
  471. data/lib/omnizip/profile/text_profile.rb +36 -0
  472. data/lib/omnizip/profile.rb +190 -0
  473. data/lib/omnizip/profiler/memory_profiler.rb +66 -0
  474. data/lib/omnizip/profiler/method_profiler.rb +49 -0
  475. data/lib/omnizip/profiler/report_generator.rb +169 -0
  476. data/lib/omnizip/profiler.rb +204 -0
  477. data/lib/omnizip/progress/callback_reporter.rb +36 -0
  478. data/lib/omnizip/progress/console_reporter.rb +62 -0
  479. data/lib/omnizip/progress/log_reporter.rb +91 -0
  480. data/lib/omnizip/progress/operation_progress.rb +118 -0
  481. data/lib/omnizip/progress/progress_bar.rb +156 -0
  482. data/lib/omnizip/progress/progress_reporter.rb +40 -0
  483. data/lib/omnizip/progress/progress_tracker.rb +190 -0
  484. data/lib/omnizip/progress/silent_reporter.rb +24 -0
  485. data/lib/omnizip/progress.rb +127 -0
  486. data/lib/omnizip/rubyzip_compat.rb +63 -0
  487. data/lib/omnizip/temp/safe_extract.rb +168 -0
  488. data/lib/omnizip/temp/temp_file.rb +124 -0
  489. data/lib/omnizip/temp/temp_file_pool.rb +109 -0
  490. data/lib/omnizip/temp.rb +181 -0
  491. data/lib/omnizip/version.rb +5 -0
  492. data/lib/omnizip/zip/entry.rb +156 -0
  493. data/lib/omnizip/zip/file.rb +485 -0
  494. data/lib/omnizip/zip/input_stream.rb +273 -0
  495. data/lib/omnizip/zip/output_stream.rb +324 -0
  496. data/lib/omnizip.rb +156 -0
  497. data/readme-docs/advanced-features.adoc +515 -0
  498. data/readme-docs/api-usage.adoc +444 -0
  499. data/readme-docs/architecture.adoc +449 -0
  500. data/readme-docs/archive-formats.adoc +479 -0
  501. data/readme-docs/cli-usage.adoc +222 -0
  502. data/readme-docs/compression-algorithms.adoc +442 -0
  503. data/readme-docs/compression-profiles.adoc +247 -0
  504. data/readme-docs/encryption-checksums.adoc +328 -0
  505. data/readme-docs/format-converter.adoc +325 -0
  506. data/readme-docs/installation.adoc +228 -0
  507. data/readme-docs/par2-archives.adoc +608 -0
  508. data/readme-docs/performance-profiler.adoc +389 -0
  509. data/readme-docs/preprocessing-filters.adoc +280 -0
  510. data/xz-file-format-1.2.1.txt +1174 -0
  511. metadata +617 -0
@@ -0,0 +1,123 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ # Copyright (C) 2024 Ribose Inc.
5
+ #
6
+ # This file is part of Omnizip.
7
+ #
8
+ # Omnizip is a pure Ruby port of 7-Zip compression algorithms.
9
+ # Based on the 7-Zip LZMA SDK by Igor Pavlov.
10
+ #
11
+ # This library is free software; you can redistribute it and/or
12
+ # modify it under the terms of the GNU Lesser General Public
13
+ # License as published by the Free Software Foundation; either
14
+ # version 2.1 of the License, or (at your option) any later version.
15
+ #
16
+ # See the COPYING file for the complete text of the license.
17
+ #
18
+
19
+ require_relative "filter_base"
20
+ require_relative "bcj_x86"
21
+ require_relative "bcj_arm"
22
+ require_relative "bcj_arm64"
23
+ require_relative "bcj_ia64"
24
+ require_relative "bcj_ppc"
25
+ require_relative "bcj_sparc"
26
+ require_relative "bcj2"
27
+ require_relative "delta"
28
+ require_relative "bcj" # Unified BCJ filter (Task 2)
29
+ require_relative "../filter_registry"
30
+
31
+ module Omnizip
32
+ module Filters
33
+ # Registry for auto-registering all preprocessing filters.
34
+ #
35
+ # This module centralizes filter registration, ensuring all filters
36
+ # are properly registered with their supported formats.
37
+ module Registry
38
+ # Register all BCJ filters with appropriate format support.
39
+ #
40
+ # BCJ filters are architecture-specific filters for executable code.
41
+ # XZ format supports a subset of architectures (no ARM64),
42
+ # while 7z supports all.
43
+ #
44
+ # @return [void]
45
+ def self.register_bcj_filters
46
+ # Individual BCJ architecture filters (use hyphens to match existing convention)
47
+ register_bcj_filter(:'bcj-x86', BcjX86, architecture: :x86)
48
+ register_bcj_filter(:'bcj-arm', BcjArm, architecture: :arm)
49
+ register_bcj_filter(:'bcj-arm64', BcjArm64, architecture: :arm64,
50
+ xz_supported: false)
51
+ register_bcj_filter(:'bcj-ia64', BcjIa64, architecture: :ia64)
52
+ register_bcj_filter(:'bcj-ppc', BcjPpc, architecture: :powerpc)
53
+ register_bcj_filter(:'bcj-sparc', BcjSparc, architecture: :sparc)
54
+
55
+ # Unified BCJ filter (Task 2) - supports all architectures
56
+ # Note: We register it as 'bcj' without architecture suffix
57
+ Omnizip::FilterRegistry.register_with_formats(
58
+ :bcj,
59
+ Omnizip::Filters::BCJ,
60
+ formats: [:seven_zip], # Only 7z for now (uses architecture parameter)
61
+ )
62
+ end
63
+
64
+ # Register a BCJ filter with format support.
65
+ #
66
+ # @param name [Symbol] Filter name identifier
67
+ # @param filter_class [Class] Filter class to register
68
+ # @param architecture [Symbol] Target architecture
69
+ # @param xz_supported [Boolean] Whether XZ format supports this architecture
70
+ # @return [void]
71
+ def self.register_bcj_filter(name, filter_class, architecture:,
72
+ xz_supported: true)
73
+ formats = [:seven_zip]
74
+ formats << :xz if xz_supported
75
+ Omnizip::FilterRegistry.register_with_formats(name, filter_class,
76
+ formats: formats)
77
+ end
78
+
79
+ # Register Delta filter.
80
+ #
81
+ # Delta filter is supported by both XZ and 7z formats.
82
+ #
83
+ # @return [void]
84
+ def self.register_delta_filter
85
+ Omnizip::FilterRegistry.register_with_formats(
86
+ :delta,
87
+ Delta,
88
+ formats: %i[seven_zip xz],
89
+ )
90
+ end
91
+
92
+ # Register BCJ2 filter.
93
+ #
94
+ # BCJ2 is a 4-stream variant of BCJ, primarily used by 7z.
95
+ # XZ does not support BCJ2.
96
+ #
97
+ # @return [void]
98
+ def self.register_bcj2_filter
99
+ Omnizip::FilterRegistry.register_with_formats(
100
+ :bcj2,
101
+ Bcj2,
102
+ formats: [:seven_zip], # Only 7z supports BCJ2
103
+ )
104
+ end
105
+
106
+ # Register all filters.
107
+ #
108
+ # This method registers all available filters with their
109
+ # appropriate format support. Call this during application
110
+ # initialization to ensure all filters are available.
111
+ #
112
+ # @return [void]
113
+ def self.register_all
114
+ register_bcj_filters
115
+ register_delta_filter
116
+ register_bcj2_filter
117
+ end
118
+ end
119
+ end
120
+ end
121
+
122
+ # Auto-register all filters on load
123
+ Omnizip::Filters::Registry.register_all
@@ -0,0 +1,258 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ # Copyright (C) 2025 Ribose Inc.
5
+ #
6
+ # Permission is hereby granted, free of charge, to any person obtaining a
7
+ # copy of this software and associated documentation files (the "Software"),
8
+ # to deal in the Software without restriction, including without limitation
9
+ # the rights to use, copy, modify, merge, publish, distribute, sublicense,
10
+ # and/or sell copies of the Software, and to permit persons to whom the
11
+ # Software is furnished to do so, subject to the following conditions:
12
+ #
13
+ # The above copyright notice and this permission notice shall be included in
14
+ # all copies or substantial portions of the Software.
15
+ #
16
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21
+ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22
+ # DEALINGS IN THE SOFTWARE.
23
+ #
24
+ # XZ Utils Delta filter implementation.
25
+ #
26
+ # This is the XZ Utils Delta filter (filter ID 0x03), which is DIFFERENT
27
+ # from the 7-Zip Delta filter. Both compute byte-wise differences but use
28
+ # different algorithms:
29
+ #
30
+ # - 7-Zip Delta: Simple forward difference (new[i] = old[i] - old[i-distance])
31
+ # - XZ Utils Delta: Uses a 256-byte circular history buffer
32
+ #
33
+ # Reference: /Users/mulgogi/src/external/xz/src/liblzma/delta/
34
+ #
35
+ # Algorithm summary:
36
+ # - Maintains a 256-byte circular history buffer
37
+ # - Encoder: out[i] = in[i] - history[(distance + pos) & 0xFF]
38
+ # - Decoder: out[i] = in[i] + history[(distance + pos) & 0xFF]
39
+ # - history[pos & 0xFF] = processed_byte (updated in both encode/decode)
40
+ # - pos decrements each byte, wrapping via & 0xFF
41
+ #
42
+ # The distance parameter (1-256) determines how far back in the history
43
+ # to look for the delta reference value.
44
+
45
+ module Omnizip
46
+ module Filters
47
+ # XZ Utils Delta filter.
48
+ #
49
+ # This filter computes byte-wise differences using a 256-byte circular
50
+ # history buffer. It is particularly effective for:
51
+ # - Stereo audio (distance=4 for 16-bit samples)
52
+ # - RGB images (distance=3)
53
+ # - RGBA images (distance=4)
54
+ # - Multi-channel data with regular patterns
55
+ #
56
+ # This is DIFFERENT from the 7-Zip Delta filter which uses simple
57
+ # forward differences without a history buffer.
58
+ #
59
+ # Reference: XZ Utils delta_encoder.c, delta_decoder.c
60
+ class XzDeltaFilter
61
+ # Filter ID for XZ format
62
+ FILTER_ID = 0x03
63
+
64
+ # Minimum distance value (XZ Utils LZMA_DELTA_DIST_MIN)
65
+ DELTA_DIST_MIN = 1
66
+
67
+ # Maximum distance value (XZ Utils LZMA_DELTA_DIST_MAX)
68
+ DELTA_DIST_MAX = 256
69
+
70
+ # History buffer size (always 256 bytes in XZ Utils)
71
+ HISTORY_SIZE = 256
72
+
73
+ # Delta type (only BYTE is supported in XZ Utils)
74
+ DELTA_TYPE_BYTE = 0
75
+
76
+ attr_reader :distance
77
+
78
+ # Initialize the Delta filter.
79
+ #
80
+ # @param distance [Integer] Byte distance for delta calculation (1-256)
81
+ # @raise [ArgumentError] If distance is invalid
82
+ def initialize(distance = DELTA_DIST_MIN)
83
+ validate_distance(distance)
84
+ @distance = distance
85
+ # Initialize state (matches XZ Utils lzma_delta_coder_init)
86
+ @pos = 0
87
+ @history = ("\x00" * HISTORY_SIZE).b
88
+ end
89
+
90
+ # Encode (preprocess) data by computing forward differences.
91
+ #
92
+ # For each byte:
93
+ # tmp = history[(distance + pos) & 0xFF]
94
+ # history[pos & 0xFF] = in[i]
95
+ # out[i] = in[i] - tmp (mod 256)
96
+ # pos--
97
+ #
98
+ # Reference: XZ Utils delta_encoder.c:copy_and_encode
99
+ #
100
+ # @param data [String] Binary data to encode
101
+ # @return [String] Encoded binary data
102
+ def encode(data)
103
+ return data.dup.b if data.empty?
104
+
105
+ result = data.dup.b
106
+ data.bytes.each_with_index do |byte, i|
107
+ # Get historical value from distance positions back
108
+ tmp = @history.getbyte((@distance + @pos) & 0xFF)
109
+
110
+ # Store current byte in history
111
+ @history.setbyte(@pos & 0xFF, byte)
112
+
113
+ # Output is the difference
114
+ result.setbyte(i, (byte - tmp) & 0xFF)
115
+
116
+ # Move position backward (wraps via & 0xFF)
117
+ @pos = (@pos - 1) & 0xFF
118
+ end
119
+
120
+ result
121
+ end
122
+
123
+ # Decode (postprocess) data by restoring from differences.
124
+ #
125
+ # For each byte:
126
+ # buffer[i] += history[(distance + pos) & 0xFF] (mod 256)
127
+ # history[pos & 0xFF] = buffer[i]
128
+ # pos--
129
+ #
130
+ # Reference: XZ Utils delta_decoder.c:decode_buffer
131
+ #
132
+ # @param data [String] Binary data to decode
133
+ # @return [String] Decoded binary data
134
+ def decode(data)
135
+ return data.dup.b if data.empty?
136
+
137
+ result = data.dup.b
138
+ data.bytes.each_with_index do |byte, i|
139
+ # Get historical value from distance positions back
140
+ tmp = @history.getbyte((@distance + @pos) & 0xFF)
141
+
142
+ # Restore original value by adding the difference
143
+ result.setbyte(i, (byte + tmp) & 0xFF)
144
+
145
+ # Store restored byte in history
146
+ @history.setbyte(@pos & 0xFF, result.getbyte(i))
147
+
148
+ # Move position backward (wraps via & 0xFF)
149
+ @pos = (@pos - 1) & 0xFF
150
+ end
151
+
152
+ result
153
+ end
154
+
155
+ # Reset the filter state.
156
+ #
157
+ # This clears the history buffer and resets position to 0.
158
+ # Used when initializing a new filter chain.
159
+ #
160
+ # @return [void]
161
+ def reset
162
+ @pos = 0
163
+ @history = ("\x00" * HISTORY_SIZE).b
164
+ end
165
+
166
+ class << self
167
+ # Decode properties byte to get distance.
168
+ #
169
+ # XZ Utils encodes distance as: props[0] = dist - 1
170
+ # So we decode as: dist = props[0] + 1
171
+ #
172
+ # Reference: XZ Utils delta_decoder.c:lzma_delta_props_decode
173
+ #
174
+ # @param properties [String] Properties byte (1 byte)
175
+ # @return [Integer] Distance value (1-256)
176
+ # @raise [ArgumentError] If properties size is invalid
177
+ def decode_properties(properties)
178
+ unless properties.is_a?(String) && properties.bytesize == 1
179
+ raise ArgumentError,
180
+ "Delta filter requires exactly 1 property byte, got #{properties&.bytesize}"
181
+ end
182
+
183
+ props_byte = properties.getbyte(0)
184
+ # XZ Utils: opt->dist = props[0] + LZMA_DELTA_DIST_MIN
185
+ # where LZMA_DELTA_DIST_MIN = 1
186
+ distance = props_byte + DELTA_DIST_MIN
187
+
188
+ # Validate distance is in valid range (inline for class method)
189
+ unless distance.between?(DELTA_DIST_MIN, DELTA_DIST_MAX)
190
+ raise ArgumentError,
191
+ "Invalid distance #{distance}, must be between #{DELTA_DIST_MIN} and #{DELTA_DIST_MAX}"
192
+ end
193
+
194
+ distance
195
+ end
196
+
197
+ # Encode distance to properties byte.
198
+ #
199
+ # XZ Utils encodes distance as: props[0] = dist - 1
200
+ #
201
+ # Reference: XZ Utils delta_encoder.c:lzma_delta_props_encode
202
+ #
203
+ # @param distance [Integer] Distance value (1-256)
204
+ # @return [String] Properties byte (1 byte)
205
+ # @raise [ArgumentError] If distance is invalid
206
+ def encode_properties(distance)
207
+ # Validate distance (inline for class method)
208
+ unless distance.is_a?(Integer)
209
+ raise ArgumentError,
210
+ "Distance must be an integer, got #{distance.class}"
211
+ end
212
+
213
+ unless distance.between?(DELTA_DIST_MIN, DELTA_DIST_MAX)
214
+ raise ArgumentError,
215
+ "Distance must be between #{DELTA_DIST_MIN} and #{DELTA_DIST_MAX}, got #{distance}"
216
+ end
217
+
218
+ # XZ Utils: out[0] = opt->dist - LZMA_DELTA_DIST_MIN
219
+ # where LZMA_DELTA_DIST_MIN = 1
220
+ props_byte = distance - DELTA_DIST_MIN
221
+
222
+ [props_byte].pack("C")
223
+ end
224
+
225
+ # Get metadata about this filter.
226
+ #
227
+ # @return [Hash] Filter metadata
228
+ def metadata
229
+ {
230
+ name: "XZ Delta",
231
+ description: "XZ Utils Delta filter with 256-byte circular history buffer",
232
+ filter_id: FILTER_ID,
233
+ typical_usage: "WAV audio (distance=4), BMP images (distance=3), " \
234
+ "multi-channel data with regular patterns",
235
+ }
236
+ end
237
+ end
238
+
239
+ private
240
+
241
+ # Validate distance parameter.
242
+ #
243
+ # @param dist [Integer] Distance value to validate
244
+ # @raise [ArgumentError] If distance is invalid
245
+ # @return [void]
246
+ def validate_distance(dist)
247
+ unless dist.is_a?(Integer)
248
+ raise ArgumentError, "Distance must be an integer, got #{dist.class}"
249
+ end
250
+
251
+ unless dist.between?(DELTA_DIST_MIN, DELTA_DIST_MAX)
252
+ raise ArgumentError,
253
+ "Distance must be between #{DELTA_DIST_MIN} and #{DELTA_DIST_MAX}, got #{dist}"
254
+ end
255
+ end
256
+ end
257
+ end
258
+ end
@@ -0,0 +1,162 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Omnizip
4
+ # Detects archive format from file signature/magic bytes
5
+ #
6
+ # This class identifies archive formats based on their file signatures.
7
+ # It distinguishes between XZ Utils format (.xz) and 7-Zip format (.7z)
8
+ # which are DIFFERENT implementations of LZMA/LZMA2 compression.
9
+ #
10
+ # @example Basic usage
11
+ # format = Omnizip::FormatDetector.detect("archive.xz")
12
+ # case format
13
+ # when :xz
14
+ # # Use XZ Utils implementation
15
+ # when :seven_zip
16
+ # # Use 7-Zip implementation
17
+ # end
18
+ #
19
+ class FormatDetector
20
+ # XZ Utils format signature: "\xFD7zXZ\x00"
21
+ XZ_SIGNATURE = [0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00].freeze
22
+
23
+ # 7-Zip format signature: "7z\xBC\xAF\x27\x1C"
24
+ SEVEN_ZIP_SIGNATURE = [0x37, 0x7A, 0xBC, 0xAF, 0x27, 0x1C].freeze
25
+
26
+ # RAR5 format signature: "Rar!\x1A\x07\x01\x00"
27
+ RAR5_SIGNATURE = [0x52, 0x61, 0x72, 0x21, 0x1A, 0x07, 0x01, 0x00].freeze
28
+
29
+ # RAR4 format signature: "Rar!\x1A\x07\x00"
30
+ RAR4_SIGNATURE = [0x52, 0x61, 0x72, 0x21, 0x1A, 0x07, 0x00].freeze
31
+
32
+ # ZIP format signature: "PK\x03\x04"
33
+ ZIP_SIGNATURE = [0x50, 0x4B, 0x03, 0x04].freeze
34
+
35
+ # GZIP format signature: "\x1F\x8B"
36
+ GZIP_SIGNATURE = [0x1F, 0x8B].freeze
37
+
38
+ # BZIP2 format signature: "BZ"
39
+ BZIP2_SIGNATURE = [0x42, 0x5A].freeze
40
+
41
+ # Detect archive format from file path
42
+ #
43
+ # @param file_path [String] Path to the archive file
44
+ # @return [Symbol, nil] Format identifier (:xz, :seven_zip, :rar5, :rar4,
45
+ # :zip, :gzip, :bzip2, :lzma_alone) or nil if unknown
46
+ def self.detect(file_path)
47
+ return nil unless File.exist?(file_path)
48
+
49
+ header = File.binread(file_path, 16)
50
+ return nil if header.nil? || header.empty?
51
+
52
+ bytes = header.bytes
53
+
54
+ case bytes
55
+ in [0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00, *]
56
+ :xz
57
+ in [0x37, 0x7A, 0xBC, 0xAF, 0x27, 0x1C, *]
58
+ :seven_zip
59
+ in [0x52, 0x61, 0x72, 0x21, 0x1A, 0x07, 0x01, 0x00, *]
60
+ :rar5
61
+ in [0x52, 0x61, 0x72, 0x21, 0x1A, 0x07, 0x00, *]
62
+ :rar4
63
+ in [0x50, 0x4B, 0x03, 0x04, *]
64
+ :zip
65
+ in [0x1F, 0x8B, *]
66
+ :gzip
67
+ in [0x42, 0x5A, *]
68
+ :bzip2
69
+ else
70
+ # Check for LZMA_Alone format (13-byte header with properties)
71
+ detect_lzma_alone(bytes)
72
+ end
73
+ end
74
+
75
+ # Check if file is XZ Utils format
76
+ #
77
+ # @param file_path [String] Path to the file
78
+ # @return [Boolean] true if XZ format
79
+ def self.xz?(file_path)
80
+ detect(file_path) == :xz
81
+ end
82
+
83
+ # Check if file is 7-Zip format
84
+ #
85
+ # @param file_path [String] Path to the file
86
+ # @return [Boolean] true if 7-Zip format
87
+ def self.seven_zip?(file_path)
88
+ detect(file_path) == :seven_zip
89
+ end
90
+
91
+ # Check if file is RAR5 format
92
+ #
93
+ # @param file_path [String] Path to the file
94
+ # @return [Boolean] true if RAR5 format
95
+ def self.rar5?(file_path)
96
+ detect(file_path) == :rar5
97
+ end
98
+
99
+ # Check if file is RAR4 format
100
+ #
101
+ # @param file_path [String] Path to the file
102
+ # @return [Boolean] true if RAR4 format
103
+ def self.rar4?(file_path)
104
+ detect(file_path) == :rar4
105
+ end
106
+
107
+ # Get the appropriate reader class for the format
108
+ #
109
+ # @param file_path [String] Path to the archive file
110
+ # @return [Class, nil] Reader class or nil if unknown format
111
+ def self.reader_for(file_path)
112
+ case detect(file_path)
113
+ when :xz
114
+ require_relative "formats/xz"
115
+ Omnizip::Formats::Xz
116
+ when :seven_zip
117
+ require_relative "formats/seven_zip/reader"
118
+ Omnizip::Formats::SevenZip::Reader
119
+ when :rar5
120
+ require_relative "formats/rar5/reader"
121
+ Omnizip::Formats::Rar5::Reader
122
+ when :rar4
123
+ require_relative "formats/rar3/reader"
124
+ Omnizip::Formats::Rar3::Reader
125
+ when :zip
126
+ require_relative "formats/zip/reader"
127
+ Omnizip::Formats::Zip::Reader
128
+ end
129
+ end
130
+
131
+ # Detect LZMA_Alone format
132
+ #
133
+ # LZMA_Alone format has a 13-byte header:
134
+ # - 1 byte: properties (lc, lp, pb encoded)
135
+ # - 4 bytes: dictionary size (little-endian)
136
+ # - 8 bytes: uncompressed size (little-endian, -1 for unknown)
137
+ #
138
+ # @param bytes [Array<Integer>] First bytes of file
139
+ # @return [Symbol, nil] :lzma_alone or nil
140
+ def self.detect_lzma_alone(bytes)
141
+ return nil if bytes.size < 13
142
+
143
+ # Check properties byte (must be valid lc/lp/pb encoding)
144
+ props = bytes[0]
145
+ return nil if props > 225 # Max valid value is (9 * 5 * 5) - 1 = 224
146
+
147
+ lc = props % 9
148
+ lp = (props / 9) % 5
149
+ pb = props / 45
150
+
151
+ # Validate ranges
152
+ return nil if lc > 8 || lp > 4 || pb > 4
153
+
154
+ # Dictionary size should be power of 2 or close to it
155
+ dict_size = bytes[1, 4].pack("C*").unpack1("V")
156
+ return nil if dict_size.zero? || dict_size > (1 << 30) # Max ~1GB
157
+
158
+ :lzma_alone
159
+ end
160
+ private_class_method :detect_lzma_alone
161
+ end
162
+ end
@@ -0,0 +1,59 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Omnizip
4
+ # Registry for archive format handlers
5
+ # Manages different archive format readers (7z, zip, tar, etc.)
6
+ class FormatRegistry
7
+ class << self
8
+ # Register a format handler
9
+ #
10
+ # @param extension [String] File extension (e.g., ".7z", ".zip")
11
+ # @param handler_class [Class] Format handler class
12
+ def register(extension, handler_class)
13
+ registry[normalize_extension(extension)] = handler_class
14
+ end
15
+
16
+ # Get format handler for extension
17
+ #
18
+ # @param extension [String] File extension
19
+ # @return [Class, nil] Handler class or nil if not found
20
+ def get(extension)
21
+ registry[normalize_extension(extension)]
22
+ end
23
+
24
+ # Check if format is supported
25
+ #
26
+ # @param extension [String] File extension
27
+ # @return [Boolean] true if supported
28
+ def supported?(extension)
29
+ registry.key?(normalize_extension(extension))
30
+ end
31
+
32
+ # List all supported formats
33
+ #
34
+ # @return [Array<String>] Supported extensions
35
+ def supported_formats
36
+ registry.keys.sort
37
+ end
38
+
39
+ private
40
+
41
+ # Format registry storage
42
+ #
43
+ # @return [Hash] Extension to handler class mapping
44
+ def registry
45
+ @registry ||= {}
46
+ end
47
+
48
+ # Normalize file extension
49
+ #
50
+ # @param ext [String] Extension
51
+ # @return [String] Normalized extension
52
+ def normalize_extension(ext)
53
+ ext = ext.to_s
54
+ ext = ".#{ext}" unless ext.start_with?(".")
55
+ ext.downcase
56
+ end
57
+ end
58
+ end
59
+ end
File without changes