omnizip 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (511) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.rubocop.yml +32 -0
  4. data/.rubocop_todo.yml +754 -0
  5. data/COPYING +502 -0
  6. data/Gemfile +17 -0
  7. data/LICENSE +12 -0
  8. data/README.adoc +1045 -0
  9. data/Rakefile +12 -0
  10. data/benchmark/README.md +260 -0
  11. data/benchmark/benchmark_suite.rb +125 -0
  12. data/benchmark/compression_bench.rb +181 -0
  13. data/benchmark/filter_bench.rb +180 -0
  14. data/benchmark/models/benchmark_result.rb +59 -0
  15. data/benchmark/models/comparison_result.rb +69 -0
  16. data/benchmark/profile_suite.rb +167 -0
  17. data/benchmark/reporter.rb +150 -0
  18. data/benchmark/run_benchmarks.rb +66 -0
  19. data/benchmark/test_data.rb +137 -0
  20. data/config/formats/rar3_spec.yml +91 -0
  21. data/config/formats/rar5_spec.yml +102 -0
  22. data/docs/.github/workflows/docs.yml +142 -0
  23. data/docs/.gitignore +21 -0
  24. data/docs/.lychee.toml +67 -0
  25. data/docs/Gemfile +13 -0
  26. data/docs/RAR_WRITE_SUPPORT.md +26 -0
  27. data/docs/README.md +101 -0
  28. data/docs/_config.yml +112 -0
  29. data/docs/assets/logo.svg +1 -0
  30. data/docs/assets/omnizip-logo.pdf +1540 -11
  31. data/docs/comparison/feature-matrix.adoc +694 -0
  32. data/docs/comparison/index.adoc +113 -0
  33. data/docs/comparison/vs-7zip.adoc +309 -0
  34. data/docs/comparison/vs-peazip.adoc +77 -0
  35. data/docs/comparison/vs-rubyzip.adoc +342 -0
  36. data/docs/comparison/vs-winrar.adoc +100 -0
  37. data/docs/compatibility.adoc +579 -0
  38. data/docs/concepts/index.adoc +129 -0
  39. data/docs/developer/architecture.adoc +256 -0
  40. data/docs/developer/contributing.adoc +158 -0
  41. data/docs/developer/index.adoc +25 -0
  42. data/docs/developer/testing.adoc +212 -0
  43. data/docs/getting-started/basic-usage.adoc +271 -0
  44. data/docs/getting-started/index.adoc +42 -0
  45. data/docs/getting-started/installation.adoc +138 -0
  46. data/docs/getting-started/quick-start.adoc +185 -0
  47. data/docs/getting-started/your-first-archive.adoc +218 -0
  48. data/docs/guides/advanced-features/encryption.adoc +300 -0
  49. data/docs/guides/advanced-features/index.adoc +49 -0
  50. data/docs/guides/advanced-features/parallel-processing.adoc +246 -0
  51. data/docs/guides/advanced-features/progress-tracking.adoc +320 -0
  52. data/docs/guides/advanced-features/streaming.adoc +212 -0
  53. data/docs/guides/archive-formats/gzip-format.adoc +107 -0
  54. data/docs/guides/archive-formats/index.adoc +130 -0
  55. data/docs/guides/archive-formats/rar-format.adoc +104 -0
  56. data/docs/guides/archive-formats/rar5.adoc +521 -0
  57. data/docs/guides/archive-formats/seven-zip-format.adoc +35 -0
  58. data/docs/guides/archive-formats/tar-format.adoc +106 -0
  59. data/docs/guides/archive-formats/xz-format.adoc +118 -0
  60. data/docs/guides/archive-formats/zip-format.adoc +35 -0
  61. data/docs/guides/compression-algorithms/bzip2.adoc +113 -0
  62. data/docs/guides/compression-algorithms/deflate.adoc +319 -0
  63. data/docs/guides/compression-algorithms/index.adoc +190 -0
  64. data/docs/guides/compression-algorithms/lzma.adoc +398 -0
  65. data/docs/guides/compression-algorithms/lzma2.adoc +327 -0
  66. data/docs/guides/compression-algorithms/ppmd.adoc +316 -0
  67. data/docs/guides/compression-algorithms/zstandard.adoc +361 -0
  68. data/docs/guides/creating-archives.adoc +354 -0
  69. data/docs/guides/extracting-archives.adoc +53 -0
  70. data/docs/guides/format-conversion.adoc +64 -0
  71. data/docs/guides/index.adoc +49 -0
  72. data/docs/guides/migration-rubyzip.adoc +217 -0
  73. data/docs/guides/parity-archives.adoc +605 -0
  74. data/docs/guides/performance-tuning.adoc +88 -0
  75. data/docs/index.adoc +218 -0
  76. data/docs/lychee.toml +67 -0
  77. data/docs/reference/api/overview.adoc +188 -0
  78. data/docs/reference/cli/compress-command.adoc +114 -0
  79. data/docs/reference/cli/overview.adoc +140 -0
  80. data/docs/reference/index.adoc +26 -0
  81. data/docs/resources/faq.adoc +185 -0
  82. data/docs/resources/quick-reference.adoc +222 -0
  83. data/docs/troubleshooting/index.adoc +208 -0
  84. data/examples/api_comparison.rb +205 -0
  85. data/examples/deflate64_example.rb +96 -0
  86. data/examples/par2_demo.rb +121 -0
  87. data/examples/quick_start_native.rb +150 -0
  88. data/examples/quick_start_rubyzip.rb +115 -0
  89. data/examples/rubyzip_compatibility_demo.rb +194 -0
  90. data/exe/omnizip +27 -0
  91. data/lib/omnizip/algorithm.rb +130 -0
  92. data/lib/omnizip/algorithm_registry.rb +86 -0
  93. data/lib/omnizip/algorithms/.keep +0 -0
  94. data/lib/omnizip/algorithms/bzip2/bwt.rb +225 -0
  95. data/lib/omnizip/algorithms/bzip2/decoder.rb +193 -0
  96. data/lib/omnizip/algorithms/bzip2/encoder.rb +237 -0
  97. data/lib/omnizip/algorithms/bzip2/huffman.rb +206 -0
  98. data/lib/omnizip/algorithms/bzip2/mtf.rb +101 -0
  99. data/lib/omnizip/algorithms/bzip2/rle.rb +151 -0
  100. data/lib/omnizip/algorithms/bzip2.rb +130 -0
  101. data/lib/omnizip/algorithms/deflate/constants.rb +28 -0
  102. data/lib/omnizip/algorithms/deflate/decoder.rb +38 -0
  103. data/lib/omnizip/algorithms/deflate/encoder.rb +46 -0
  104. data/lib/omnizip/algorithms/deflate.rb +128 -0
  105. data/lib/omnizip/algorithms/deflate64/constants.rb +45 -0
  106. data/lib/omnizip/algorithms/deflate64/decoder.rb +153 -0
  107. data/lib/omnizip/algorithms/deflate64/encoder.rb +98 -0
  108. data/lib/omnizip/algorithms/deflate64/huffman_coder.rb +354 -0
  109. data/lib/omnizip/algorithms/deflate64/lz77_encoder.rb +142 -0
  110. data/lib/omnizip/algorithms/deflate64.rb +109 -0
  111. data/lib/omnizip/algorithms/lzma/bit_model.rb +120 -0
  112. data/lib/omnizip/algorithms/lzma/constants.rb +112 -0
  113. data/lib/omnizip/algorithms/lzma/decoder.rb +148 -0
  114. data/lib/omnizip/algorithms/lzma/dictionary.rb +69 -0
  115. data/lib/omnizip/algorithms/lzma/distance_coder.rb +415 -0
  116. data/lib/omnizip/algorithms/lzma/encoder.rb +142 -0
  117. data/lib/omnizip/algorithms/lzma/length_coder.rb +260 -0
  118. data/lib/omnizip/algorithms/lzma/literal_decoder.rb +320 -0
  119. data/lib/omnizip/algorithms/lzma/literal_encoder.rb +210 -0
  120. data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +341 -0
  121. data/lib/omnizip/algorithms/lzma/lzma_alone_decoder.rb +192 -0
  122. data/lib/omnizip/algorithms/lzma/lzma_state.rb +128 -0
  123. data/lib/omnizip/algorithms/lzma/match.rb +32 -0
  124. data/lib/omnizip/algorithms/lzma/match_finder.rb +205 -0
  125. data/lib/omnizip/algorithms/lzma/match_finder_config.rb +142 -0
  126. data/lib/omnizip/algorithms/lzma/match_finder_factory.rb +88 -0
  127. data/lib/omnizip/algorithms/lzma/optimal_encoder.rb +130 -0
  128. data/lib/omnizip/algorithms/lzma/probability_models.rb +72 -0
  129. data/lib/omnizip/algorithms/lzma/range_coder.rb +85 -0
  130. data/lib/omnizip/algorithms/lzma/range_decoder.rb +434 -0
  131. data/lib/omnizip/algorithms/lzma/range_encoder.rb +194 -0
  132. data/lib/omnizip/algorithms/lzma/state.rb +127 -0
  133. data/lib/omnizip/algorithms/lzma/xz_buffered_range_encoder.rb +325 -0
  134. data/lib/omnizip/algorithms/lzma/xz_encoder.rb +426 -0
  135. data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +645 -0
  136. data/lib/omnizip/algorithms/lzma/xz_match_finder_adapter.rb +227 -0
  137. data/lib/omnizip/algorithms/lzma/xz_price_calculator.rb +169 -0
  138. data/lib/omnizip/algorithms/lzma/xz_probability_models.rb +261 -0
  139. data/lib/omnizip/algorithms/lzma/xz_range_encoder.rb +223 -0
  140. data/lib/omnizip/algorithms/lzma/xz_range_encoder_exact.rb +331 -0
  141. data/lib/omnizip/algorithms/lzma/xz_state.rb +116 -0
  142. data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +2055 -0
  143. data/lib/omnizip/algorithms/lzma.rb +238 -0
  144. data/lib/omnizip/algorithms/lzma2/chunk_manager.rb +182 -0
  145. data/lib/omnizip/algorithms/lzma2/constants.rb +41 -0
  146. data/lib/omnizip/algorithms/lzma2/encoder.rb +147 -0
  147. data/lib/omnizip/algorithms/lzma2/lzma2_chunk.rb +161 -0
  148. data/lib/omnizip/algorithms/lzma2/properties.rb +179 -0
  149. data/lib/omnizip/algorithms/lzma2/simple_lzma2_encoder.rb +127 -0
  150. data/lib/omnizip/algorithms/lzma2/xz_encoder_adapter.rb +85 -0
  151. data/lib/omnizip/algorithms/lzma2.rb +141 -0
  152. data/lib/omnizip/algorithms/ppmd7/constants.rb +74 -0
  153. data/lib/omnizip/algorithms/ppmd7/context.rb +154 -0
  154. data/lib/omnizip/algorithms/ppmd7/decoder.rb +126 -0
  155. data/lib/omnizip/algorithms/ppmd7/encoder.rb +163 -0
  156. data/lib/omnizip/algorithms/ppmd7/model.rb +248 -0
  157. data/lib/omnizip/algorithms/ppmd7/symbol_state.rb +57 -0
  158. data/lib/omnizip/algorithms/ppmd7.rb +116 -0
  159. data/lib/omnizip/algorithms/ppmd8/constants.rb +61 -0
  160. data/lib/omnizip/algorithms/ppmd8/context.rb +34 -0
  161. data/lib/omnizip/algorithms/ppmd8/decoder.rb +107 -0
  162. data/lib/omnizip/algorithms/ppmd8/encoder.rb +138 -0
  163. data/lib/omnizip/algorithms/ppmd8/model.rb +250 -0
  164. data/lib/omnizip/algorithms/ppmd8/restoration_method.rb +78 -0
  165. data/lib/omnizip/algorithms/ppmd8.rb +82 -0
  166. data/lib/omnizip/algorithms/ppmd_base.rb +138 -0
  167. data/lib/omnizip/algorithms/sevenzip_lzma2.rb +123 -0
  168. data/lib/omnizip/algorithms/xz_lzma2.rb +118 -0
  169. data/lib/omnizip/algorithms/zstandard/constants.rb +25 -0
  170. data/lib/omnizip/algorithms/zstandard/decoder.rb +46 -0
  171. data/lib/omnizip/algorithms/zstandard/encoder.rb +51 -0
  172. data/lib/omnizip/algorithms/zstandard.rb +138 -0
  173. data/lib/omnizip/buffer/memory_archive.rb +251 -0
  174. data/lib/omnizip/buffer/memory_extractor.rb +224 -0
  175. data/lib/omnizip/buffer.rb +176 -0
  176. data/lib/omnizip/checksum_registry.rb +114 -0
  177. data/lib/omnizip/checksums/crc32.rb +100 -0
  178. data/lib/omnizip/checksums/crc64.rb +101 -0
  179. data/lib/omnizip/checksums/crc_base.rb +158 -0
  180. data/lib/omnizip/checksums/verifier.rb +131 -0
  181. data/lib/omnizip/chunked/memory_manager.rb +194 -0
  182. data/lib/omnizip/chunked/reader.rb +78 -0
  183. data/lib/omnizip/chunked/writer.rb +120 -0
  184. data/lib/omnizip/chunked.rb +129 -0
  185. data/lib/omnizip/cli/output_formatter.rb +104 -0
  186. data/lib/omnizip/cli.rb +572 -0
  187. data/lib/omnizip/commands/.keep +0 -0
  188. data/lib/omnizip/commands/archive_create_command.rb +427 -0
  189. data/lib/omnizip/commands/archive_extract_command.rb +272 -0
  190. data/lib/omnizip/commands/archive_list_command.rb +218 -0
  191. data/lib/omnizip/commands/archive_repair_command.rb +131 -0
  192. data/lib/omnizip/commands/archive_verify_command.rb +117 -0
  193. data/lib/omnizip/commands/compress_command.rb +117 -0
  194. data/lib/omnizip/commands/decompress_command.rb +120 -0
  195. data/lib/omnizip/commands/list_command.rb +53 -0
  196. data/lib/omnizip/commands/metadata_command.rb +153 -0
  197. data/lib/omnizip/commands/parity_create_command.rb +122 -0
  198. data/lib/omnizip/commands/parity_repair_command.rb +122 -0
  199. data/lib/omnizip/commands/parity_verify_command.rb +124 -0
  200. data/lib/omnizip/commands/profile_list_command.rb +56 -0
  201. data/lib/omnizip/commands/profile_show_command.rb +44 -0
  202. data/lib/omnizip/convenience.rb +359 -0
  203. data/lib/omnizip/converter/conversion_registry.rb +49 -0
  204. data/lib/omnizip/converter/conversion_strategy.rb +121 -0
  205. data/lib/omnizip/converter/seven_zip_to_zip_strategy.rb +97 -0
  206. data/lib/omnizip/converter/zip_to_seven_zip_strategy.rb +112 -0
  207. data/lib/omnizip/converter.rb +105 -0
  208. data/lib/omnizip/crypto/aes256/cipher.rb +100 -0
  209. data/lib/omnizip/crypto/aes256/constants.rb +28 -0
  210. data/lib/omnizip/crypto/aes256/key_derivation.rb +101 -0
  211. data/lib/omnizip/crypto/aes256.rb +102 -0
  212. data/lib/omnizip/error.rb +106 -0
  213. data/lib/omnizip/eta/exponential_smoothing_estimator.rb +98 -0
  214. data/lib/omnizip/eta/moving_average_estimator.rb +99 -0
  215. data/lib/omnizip/eta/rate_calculator.rb +104 -0
  216. data/lib/omnizip/eta/sample_history.rb +143 -0
  217. data/lib/omnizip/eta/time_estimator.rb +106 -0
  218. data/lib/omnizip/eta.rb +63 -0
  219. data/lib/omnizip/extraction/filter_chain.rb +177 -0
  220. data/lib/omnizip/extraction/glob_pattern.rb +140 -0
  221. data/lib/omnizip/extraction/pattern_matcher.rb +70 -0
  222. data/lib/omnizip/extraction/predicate_pattern.rb +52 -0
  223. data/lib/omnizip/extraction/regex_pattern.rb +50 -0
  224. data/lib/omnizip/extraction/selective_extractor.rb +240 -0
  225. data/lib/omnizip/extraction.rb +111 -0
  226. data/lib/omnizip/file_type/mime_classifier.rb +144 -0
  227. data/lib/omnizip/file_type.rb +113 -0
  228. data/lib/omnizip/filter.rb +139 -0
  229. data/lib/omnizip/filter_pipeline.rb +108 -0
  230. data/lib/omnizip/filter_registry.rb +166 -0
  231. data/lib/omnizip/filters/bcj.rb +279 -0
  232. data/lib/omnizip/filters/bcj2/constants.rb +53 -0
  233. data/lib/omnizip/filters/bcj2/decoder.rb +200 -0
  234. data/lib/omnizip/filters/bcj2/encoder.rb +61 -0
  235. data/lib/omnizip/filters/bcj2/stream_data.rb +93 -0
  236. data/lib/omnizip/filters/bcj2.rb +99 -0
  237. data/lib/omnizip/filters/bcj_arm.rb +176 -0
  238. data/lib/omnizip/filters/bcj_arm64.rb +244 -0
  239. data/lib/omnizip/filters/bcj_ia64.rb +196 -0
  240. data/lib/omnizip/filters/bcj_ppc.rb +190 -0
  241. data/lib/omnizip/filters/bcj_sparc.rb +176 -0
  242. data/lib/omnizip/filters/bcj_x86.rb +193 -0
  243. data/lib/omnizip/filters/delta.rb +196 -0
  244. data/lib/omnizip/filters/filter_base.rb +72 -0
  245. data/lib/omnizip/filters/registry.rb +123 -0
  246. data/lib/omnizip/filters/xz_delta.rb +258 -0
  247. data/lib/omnizip/format_detector.rb +162 -0
  248. data/lib/omnizip/format_registry.rb +59 -0
  249. data/lib/omnizip/formats/.keep +0 -0
  250. data/lib/omnizip/formats/bzip2_file.rb +172 -0
  251. data/lib/omnizip/formats/cpio/constants.rb +55 -0
  252. data/lib/omnizip/formats/cpio/entry.rb +385 -0
  253. data/lib/omnizip/formats/cpio/reader.rb +196 -0
  254. data/lib/omnizip/formats/cpio/writer.rb +234 -0
  255. data/lib/omnizip/formats/cpio.rb +140 -0
  256. data/lib/omnizip/formats/format_spec_loader.rb +230 -0
  257. data/lib/omnizip/formats/gzip.rb +238 -0
  258. data/lib/omnizip/formats/iso/directory_builder.rb +297 -0
  259. data/lib/omnizip/formats/iso/directory_record.rb +152 -0
  260. data/lib/omnizip/formats/iso/joliet.rb +204 -0
  261. data/lib/omnizip/formats/iso/path_table.rb +125 -0
  262. data/lib/omnizip/formats/iso/reader.rb +197 -0
  263. data/lib/omnizip/formats/iso/rock_ridge.rb +349 -0
  264. data/lib/omnizip/formats/iso/volume_builder.rb +320 -0
  265. data/lib/omnizip/formats/iso/volume_descriptor.rb +168 -0
  266. data/lib/omnizip/formats/iso/writer.rb +530 -0
  267. data/lib/omnizip/formats/iso.rb +140 -0
  268. data/lib/omnizip/formats/lzip.rb +175 -0
  269. data/lib/omnizip/formats/lzma_alone.rb +171 -0
  270. data/lib/omnizip/formats/rar/archive_repairer.rb +243 -0
  271. data/lib/omnizip/formats/rar/archive_verifier.rb +195 -0
  272. data/lib/omnizip/formats/rar/block_parser.rb +243 -0
  273. data/lib/omnizip/formats/rar/compression/bit_stream.rb +180 -0
  274. data/lib/omnizip/formats/rar/compression/dispatcher.rb +217 -0
  275. data/lib/omnizip/formats/rar/compression/lz77_huffman/decoder.rb +216 -0
  276. data/lib/omnizip/formats/rar/compression/lz77_huffman/encoder.rb +158 -0
  277. data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_builder.rb +217 -0
  278. data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_coder.rb +189 -0
  279. data/lib/omnizip/formats/rar/compression/lz77_huffman/match_finder.rb +135 -0
  280. data/lib/omnizip/formats/rar/compression/lz77_huffman/sliding_window.rb +165 -0
  281. data/lib/omnizip/formats/rar/compression/ppmd/context.rb +105 -0
  282. data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +219 -0
  283. data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +262 -0
  284. data/lib/omnizip/formats/rar/compression_method_registry.rb +106 -0
  285. data/lib/omnizip/formats/rar/constants.rb +82 -0
  286. data/lib/omnizip/formats/rar/decompressor.rb +238 -0
  287. data/lib/omnizip/formats/rar/external_writer.rb +312 -0
  288. data/lib/omnizip/formats/rar/header.rb +192 -0
  289. data/lib/omnizip/formats/rar/license_validator.rb +109 -0
  290. data/lib/omnizip/formats/rar/models/rar_archive.rb +77 -0
  291. data/lib/omnizip/formats/rar/models/rar_entry.rb +65 -0
  292. data/lib/omnizip/formats/rar/models/rar_volume.rb +56 -0
  293. data/lib/omnizip/formats/rar/parity_handler.rb +292 -0
  294. data/lib/omnizip/formats/rar/rar5/compression/lzma.rb +202 -0
  295. data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +578 -0
  296. data/lib/omnizip/formats/rar/rar5/compression/store.rb +60 -0
  297. data/lib/omnizip/formats/rar/rar5/crc32.rb +39 -0
  298. data/lib/omnizip/formats/rar/rar5/encryption/aes256_cbc.rb +97 -0
  299. data/lib/omnizip/formats/rar/rar5/encryption/encryption_header.rb +114 -0
  300. data/lib/omnizip/formats/rar/rar5/encryption/encryption_manager.rb +166 -0
  301. data/lib/omnizip/formats/rar/rar5/encryption/key_derivation.rb +97 -0
  302. data/lib/omnizip/formats/rar/rar5/header.rb +187 -0
  303. data/lib/omnizip/formats/rar/rar5/models/encryption_options.rb +74 -0
  304. data/lib/omnizip/formats/rar/rar5/models/recovery_options.rb +63 -0
  305. data/lib/omnizip/formats/rar/rar5/models/solid_options.rb +63 -0
  306. data/lib/omnizip/formats/rar/rar5/models/volume_options.rb +74 -0
  307. data/lib/omnizip/formats/rar/rar5/multi_volume/ARCHITECTURE.md +290 -0
  308. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_manager.rb +264 -0
  309. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_splitter.rb +155 -0
  310. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_writer.rb +194 -0
  311. data/lib/omnizip/formats/rar/rar5/solid/solid_encoder.rb +109 -0
  312. data/lib/omnizip/formats/rar/rar5/solid/solid_manager.rb +142 -0
  313. data/lib/omnizip/formats/rar/rar5/solid/solid_stream.rb +121 -0
  314. data/lib/omnizip/formats/rar/rar5/vint.rb +65 -0
  315. data/lib/omnizip/formats/rar/rar5/writer.rb +466 -0
  316. data/lib/omnizip/formats/rar/rar_format_base.rb +241 -0
  317. data/lib/omnizip/formats/rar/reader.rb +366 -0
  318. data/lib/omnizip/formats/rar/recovery_record.rb +245 -0
  319. data/lib/omnizip/formats/rar/volume_manager.rb +168 -0
  320. data/lib/omnizip/formats/rar/writer.rb +431 -0
  321. data/lib/omnizip/formats/rar.rb +205 -0
  322. data/lib/omnizip/formats/rar3/compressor.rb +73 -0
  323. data/lib/omnizip/formats/rar3/decompressor.rb +66 -0
  324. data/lib/omnizip/formats/rar3/reader.rb +386 -0
  325. data/lib/omnizip/formats/rar3/writer.rb +219 -0
  326. data/lib/omnizip/formats/rar5/compressor.rb +73 -0
  327. data/lib/omnizip/formats/rar5/decompressor.rb +66 -0
  328. data/lib/omnizip/formats/rar5/reader.rb +342 -0
  329. data/lib/omnizip/formats/rar5/writer.rb +214 -0
  330. data/lib/omnizip/formats/seven_zip/coder_chain.rb +150 -0
  331. data/lib/omnizip/formats/seven_zip/constants.rb +126 -0
  332. data/lib/omnizip/formats/seven_zip/encoded_header.rb +114 -0
  333. data/lib/omnizip/formats/seven_zip/encrypted_header.rb +142 -0
  334. data/lib/omnizip/formats/seven_zip/file_collector.rb +144 -0
  335. data/lib/omnizip/formats/seven_zip/header.rb +106 -0
  336. data/lib/omnizip/formats/seven_zip/header_encryptor.rb +134 -0
  337. data/lib/omnizip/formats/seven_zip/header_writer.rb +466 -0
  338. data/lib/omnizip/formats/seven_zip/models/coder_info.rb +30 -0
  339. data/lib/omnizip/formats/seven_zip/models/file_entry.rb +58 -0
  340. data/lib/omnizip/formats/seven_zip/models/folder.rb +69 -0
  341. data/lib/omnizip/formats/seven_zip/models/stream_info.rb +42 -0
  342. data/lib/omnizip/formats/seven_zip/parser.rb +660 -0
  343. data/lib/omnizip/formats/seven_zip/reader.rb +458 -0
  344. data/lib/omnizip/formats/seven_zip/split_archive_reader.rb +632 -0
  345. data/lib/omnizip/formats/seven_zip/split_archive_writer.rb +315 -0
  346. data/lib/omnizip/formats/seven_zip/stream_compressor.rb +151 -0
  347. data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +162 -0
  348. data/lib/omnizip/formats/seven_zip/writer.rb +740 -0
  349. data/lib/omnizip/formats/seven_zip.rb +93 -0
  350. data/lib/omnizip/formats/tar/constants.rb +73 -0
  351. data/lib/omnizip/formats/tar/entry.rb +94 -0
  352. data/lib/omnizip/formats/tar/header.rb +168 -0
  353. data/lib/omnizip/formats/tar/reader.rb +121 -0
  354. data/lib/omnizip/formats/tar/writer.rb +216 -0
  355. data/lib/omnizip/formats/tar.rb +84 -0
  356. data/lib/omnizip/formats/xz/reader.rb +116 -0
  357. data/lib/omnizip/formats/xz.rb +237 -0
  358. data/lib/omnizip/formats/xz_impl/block_decoder.rb +754 -0
  359. data/lib/omnizip/formats/xz_impl/block_encoder.rb +306 -0
  360. data/lib/omnizip/formats/xz_impl/block_header.rb +210 -0
  361. data/lib/omnizip/formats/xz_impl/block_header_parser.rb +186 -0
  362. data/lib/omnizip/formats/xz_impl/constants.rb +49 -0
  363. data/lib/omnizip/formats/xz_impl/index_decoder.rb +174 -0
  364. data/lib/omnizip/formats/xz_impl/index_encoder.rb +122 -0
  365. data/lib/omnizip/formats/xz_impl/stream_decoder.rb +468 -0
  366. data/lib/omnizip/formats/xz_impl/stream_encoder.rb +99 -0
  367. data/lib/omnizip/formats/xz_impl/stream_footer.rb +81 -0
  368. data/lib/omnizip/formats/xz_impl/stream_footer_parser.rb +117 -0
  369. data/lib/omnizip/formats/xz_impl/stream_header.rb +55 -0
  370. data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +108 -0
  371. data/lib/omnizip/formats/xz_impl/vli.rb +128 -0
  372. data/lib/omnizip/formats/xz_impl/writer.rb +421 -0
  373. data/lib/omnizip/formats/zip/central_directory_header.rb +195 -0
  374. data/lib/omnizip/formats/zip/constants.rb +69 -0
  375. data/lib/omnizip/formats/zip/end_of_central_directory.rb +133 -0
  376. data/lib/omnizip/formats/zip/local_file_header.rb +138 -0
  377. data/lib/omnizip/formats/zip/reader.rb +250 -0
  378. data/lib/omnizip/formats/zip/unix_extra_field.rb +153 -0
  379. data/lib/omnizip/formats/zip/writer.rb +375 -0
  380. data/lib/omnizip/formats/zip/zip64_end_of_central_directory.rb +104 -0
  381. data/lib/omnizip/formats/zip/zip64_end_of_central_directory_locator.rb +66 -0
  382. data/lib/omnizip/formats/zip/zip64_extra_field.rb +114 -0
  383. data/lib/omnizip/formats/zip.rb +50 -0
  384. data/lib/omnizip/implementations/base/lzma2_decoder_base.rb +75 -0
  385. data/lib/omnizip/implementations/base/lzma2_encoder_base.rb +128 -0
  386. data/lib/omnizip/implementations/base/lzma_decoder_base.rb +83 -0
  387. data/lib/omnizip/implementations/base/lzma_encoder_base.rb +108 -0
  388. data/lib/omnizip/implementations/base/state_machine_base.rb +182 -0
  389. data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +421 -0
  390. data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +465 -0
  391. data/lib/omnizip/implementations/seven_zip/lzma/match_finder.rb +288 -0
  392. data/lib/omnizip/implementations/seven_zip/lzma/range_decoder.rb +200 -0
  393. data/lib/omnizip/implementations/seven_zip/lzma/range_encoder.rb +197 -0
  394. data/lib/omnizip/implementations/seven_zip/lzma/state_machine.rb +141 -0
  395. data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +519 -0
  396. data/lib/omnizip/implementations/xz_utils/lzma2/decoder.rb +723 -0
  397. data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +750 -0
  398. data/lib/omnizip/io/buffered_input.rb +146 -0
  399. data/lib/omnizip/io/buffered_output.rb +105 -0
  400. data/lib/omnizip/io/stream_manager.rb +115 -0
  401. data/lib/omnizip/link_handler/hard_link.rb +79 -0
  402. data/lib/omnizip/link_handler/symbolic_link.rb +74 -0
  403. data/lib/omnizip/link_handler.rb +124 -0
  404. data/lib/omnizip/metadata/archive_metadata.rb +114 -0
  405. data/lib/omnizip/metadata/entry_metadata.rb +146 -0
  406. data/lib/omnizip/metadata/metadata_editor.rb +171 -0
  407. data/lib/omnizip/metadata/metadata_registry.rb +64 -0
  408. data/lib/omnizip/metadata/metadata_validator.rb +99 -0
  409. data/lib/omnizip/metadata.rb +57 -0
  410. data/lib/omnizip/models/.keep +0 -0
  411. data/lib/omnizip/models/algorithm_metadata.rb +73 -0
  412. data/lib/omnizip/models/compression_options.rb +71 -0
  413. data/lib/omnizip/models/conversion_options.rb +87 -0
  414. data/lib/omnizip/models/conversion_result.rb +135 -0
  415. data/lib/omnizip/models/eta_result.rb +46 -0
  416. data/lib/omnizip/models/extraction_rule.rb +115 -0
  417. data/lib/omnizip/models/filter_chain.rb +144 -0
  418. data/lib/omnizip/models/filter_config.rb +183 -0
  419. data/lib/omnizip/models/match_result.rb +124 -0
  420. data/lib/omnizip/models/optimization_suggestion.rb +91 -0
  421. data/lib/omnizip/models/parallel_options.rb +104 -0
  422. data/lib/omnizip/models/performance_result.rb +79 -0
  423. data/lib/omnizip/models/profile_report.rb +82 -0
  424. data/lib/omnizip/models/progress_options.rb +38 -0
  425. data/lib/omnizip/models/split_options.rb +116 -0
  426. data/lib/omnizip/optimization_registry.rb +81 -0
  427. data/lib/omnizip/parallel/job_queue.rb +209 -0
  428. data/lib/omnizip/parallel/job_scheduler.rb +203 -0
  429. data/lib/omnizip/parallel/parallel_compressor.rb +347 -0
  430. data/lib/omnizip/parallel/parallel_extractor.rb +329 -0
  431. data/lib/omnizip/parallel/worker_pool.rb +223 -0
  432. data/lib/omnizip/parallel.rb +149 -0
  433. data/lib/omnizip/parity/chunked_block_processor.rb +196 -0
  434. data/lib/omnizip/parity/galois16.rb +145 -0
  435. data/lib/omnizip/parity/models/creator_packet.rb +73 -0
  436. data/lib/omnizip/parity/models/file_description_packet.rb +133 -0
  437. data/lib/omnizip/parity/models/ifsc_packet.rb +123 -0
  438. data/lib/omnizip/parity/models/main_packet.rb +128 -0
  439. data/lib/omnizip/parity/models/packet.rb +156 -0
  440. data/lib/omnizip/parity/models/packet_registry.rb +109 -0
  441. data/lib/omnizip/parity/models/recovery_slice_packet.rb +78 -0
  442. data/lib/omnizip/parity/par2_creator.rb +531 -0
  443. data/lib/omnizip/parity/par2_repairer.rb +407 -0
  444. data/lib/omnizip/parity/par2_verifier.rb +364 -0
  445. data/lib/omnizip/parity/par2cmdline_algorithm.rb +110 -0
  446. data/lib/omnizip/parity/par2cmdline_coefficients.rb +78 -0
  447. data/lib/omnizip/parity/reed_solomon_decoder.rb +266 -0
  448. data/lib/omnizip/parity/reed_solomon_encoder.rb +111 -0
  449. data/lib/omnizip/parity/reed_solomon_matrix.rb +342 -0
  450. data/lib/omnizip/parity.rb +186 -0
  451. data/lib/omnizip/password/encryption_registry.rb +65 -0
  452. data/lib/omnizip/password/encryption_strategy.rb +96 -0
  453. data/lib/omnizip/password/password_validator.rb +129 -0
  454. data/lib/omnizip/password/winzip_aes_strategy.rb +192 -0
  455. data/lib/omnizip/password/zip_crypto_strategy.rb +141 -0
  456. data/lib/omnizip/password.rb +87 -0
  457. data/lib/omnizip/pipe/stream_compressor.rb +124 -0
  458. data/lib/omnizip/pipe/stream_decompressor.rb +174 -0
  459. data/lib/omnizip/pipe.rb +121 -0
  460. data/lib/omnizip/platform/ntfs_streams.rb +201 -0
  461. data/lib/omnizip/platform.rb +189 -0
  462. data/lib/omnizip/profile/archive_profile.rb +39 -0
  463. data/lib/omnizip/profile/balanced_profile.rb +33 -0
  464. data/lib/omnizip/profile/binary_profile.rb +36 -0
  465. data/lib/omnizip/profile/compression_profile.rb +158 -0
  466. data/lib/omnizip/profile/custom_profile.rb +157 -0
  467. data/lib/omnizip/profile/fast_profile.rb +33 -0
  468. data/lib/omnizip/profile/maximum_profile.rb +33 -0
  469. data/lib/omnizip/profile/profile_detector.rb +110 -0
  470. data/lib/omnizip/profile/profile_registry.rb +161 -0
  471. data/lib/omnizip/profile/text_profile.rb +36 -0
  472. data/lib/omnizip/profile.rb +190 -0
  473. data/lib/omnizip/profiler/memory_profiler.rb +66 -0
  474. data/lib/omnizip/profiler/method_profiler.rb +49 -0
  475. data/lib/omnizip/profiler/report_generator.rb +169 -0
  476. data/lib/omnizip/profiler.rb +204 -0
  477. data/lib/omnizip/progress/callback_reporter.rb +36 -0
  478. data/lib/omnizip/progress/console_reporter.rb +62 -0
  479. data/lib/omnizip/progress/log_reporter.rb +91 -0
  480. data/lib/omnizip/progress/operation_progress.rb +118 -0
  481. data/lib/omnizip/progress/progress_bar.rb +156 -0
  482. data/lib/omnizip/progress/progress_reporter.rb +40 -0
  483. data/lib/omnizip/progress/progress_tracker.rb +190 -0
  484. data/lib/omnizip/progress/silent_reporter.rb +24 -0
  485. data/lib/omnizip/progress.rb +127 -0
  486. data/lib/omnizip/rubyzip_compat.rb +63 -0
  487. data/lib/omnizip/temp/safe_extract.rb +168 -0
  488. data/lib/omnizip/temp/temp_file.rb +124 -0
  489. data/lib/omnizip/temp/temp_file_pool.rb +109 -0
  490. data/lib/omnizip/temp.rb +181 -0
  491. data/lib/omnizip/version.rb +5 -0
  492. data/lib/omnizip/zip/entry.rb +156 -0
  493. data/lib/omnizip/zip/file.rb +485 -0
  494. data/lib/omnizip/zip/input_stream.rb +273 -0
  495. data/lib/omnizip/zip/output_stream.rb +324 -0
  496. data/lib/omnizip.rb +156 -0
  497. data/readme-docs/advanced-features.adoc +515 -0
  498. data/readme-docs/api-usage.adoc +444 -0
  499. data/readme-docs/architecture.adoc +449 -0
  500. data/readme-docs/archive-formats.adoc +479 -0
  501. data/readme-docs/cli-usage.adoc +222 -0
  502. data/readme-docs/compression-algorithms.adoc +442 -0
  503. data/readme-docs/compression-profiles.adoc +247 -0
  504. data/readme-docs/encryption-checksums.adoc +328 -0
  505. data/readme-docs/format-converter.adoc +325 -0
  506. data/readme-docs/installation.adoc +228 -0
  507. data/readme-docs/par2-archives.adoc +608 -0
  508. data/readme-docs/performance-profiler.adoc +389 -0
  509. data/readme-docs/preprocessing-filters.adoc +280 -0
  510. data/xz-file-format-1.2.1.txt +1174 -0
  511. metadata +617 -0
@@ -0,0 +1,139 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ # Copyright (C) 2025 Ribose Inc.
5
+ #
6
+ # Permission is hereby granted, free of charge, to any person obtaining a
7
+ # copy of this software and associated documentation files (the "Software"),
8
+ # to deal in the Software without restriction, including without limitation
9
+ # the rights to use, copy, modify, merge, publish, distribute, sublicense,
10
+ # and/or sell copies of the Software, and to permit persons to whom the
11
+ # Software is furnished to do so, subject to the following conditions:
12
+ #
13
+ # The above copyright notice and this permission notice shall be included in
14
+ # all copies or substantial portions of the Software.
15
+ #
16
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21
+ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22
+ # DEALINGS IN THE SOFTWARE.
23
+
24
+ module Omnizip
25
+ # Abstract base class for all preprocessing filters
26
+ #
27
+ # Filters are reversible transformations applied to data before
28
+ # compression to improve compression ratios. This class defines the
29
+ # interface that all filter implementations must follow.
30
+ #
31
+ # The key innovation is format-aware ID resolution: different formats
32
+ # (7z, XZ) use different IDs for the same filter. This class provides the
33
+ # id_for_format(format) method to handle this mapping.
34
+ #
35
+ # @abstract Subclasses must implement encode, decode, metadata
36
+ #
37
+ # @example Create a custom filter
38
+ # class MyFilter < Filter
39
+ # def initialize(architecture:)
40
+ # super(architecture: architecture, name: "MyFilter")
41
+ # end
42
+ #
43
+ # def id_for_format(format)
44
+ # format == :xz ? 0x04 : 0x03
45
+ # end
46
+ #
47
+ # def encode(data, position = 0)
48
+ # # encoding logic
49
+ # end
50
+ #
51
+ # def decode(data, position = 0)
52
+ # # decoding logic
53
+ # end
54
+ #
55
+ # def self.metadata
56
+ # { name: "MyFilter", description: "..." }
57
+ # end
58
+ # end
59
+ class Filter
60
+ # @return [Symbol] Architecture identifier (:x86, :arm, :arm64, :powerpc, :ia64, :sparc)
61
+ attr_reader :architecture
62
+
63
+ # @return [String] Human-readable filter name
64
+ attr_reader :name
65
+
66
+ # Initialize filter
67
+ #
68
+ # @param architecture [Symbol] Target architecture
69
+ # @param name [String] Human-readable name
70
+ def initialize(architecture:, name: "Unknown")
71
+ @architecture = architecture
72
+ @name = name
73
+ end
74
+
75
+ # Get filter ID for specific format
76
+ #
77
+ # This is the KEY METHOD that solves the filter ID mapping problem.
78
+ # Different formats (7z, XZ) use different IDs for the same filter.
79
+ #
80
+ # @param format [Symbol] Format identifier (:seven_zip, :xz)
81
+ # @return [Integer] Format-specific filter ID
82
+ # @raise [NotImplementedError] Subclass must implement
83
+ #
84
+ # @example Get XZ format ID for BCJ filter
85
+ # bcj.id_for_format(:xz) # => 0x04
86
+ #
87
+ # @example Get 7z format ID for BCJ filter
88
+ # bcj.id_for_format(:seven_zip) # => 0x03030103
89
+ def id_for_format(format)
90
+ raise NotImplementedError,
91
+ "#{self.class} must implement #id_for_format(format)"
92
+ end
93
+
94
+ # Encode (preprocess) data for compression
95
+ #
96
+ # Transforms data to make it more compressible. The transformation
97
+ # must be reversible - decode(encode(data)) == data.
98
+ #
99
+ # @param data [String] Binary data to encode
100
+ # @param position [Integer] Current stream position (default: 0)
101
+ # @return [String] Encoded binary data
102
+ # @raise [NotImplementedError] Subclass must implement
103
+ def encode(data, position = 0)
104
+ raise NotImplementedError,
105
+ "#{self.class} must implement #encode(data, position)"
106
+ end
107
+
108
+ # Decode (postprocess) data after decompression
109
+ #
110
+ # Reverses the encoding transformation, restoring original data.
111
+ #
112
+ # @param data [String] Binary data to decode
113
+ # @param position [Integer] Current stream position (default: 0)
114
+ # @return [String] Decoded binary data
115
+ # @raise [NotImplementedError] Subclass must implement
116
+ def decode(data, position = 0)
117
+ raise NotImplementedError,
118
+ "#{self.class} must implement #decode(data, position)"
119
+ end
120
+
121
+ class << self
122
+ # Get metadata about this filter
123
+ #
124
+ # @return [Hash] Filter metadata
125
+ # @option metadata [String] :name Human-readable name
126
+ # @option metadata [String] :description Filter description
127
+ # @option metadata [Array<Symbol>] :supported_archs Supported architectures
128
+ # @raise [NotImplementedError] Subclass must implement
129
+ #
130
+ # @example Get BCJ filter metadata
131
+ # Omnizip::Filters::BCJ.metadata
132
+ # # => { name: "BCJ", description: "...", supported_archs: [:x86, :arm, ...] }
133
+ def metadata
134
+ raise NotImplementedError,
135
+ "#{self} must implement .metadata"
136
+ end
137
+ end
138
+ end
139
+ end
@@ -0,0 +1,108 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ # Copyright (C) 2024 Ribose Inc.
5
+ #
6
+ # This file is part of Omnizip.
7
+ #
8
+ # Omnizip is a pure Ruby port of 7-Zip compression algorithms.
9
+ # Based on the 7-Zip LZMA SDK by Igor Pavlov.
10
+ #
11
+ # This library is free software; you can redistribute it and/or
12
+ # modify it under the terms of the GNU Lesser General Public
13
+ # License as published by the Free Software Foundation; either
14
+ # version 2.1 of the License, or (at your option) any later version.
15
+ #
16
+ # See the COPYING file for the complete text of the license.
17
+ #
18
+
19
+ module Omnizip
20
+ # Pipeline for chaining multiple filters together.
21
+ #
22
+ # Filters are applied in sequence during encoding, and in reverse
23
+ # order during decoding. Position tracking is maintained across
24
+ # the entire pipeline.
25
+ class FilterPipeline
26
+ attr_reader :filters
27
+
28
+ # Initialize an empty filter pipeline.
29
+ def initialize
30
+ @filters = []
31
+ @position = 0
32
+ end
33
+
34
+ # Add a filter to the pipeline.
35
+ #
36
+ # Filters are applied in the order they are added during encoding,
37
+ # and in reverse order during decoding.
38
+ #
39
+ # @param filter [Filters::FilterBase] Filter instance to add
40
+ # @return [self] For method chaining
41
+ def add_filter(filter)
42
+ @filters << filter
43
+ self
44
+ end
45
+
46
+ # Check if pipeline has any filters.
47
+ #
48
+ # @return [Boolean] True if pipeline contains filters
49
+ def empty?
50
+ @filters.empty?
51
+ end
52
+
53
+ # Get number of filters in pipeline.
54
+ #
55
+ # @return [Integer] Number of filters
56
+ def size
57
+ @filters.size
58
+ end
59
+
60
+ # Encode (preprocess) data by applying all filters in order.
61
+ #
62
+ # Filters are applied sequentially with the same position value.
63
+ # Position represents the current stream position for address
64
+ # calculations.
65
+ #
66
+ # @param data [String] Binary data to encode
67
+ # @param position [Integer] Current stream position
68
+ # @return [String] Encoded binary data
69
+ def encode(data, position = 0)
70
+ return data.dup if @filters.empty?
71
+
72
+ result = data
73
+ @filters.each do |filter|
74
+ result = filter.encode(result, position)
75
+ end
76
+
77
+ result
78
+ end
79
+
80
+ # Decode (postprocess) data by applying all filters in reverse order.
81
+ #
82
+ # Filters are applied in reverse order with the same position value
83
+ # to undo the encoding transformation.
84
+ #
85
+ # @param data [String] Binary data to decode
86
+ # @param position [Integer] Current stream position
87
+ # @return [String] Decoded binary data
88
+ def decode(data, position = 0)
89
+ return data.dup if @filters.empty?
90
+
91
+ result = data
92
+ # Apply filters in reverse order
93
+ @filters.reverse_each do |filter|
94
+ result = filter.decode(result, position)
95
+ end
96
+
97
+ result
98
+ end
99
+
100
+ # Clear all filters from the pipeline.
101
+ #
102
+ # @return [void]
103
+ def clear
104
+ @filters.clear
105
+ @position = 0
106
+ end
107
+ end
108
+ end
@@ -0,0 +1,166 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ # Copyright (C) 2024 Ribose Inc.
5
+ #
6
+ # This file is part of Omnizip.
7
+ #
8
+ # Omnizip is a pure Ruby port of 7-Zip compression algorithms.
9
+ # Based on the 7-Zip LZMA SDK by Igor Pavlov.
10
+ #
11
+ # This library is free software; you can redistribute it and/or
12
+ # modify it under the terms of the GNU Lesser General Public
13
+ # License as published by the Free Software Foundation; either
14
+ # version 2.1 of the License, or (at your option) any later version.
15
+ #
16
+ # See the COPYING file for the complete text of the license.
17
+ #
18
+
19
+ module Omnizip
20
+ # Registry for managing filter classes.
21
+ #
22
+ # This class provides a centralized registry for preprocessing filters,
23
+ # allowing filters to self-register and be retrieved by name.
24
+ # It implements a plugin-style architecture for extensibility.
25
+ class FilterRegistry
26
+ @filters = {}
27
+
28
+ class << self
29
+ # Register a filter class with the registry.
30
+ #
31
+ # @param name [Symbol, String] The name identifier for the filter
32
+ # @param klass [Class] The filter class to register
33
+ # @raise [ArgumentError] If name or klass is nil
34
+ # @return [void]
35
+ def register(name, klass)
36
+ raise ArgumentError, "Filter name cannot be nil" if name.nil?
37
+ raise ArgumentError, "Filter class cannot be nil" if klass.nil?
38
+
39
+ @filters[name.to_sym] = klass
40
+ end
41
+
42
+ # Retrieve a filter class by name.
43
+ #
44
+ # Handles both old-style (Class) and new-style (Hash with :class key)
45
+ # registrations for backward compatibility.
46
+ #
47
+ # @param name [Symbol, String] The name identifier for the filter
48
+ # @raise [UnknownFilterError] If filter is not registered
49
+ # @return [Class] The registered filter class
50
+ def get(name)
51
+ filter = @filters[name.to_sym]
52
+ unless filter
53
+ raise UnknownFilterError,
54
+ "Unknown filter: #{name}. " \
55
+ "Available: #{available.join(', ')}"
56
+ end
57
+
58
+ # Handle new-style registration (Hash with :class key)
59
+ return filter[:class] if filter.is_a?(Hash)
60
+
61
+ # Handle old-style registration (Class directly)
62
+ filter
63
+ end
64
+
65
+ # Check if a filter is registered.
66
+ #
67
+ # @param name [Symbol, String] The name identifier for the filter
68
+ # @return [Boolean] True if filter is registered, false otherwise
69
+ def registered?(name)
70
+ @filters.key?(name.to_sym)
71
+ end
72
+
73
+ # Get list of all registered filter names.
74
+ #
75
+ # @return [Array<Symbol>] Array of registered filter names
76
+ def available
77
+ @filters.keys
78
+ end
79
+
80
+ # Reset the registry (primarily for testing).
81
+ #
82
+ # @return [void]
83
+ def reset!
84
+ @filters.clear
85
+ end
86
+
87
+ # Register a filter class with format support.
88
+ #
89
+ # This format-aware registration stores which formats the filter
90
+ # supports, enabling format-specific filter retrieval.
91
+ #
92
+ # @param name [Symbol, String] The name identifier for the filter
93
+ # @param filter_class [Class] The filter class to register
94
+ # @param formats [Array<Symbol>] Supported formats (default: [:xz,
95
+ # :seven_zip])
96
+ # @return [void]
97
+ def register_with_formats(name, filter_class, formats: %i[xz seven_zip])
98
+ raise ArgumentError, "Filter name cannot be nil" if name.nil?
99
+ raise ArgumentError, "Filter class cannot be nil" if filter_class.nil?
100
+
101
+ @filters[name.to_sym] = {
102
+ class: filter_class,
103
+ formats: formats,
104
+ }
105
+ end
106
+
107
+ # Get filter instance for specific format.
108
+ #
109
+ # Returns a new filter instance after verifying the filter supports
110
+ # the specified format.
111
+ #
112
+ # @param name [Symbol, String] The name identifier for the filter
113
+ # @param format [Symbol] Format identifier (:xz, :seven_zip)
114
+ # @raise [KeyError] If filter is not registered
115
+ # @raise [ArgumentError] If filter doesn't support the format
116
+ # @return [Object] New filter instance
117
+ def get_for_format(name, format)
118
+ filter_info = @filters[name.to_sym]
119
+ raise KeyError, "Filter not found: #{name}" unless filter_info
120
+
121
+ unless filter_info[:formats].include?(format)
122
+ raise ArgumentError,
123
+ "Filter #{name} not supported for format #{format}"
124
+ end
125
+
126
+ filter_info[:class].new
127
+ end
128
+
129
+ # Check if filter supports specific format.
130
+ #
131
+ # @param name [Symbol, String] The name identifier for the filter
132
+ # @param format [Symbol] Format identifier
133
+ # @return [Boolean] True if filter supports the format
134
+ def supports_format?(name, format)
135
+ return false unless @filters[name.to_sym]
136
+
137
+ filter_info = @filters[name.to_sym]
138
+ # Handle both old-style (Class) and new-style (Hash) registrations
139
+ if filter_info.is_a?(Hash)
140
+ filter_info[:formats]&.include?(format)
141
+ else
142
+ # Old-style registration - assume supports all formats
143
+ true
144
+ end
145
+ end
146
+
147
+ # Get all filters supporting a specific format.
148
+ #
149
+ # @param format [Symbol] Format identifier
150
+ # @return [Array<Symbol>] Filter names supporting the format
151
+ def filters_for_format(format)
152
+ @filters.select do |_, info|
153
+ if info.is_a?(Hash)
154
+ info[:formats]&.include?(format)
155
+ else
156
+ # Old-style registration - assume supports all formats
157
+ true
158
+ end
159
+ end.keys
160
+ end
161
+ end
162
+ end
163
+
164
+ # Error raised when an unknown filter is requested
165
+ class UnknownFilterError < StandardError; end
166
+ end
@@ -0,0 +1,279 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ # Copyright (C) 2025 Ribose Inc.
5
+ #
6
+ # Permission is hereby granted, free of charge, to any person obtaining a
7
+ # copy of this software and associated documentation files (the "Software"),
8
+ # to deal in the Software without restriction, including without limitation
9
+ # the rights to use, copy, modify, merge, publish, distribute, sublicense,
10
+ # and/or sell copies of the Software, and to permit persons to whom the
11
+ # Software is furnished to do so, subject to the following conditions:
12
+ #
13
+ # The above copyright notice and this permission notice shall be included in
14
+ # all copies or substantial portions of the Software.
15
+ #
16
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21
+ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22
+ # DEALINGS IN THE SOFTWARE.
23
+
24
+ require_relative "../filter"
25
+
26
+ module Omnizip
27
+ module Filters
28
+ # Unified BCJ (Branch/Call/Jump) filter for multiple architectures
29
+ #
30
+ # This filter preprocesses executable code by converting relative
31
+ # addresses in branch/call instructions to absolute addresses.
32
+ # The transformation is reversible and improves compression ratio.
33
+ #
34
+ # Supports x86, ARM, ARM Thumb, ARM64, PowerPC, IA64, SPARC architectures.
35
+ # Automatically returns correct filter ID for 7z or XZ format.
36
+ #
37
+ # @example Create x86 BCJ filter
38
+ # bcj = Omnizip::Filters::BCJ.new(architecture: :x86)
39
+ # bcj.id_for_format(:xz) # => 0x04
40
+ # bcj.id_for_format(:seven_zip) # => 0x03030103
41
+ class BCJ < Filter
42
+ # Architecture-specific configurations
43
+ CONFIG = {
44
+ x86: {
45
+ opcodes: [0xE8, 0xE9], # CALL, JMP
46
+ address_size: 4,
47
+ instruction_size: 5,
48
+ xz_id: 0x04,
49
+ seven_zip_id: 0x03030103,
50
+ },
51
+ arm: {
52
+ opcodes: [0x0A, 0x0B], # ARM BL/B conditional
53
+ address_size: 4,
54
+ instruction_size: 4,
55
+ xz_id: 0x07,
56
+ seven_zip_id: 0x03030501,
57
+ },
58
+ armthumb: {
59
+ opcodes: [0xE8, 0xF0, 0xF1], # ARM Thumb BL/B conditional
60
+ address_size: 4,
61
+ instruction_size: 4,
62
+ xz_id: 0x08,
63
+ seven_zip_id: 0x03030701,
64
+ },
65
+ arm64: {
66
+ opcodes: [0x00], # ARM64 BL
67
+ address_size: 4,
68
+ instruction_size: 4,
69
+ xz_id: nil, # Not yet in XZ
70
+ seven_zip_id: 0x03030601,
71
+ },
72
+ powerpc: {
73
+ opcodes: [0x48, 0x18], # PowerPC branch instructions
74
+ address_size: 4,
75
+ instruction_size: 4,
76
+ xz_id: 0x05,
77
+ seven_zip_id: 0x03030205,
78
+ },
79
+ ia64: {
80
+ opcodes: [0x04, 0x05, 0x06, 0x07, 0x08], # IA64 branches
81
+ address_size: 4,
82
+ instruction_size: 4,
83
+ xz_id: 0x06,
84
+ seven_zip_id: 0x03030401,
85
+ },
86
+ sparc: {
87
+ opcodes: [0x04, 0x06, 0x07], # SPARC call/branch
88
+ address_size: 4,
89
+ instruction_size: 4,
90
+ xz_id: 0x09,
91
+ seven_zip_id: 0x03030805,
92
+ },
93
+ }.freeze
94
+
95
+ # @return [Symbol] Architecture identifier
96
+ attr_reader :architecture
97
+
98
+ # Initialize BCJ filter for specific architecture
99
+ #
100
+ # @param architecture [Symbol] Target architecture (:x86, :arm, :armthumb, :arm64, :powerpc, :ia64, :sparc)
101
+ # @raise [ArgumentError] If architecture is not supported
102
+ def initialize(architecture:)
103
+ unless CONFIG.key?(architecture)
104
+ raise ArgumentError, "Unsupported BCJ architecture: #{architecture}. " \
105
+ "Supported: #{CONFIG.keys.join(', ')}"
106
+ end
107
+
108
+ @architecture = architecture
109
+ @config = CONFIG[architecture]
110
+ super(architecture: architecture, name: "BCJ-#{architecture.to_s.upcase}")
111
+ end
112
+
113
+ # Get filter ID for specific format
114
+ #
115
+ # @param format [Symbol] Format identifier (:seven_zip, :xz)
116
+ # @return [Integer] Format-specific filter ID
117
+ # @raise [ArgumentError] If format is not supported
118
+ # @raise [NotImplementedError] If architecture not supported in format
119
+ def id_for_format(format)
120
+ case format
121
+ when :seven_zip
122
+ @config[:seven_zip_id]
123
+ when :xz
124
+ id = @config[:xz_id]
125
+ if id.nil?
126
+ raise NotImplementedError,
127
+ "#{@architecture} BCJ not yet supported in XZ format"
128
+ end
129
+
130
+ id
131
+ else
132
+ raise ArgumentError,
133
+ "Unknown format: #{format}. Supported: :seven_zip, :xz"
134
+ end
135
+ end
136
+
137
+ # Encode (preprocess) data for compression
138
+ #
139
+ # Scans for branch/call opcodes and converts relative addresses
140
+ # to absolute addresses.
141
+ #
142
+ # @param data [String] Binary executable data
143
+ # @param position [Integer] Current stream position (default: 0)
144
+ # @return [String] Encoded binary data
145
+ def encode(data, position = 0)
146
+ return data.dup if data.bytesize < @config[:instruction_size]
147
+
148
+ result = data.b
149
+ i = 0
150
+ limit = data.bytesize - @config[:instruction_size]
151
+
152
+ while i <= limit
153
+ opcode = result.getbyte(i)
154
+
155
+ if @config[:opcodes].include?(opcode)
156
+ # Extract address (little-endian)
157
+ address = extract_address(result, i + 1)
158
+
159
+ # Check if valid relative address
160
+ if valid_relative_address?(address)
161
+ # Convert to absolute
162
+ absolute = address + position + i + @config[:instruction_size]
163
+ write_address(result, i + 1, absolute)
164
+ end
165
+
166
+ i += @config[:instruction_size]
167
+ else
168
+ i += 1
169
+ end
170
+ end
171
+
172
+ result
173
+ end
174
+
175
+ # Decode (postprocess) data after decompression
176
+ #
177
+ # Reverses encoding by converting absolute addresses back to
178
+ # relative addresses.
179
+ #
180
+ # @param data [String] Binary executable data
181
+ # @param position [Integer] Current stream position (default: 0)
182
+ # @return [String] Decoded binary data
183
+ def decode(data, position = 0)
184
+ return data.dup if data.bytesize < @config[:instruction_size]
185
+
186
+ result = data.b
187
+ i = 0
188
+ limit = data.bytesize - @config[:instruction_size]
189
+
190
+ while i <= limit
191
+ opcode = result.getbyte(i)
192
+
193
+ if @config[:opcodes].include?(opcode)
194
+ # Extract absolute address
195
+ absolute = extract_address(result, i + 1)
196
+
197
+ # Convert to relative
198
+ address = absolute - (position + i + @config[:instruction_size])
199
+
200
+ if valid_relative_address?(address)
201
+ write_address(result, i + 1, address)
202
+ end
203
+
204
+ i += @config[:instruction_size]
205
+ else
206
+ i += 1
207
+ end
208
+ end
209
+
210
+ result
211
+ end
212
+
213
+ class << self
214
+ # Get metadata about this filter
215
+ #
216
+ # @return [Hash] Filter metadata
217
+ def metadata
218
+ {
219
+ name: "BCJ",
220
+ description: "Branch/Call/Jump converter for executable files",
221
+ supported_architectures: CONFIG.keys,
222
+ architectures: {
223
+ x86: "x86/x86-64",
224
+ arm: "ARM 32-bit",
225
+ arm64: "ARM 64-bit",
226
+ powerpc: "PowerPC",
227
+ ia64: "IA-64 (Itanium)",
228
+ sparc: "SPARC",
229
+ },
230
+ }
231
+ end
232
+ end
233
+
234
+ private
235
+
236
+ # Extract address from data at offset (little-endian)
237
+ #
238
+ # @param data [String] Binary data
239
+ # @param offset [Integer] Starting position
240
+ # @return [Integer] Address value
241
+ def extract_address(data, offset)
242
+ bytes = data.byteslice(offset, @config[:address_size]).bytes
243
+ value = bytes.each_with_index.reduce(0) do |acc, (byte, i)|
244
+ acc | (byte << (8 * i))
245
+ end
246
+
247
+ # Convert to signed if needed (for 32-bit addresses)
248
+ mask = (1 << (8 * @config[:address_size])) - 1
249
+ value.nobits?(~mask) ? value - (1 << (8 * @config[:address_size])) : value
250
+ end
251
+
252
+ # Write address to data at offset (little-endian)
253
+ #
254
+ # @param data [String] Binary data (modified in place)
255
+ # @param offset [Integer] Starting position
256
+ # @param value [Integer] Address value to write
257
+ # @return [void]
258
+ def write_address(data, offset, value)
259
+ @config[:address_size].times do |i|
260
+ data.setbyte(offset + i, value & 0xFF)
261
+ value >>= 8
262
+ end
263
+ end
264
+
265
+ # Check if address is a valid relative address
266
+ #
267
+ # Valid relative addresses have high byte of 0x00 or 0xFF,
268
+ # indicating small positive or negative offsets.
269
+ #
270
+ # @param value [Integer] Address value to check
271
+ # @return [Boolean] True if valid relative address
272
+ def valid_relative_address?(value)
273
+ unsigned = value & 0xFFFFFFFF
274
+ high_byte = (unsigned >> 24) & 0xFF
275
+ [0x00, 0xFF].include?(high_byte)
276
+ end
277
+ end
278
+ end
279
+ end