omnizip 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (511) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.rubocop.yml +32 -0
  4. data/.rubocop_todo.yml +754 -0
  5. data/COPYING +502 -0
  6. data/Gemfile +17 -0
  7. data/LICENSE +12 -0
  8. data/README.adoc +1045 -0
  9. data/Rakefile +12 -0
  10. data/benchmark/README.md +260 -0
  11. data/benchmark/benchmark_suite.rb +125 -0
  12. data/benchmark/compression_bench.rb +181 -0
  13. data/benchmark/filter_bench.rb +180 -0
  14. data/benchmark/models/benchmark_result.rb +59 -0
  15. data/benchmark/models/comparison_result.rb +69 -0
  16. data/benchmark/profile_suite.rb +167 -0
  17. data/benchmark/reporter.rb +150 -0
  18. data/benchmark/run_benchmarks.rb +66 -0
  19. data/benchmark/test_data.rb +137 -0
  20. data/config/formats/rar3_spec.yml +91 -0
  21. data/config/formats/rar5_spec.yml +102 -0
  22. data/docs/.github/workflows/docs.yml +142 -0
  23. data/docs/.gitignore +21 -0
  24. data/docs/.lychee.toml +67 -0
  25. data/docs/Gemfile +13 -0
  26. data/docs/RAR_WRITE_SUPPORT.md +26 -0
  27. data/docs/README.md +101 -0
  28. data/docs/_config.yml +112 -0
  29. data/docs/assets/logo.svg +1 -0
  30. data/docs/assets/omnizip-logo.pdf +1540 -11
  31. data/docs/comparison/feature-matrix.adoc +694 -0
  32. data/docs/comparison/index.adoc +113 -0
  33. data/docs/comparison/vs-7zip.adoc +309 -0
  34. data/docs/comparison/vs-peazip.adoc +77 -0
  35. data/docs/comparison/vs-rubyzip.adoc +342 -0
  36. data/docs/comparison/vs-winrar.adoc +100 -0
  37. data/docs/compatibility.adoc +579 -0
  38. data/docs/concepts/index.adoc +129 -0
  39. data/docs/developer/architecture.adoc +256 -0
  40. data/docs/developer/contributing.adoc +158 -0
  41. data/docs/developer/index.adoc +25 -0
  42. data/docs/developer/testing.adoc +212 -0
  43. data/docs/getting-started/basic-usage.adoc +271 -0
  44. data/docs/getting-started/index.adoc +42 -0
  45. data/docs/getting-started/installation.adoc +138 -0
  46. data/docs/getting-started/quick-start.adoc +185 -0
  47. data/docs/getting-started/your-first-archive.adoc +218 -0
  48. data/docs/guides/advanced-features/encryption.adoc +300 -0
  49. data/docs/guides/advanced-features/index.adoc +49 -0
  50. data/docs/guides/advanced-features/parallel-processing.adoc +246 -0
  51. data/docs/guides/advanced-features/progress-tracking.adoc +320 -0
  52. data/docs/guides/advanced-features/streaming.adoc +212 -0
  53. data/docs/guides/archive-formats/gzip-format.adoc +107 -0
  54. data/docs/guides/archive-formats/index.adoc +130 -0
  55. data/docs/guides/archive-formats/rar-format.adoc +104 -0
  56. data/docs/guides/archive-formats/rar5.adoc +521 -0
  57. data/docs/guides/archive-formats/seven-zip-format.adoc +35 -0
  58. data/docs/guides/archive-formats/tar-format.adoc +106 -0
  59. data/docs/guides/archive-formats/xz-format.adoc +118 -0
  60. data/docs/guides/archive-formats/zip-format.adoc +35 -0
  61. data/docs/guides/compression-algorithms/bzip2.adoc +113 -0
  62. data/docs/guides/compression-algorithms/deflate.adoc +319 -0
  63. data/docs/guides/compression-algorithms/index.adoc +190 -0
  64. data/docs/guides/compression-algorithms/lzma.adoc +398 -0
  65. data/docs/guides/compression-algorithms/lzma2.adoc +327 -0
  66. data/docs/guides/compression-algorithms/ppmd.adoc +316 -0
  67. data/docs/guides/compression-algorithms/zstandard.adoc +361 -0
  68. data/docs/guides/creating-archives.adoc +354 -0
  69. data/docs/guides/extracting-archives.adoc +53 -0
  70. data/docs/guides/format-conversion.adoc +64 -0
  71. data/docs/guides/index.adoc +49 -0
  72. data/docs/guides/migration-rubyzip.adoc +217 -0
  73. data/docs/guides/parity-archives.adoc +605 -0
  74. data/docs/guides/performance-tuning.adoc +88 -0
  75. data/docs/index.adoc +218 -0
  76. data/docs/lychee.toml +67 -0
  77. data/docs/reference/api/overview.adoc +188 -0
  78. data/docs/reference/cli/compress-command.adoc +114 -0
  79. data/docs/reference/cli/overview.adoc +140 -0
  80. data/docs/reference/index.adoc +26 -0
  81. data/docs/resources/faq.adoc +185 -0
  82. data/docs/resources/quick-reference.adoc +222 -0
  83. data/docs/troubleshooting/index.adoc +208 -0
  84. data/examples/api_comparison.rb +205 -0
  85. data/examples/deflate64_example.rb +96 -0
  86. data/examples/par2_demo.rb +121 -0
  87. data/examples/quick_start_native.rb +150 -0
  88. data/examples/quick_start_rubyzip.rb +115 -0
  89. data/examples/rubyzip_compatibility_demo.rb +194 -0
  90. data/exe/omnizip +27 -0
  91. data/lib/omnizip/algorithm.rb +130 -0
  92. data/lib/omnizip/algorithm_registry.rb +86 -0
  93. data/lib/omnizip/algorithms/.keep +0 -0
  94. data/lib/omnizip/algorithms/bzip2/bwt.rb +225 -0
  95. data/lib/omnizip/algorithms/bzip2/decoder.rb +193 -0
  96. data/lib/omnizip/algorithms/bzip2/encoder.rb +237 -0
  97. data/lib/omnizip/algorithms/bzip2/huffman.rb +206 -0
  98. data/lib/omnizip/algorithms/bzip2/mtf.rb +101 -0
  99. data/lib/omnizip/algorithms/bzip2/rle.rb +151 -0
  100. data/lib/omnizip/algorithms/bzip2.rb +130 -0
  101. data/lib/omnizip/algorithms/deflate/constants.rb +28 -0
  102. data/lib/omnizip/algorithms/deflate/decoder.rb +38 -0
  103. data/lib/omnizip/algorithms/deflate/encoder.rb +46 -0
  104. data/lib/omnizip/algorithms/deflate.rb +128 -0
  105. data/lib/omnizip/algorithms/deflate64/constants.rb +45 -0
  106. data/lib/omnizip/algorithms/deflate64/decoder.rb +153 -0
  107. data/lib/omnizip/algorithms/deflate64/encoder.rb +98 -0
  108. data/lib/omnizip/algorithms/deflate64/huffman_coder.rb +354 -0
  109. data/lib/omnizip/algorithms/deflate64/lz77_encoder.rb +142 -0
  110. data/lib/omnizip/algorithms/deflate64.rb +109 -0
  111. data/lib/omnizip/algorithms/lzma/bit_model.rb +120 -0
  112. data/lib/omnizip/algorithms/lzma/constants.rb +112 -0
  113. data/lib/omnizip/algorithms/lzma/decoder.rb +148 -0
  114. data/lib/omnizip/algorithms/lzma/dictionary.rb +69 -0
  115. data/lib/omnizip/algorithms/lzma/distance_coder.rb +415 -0
  116. data/lib/omnizip/algorithms/lzma/encoder.rb +142 -0
  117. data/lib/omnizip/algorithms/lzma/length_coder.rb +260 -0
  118. data/lib/omnizip/algorithms/lzma/literal_decoder.rb +320 -0
  119. data/lib/omnizip/algorithms/lzma/literal_encoder.rb +210 -0
  120. data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +341 -0
  121. data/lib/omnizip/algorithms/lzma/lzma_alone_decoder.rb +192 -0
  122. data/lib/omnizip/algorithms/lzma/lzma_state.rb +128 -0
  123. data/lib/omnizip/algorithms/lzma/match.rb +32 -0
  124. data/lib/omnizip/algorithms/lzma/match_finder.rb +205 -0
  125. data/lib/omnizip/algorithms/lzma/match_finder_config.rb +142 -0
  126. data/lib/omnizip/algorithms/lzma/match_finder_factory.rb +88 -0
  127. data/lib/omnizip/algorithms/lzma/optimal_encoder.rb +130 -0
  128. data/lib/omnizip/algorithms/lzma/probability_models.rb +72 -0
  129. data/lib/omnizip/algorithms/lzma/range_coder.rb +85 -0
  130. data/lib/omnizip/algorithms/lzma/range_decoder.rb +434 -0
  131. data/lib/omnizip/algorithms/lzma/range_encoder.rb +194 -0
  132. data/lib/omnizip/algorithms/lzma/state.rb +127 -0
  133. data/lib/omnizip/algorithms/lzma/xz_buffered_range_encoder.rb +325 -0
  134. data/lib/omnizip/algorithms/lzma/xz_encoder.rb +426 -0
  135. data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +645 -0
  136. data/lib/omnizip/algorithms/lzma/xz_match_finder_adapter.rb +227 -0
  137. data/lib/omnizip/algorithms/lzma/xz_price_calculator.rb +169 -0
  138. data/lib/omnizip/algorithms/lzma/xz_probability_models.rb +261 -0
  139. data/lib/omnizip/algorithms/lzma/xz_range_encoder.rb +223 -0
  140. data/lib/omnizip/algorithms/lzma/xz_range_encoder_exact.rb +331 -0
  141. data/lib/omnizip/algorithms/lzma/xz_state.rb +116 -0
  142. data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +2055 -0
  143. data/lib/omnizip/algorithms/lzma.rb +238 -0
  144. data/lib/omnizip/algorithms/lzma2/chunk_manager.rb +182 -0
  145. data/lib/omnizip/algorithms/lzma2/constants.rb +41 -0
  146. data/lib/omnizip/algorithms/lzma2/encoder.rb +147 -0
  147. data/lib/omnizip/algorithms/lzma2/lzma2_chunk.rb +161 -0
  148. data/lib/omnizip/algorithms/lzma2/properties.rb +179 -0
  149. data/lib/omnizip/algorithms/lzma2/simple_lzma2_encoder.rb +127 -0
  150. data/lib/omnizip/algorithms/lzma2/xz_encoder_adapter.rb +85 -0
  151. data/lib/omnizip/algorithms/lzma2.rb +141 -0
  152. data/lib/omnizip/algorithms/ppmd7/constants.rb +74 -0
  153. data/lib/omnizip/algorithms/ppmd7/context.rb +154 -0
  154. data/lib/omnizip/algorithms/ppmd7/decoder.rb +126 -0
  155. data/lib/omnizip/algorithms/ppmd7/encoder.rb +163 -0
  156. data/lib/omnizip/algorithms/ppmd7/model.rb +248 -0
  157. data/lib/omnizip/algorithms/ppmd7/symbol_state.rb +57 -0
  158. data/lib/omnizip/algorithms/ppmd7.rb +116 -0
  159. data/lib/omnizip/algorithms/ppmd8/constants.rb +61 -0
  160. data/lib/omnizip/algorithms/ppmd8/context.rb +34 -0
  161. data/lib/omnizip/algorithms/ppmd8/decoder.rb +107 -0
  162. data/lib/omnizip/algorithms/ppmd8/encoder.rb +138 -0
  163. data/lib/omnizip/algorithms/ppmd8/model.rb +250 -0
  164. data/lib/omnizip/algorithms/ppmd8/restoration_method.rb +78 -0
  165. data/lib/omnizip/algorithms/ppmd8.rb +82 -0
  166. data/lib/omnizip/algorithms/ppmd_base.rb +138 -0
  167. data/lib/omnizip/algorithms/sevenzip_lzma2.rb +123 -0
  168. data/lib/omnizip/algorithms/xz_lzma2.rb +118 -0
  169. data/lib/omnizip/algorithms/zstandard/constants.rb +25 -0
  170. data/lib/omnizip/algorithms/zstandard/decoder.rb +46 -0
  171. data/lib/omnizip/algorithms/zstandard/encoder.rb +51 -0
  172. data/lib/omnizip/algorithms/zstandard.rb +138 -0
  173. data/lib/omnizip/buffer/memory_archive.rb +251 -0
  174. data/lib/omnizip/buffer/memory_extractor.rb +224 -0
  175. data/lib/omnizip/buffer.rb +176 -0
  176. data/lib/omnizip/checksum_registry.rb +114 -0
  177. data/lib/omnizip/checksums/crc32.rb +100 -0
  178. data/lib/omnizip/checksums/crc64.rb +101 -0
  179. data/lib/omnizip/checksums/crc_base.rb +158 -0
  180. data/lib/omnizip/checksums/verifier.rb +131 -0
  181. data/lib/omnizip/chunked/memory_manager.rb +194 -0
  182. data/lib/omnizip/chunked/reader.rb +78 -0
  183. data/lib/omnizip/chunked/writer.rb +120 -0
  184. data/lib/omnizip/chunked.rb +129 -0
  185. data/lib/omnizip/cli/output_formatter.rb +104 -0
  186. data/lib/omnizip/cli.rb +572 -0
  187. data/lib/omnizip/commands/.keep +0 -0
  188. data/lib/omnizip/commands/archive_create_command.rb +427 -0
  189. data/lib/omnizip/commands/archive_extract_command.rb +272 -0
  190. data/lib/omnizip/commands/archive_list_command.rb +218 -0
  191. data/lib/omnizip/commands/archive_repair_command.rb +131 -0
  192. data/lib/omnizip/commands/archive_verify_command.rb +117 -0
  193. data/lib/omnizip/commands/compress_command.rb +117 -0
  194. data/lib/omnizip/commands/decompress_command.rb +120 -0
  195. data/lib/omnizip/commands/list_command.rb +53 -0
  196. data/lib/omnizip/commands/metadata_command.rb +153 -0
  197. data/lib/omnizip/commands/parity_create_command.rb +122 -0
  198. data/lib/omnizip/commands/parity_repair_command.rb +122 -0
  199. data/lib/omnizip/commands/parity_verify_command.rb +124 -0
  200. data/lib/omnizip/commands/profile_list_command.rb +56 -0
  201. data/lib/omnizip/commands/profile_show_command.rb +44 -0
  202. data/lib/omnizip/convenience.rb +359 -0
  203. data/lib/omnizip/converter/conversion_registry.rb +49 -0
  204. data/lib/omnizip/converter/conversion_strategy.rb +121 -0
  205. data/lib/omnizip/converter/seven_zip_to_zip_strategy.rb +97 -0
  206. data/lib/omnizip/converter/zip_to_seven_zip_strategy.rb +112 -0
  207. data/lib/omnizip/converter.rb +105 -0
  208. data/lib/omnizip/crypto/aes256/cipher.rb +100 -0
  209. data/lib/omnizip/crypto/aes256/constants.rb +28 -0
  210. data/lib/omnizip/crypto/aes256/key_derivation.rb +101 -0
  211. data/lib/omnizip/crypto/aes256.rb +102 -0
  212. data/lib/omnizip/error.rb +106 -0
  213. data/lib/omnizip/eta/exponential_smoothing_estimator.rb +98 -0
  214. data/lib/omnizip/eta/moving_average_estimator.rb +99 -0
  215. data/lib/omnizip/eta/rate_calculator.rb +104 -0
  216. data/lib/omnizip/eta/sample_history.rb +143 -0
  217. data/lib/omnizip/eta/time_estimator.rb +106 -0
  218. data/lib/omnizip/eta.rb +63 -0
  219. data/lib/omnizip/extraction/filter_chain.rb +177 -0
  220. data/lib/omnizip/extraction/glob_pattern.rb +140 -0
  221. data/lib/omnizip/extraction/pattern_matcher.rb +70 -0
  222. data/lib/omnizip/extraction/predicate_pattern.rb +52 -0
  223. data/lib/omnizip/extraction/regex_pattern.rb +50 -0
  224. data/lib/omnizip/extraction/selective_extractor.rb +240 -0
  225. data/lib/omnizip/extraction.rb +111 -0
  226. data/lib/omnizip/file_type/mime_classifier.rb +144 -0
  227. data/lib/omnizip/file_type.rb +113 -0
  228. data/lib/omnizip/filter.rb +139 -0
  229. data/lib/omnizip/filter_pipeline.rb +108 -0
  230. data/lib/omnizip/filter_registry.rb +166 -0
  231. data/lib/omnizip/filters/bcj.rb +279 -0
  232. data/lib/omnizip/filters/bcj2/constants.rb +53 -0
  233. data/lib/omnizip/filters/bcj2/decoder.rb +200 -0
  234. data/lib/omnizip/filters/bcj2/encoder.rb +61 -0
  235. data/lib/omnizip/filters/bcj2/stream_data.rb +93 -0
  236. data/lib/omnizip/filters/bcj2.rb +99 -0
  237. data/lib/omnizip/filters/bcj_arm.rb +176 -0
  238. data/lib/omnizip/filters/bcj_arm64.rb +244 -0
  239. data/lib/omnizip/filters/bcj_ia64.rb +196 -0
  240. data/lib/omnizip/filters/bcj_ppc.rb +190 -0
  241. data/lib/omnizip/filters/bcj_sparc.rb +176 -0
  242. data/lib/omnizip/filters/bcj_x86.rb +193 -0
  243. data/lib/omnizip/filters/delta.rb +196 -0
  244. data/lib/omnizip/filters/filter_base.rb +72 -0
  245. data/lib/omnizip/filters/registry.rb +123 -0
  246. data/lib/omnizip/filters/xz_delta.rb +258 -0
  247. data/lib/omnizip/format_detector.rb +162 -0
  248. data/lib/omnizip/format_registry.rb +59 -0
  249. data/lib/omnizip/formats/.keep +0 -0
  250. data/lib/omnizip/formats/bzip2_file.rb +172 -0
  251. data/lib/omnizip/formats/cpio/constants.rb +55 -0
  252. data/lib/omnizip/formats/cpio/entry.rb +385 -0
  253. data/lib/omnizip/formats/cpio/reader.rb +196 -0
  254. data/lib/omnizip/formats/cpio/writer.rb +234 -0
  255. data/lib/omnizip/formats/cpio.rb +140 -0
  256. data/lib/omnizip/formats/format_spec_loader.rb +230 -0
  257. data/lib/omnizip/formats/gzip.rb +238 -0
  258. data/lib/omnizip/formats/iso/directory_builder.rb +297 -0
  259. data/lib/omnizip/formats/iso/directory_record.rb +152 -0
  260. data/lib/omnizip/formats/iso/joliet.rb +204 -0
  261. data/lib/omnizip/formats/iso/path_table.rb +125 -0
  262. data/lib/omnizip/formats/iso/reader.rb +197 -0
  263. data/lib/omnizip/formats/iso/rock_ridge.rb +349 -0
  264. data/lib/omnizip/formats/iso/volume_builder.rb +320 -0
  265. data/lib/omnizip/formats/iso/volume_descriptor.rb +168 -0
  266. data/lib/omnizip/formats/iso/writer.rb +530 -0
  267. data/lib/omnizip/formats/iso.rb +140 -0
  268. data/lib/omnizip/formats/lzip.rb +175 -0
  269. data/lib/omnizip/formats/lzma_alone.rb +171 -0
  270. data/lib/omnizip/formats/rar/archive_repairer.rb +243 -0
  271. data/lib/omnizip/formats/rar/archive_verifier.rb +195 -0
  272. data/lib/omnizip/formats/rar/block_parser.rb +243 -0
  273. data/lib/omnizip/formats/rar/compression/bit_stream.rb +180 -0
  274. data/lib/omnizip/formats/rar/compression/dispatcher.rb +217 -0
  275. data/lib/omnizip/formats/rar/compression/lz77_huffman/decoder.rb +216 -0
  276. data/lib/omnizip/formats/rar/compression/lz77_huffman/encoder.rb +158 -0
  277. data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_builder.rb +217 -0
  278. data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_coder.rb +189 -0
  279. data/lib/omnizip/formats/rar/compression/lz77_huffman/match_finder.rb +135 -0
  280. data/lib/omnizip/formats/rar/compression/lz77_huffman/sliding_window.rb +165 -0
  281. data/lib/omnizip/formats/rar/compression/ppmd/context.rb +105 -0
  282. data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +219 -0
  283. data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +262 -0
  284. data/lib/omnizip/formats/rar/compression_method_registry.rb +106 -0
  285. data/lib/omnizip/formats/rar/constants.rb +82 -0
  286. data/lib/omnizip/formats/rar/decompressor.rb +238 -0
  287. data/lib/omnizip/formats/rar/external_writer.rb +312 -0
  288. data/lib/omnizip/formats/rar/header.rb +192 -0
  289. data/lib/omnizip/formats/rar/license_validator.rb +109 -0
  290. data/lib/omnizip/formats/rar/models/rar_archive.rb +77 -0
  291. data/lib/omnizip/formats/rar/models/rar_entry.rb +65 -0
  292. data/lib/omnizip/formats/rar/models/rar_volume.rb +56 -0
  293. data/lib/omnizip/formats/rar/parity_handler.rb +292 -0
  294. data/lib/omnizip/formats/rar/rar5/compression/lzma.rb +202 -0
  295. data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +578 -0
  296. data/lib/omnizip/formats/rar/rar5/compression/store.rb +60 -0
  297. data/lib/omnizip/formats/rar/rar5/crc32.rb +39 -0
  298. data/lib/omnizip/formats/rar/rar5/encryption/aes256_cbc.rb +97 -0
  299. data/lib/omnizip/formats/rar/rar5/encryption/encryption_header.rb +114 -0
  300. data/lib/omnizip/formats/rar/rar5/encryption/encryption_manager.rb +166 -0
  301. data/lib/omnizip/formats/rar/rar5/encryption/key_derivation.rb +97 -0
  302. data/lib/omnizip/formats/rar/rar5/header.rb +187 -0
  303. data/lib/omnizip/formats/rar/rar5/models/encryption_options.rb +74 -0
  304. data/lib/omnizip/formats/rar/rar5/models/recovery_options.rb +63 -0
  305. data/lib/omnizip/formats/rar/rar5/models/solid_options.rb +63 -0
  306. data/lib/omnizip/formats/rar/rar5/models/volume_options.rb +74 -0
  307. data/lib/omnizip/formats/rar/rar5/multi_volume/ARCHITECTURE.md +290 -0
  308. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_manager.rb +264 -0
  309. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_splitter.rb +155 -0
  310. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_writer.rb +194 -0
  311. data/lib/omnizip/formats/rar/rar5/solid/solid_encoder.rb +109 -0
  312. data/lib/omnizip/formats/rar/rar5/solid/solid_manager.rb +142 -0
  313. data/lib/omnizip/formats/rar/rar5/solid/solid_stream.rb +121 -0
  314. data/lib/omnizip/formats/rar/rar5/vint.rb +65 -0
  315. data/lib/omnizip/formats/rar/rar5/writer.rb +466 -0
  316. data/lib/omnizip/formats/rar/rar_format_base.rb +241 -0
  317. data/lib/omnizip/formats/rar/reader.rb +366 -0
  318. data/lib/omnizip/formats/rar/recovery_record.rb +245 -0
  319. data/lib/omnizip/formats/rar/volume_manager.rb +168 -0
  320. data/lib/omnizip/formats/rar/writer.rb +431 -0
  321. data/lib/omnizip/formats/rar.rb +205 -0
  322. data/lib/omnizip/formats/rar3/compressor.rb +73 -0
  323. data/lib/omnizip/formats/rar3/decompressor.rb +66 -0
  324. data/lib/omnizip/formats/rar3/reader.rb +386 -0
  325. data/lib/omnizip/formats/rar3/writer.rb +219 -0
  326. data/lib/omnizip/formats/rar5/compressor.rb +73 -0
  327. data/lib/omnizip/formats/rar5/decompressor.rb +66 -0
  328. data/lib/omnizip/formats/rar5/reader.rb +342 -0
  329. data/lib/omnizip/formats/rar5/writer.rb +214 -0
  330. data/lib/omnizip/formats/seven_zip/coder_chain.rb +150 -0
  331. data/lib/omnizip/formats/seven_zip/constants.rb +126 -0
  332. data/lib/omnizip/formats/seven_zip/encoded_header.rb +114 -0
  333. data/lib/omnizip/formats/seven_zip/encrypted_header.rb +142 -0
  334. data/lib/omnizip/formats/seven_zip/file_collector.rb +144 -0
  335. data/lib/omnizip/formats/seven_zip/header.rb +106 -0
  336. data/lib/omnizip/formats/seven_zip/header_encryptor.rb +134 -0
  337. data/lib/omnizip/formats/seven_zip/header_writer.rb +466 -0
  338. data/lib/omnizip/formats/seven_zip/models/coder_info.rb +30 -0
  339. data/lib/omnizip/formats/seven_zip/models/file_entry.rb +58 -0
  340. data/lib/omnizip/formats/seven_zip/models/folder.rb +69 -0
  341. data/lib/omnizip/formats/seven_zip/models/stream_info.rb +42 -0
  342. data/lib/omnizip/formats/seven_zip/parser.rb +660 -0
  343. data/lib/omnizip/formats/seven_zip/reader.rb +458 -0
  344. data/lib/omnizip/formats/seven_zip/split_archive_reader.rb +632 -0
  345. data/lib/omnizip/formats/seven_zip/split_archive_writer.rb +315 -0
  346. data/lib/omnizip/formats/seven_zip/stream_compressor.rb +151 -0
  347. data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +162 -0
  348. data/lib/omnizip/formats/seven_zip/writer.rb +740 -0
  349. data/lib/omnizip/formats/seven_zip.rb +93 -0
  350. data/lib/omnizip/formats/tar/constants.rb +73 -0
  351. data/lib/omnizip/formats/tar/entry.rb +94 -0
  352. data/lib/omnizip/formats/tar/header.rb +168 -0
  353. data/lib/omnizip/formats/tar/reader.rb +121 -0
  354. data/lib/omnizip/formats/tar/writer.rb +216 -0
  355. data/lib/omnizip/formats/tar.rb +84 -0
  356. data/lib/omnizip/formats/xz/reader.rb +116 -0
  357. data/lib/omnizip/formats/xz.rb +237 -0
  358. data/lib/omnizip/formats/xz_impl/block_decoder.rb +754 -0
  359. data/lib/omnizip/formats/xz_impl/block_encoder.rb +306 -0
  360. data/lib/omnizip/formats/xz_impl/block_header.rb +210 -0
  361. data/lib/omnizip/formats/xz_impl/block_header_parser.rb +186 -0
  362. data/lib/omnizip/formats/xz_impl/constants.rb +49 -0
  363. data/lib/omnizip/formats/xz_impl/index_decoder.rb +174 -0
  364. data/lib/omnizip/formats/xz_impl/index_encoder.rb +122 -0
  365. data/lib/omnizip/formats/xz_impl/stream_decoder.rb +468 -0
  366. data/lib/omnizip/formats/xz_impl/stream_encoder.rb +99 -0
  367. data/lib/omnizip/formats/xz_impl/stream_footer.rb +81 -0
  368. data/lib/omnizip/formats/xz_impl/stream_footer_parser.rb +117 -0
  369. data/lib/omnizip/formats/xz_impl/stream_header.rb +55 -0
  370. data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +108 -0
  371. data/lib/omnizip/formats/xz_impl/vli.rb +128 -0
  372. data/lib/omnizip/formats/xz_impl/writer.rb +421 -0
  373. data/lib/omnizip/formats/zip/central_directory_header.rb +195 -0
  374. data/lib/omnizip/formats/zip/constants.rb +69 -0
  375. data/lib/omnizip/formats/zip/end_of_central_directory.rb +133 -0
  376. data/lib/omnizip/formats/zip/local_file_header.rb +138 -0
  377. data/lib/omnizip/formats/zip/reader.rb +250 -0
  378. data/lib/omnizip/formats/zip/unix_extra_field.rb +153 -0
  379. data/lib/omnizip/formats/zip/writer.rb +375 -0
  380. data/lib/omnizip/formats/zip/zip64_end_of_central_directory.rb +104 -0
  381. data/lib/omnizip/formats/zip/zip64_end_of_central_directory_locator.rb +66 -0
  382. data/lib/omnizip/formats/zip/zip64_extra_field.rb +114 -0
  383. data/lib/omnizip/formats/zip.rb +50 -0
  384. data/lib/omnizip/implementations/base/lzma2_decoder_base.rb +75 -0
  385. data/lib/omnizip/implementations/base/lzma2_encoder_base.rb +128 -0
  386. data/lib/omnizip/implementations/base/lzma_decoder_base.rb +83 -0
  387. data/lib/omnizip/implementations/base/lzma_encoder_base.rb +108 -0
  388. data/lib/omnizip/implementations/base/state_machine_base.rb +182 -0
  389. data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +421 -0
  390. data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +465 -0
  391. data/lib/omnizip/implementations/seven_zip/lzma/match_finder.rb +288 -0
  392. data/lib/omnizip/implementations/seven_zip/lzma/range_decoder.rb +200 -0
  393. data/lib/omnizip/implementations/seven_zip/lzma/range_encoder.rb +197 -0
  394. data/lib/omnizip/implementations/seven_zip/lzma/state_machine.rb +141 -0
  395. data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +519 -0
  396. data/lib/omnizip/implementations/xz_utils/lzma2/decoder.rb +723 -0
  397. data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +750 -0
  398. data/lib/omnizip/io/buffered_input.rb +146 -0
  399. data/lib/omnizip/io/buffered_output.rb +105 -0
  400. data/lib/omnizip/io/stream_manager.rb +115 -0
  401. data/lib/omnizip/link_handler/hard_link.rb +79 -0
  402. data/lib/omnizip/link_handler/symbolic_link.rb +74 -0
  403. data/lib/omnizip/link_handler.rb +124 -0
  404. data/lib/omnizip/metadata/archive_metadata.rb +114 -0
  405. data/lib/omnizip/metadata/entry_metadata.rb +146 -0
  406. data/lib/omnizip/metadata/metadata_editor.rb +171 -0
  407. data/lib/omnizip/metadata/metadata_registry.rb +64 -0
  408. data/lib/omnizip/metadata/metadata_validator.rb +99 -0
  409. data/lib/omnizip/metadata.rb +57 -0
  410. data/lib/omnizip/models/.keep +0 -0
  411. data/lib/omnizip/models/algorithm_metadata.rb +73 -0
  412. data/lib/omnizip/models/compression_options.rb +71 -0
  413. data/lib/omnizip/models/conversion_options.rb +87 -0
  414. data/lib/omnizip/models/conversion_result.rb +135 -0
  415. data/lib/omnizip/models/eta_result.rb +46 -0
  416. data/lib/omnizip/models/extraction_rule.rb +115 -0
  417. data/lib/omnizip/models/filter_chain.rb +144 -0
  418. data/lib/omnizip/models/filter_config.rb +183 -0
  419. data/lib/omnizip/models/match_result.rb +124 -0
  420. data/lib/omnizip/models/optimization_suggestion.rb +91 -0
  421. data/lib/omnizip/models/parallel_options.rb +104 -0
  422. data/lib/omnizip/models/performance_result.rb +79 -0
  423. data/lib/omnizip/models/profile_report.rb +82 -0
  424. data/lib/omnizip/models/progress_options.rb +38 -0
  425. data/lib/omnizip/models/split_options.rb +116 -0
  426. data/lib/omnizip/optimization_registry.rb +81 -0
  427. data/lib/omnizip/parallel/job_queue.rb +209 -0
  428. data/lib/omnizip/parallel/job_scheduler.rb +203 -0
  429. data/lib/omnizip/parallel/parallel_compressor.rb +347 -0
  430. data/lib/omnizip/parallel/parallel_extractor.rb +329 -0
  431. data/lib/omnizip/parallel/worker_pool.rb +223 -0
  432. data/lib/omnizip/parallel.rb +149 -0
  433. data/lib/omnizip/parity/chunked_block_processor.rb +196 -0
  434. data/lib/omnizip/parity/galois16.rb +145 -0
  435. data/lib/omnizip/parity/models/creator_packet.rb +73 -0
  436. data/lib/omnizip/parity/models/file_description_packet.rb +133 -0
  437. data/lib/omnizip/parity/models/ifsc_packet.rb +123 -0
  438. data/lib/omnizip/parity/models/main_packet.rb +128 -0
  439. data/lib/omnizip/parity/models/packet.rb +156 -0
  440. data/lib/omnizip/parity/models/packet_registry.rb +109 -0
  441. data/lib/omnizip/parity/models/recovery_slice_packet.rb +78 -0
  442. data/lib/omnizip/parity/par2_creator.rb +531 -0
  443. data/lib/omnizip/parity/par2_repairer.rb +407 -0
  444. data/lib/omnizip/parity/par2_verifier.rb +364 -0
  445. data/lib/omnizip/parity/par2cmdline_algorithm.rb +110 -0
  446. data/lib/omnizip/parity/par2cmdline_coefficients.rb +78 -0
  447. data/lib/omnizip/parity/reed_solomon_decoder.rb +266 -0
  448. data/lib/omnizip/parity/reed_solomon_encoder.rb +111 -0
  449. data/lib/omnizip/parity/reed_solomon_matrix.rb +342 -0
  450. data/lib/omnizip/parity.rb +186 -0
  451. data/lib/omnizip/password/encryption_registry.rb +65 -0
  452. data/lib/omnizip/password/encryption_strategy.rb +96 -0
  453. data/lib/omnizip/password/password_validator.rb +129 -0
  454. data/lib/omnizip/password/winzip_aes_strategy.rb +192 -0
  455. data/lib/omnizip/password/zip_crypto_strategy.rb +141 -0
  456. data/lib/omnizip/password.rb +87 -0
  457. data/lib/omnizip/pipe/stream_compressor.rb +124 -0
  458. data/lib/omnizip/pipe/stream_decompressor.rb +174 -0
  459. data/lib/omnizip/pipe.rb +121 -0
  460. data/lib/omnizip/platform/ntfs_streams.rb +201 -0
  461. data/lib/omnizip/platform.rb +189 -0
  462. data/lib/omnizip/profile/archive_profile.rb +39 -0
  463. data/lib/omnizip/profile/balanced_profile.rb +33 -0
  464. data/lib/omnizip/profile/binary_profile.rb +36 -0
  465. data/lib/omnizip/profile/compression_profile.rb +158 -0
  466. data/lib/omnizip/profile/custom_profile.rb +157 -0
  467. data/lib/omnizip/profile/fast_profile.rb +33 -0
  468. data/lib/omnizip/profile/maximum_profile.rb +33 -0
  469. data/lib/omnizip/profile/profile_detector.rb +110 -0
  470. data/lib/omnizip/profile/profile_registry.rb +161 -0
  471. data/lib/omnizip/profile/text_profile.rb +36 -0
  472. data/lib/omnizip/profile.rb +190 -0
  473. data/lib/omnizip/profiler/memory_profiler.rb +66 -0
  474. data/lib/omnizip/profiler/method_profiler.rb +49 -0
  475. data/lib/omnizip/profiler/report_generator.rb +169 -0
  476. data/lib/omnizip/profiler.rb +204 -0
  477. data/lib/omnizip/progress/callback_reporter.rb +36 -0
  478. data/lib/omnizip/progress/console_reporter.rb +62 -0
  479. data/lib/omnizip/progress/log_reporter.rb +91 -0
  480. data/lib/omnizip/progress/operation_progress.rb +118 -0
  481. data/lib/omnizip/progress/progress_bar.rb +156 -0
  482. data/lib/omnizip/progress/progress_reporter.rb +40 -0
  483. data/lib/omnizip/progress/progress_tracker.rb +190 -0
  484. data/lib/omnizip/progress/silent_reporter.rb +24 -0
  485. data/lib/omnizip/progress.rb +127 -0
  486. data/lib/omnizip/rubyzip_compat.rb +63 -0
  487. data/lib/omnizip/temp/safe_extract.rb +168 -0
  488. data/lib/omnizip/temp/temp_file.rb +124 -0
  489. data/lib/omnizip/temp/temp_file_pool.rb +109 -0
  490. data/lib/omnizip/temp.rb +181 -0
  491. data/lib/omnizip/version.rb +5 -0
  492. data/lib/omnizip/zip/entry.rb +156 -0
  493. data/lib/omnizip/zip/file.rb +485 -0
  494. data/lib/omnizip/zip/input_stream.rb +273 -0
  495. data/lib/omnizip/zip/output_stream.rb +324 -0
  496. data/lib/omnizip.rb +156 -0
  497. data/readme-docs/advanced-features.adoc +515 -0
  498. data/readme-docs/api-usage.adoc +444 -0
  499. data/readme-docs/architecture.adoc +449 -0
  500. data/readme-docs/archive-formats.adoc +479 -0
  501. data/readme-docs/cli-usage.adoc +222 -0
  502. data/readme-docs/compression-algorithms.adoc +442 -0
  503. data/readme-docs/compression-profiles.adoc +247 -0
  504. data/readme-docs/encryption-checksums.adoc +328 -0
  505. data/readme-docs/format-converter.adoc +325 -0
  506. data/readme-docs/installation.adoc +228 -0
  507. data/readme-docs/par2-archives.adoc +608 -0
  508. data/readme-docs/performance-profiler.adoc +389 -0
  509. data/readme-docs/preprocessing-filters.adoc +280 -0
  510. data/xz-file-format-1.2.1.txt +1174 -0
  511. metadata +617 -0
@@ -0,0 +1,244 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ # Copyright (C) 2024 Ribose Inc.
5
+ #
6
+ # This file is part of Omnizip.
7
+ #
8
+ # Omnizip is a pure Ruby port of 7-Zip compression algorithms.
9
+ # Based on the 7-Zip LZMA SDK by Igor Pavlov.
10
+ #
11
+ # This library is free software; you can redistribute it and/or
12
+ # modify it under the terms of the GNU Lesser General Public
13
+ # License as published by the Free Software Foundation; either
14
+ # version 2.1 of the License, or (at your option) any later version.
15
+ #
16
+ # See the COPYING file for the complete text of the license.
17
+ #
18
+
19
+ require_relative "filter_base"
20
+
21
+ module Omnizip
22
+ module Filters
23
+ # BCJ filter for 64-bit ARM (AArch64) executables.
24
+ #
25
+ # This filter preprocesses ARM64 machine code by converting relative
26
+ # addresses in B/BL (0x94) and ADRP (0x90) instructions to absolute
27
+ # addresses. ARM64 uses 4-byte aligned instructions with little-endian
28
+ # encoding.
29
+ #
30
+ # The filter improves compression by making branch targets and
31
+ # page-aligned addresses position-independent.
32
+ class BcjArm64 < FilterBase
33
+ # B/BL instruction base opcode
34
+ OPCODE_B_BL = 0x94000000
35
+
36
+ # B/BL instruction mask
37
+ MASK_B_BL = 0xFC000000
38
+
39
+ # ADRP instruction base opcode
40
+ OPCODE_ADRP = 0x90000000
41
+
42
+ # ADRP instruction mask for variant detection
43
+ MASK_ADRP = 0x9F000000
44
+
45
+ # Size of ARM64 instruction (4 bytes)
46
+ INSTRUCTION_SIZE = 4
47
+
48
+ # Encode (preprocess) ARM64 executable data for compression.
49
+ #
50
+ # Scans for B/BL (0x94xxxxxx) and ADRP (0x90xxxxxx) instructions
51
+ # and converts relative addresses to absolute addresses.
52
+ #
53
+ # @param data [String] Binary executable data
54
+ # @param position [Integer] Current stream position
55
+ # @return [String] Encoded binary data
56
+ def encode(data, position = 0)
57
+ return data.dup if data.bytesize < INSTRUCTION_SIZE
58
+
59
+ result = data.b
60
+ size = data.bytesize & ~(INSTRUCTION_SIZE - 1)
61
+ i = 0
62
+ pc = position - INSTRUCTION_SIZE
63
+
64
+ while i < size
65
+ instruction = extract_uint32_le(result, i)
66
+ pc += INSTRUCTION_SIZE
67
+
68
+ # Check for B/BL instruction (0x94xxxxxx)
69
+ if (instruction & MASK_B_BL) == OPCODE_B_BL
70
+ # Extract 26-bit offset, sign-extend, and convert
71
+ offset = sign_extend_26(instruction & 0x03FFFFFF)
72
+ absolute = (offset << 2) + pc
73
+ new_instruction = OPCODE_B_BL | ((absolute >> 2) & 0x03FFFFFF)
74
+ write_uint32_le(result, i, new_instruction)
75
+ i += INSTRUCTION_SIZE
76
+ next
77
+ end
78
+
79
+ # Check for ADRP instruction (0x90xxxxxx or 0xB0xxxxxx variants)
80
+ if (instruction & MASK_ADRP) == OPCODE_ADRP
81
+ # Extract immlo (bits [30:29]) and immhi (bits [23:5])
82
+ immlo = (instruction >> 29) & 0x3
83
+ immhi = (instruction >> 5) & 0x7FFFF
84
+
85
+ # Combine into 21-bit offset
86
+ offset = (immhi << 2) | immlo
87
+
88
+ # Sign-extend 21-bit to full integer
89
+ offset = sign_extend_21(offset)
90
+
91
+ # Convert to absolute address (page-aligned, << 12)
92
+ absolute = (offset << 12) + (pc & ~0xFFF)
93
+
94
+ # Encode back
95
+ absolute >>= 12
96
+ new_immlo = absolute & 0x3
97
+ new_immhi = (absolute >> 2) & 0x7FFFF
98
+
99
+ new_instruction = (instruction & 0x9F00001F) |
100
+ (new_immlo << 29) |
101
+ (new_immhi << 5)
102
+ write_uint32_le(result, i, new_instruction)
103
+ end
104
+
105
+ i += INSTRUCTION_SIZE
106
+ end
107
+
108
+ result
109
+ end
110
+
111
+ # Decode (postprocess) ARM64 executable data after decompression.
112
+ #
113
+ # Reverses the encoding by converting absolute addresses back to
114
+ # relative addresses.
115
+ #
116
+ # @param data [String] Binary executable data
117
+ # @param position [Integer] Current stream position
118
+ # @return [String] Decoded binary data
119
+ def decode(data, position = 0)
120
+ return data.dup if data.bytesize < INSTRUCTION_SIZE
121
+
122
+ result = data.b
123
+ size = data.bytesize & ~(INSTRUCTION_SIZE - 1)
124
+ i = 0
125
+ pc = position - INSTRUCTION_SIZE
126
+
127
+ while i < size
128
+ instruction = extract_uint32_le(result, i)
129
+ pc += INSTRUCTION_SIZE
130
+
131
+ # Check for B/BL instruction
132
+ if (instruction & MASK_B_BL) == OPCODE_B_BL
133
+ # Extract absolute address and convert to relative offset
134
+ absolute = (instruction & 0x03FFFFFF) << 2
135
+ offset = (absolute - pc) >> 2
136
+ new_instruction = OPCODE_B_BL | (offset & 0x03FFFFFF)
137
+ write_uint32_le(result, i, new_instruction)
138
+ i += INSTRUCTION_SIZE
139
+ next
140
+ end
141
+
142
+ # Check for ADRP instruction
143
+ if (instruction & MASK_ADRP) == OPCODE_ADRP
144
+ # Extract immlo and immhi
145
+ immlo = (instruction >> 29) & 0x3
146
+ immhi = (instruction >> 5) & 0x7FFFF
147
+
148
+ # Combine into absolute page address
149
+ absolute = ((immhi << 2) | immlo) << 12
150
+
151
+ # Convert to relative offset
152
+ offset = (absolute - (pc & ~0xFFF)) >> 12
153
+
154
+ # Encode back as 21-bit value
155
+ new_immlo = offset & 0x3
156
+ new_immhi = (offset >> 2) & 0x7FFFF
157
+
158
+ new_instruction = (instruction & 0x9F00001F) |
159
+ (new_immlo << 29) |
160
+ (new_immhi << 5)
161
+ write_uint32_le(result, i, new_instruction)
162
+ end
163
+
164
+ i += INSTRUCTION_SIZE
165
+ end
166
+
167
+ result
168
+ end
169
+
170
+ class << self
171
+ # Get metadata about this filter.
172
+ #
173
+ # @return [Hash] Filter metadata
174
+ def metadata
175
+ {
176
+ name: "BCJ-ARM64",
177
+ description: "Branch converter for 64-bit ARM (AArch64) " \
178
+ "executables",
179
+ architecture: "ARM64 / AArch64",
180
+ alignment: 4,
181
+ endian: "little",
182
+ }
183
+ end
184
+ end
185
+
186
+ private
187
+
188
+ # Extract an unsigned 32-bit little-endian integer from data.
189
+ #
190
+ # @param data [String] Binary data
191
+ # @param offset [Integer] Starting position
192
+ # @return [Integer] Unsigned 32-bit integer
193
+ def extract_uint32_le(data, offset)
194
+ bytes = data.byteslice(offset, INSTRUCTION_SIZE).bytes
195
+ bytes[0] |
196
+ (bytes[1] << 8) |
197
+ (bytes[2] << 16) |
198
+ (bytes[3] << 24)
199
+ end
200
+
201
+ # Write an unsigned 32-bit little-endian integer to data.
202
+ #
203
+ # @param data [String] Binary data (modified in place)
204
+ # @param offset [Integer] Starting position
205
+ # @param value [Integer] 32-bit integer to write
206
+ # @return [void]
207
+ def write_uint32_le(data, offset, value)
208
+ value &= 0xFFFFFFFF
209
+ data.setbyte(offset, value & 0xFF)
210
+ data.setbyte(offset + 1, (value >> 8) & 0xFF)
211
+ data.setbyte(offset + 2, (value >> 16) & 0xFF)
212
+ data.setbyte(offset + 3, (value >> 24) & 0xFF)
213
+ end
214
+
215
+ # Sign-extend a 26-bit value to 32-bit.
216
+ #
217
+ # @param value [Integer] 26-bit value
218
+ # @return [Integer] Sign-extended integer
219
+ # rubocop:disable Naming/VariableNumber
220
+ def sign_extend_26(value)
221
+ # rubocop:enable Naming/VariableNumber
222
+ if value.anybits?(0x02000000)
223
+ value | 0xFC000000
224
+ else
225
+ value
226
+ end
227
+ end
228
+
229
+ # Sign-extend a 21-bit value to 32-bit.
230
+ #
231
+ # @param value [Integer] 21-bit value
232
+ # @return [Integer] Sign-extended integer
233
+ # rubocop:disable Naming/VariableNumber
234
+ def sign_extend_21(value)
235
+ # rubocop:enable Naming/VariableNumber
236
+ if value.anybits?(0x100000)
237
+ value | 0xFFE00000
238
+ else
239
+ value
240
+ end
241
+ end
242
+ end
243
+ end
244
+ end
@@ -0,0 +1,196 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ # Copyright (C) 2024 Ribose Inc.
5
+ #
6
+ # This file is part of Omnizip.
7
+ #
8
+ # Omnizip is a pure Ruby port of 7-Zip compression algorithms.
9
+ # Based on the 7-Zip LZMA SDK by Igor Pavlov.
10
+ #
11
+ # This library is free software; you can redistribute it and/or
12
+ # modify it under the terms of the GNU Lesser General Public
13
+ # License as published by the Free Software Foundation; either
14
+ # version 2.1 of the License, or (at your option) any later version.
15
+ #
16
+ # See the COPYING file for the complete text of the license.
17
+ #
18
+
19
+ require_relative "filter_base"
20
+
21
+ module Omnizip
22
+ module Filters
23
+ # BCJ filter for IA-64 (Itanium) executables.
24
+ #
25
+ # This filter preprocesses Itanium machine code by converting
26
+ # relative addresses in branch instructions. IA-64 uses a complex
27
+ # VLIW (Very Long Instruction Word) architecture with 16-byte
28
+ # instruction bundles containing 3 instructions plus a template.
29
+ #
30
+ # The filter improves compression by making branch targets
31
+ # position-independent.
32
+ class BcjIa64 < FilterBase
33
+ # Size of IA-64 instruction bundle (16 bytes, little-endian)
34
+ BUNDLE_SIZE = 16
35
+
36
+ # Template lookup table for instruction slot positions
37
+ # Each bit pattern indicates which slots may contain branch instr
38
+ TEMPLATE_MASKS = 0x334B0000
39
+
40
+ # Encode (preprocess) IA-64 executable data for compression.
41
+ #
42
+ # Scans 16-byte instruction bundles for branch instructions and
43
+ # converts relative addresses to absolute addresses.
44
+ #
45
+ # @param data [String] Binary executable data
46
+ # @param position [Integer] Current stream position
47
+ # @return [String] Encoded binary data
48
+ def encode(data, position = 0)
49
+ return data.dup if data.bytesize < BUNDLE_SIZE
50
+
51
+ result = data.b
52
+ size = data.bytesize & ~(BUNDLE_SIZE - 1)
53
+ i = 0
54
+ pc = (position >> 4) << 1
55
+
56
+ while i < size
57
+ # Check template byte for slots with potential branches
58
+ template = result.getbyte(i) & 0x1E
59
+ mask = (TEMPLATE_MASKS >> template) & 3
60
+ pc += 2
61
+
62
+ i += BUNDLE_SIZE
63
+ next if mask.zero?
64
+
65
+ # Process each marked slot
66
+ process_bundle_slots(result, i - BUNDLE_SIZE, mask, pc, true)
67
+ end
68
+
69
+ result
70
+ end
71
+
72
+ # Decode (postprocess) IA-64 executable data after decompression.
73
+ #
74
+ # Reverses the encoding by converting absolute addresses back to
75
+ # relative addresses.
76
+ #
77
+ # @param data [String] Binary executable data
78
+ # @param position [Integer] Current stream position
79
+ # @return [String] Decoded binary data
80
+ def decode(data, position = 0)
81
+ return data.dup if data.bytesize < BUNDLE_SIZE
82
+
83
+ result = data.b
84
+ size = data.bytesize & ~(BUNDLE_SIZE - 1)
85
+ i = 0
86
+ pc = (position >> 4) << 1
87
+
88
+ while i < size
89
+ # Check template byte for slots with potential branches
90
+ template = result.getbyte(i) & 0x1E
91
+ mask = (TEMPLATE_MASKS >> template) & 3
92
+ pc += 2
93
+
94
+ i += BUNDLE_SIZE
95
+ next if mask.zero?
96
+
97
+ # Process each marked slot
98
+ process_bundle_slots(result, i - BUNDLE_SIZE, mask, pc, false)
99
+ end
100
+
101
+ result
102
+ end
103
+
104
+ class << self
105
+ # Get metadata about this filter.
106
+ #
107
+ # @return [Hash] Filter metadata
108
+ def metadata
109
+ {
110
+ name: "BCJ-IA64",
111
+ description: "Branch converter for IA-64 (Itanium) " \
112
+ "executables",
113
+ architecture: "IA-64 / Itanium",
114
+ alignment: 16,
115
+ endian: "little",
116
+ complexity: "high",
117
+ }
118
+ end
119
+ end
120
+
121
+ private
122
+
123
+ # Process instruction slots within a bundle.
124
+ #
125
+ # @param data [String] Binary data
126
+ # @param offset [Integer] Bundle offset
127
+ # @param mask [Integer] Slot mask
128
+ # @param pc [Integer] Program counter
129
+ # @param encoding [Boolean] True for encoding, false for decoding
130
+ # @return [void]
131
+ # rubocop:disable Naming/MethodParameterName
132
+ def process_bundle_slots(data, offset, mask, pc, encoding)
133
+ # rubocop:enable Naming/MethodParameterName
134
+ slot_offset = 0
135
+
136
+ 3.times do
137
+ break if mask.zero?
138
+
139
+ if mask.anybits?(1)
140
+ process_slot(data, offset + 1 + slot_offset, pc, encoding)
141
+ end
142
+
143
+ mask >>= 1
144
+ slot_offset += 5
145
+ end
146
+ end
147
+
148
+ # Process a single instruction slot.
149
+ #
150
+ # @param data [String] Binary data
151
+ # @param offset [Integer] Slot offset within bundle
152
+ # @param pc [Integer] Program counter
153
+ # @param encoding [Boolean] True for encoding, false for decoding
154
+ # @return [void]
155
+ # rubocop:disable Naming/MethodParameterName
156
+ def process_slot(data, offset, pc, encoding)
157
+ # rubocop:enable Naming/MethodParameterName
158
+ # Extract slot data (5 bytes forming a 41-bit instruction)
159
+ bytes = data.byteslice(offset, 5).bytes
160
+ instruction = bytes[0] |
161
+ (bytes[1] << 8) |
162
+ (bytes[2] << 16) |
163
+ (bytes[3] << 24) |
164
+ (bytes[4] << 32)
165
+
166
+ # Check if this is a branch instruction
167
+ # Opcode check: bits 37-40 should be 0x5 (B-type instruction)
168
+ opcode = (instruction >> 37) & 0xF
169
+ return unless opcode == 5
170
+
171
+ # Extract 25-bit target address from bits 13-37
172
+ target = (instruction >> 13) & 0x1FFFFFF
173
+
174
+ # Apply address conversion
175
+ new_target = if encoding
176
+ # Convert relative to absolute
177
+ (target + pc) & 0x1FFFFFF
178
+ else
179
+ # Convert absolute to relative
180
+ (target - pc) & 0x1FFFFFF
181
+ end
182
+
183
+ # Reconstruct instruction with new target
184
+ instruction = (instruction & ~(0x1FFFFFF << 13)) |
185
+ (new_target << 13)
186
+
187
+ # Write back the modified instruction
188
+ data.setbyte(offset, instruction & 0xFF)
189
+ data.setbyte(offset + 1, (instruction >> 8) & 0xFF)
190
+ data.setbyte(offset + 2, (instruction >> 16) & 0xFF)
191
+ data.setbyte(offset + 3, (instruction >> 24) & 0xFF)
192
+ data.setbyte(offset + 4, (instruction >> 32) & 0xFF)
193
+ end
194
+ end
195
+ end
196
+ end
@@ -0,0 +1,190 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ # Copyright (C) 2024 Ribose Inc.
5
+ #
6
+ # This file is part of Omnizip.
7
+ #
8
+ # Omnizip is a pure Ruby port of 7-Zip compression algorithms.
9
+ # Based on the 7-Zip LZMA SDK by Igor Pavlov.
10
+ #
11
+ # This library is free software; you can redistribute it and/or
12
+ # modify it under the terms of the GNU Lesser General Public
13
+ # License as published by the Free Software Foundation; either
14
+ # version 2.1 of the License, or (at your option) any later version.
15
+ #
16
+ # See the COPYING file for the complete text of the license.
17
+ #
18
+
19
+ require_relative "filter_base"
20
+
21
+ module Omnizip
22
+ module Filters
23
+ # BCJ filter for PowerPC executables.
24
+ #
25
+ # This filter preprocesses PowerPC machine code by converting
26
+ # relative addresses in B/BL (Branch/Branch and Link) instructions
27
+ # to absolute addresses. PowerPC uses 4-byte aligned instructions
28
+ # with big-endian encoding.
29
+ #
30
+ # The filter improves compression by making branch targets
31
+ # position-independent.
32
+ class BcjPpc < FilterBase
33
+ # PPC B/BL instruction base (0x48000000)
34
+ OPCODE_BASE = 0x48000000
35
+
36
+ # Mask for checking B/BL instructions
37
+ OPCODE_MASK = 0xFC000003
38
+
39
+ # Expected pattern for B/BL with link bit (0x48000001)
40
+ OPCODE_PATTERN = 0x48000001
41
+
42
+ # Size of PPC instruction (4 bytes, big-endian)
43
+ INSTRUCTION_SIZE = 4
44
+
45
+ # Offset mask (26-bit offset in instruction)
46
+ OFFSET_MASK = 0x03FFFFFC
47
+
48
+ # Encode (preprocess) PowerPC executable data for compression.
49
+ #
50
+ # Scans for B/BL instructions (0x48xxxxxx) and converts relative
51
+ # addresses to absolute addresses.
52
+ #
53
+ # @param data [String] Binary executable data
54
+ # @param position [Integer] Current stream position
55
+ # @return [String] Encoded binary data
56
+ def encode(data, position = 0)
57
+ return data.dup if data.bytesize < INSTRUCTION_SIZE
58
+
59
+ result = data.b
60
+ size = data.bytesize & ~(INSTRUCTION_SIZE - 1)
61
+ i = 0
62
+ pc = position - INSTRUCTION_SIZE
63
+
64
+ while i < size
65
+ instruction = extract_uint32_be(result, i)
66
+ pc += INSTRUCTION_SIZE
67
+
68
+ # Check for B/BL instruction (0x48xxxxxx with proper flags)
69
+ if (instruction & OPCODE_MASK) == OPCODE_PATTERN
70
+ # Extract 24-bit offset (bits 6-29), sign-extend
71
+ offset = instruction & OFFSET_MASK
72
+ offset = sign_extend_26(offset)
73
+
74
+ # Convert to absolute address
75
+ absolute = offset + pc
76
+
77
+ # Encode back
78
+ new_instruction = OPCODE_BASE | (absolute & OFFSET_MASK) | 1
79
+ write_uint32_be(result, i, new_instruction)
80
+ end
81
+
82
+ i += INSTRUCTION_SIZE
83
+ end
84
+
85
+ result
86
+ end
87
+
88
+ # Decode (postprocess) PowerPC executable data after decompression.
89
+ #
90
+ # Reverses the encoding by converting absolute addresses back to
91
+ # relative addresses.
92
+ #
93
+ # @param data [String] Binary executable data
94
+ # @param position [Integer] Current stream position
95
+ # @return [String] Decoded binary data
96
+ def decode(data, position = 0)
97
+ return data.dup if data.bytesize < INSTRUCTION_SIZE
98
+
99
+ result = data.b
100
+ size = data.bytesize & ~(INSTRUCTION_SIZE - 1)
101
+ i = 0
102
+ pc = position - INSTRUCTION_SIZE
103
+
104
+ while i < size
105
+ instruction = extract_uint32_be(result, i)
106
+ pc += INSTRUCTION_SIZE
107
+
108
+ # Check for B/BL instruction
109
+ if (instruction & OPCODE_MASK) == OPCODE_PATTERN
110
+ # Extract absolute address
111
+ absolute = instruction & OFFSET_MASK
112
+
113
+ # Convert to relative offset
114
+ offset = absolute - pc
115
+
116
+ # Encode back
117
+ new_instruction = OPCODE_BASE | (offset & OFFSET_MASK) | 1
118
+ write_uint32_be(result, i, new_instruction)
119
+ end
120
+
121
+ i += INSTRUCTION_SIZE
122
+ end
123
+
124
+ result
125
+ end
126
+
127
+ class << self
128
+ # Get metadata about this filter.
129
+ #
130
+ # @return [Hash] Filter metadata
131
+ def metadata
132
+ {
133
+ name: "BCJ-PPC",
134
+ description: "Branch converter for PowerPC executables",
135
+ architecture: "PowerPC",
136
+ alignment: 4,
137
+ endian: "big",
138
+ }
139
+ end
140
+ end
141
+
142
+ private
143
+
144
+ # Extract an unsigned 32-bit big-endian integer from data.
145
+ #
146
+ # @param data [String] Binary data
147
+ # @param offset [Integer] Starting position
148
+ # @return [Integer] Unsigned 32-bit integer
149
+ def extract_uint32_be(data, offset)
150
+ bytes = data.byteslice(offset, INSTRUCTION_SIZE).bytes
151
+ (bytes[0] << 24) |
152
+ (bytes[1] << 16) |
153
+ (bytes[2] << 8) |
154
+ bytes[3]
155
+ end
156
+
157
+ # Write an unsigned 32-bit big-endian integer to data.
158
+ #
159
+ # @param data [String] Binary data (modified in place)
160
+ # @param offset [Integer] Starting position
161
+ # @param value [Integer] 32-bit integer to write
162
+ # @return [void]
163
+ def write_uint32_be(data, offset, value)
164
+ value &= 0xFFFFFFFF
165
+ data.setbyte(offset, (value >> 24) & 0xFF)
166
+ data.setbyte(offset + 1, (value >> 16) & 0xFF)
167
+ data.setbyte(offset + 2, (value >> 8) & 0xFF)
168
+ data.setbyte(offset + 3, value & 0xFF)
169
+ end
170
+
171
+ # Sign-extend a 26-bit value to 32-bit.
172
+ #
173
+ # The offset in PPC B/BL instructions is 26 bits (bits 6-31),
174
+ # but bit 0-1 are always 0 (4-byte aligned).
175
+ #
176
+ # @param value [Integer] 26-bit value
177
+ # @return [Integer] Sign-extended integer
178
+ # rubocop:disable Naming/VariableNumber
179
+ def sign_extend_26(value)
180
+ # rubocop:enable Naming/VariableNumber
181
+ # Check if bit 25 is set (sign bit for 26-bit number)
182
+ if value.anybits?(0x02000000)
183
+ value | 0xFC000000
184
+ else
185
+ value
186
+ end
187
+ end
188
+ end
189
+ end
190
+ end