omnizip 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (511) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.rubocop.yml +32 -0
  4. data/.rubocop_todo.yml +754 -0
  5. data/COPYING +502 -0
  6. data/Gemfile +17 -0
  7. data/LICENSE +12 -0
  8. data/README.adoc +1045 -0
  9. data/Rakefile +12 -0
  10. data/benchmark/README.md +260 -0
  11. data/benchmark/benchmark_suite.rb +125 -0
  12. data/benchmark/compression_bench.rb +181 -0
  13. data/benchmark/filter_bench.rb +180 -0
  14. data/benchmark/models/benchmark_result.rb +59 -0
  15. data/benchmark/models/comparison_result.rb +69 -0
  16. data/benchmark/profile_suite.rb +167 -0
  17. data/benchmark/reporter.rb +150 -0
  18. data/benchmark/run_benchmarks.rb +66 -0
  19. data/benchmark/test_data.rb +137 -0
  20. data/config/formats/rar3_spec.yml +91 -0
  21. data/config/formats/rar5_spec.yml +102 -0
  22. data/docs/.github/workflows/docs.yml +142 -0
  23. data/docs/.gitignore +21 -0
  24. data/docs/.lychee.toml +67 -0
  25. data/docs/Gemfile +13 -0
  26. data/docs/RAR_WRITE_SUPPORT.md +26 -0
  27. data/docs/README.md +101 -0
  28. data/docs/_config.yml +112 -0
  29. data/docs/assets/logo.svg +1 -0
  30. data/docs/assets/omnizip-logo.pdf +1540 -11
  31. data/docs/comparison/feature-matrix.adoc +694 -0
  32. data/docs/comparison/index.adoc +113 -0
  33. data/docs/comparison/vs-7zip.adoc +309 -0
  34. data/docs/comparison/vs-peazip.adoc +77 -0
  35. data/docs/comparison/vs-rubyzip.adoc +342 -0
  36. data/docs/comparison/vs-winrar.adoc +100 -0
  37. data/docs/compatibility.adoc +579 -0
  38. data/docs/concepts/index.adoc +129 -0
  39. data/docs/developer/architecture.adoc +256 -0
  40. data/docs/developer/contributing.adoc +158 -0
  41. data/docs/developer/index.adoc +25 -0
  42. data/docs/developer/testing.adoc +212 -0
  43. data/docs/getting-started/basic-usage.adoc +271 -0
  44. data/docs/getting-started/index.adoc +42 -0
  45. data/docs/getting-started/installation.adoc +138 -0
  46. data/docs/getting-started/quick-start.adoc +185 -0
  47. data/docs/getting-started/your-first-archive.adoc +218 -0
  48. data/docs/guides/advanced-features/encryption.adoc +300 -0
  49. data/docs/guides/advanced-features/index.adoc +49 -0
  50. data/docs/guides/advanced-features/parallel-processing.adoc +246 -0
  51. data/docs/guides/advanced-features/progress-tracking.adoc +320 -0
  52. data/docs/guides/advanced-features/streaming.adoc +212 -0
  53. data/docs/guides/archive-formats/gzip-format.adoc +107 -0
  54. data/docs/guides/archive-formats/index.adoc +130 -0
  55. data/docs/guides/archive-formats/rar-format.adoc +104 -0
  56. data/docs/guides/archive-formats/rar5.adoc +521 -0
  57. data/docs/guides/archive-formats/seven-zip-format.adoc +35 -0
  58. data/docs/guides/archive-formats/tar-format.adoc +106 -0
  59. data/docs/guides/archive-formats/xz-format.adoc +118 -0
  60. data/docs/guides/archive-formats/zip-format.adoc +35 -0
  61. data/docs/guides/compression-algorithms/bzip2.adoc +113 -0
  62. data/docs/guides/compression-algorithms/deflate.adoc +319 -0
  63. data/docs/guides/compression-algorithms/index.adoc +190 -0
  64. data/docs/guides/compression-algorithms/lzma.adoc +398 -0
  65. data/docs/guides/compression-algorithms/lzma2.adoc +327 -0
  66. data/docs/guides/compression-algorithms/ppmd.adoc +316 -0
  67. data/docs/guides/compression-algorithms/zstandard.adoc +361 -0
  68. data/docs/guides/creating-archives.adoc +354 -0
  69. data/docs/guides/extracting-archives.adoc +53 -0
  70. data/docs/guides/format-conversion.adoc +64 -0
  71. data/docs/guides/index.adoc +49 -0
  72. data/docs/guides/migration-rubyzip.adoc +217 -0
  73. data/docs/guides/parity-archives.adoc +605 -0
  74. data/docs/guides/performance-tuning.adoc +88 -0
  75. data/docs/index.adoc +218 -0
  76. data/docs/lychee.toml +67 -0
  77. data/docs/reference/api/overview.adoc +188 -0
  78. data/docs/reference/cli/compress-command.adoc +114 -0
  79. data/docs/reference/cli/overview.adoc +140 -0
  80. data/docs/reference/index.adoc +26 -0
  81. data/docs/resources/faq.adoc +185 -0
  82. data/docs/resources/quick-reference.adoc +222 -0
  83. data/docs/troubleshooting/index.adoc +208 -0
  84. data/examples/api_comparison.rb +205 -0
  85. data/examples/deflate64_example.rb +96 -0
  86. data/examples/par2_demo.rb +121 -0
  87. data/examples/quick_start_native.rb +150 -0
  88. data/examples/quick_start_rubyzip.rb +115 -0
  89. data/examples/rubyzip_compatibility_demo.rb +194 -0
  90. data/exe/omnizip +27 -0
  91. data/lib/omnizip/algorithm.rb +130 -0
  92. data/lib/omnizip/algorithm_registry.rb +86 -0
  93. data/lib/omnizip/algorithms/.keep +0 -0
  94. data/lib/omnizip/algorithms/bzip2/bwt.rb +225 -0
  95. data/lib/omnizip/algorithms/bzip2/decoder.rb +193 -0
  96. data/lib/omnizip/algorithms/bzip2/encoder.rb +237 -0
  97. data/lib/omnizip/algorithms/bzip2/huffman.rb +206 -0
  98. data/lib/omnizip/algorithms/bzip2/mtf.rb +101 -0
  99. data/lib/omnizip/algorithms/bzip2/rle.rb +151 -0
  100. data/lib/omnizip/algorithms/bzip2.rb +130 -0
  101. data/lib/omnizip/algorithms/deflate/constants.rb +28 -0
  102. data/lib/omnizip/algorithms/deflate/decoder.rb +38 -0
  103. data/lib/omnizip/algorithms/deflate/encoder.rb +46 -0
  104. data/lib/omnizip/algorithms/deflate.rb +128 -0
  105. data/lib/omnizip/algorithms/deflate64/constants.rb +45 -0
  106. data/lib/omnizip/algorithms/deflate64/decoder.rb +153 -0
  107. data/lib/omnizip/algorithms/deflate64/encoder.rb +98 -0
  108. data/lib/omnizip/algorithms/deflate64/huffman_coder.rb +354 -0
  109. data/lib/omnizip/algorithms/deflate64/lz77_encoder.rb +142 -0
  110. data/lib/omnizip/algorithms/deflate64.rb +109 -0
  111. data/lib/omnizip/algorithms/lzma/bit_model.rb +120 -0
  112. data/lib/omnizip/algorithms/lzma/constants.rb +112 -0
  113. data/lib/omnizip/algorithms/lzma/decoder.rb +148 -0
  114. data/lib/omnizip/algorithms/lzma/dictionary.rb +69 -0
  115. data/lib/omnizip/algorithms/lzma/distance_coder.rb +415 -0
  116. data/lib/omnizip/algorithms/lzma/encoder.rb +142 -0
  117. data/lib/omnizip/algorithms/lzma/length_coder.rb +260 -0
  118. data/lib/omnizip/algorithms/lzma/literal_decoder.rb +320 -0
  119. data/lib/omnizip/algorithms/lzma/literal_encoder.rb +210 -0
  120. data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +341 -0
  121. data/lib/omnizip/algorithms/lzma/lzma_alone_decoder.rb +192 -0
  122. data/lib/omnizip/algorithms/lzma/lzma_state.rb +128 -0
  123. data/lib/omnizip/algorithms/lzma/match.rb +32 -0
  124. data/lib/omnizip/algorithms/lzma/match_finder.rb +205 -0
  125. data/lib/omnizip/algorithms/lzma/match_finder_config.rb +142 -0
  126. data/lib/omnizip/algorithms/lzma/match_finder_factory.rb +88 -0
  127. data/lib/omnizip/algorithms/lzma/optimal_encoder.rb +130 -0
  128. data/lib/omnizip/algorithms/lzma/probability_models.rb +72 -0
  129. data/lib/omnizip/algorithms/lzma/range_coder.rb +85 -0
  130. data/lib/omnizip/algorithms/lzma/range_decoder.rb +434 -0
  131. data/lib/omnizip/algorithms/lzma/range_encoder.rb +194 -0
  132. data/lib/omnizip/algorithms/lzma/state.rb +127 -0
  133. data/lib/omnizip/algorithms/lzma/xz_buffered_range_encoder.rb +325 -0
  134. data/lib/omnizip/algorithms/lzma/xz_encoder.rb +426 -0
  135. data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +645 -0
  136. data/lib/omnizip/algorithms/lzma/xz_match_finder_adapter.rb +227 -0
  137. data/lib/omnizip/algorithms/lzma/xz_price_calculator.rb +169 -0
  138. data/lib/omnizip/algorithms/lzma/xz_probability_models.rb +261 -0
  139. data/lib/omnizip/algorithms/lzma/xz_range_encoder.rb +223 -0
  140. data/lib/omnizip/algorithms/lzma/xz_range_encoder_exact.rb +331 -0
  141. data/lib/omnizip/algorithms/lzma/xz_state.rb +116 -0
  142. data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +2055 -0
  143. data/lib/omnizip/algorithms/lzma.rb +238 -0
  144. data/lib/omnizip/algorithms/lzma2/chunk_manager.rb +182 -0
  145. data/lib/omnizip/algorithms/lzma2/constants.rb +41 -0
  146. data/lib/omnizip/algorithms/lzma2/encoder.rb +147 -0
  147. data/lib/omnizip/algorithms/lzma2/lzma2_chunk.rb +161 -0
  148. data/lib/omnizip/algorithms/lzma2/properties.rb +179 -0
  149. data/lib/omnizip/algorithms/lzma2/simple_lzma2_encoder.rb +127 -0
  150. data/lib/omnizip/algorithms/lzma2/xz_encoder_adapter.rb +85 -0
  151. data/lib/omnizip/algorithms/lzma2.rb +141 -0
  152. data/lib/omnizip/algorithms/ppmd7/constants.rb +74 -0
  153. data/lib/omnizip/algorithms/ppmd7/context.rb +154 -0
  154. data/lib/omnizip/algorithms/ppmd7/decoder.rb +126 -0
  155. data/lib/omnizip/algorithms/ppmd7/encoder.rb +163 -0
  156. data/lib/omnizip/algorithms/ppmd7/model.rb +248 -0
  157. data/lib/omnizip/algorithms/ppmd7/symbol_state.rb +57 -0
  158. data/lib/omnizip/algorithms/ppmd7.rb +116 -0
  159. data/lib/omnizip/algorithms/ppmd8/constants.rb +61 -0
  160. data/lib/omnizip/algorithms/ppmd8/context.rb +34 -0
  161. data/lib/omnizip/algorithms/ppmd8/decoder.rb +107 -0
  162. data/lib/omnizip/algorithms/ppmd8/encoder.rb +138 -0
  163. data/lib/omnizip/algorithms/ppmd8/model.rb +250 -0
  164. data/lib/omnizip/algorithms/ppmd8/restoration_method.rb +78 -0
  165. data/lib/omnizip/algorithms/ppmd8.rb +82 -0
  166. data/lib/omnizip/algorithms/ppmd_base.rb +138 -0
  167. data/lib/omnizip/algorithms/sevenzip_lzma2.rb +123 -0
  168. data/lib/omnizip/algorithms/xz_lzma2.rb +118 -0
  169. data/lib/omnizip/algorithms/zstandard/constants.rb +25 -0
  170. data/lib/omnizip/algorithms/zstandard/decoder.rb +46 -0
  171. data/lib/omnizip/algorithms/zstandard/encoder.rb +51 -0
  172. data/lib/omnizip/algorithms/zstandard.rb +138 -0
  173. data/lib/omnizip/buffer/memory_archive.rb +251 -0
  174. data/lib/omnizip/buffer/memory_extractor.rb +224 -0
  175. data/lib/omnizip/buffer.rb +176 -0
  176. data/lib/omnizip/checksum_registry.rb +114 -0
  177. data/lib/omnizip/checksums/crc32.rb +100 -0
  178. data/lib/omnizip/checksums/crc64.rb +101 -0
  179. data/lib/omnizip/checksums/crc_base.rb +158 -0
  180. data/lib/omnizip/checksums/verifier.rb +131 -0
  181. data/lib/omnizip/chunked/memory_manager.rb +194 -0
  182. data/lib/omnizip/chunked/reader.rb +78 -0
  183. data/lib/omnizip/chunked/writer.rb +120 -0
  184. data/lib/omnizip/chunked.rb +129 -0
  185. data/lib/omnizip/cli/output_formatter.rb +104 -0
  186. data/lib/omnizip/cli.rb +572 -0
  187. data/lib/omnizip/commands/.keep +0 -0
  188. data/lib/omnizip/commands/archive_create_command.rb +427 -0
  189. data/lib/omnizip/commands/archive_extract_command.rb +272 -0
  190. data/lib/omnizip/commands/archive_list_command.rb +218 -0
  191. data/lib/omnizip/commands/archive_repair_command.rb +131 -0
  192. data/lib/omnizip/commands/archive_verify_command.rb +117 -0
  193. data/lib/omnizip/commands/compress_command.rb +117 -0
  194. data/lib/omnizip/commands/decompress_command.rb +120 -0
  195. data/lib/omnizip/commands/list_command.rb +53 -0
  196. data/lib/omnizip/commands/metadata_command.rb +153 -0
  197. data/lib/omnizip/commands/parity_create_command.rb +122 -0
  198. data/lib/omnizip/commands/parity_repair_command.rb +122 -0
  199. data/lib/omnizip/commands/parity_verify_command.rb +124 -0
  200. data/lib/omnizip/commands/profile_list_command.rb +56 -0
  201. data/lib/omnizip/commands/profile_show_command.rb +44 -0
  202. data/lib/omnizip/convenience.rb +359 -0
  203. data/lib/omnizip/converter/conversion_registry.rb +49 -0
  204. data/lib/omnizip/converter/conversion_strategy.rb +121 -0
  205. data/lib/omnizip/converter/seven_zip_to_zip_strategy.rb +97 -0
  206. data/lib/omnizip/converter/zip_to_seven_zip_strategy.rb +112 -0
  207. data/lib/omnizip/converter.rb +105 -0
  208. data/lib/omnizip/crypto/aes256/cipher.rb +100 -0
  209. data/lib/omnizip/crypto/aes256/constants.rb +28 -0
  210. data/lib/omnizip/crypto/aes256/key_derivation.rb +101 -0
  211. data/lib/omnizip/crypto/aes256.rb +102 -0
  212. data/lib/omnizip/error.rb +106 -0
  213. data/lib/omnizip/eta/exponential_smoothing_estimator.rb +98 -0
  214. data/lib/omnizip/eta/moving_average_estimator.rb +99 -0
  215. data/lib/omnizip/eta/rate_calculator.rb +104 -0
  216. data/lib/omnizip/eta/sample_history.rb +143 -0
  217. data/lib/omnizip/eta/time_estimator.rb +106 -0
  218. data/lib/omnizip/eta.rb +63 -0
  219. data/lib/omnizip/extraction/filter_chain.rb +177 -0
  220. data/lib/omnizip/extraction/glob_pattern.rb +140 -0
  221. data/lib/omnizip/extraction/pattern_matcher.rb +70 -0
  222. data/lib/omnizip/extraction/predicate_pattern.rb +52 -0
  223. data/lib/omnizip/extraction/regex_pattern.rb +50 -0
  224. data/lib/omnizip/extraction/selective_extractor.rb +240 -0
  225. data/lib/omnizip/extraction.rb +111 -0
  226. data/lib/omnizip/file_type/mime_classifier.rb +144 -0
  227. data/lib/omnizip/file_type.rb +113 -0
  228. data/lib/omnizip/filter.rb +139 -0
  229. data/lib/omnizip/filter_pipeline.rb +108 -0
  230. data/lib/omnizip/filter_registry.rb +166 -0
  231. data/lib/omnizip/filters/bcj.rb +279 -0
  232. data/lib/omnizip/filters/bcj2/constants.rb +53 -0
  233. data/lib/omnizip/filters/bcj2/decoder.rb +200 -0
  234. data/lib/omnizip/filters/bcj2/encoder.rb +61 -0
  235. data/lib/omnizip/filters/bcj2/stream_data.rb +93 -0
  236. data/lib/omnizip/filters/bcj2.rb +99 -0
  237. data/lib/omnizip/filters/bcj_arm.rb +176 -0
  238. data/lib/omnizip/filters/bcj_arm64.rb +244 -0
  239. data/lib/omnizip/filters/bcj_ia64.rb +196 -0
  240. data/lib/omnizip/filters/bcj_ppc.rb +190 -0
  241. data/lib/omnizip/filters/bcj_sparc.rb +176 -0
  242. data/lib/omnizip/filters/bcj_x86.rb +193 -0
  243. data/lib/omnizip/filters/delta.rb +196 -0
  244. data/lib/omnizip/filters/filter_base.rb +72 -0
  245. data/lib/omnizip/filters/registry.rb +123 -0
  246. data/lib/omnizip/filters/xz_delta.rb +258 -0
  247. data/lib/omnizip/format_detector.rb +162 -0
  248. data/lib/omnizip/format_registry.rb +59 -0
  249. data/lib/omnizip/formats/.keep +0 -0
  250. data/lib/omnizip/formats/bzip2_file.rb +172 -0
  251. data/lib/omnizip/formats/cpio/constants.rb +55 -0
  252. data/lib/omnizip/formats/cpio/entry.rb +385 -0
  253. data/lib/omnizip/formats/cpio/reader.rb +196 -0
  254. data/lib/omnizip/formats/cpio/writer.rb +234 -0
  255. data/lib/omnizip/formats/cpio.rb +140 -0
  256. data/lib/omnizip/formats/format_spec_loader.rb +230 -0
  257. data/lib/omnizip/formats/gzip.rb +238 -0
  258. data/lib/omnizip/formats/iso/directory_builder.rb +297 -0
  259. data/lib/omnizip/formats/iso/directory_record.rb +152 -0
  260. data/lib/omnizip/formats/iso/joliet.rb +204 -0
  261. data/lib/omnizip/formats/iso/path_table.rb +125 -0
  262. data/lib/omnizip/formats/iso/reader.rb +197 -0
  263. data/lib/omnizip/formats/iso/rock_ridge.rb +349 -0
  264. data/lib/omnizip/formats/iso/volume_builder.rb +320 -0
  265. data/lib/omnizip/formats/iso/volume_descriptor.rb +168 -0
  266. data/lib/omnizip/formats/iso/writer.rb +530 -0
  267. data/lib/omnizip/formats/iso.rb +140 -0
  268. data/lib/omnizip/formats/lzip.rb +175 -0
  269. data/lib/omnizip/formats/lzma_alone.rb +171 -0
  270. data/lib/omnizip/formats/rar/archive_repairer.rb +243 -0
  271. data/lib/omnizip/formats/rar/archive_verifier.rb +195 -0
  272. data/lib/omnizip/formats/rar/block_parser.rb +243 -0
  273. data/lib/omnizip/formats/rar/compression/bit_stream.rb +180 -0
  274. data/lib/omnizip/formats/rar/compression/dispatcher.rb +217 -0
  275. data/lib/omnizip/formats/rar/compression/lz77_huffman/decoder.rb +216 -0
  276. data/lib/omnizip/formats/rar/compression/lz77_huffman/encoder.rb +158 -0
  277. data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_builder.rb +217 -0
  278. data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_coder.rb +189 -0
  279. data/lib/omnizip/formats/rar/compression/lz77_huffman/match_finder.rb +135 -0
  280. data/lib/omnizip/formats/rar/compression/lz77_huffman/sliding_window.rb +165 -0
  281. data/lib/omnizip/formats/rar/compression/ppmd/context.rb +105 -0
  282. data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +219 -0
  283. data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +262 -0
  284. data/lib/omnizip/formats/rar/compression_method_registry.rb +106 -0
  285. data/lib/omnizip/formats/rar/constants.rb +82 -0
  286. data/lib/omnizip/formats/rar/decompressor.rb +238 -0
  287. data/lib/omnizip/formats/rar/external_writer.rb +312 -0
  288. data/lib/omnizip/formats/rar/header.rb +192 -0
  289. data/lib/omnizip/formats/rar/license_validator.rb +109 -0
  290. data/lib/omnizip/formats/rar/models/rar_archive.rb +77 -0
  291. data/lib/omnizip/formats/rar/models/rar_entry.rb +65 -0
  292. data/lib/omnizip/formats/rar/models/rar_volume.rb +56 -0
  293. data/lib/omnizip/formats/rar/parity_handler.rb +292 -0
  294. data/lib/omnizip/formats/rar/rar5/compression/lzma.rb +202 -0
  295. data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +578 -0
  296. data/lib/omnizip/formats/rar/rar5/compression/store.rb +60 -0
  297. data/lib/omnizip/formats/rar/rar5/crc32.rb +39 -0
  298. data/lib/omnizip/formats/rar/rar5/encryption/aes256_cbc.rb +97 -0
  299. data/lib/omnizip/formats/rar/rar5/encryption/encryption_header.rb +114 -0
  300. data/lib/omnizip/formats/rar/rar5/encryption/encryption_manager.rb +166 -0
  301. data/lib/omnizip/formats/rar/rar5/encryption/key_derivation.rb +97 -0
  302. data/lib/omnizip/formats/rar/rar5/header.rb +187 -0
  303. data/lib/omnizip/formats/rar/rar5/models/encryption_options.rb +74 -0
  304. data/lib/omnizip/formats/rar/rar5/models/recovery_options.rb +63 -0
  305. data/lib/omnizip/formats/rar/rar5/models/solid_options.rb +63 -0
  306. data/lib/omnizip/formats/rar/rar5/models/volume_options.rb +74 -0
  307. data/lib/omnizip/formats/rar/rar5/multi_volume/ARCHITECTURE.md +290 -0
  308. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_manager.rb +264 -0
  309. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_splitter.rb +155 -0
  310. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_writer.rb +194 -0
  311. data/lib/omnizip/formats/rar/rar5/solid/solid_encoder.rb +109 -0
  312. data/lib/omnizip/formats/rar/rar5/solid/solid_manager.rb +142 -0
  313. data/lib/omnizip/formats/rar/rar5/solid/solid_stream.rb +121 -0
  314. data/lib/omnizip/formats/rar/rar5/vint.rb +65 -0
  315. data/lib/omnizip/formats/rar/rar5/writer.rb +466 -0
  316. data/lib/omnizip/formats/rar/rar_format_base.rb +241 -0
  317. data/lib/omnizip/formats/rar/reader.rb +366 -0
  318. data/lib/omnizip/formats/rar/recovery_record.rb +245 -0
  319. data/lib/omnizip/formats/rar/volume_manager.rb +168 -0
  320. data/lib/omnizip/formats/rar/writer.rb +431 -0
  321. data/lib/omnizip/formats/rar.rb +205 -0
  322. data/lib/omnizip/formats/rar3/compressor.rb +73 -0
  323. data/lib/omnizip/formats/rar3/decompressor.rb +66 -0
  324. data/lib/omnizip/formats/rar3/reader.rb +386 -0
  325. data/lib/omnizip/formats/rar3/writer.rb +219 -0
  326. data/lib/omnizip/formats/rar5/compressor.rb +73 -0
  327. data/lib/omnizip/formats/rar5/decompressor.rb +66 -0
  328. data/lib/omnizip/formats/rar5/reader.rb +342 -0
  329. data/lib/omnizip/formats/rar5/writer.rb +214 -0
  330. data/lib/omnizip/formats/seven_zip/coder_chain.rb +150 -0
  331. data/lib/omnizip/formats/seven_zip/constants.rb +126 -0
  332. data/lib/omnizip/formats/seven_zip/encoded_header.rb +114 -0
  333. data/lib/omnizip/formats/seven_zip/encrypted_header.rb +142 -0
  334. data/lib/omnizip/formats/seven_zip/file_collector.rb +144 -0
  335. data/lib/omnizip/formats/seven_zip/header.rb +106 -0
  336. data/lib/omnizip/formats/seven_zip/header_encryptor.rb +134 -0
  337. data/lib/omnizip/formats/seven_zip/header_writer.rb +466 -0
  338. data/lib/omnizip/formats/seven_zip/models/coder_info.rb +30 -0
  339. data/lib/omnizip/formats/seven_zip/models/file_entry.rb +58 -0
  340. data/lib/omnizip/formats/seven_zip/models/folder.rb +69 -0
  341. data/lib/omnizip/formats/seven_zip/models/stream_info.rb +42 -0
  342. data/lib/omnizip/formats/seven_zip/parser.rb +660 -0
  343. data/lib/omnizip/formats/seven_zip/reader.rb +458 -0
  344. data/lib/omnizip/formats/seven_zip/split_archive_reader.rb +632 -0
  345. data/lib/omnizip/formats/seven_zip/split_archive_writer.rb +315 -0
  346. data/lib/omnizip/formats/seven_zip/stream_compressor.rb +151 -0
  347. data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +162 -0
  348. data/lib/omnizip/formats/seven_zip/writer.rb +740 -0
  349. data/lib/omnizip/formats/seven_zip.rb +93 -0
  350. data/lib/omnizip/formats/tar/constants.rb +73 -0
  351. data/lib/omnizip/formats/tar/entry.rb +94 -0
  352. data/lib/omnizip/formats/tar/header.rb +168 -0
  353. data/lib/omnizip/formats/tar/reader.rb +121 -0
  354. data/lib/omnizip/formats/tar/writer.rb +216 -0
  355. data/lib/omnizip/formats/tar.rb +84 -0
  356. data/lib/omnizip/formats/xz/reader.rb +116 -0
  357. data/lib/omnizip/formats/xz.rb +237 -0
  358. data/lib/omnizip/formats/xz_impl/block_decoder.rb +754 -0
  359. data/lib/omnizip/formats/xz_impl/block_encoder.rb +306 -0
  360. data/lib/omnizip/formats/xz_impl/block_header.rb +210 -0
  361. data/lib/omnizip/formats/xz_impl/block_header_parser.rb +186 -0
  362. data/lib/omnizip/formats/xz_impl/constants.rb +49 -0
  363. data/lib/omnizip/formats/xz_impl/index_decoder.rb +174 -0
  364. data/lib/omnizip/formats/xz_impl/index_encoder.rb +122 -0
  365. data/lib/omnizip/formats/xz_impl/stream_decoder.rb +468 -0
  366. data/lib/omnizip/formats/xz_impl/stream_encoder.rb +99 -0
  367. data/lib/omnizip/formats/xz_impl/stream_footer.rb +81 -0
  368. data/lib/omnizip/formats/xz_impl/stream_footer_parser.rb +117 -0
  369. data/lib/omnizip/formats/xz_impl/stream_header.rb +55 -0
  370. data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +108 -0
  371. data/lib/omnizip/formats/xz_impl/vli.rb +128 -0
  372. data/lib/omnizip/formats/xz_impl/writer.rb +421 -0
  373. data/lib/omnizip/formats/zip/central_directory_header.rb +195 -0
  374. data/lib/omnizip/formats/zip/constants.rb +69 -0
  375. data/lib/omnizip/formats/zip/end_of_central_directory.rb +133 -0
  376. data/lib/omnizip/formats/zip/local_file_header.rb +138 -0
  377. data/lib/omnizip/formats/zip/reader.rb +250 -0
  378. data/lib/omnizip/formats/zip/unix_extra_field.rb +153 -0
  379. data/lib/omnizip/formats/zip/writer.rb +375 -0
  380. data/lib/omnizip/formats/zip/zip64_end_of_central_directory.rb +104 -0
  381. data/lib/omnizip/formats/zip/zip64_end_of_central_directory_locator.rb +66 -0
  382. data/lib/omnizip/formats/zip/zip64_extra_field.rb +114 -0
  383. data/lib/omnizip/formats/zip.rb +50 -0
  384. data/lib/omnizip/implementations/base/lzma2_decoder_base.rb +75 -0
  385. data/lib/omnizip/implementations/base/lzma2_encoder_base.rb +128 -0
  386. data/lib/omnizip/implementations/base/lzma_decoder_base.rb +83 -0
  387. data/lib/omnizip/implementations/base/lzma_encoder_base.rb +108 -0
  388. data/lib/omnizip/implementations/base/state_machine_base.rb +182 -0
  389. data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +421 -0
  390. data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +465 -0
  391. data/lib/omnizip/implementations/seven_zip/lzma/match_finder.rb +288 -0
  392. data/lib/omnizip/implementations/seven_zip/lzma/range_decoder.rb +200 -0
  393. data/lib/omnizip/implementations/seven_zip/lzma/range_encoder.rb +197 -0
  394. data/lib/omnizip/implementations/seven_zip/lzma/state_machine.rb +141 -0
  395. data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +519 -0
  396. data/lib/omnizip/implementations/xz_utils/lzma2/decoder.rb +723 -0
  397. data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +750 -0
  398. data/lib/omnizip/io/buffered_input.rb +146 -0
  399. data/lib/omnizip/io/buffered_output.rb +105 -0
  400. data/lib/omnizip/io/stream_manager.rb +115 -0
  401. data/lib/omnizip/link_handler/hard_link.rb +79 -0
  402. data/lib/omnizip/link_handler/symbolic_link.rb +74 -0
  403. data/lib/omnizip/link_handler.rb +124 -0
  404. data/lib/omnizip/metadata/archive_metadata.rb +114 -0
  405. data/lib/omnizip/metadata/entry_metadata.rb +146 -0
  406. data/lib/omnizip/metadata/metadata_editor.rb +171 -0
  407. data/lib/omnizip/metadata/metadata_registry.rb +64 -0
  408. data/lib/omnizip/metadata/metadata_validator.rb +99 -0
  409. data/lib/omnizip/metadata.rb +57 -0
  410. data/lib/omnizip/models/.keep +0 -0
  411. data/lib/omnizip/models/algorithm_metadata.rb +73 -0
  412. data/lib/omnizip/models/compression_options.rb +71 -0
  413. data/lib/omnizip/models/conversion_options.rb +87 -0
  414. data/lib/omnizip/models/conversion_result.rb +135 -0
  415. data/lib/omnizip/models/eta_result.rb +46 -0
  416. data/lib/omnizip/models/extraction_rule.rb +115 -0
  417. data/lib/omnizip/models/filter_chain.rb +144 -0
  418. data/lib/omnizip/models/filter_config.rb +183 -0
  419. data/lib/omnizip/models/match_result.rb +124 -0
  420. data/lib/omnizip/models/optimization_suggestion.rb +91 -0
  421. data/lib/omnizip/models/parallel_options.rb +104 -0
  422. data/lib/omnizip/models/performance_result.rb +79 -0
  423. data/lib/omnizip/models/profile_report.rb +82 -0
  424. data/lib/omnizip/models/progress_options.rb +38 -0
  425. data/lib/omnizip/models/split_options.rb +116 -0
  426. data/lib/omnizip/optimization_registry.rb +81 -0
  427. data/lib/omnizip/parallel/job_queue.rb +209 -0
  428. data/lib/omnizip/parallel/job_scheduler.rb +203 -0
  429. data/lib/omnizip/parallel/parallel_compressor.rb +347 -0
  430. data/lib/omnizip/parallel/parallel_extractor.rb +329 -0
  431. data/lib/omnizip/parallel/worker_pool.rb +223 -0
  432. data/lib/omnizip/parallel.rb +149 -0
  433. data/lib/omnizip/parity/chunked_block_processor.rb +196 -0
  434. data/lib/omnizip/parity/galois16.rb +145 -0
  435. data/lib/omnizip/parity/models/creator_packet.rb +73 -0
  436. data/lib/omnizip/parity/models/file_description_packet.rb +133 -0
  437. data/lib/omnizip/parity/models/ifsc_packet.rb +123 -0
  438. data/lib/omnizip/parity/models/main_packet.rb +128 -0
  439. data/lib/omnizip/parity/models/packet.rb +156 -0
  440. data/lib/omnizip/parity/models/packet_registry.rb +109 -0
  441. data/lib/omnizip/parity/models/recovery_slice_packet.rb +78 -0
  442. data/lib/omnizip/parity/par2_creator.rb +531 -0
  443. data/lib/omnizip/parity/par2_repairer.rb +407 -0
  444. data/lib/omnizip/parity/par2_verifier.rb +364 -0
  445. data/lib/omnizip/parity/par2cmdline_algorithm.rb +110 -0
  446. data/lib/omnizip/parity/par2cmdline_coefficients.rb +78 -0
  447. data/lib/omnizip/parity/reed_solomon_decoder.rb +266 -0
  448. data/lib/omnizip/parity/reed_solomon_encoder.rb +111 -0
  449. data/lib/omnizip/parity/reed_solomon_matrix.rb +342 -0
  450. data/lib/omnizip/parity.rb +186 -0
  451. data/lib/omnizip/password/encryption_registry.rb +65 -0
  452. data/lib/omnizip/password/encryption_strategy.rb +96 -0
  453. data/lib/omnizip/password/password_validator.rb +129 -0
  454. data/lib/omnizip/password/winzip_aes_strategy.rb +192 -0
  455. data/lib/omnizip/password/zip_crypto_strategy.rb +141 -0
  456. data/lib/omnizip/password.rb +87 -0
  457. data/lib/omnizip/pipe/stream_compressor.rb +124 -0
  458. data/lib/omnizip/pipe/stream_decompressor.rb +174 -0
  459. data/lib/omnizip/pipe.rb +121 -0
  460. data/lib/omnizip/platform/ntfs_streams.rb +201 -0
  461. data/lib/omnizip/platform.rb +189 -0
  462. data/lib/omnizip/profile/archive_profile.rb +39 -0
  463. data/lib/omnizip/profile/balanced_profile.rb +33 -0
  464. data/lib/omnizip/profile/binary_profile.rb +36 -0
  465. data/lib/omnizip/profile/compression_profile.rb +158 -0
  466. data/lib/omnizip/profile/custom_profile.rb +157 -0
  467. data/lib/omnizip/profile/fast_profile.rb +33 -0
  468. data/lib/omnizip/profile/maximum_profile.rb +33 -0
  469. data/lib/omnizip/profile/profile_detector.rb +110 -0
  470. data/lib/omnizip/profile/profile_registry.rb +161 -0
  471. data/lib/omnizip/profile/text_profile.rb +36 -0
  472. data/lib/omnizip/profile.rb +190 -0
  473. data/lib/omnizip/profiler/memory_profiler.rb +66 -0
  474. data/lib/omnizip/profiler/method_profiler.rb +49 -0
  475. data/lib/omnizip/profiler/report_generator.rb +169 -0
  476. data/lib/omnizip/profiler.rb +204 -0
  477. data/lib/omnizip/progress/callback_reporter.rb +36 -0
  478. data/lib/omnizip/progress/console_reporter.rb +62 -0
  479. data/lib/omnizip/progress/log_reporter.rb +91 -0
  480. data/lib/omnizip/progress/operation_progress.rb +118 -0
  481. data/lib/omnizip/progress/progress_bar.rb +156 -0
  482. data/lib/omnizip/progress/progress_reporter.rb +40 -0
  483. data/lib/omnizip/progress/progress_tracker.rb +190 -0
  484. data/lib/omnizip/progress/silent_reporter.rb +24 -0
  485. data/lib/omnizip/progress.rb +127 -0
  486. data/lib/omnizip/rubyzip_compat.rb +63 -0
  487. data/lib/omnizip/temp/safe_extract.rb +168 -0
  488. data/lib/omnizip/temp/temp_file.rb +124 -0
  489. data/lib/omnizip/temp/temp_file_pool.rb +109 -0
  490. data/lib/omnizip/temp.rb +181 -0
  491. data/lib/omnizip/version.rb +5 -0
  492. data/lib/omnizip/zip/entry.rb +156 -0
  493. data/lib/omnizip/zip/file.rb +485 -0
  494. data/lib/omnizip/zip/input_stream.rb +273 -0
  495. data/lib/omnizip/zip/output_stream.rb +324 -0
  496. data/lib/omnizip.rb +156 -0
  497. data/readme-docs/advanced-features.adoc +515 -0
  498. data/readme-docs/api-usage.adoc +444 -0
  499. data/readme-docs/architecture.adoc +449 -0
  500. data/readme-docs/archive-formats.adoc +479 -0
  501. data/readme-docs/cli-usage.adoc +222 -0
  502. data/readme-docs/compression-algorithms.adoc +442 -0
  503. data/readme-docs/compression-profiles.adoc +247 -0
  504. data/readme-docs/encryption-checksums.adoc +328 -0
  505. data/readme-docs/format-converter.adoc +325 -0
  506. data/readme-docs/installation.adoc +228 -0
  507. data/readme-docs/par2-archives.adoc +608 -0
  508. data/readme-docs/performance-profiler.adoc +389 -0
  509. data/readme-docs/preprocessing-filters.adoc +280 -0
  510. data/xz-file-format-1.2.1.txt +1174 -0
  511. metadata +617 -0
@@ -0,0 +1,53 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ # Copyright (C) 2024 Ribose Inc.
5
+ #
6
+ # This file is part of Omnizip.
7
+ #
8
+ # Omnizip is a pure Ruby port of 7-Zip compression algorithms.
9
+ # Based on the 7-Zip LZMA SDK by Igor Pavlov.
10
+ #
11
+ # This library is free software; you can redistribute it and/or
12
+ # modify it under the terms of the GNU Lesser General Public
13
+ # License as published by the Free Software Foundation; either
14
+ # version 2.1 of the License, or (at your option) any later version.
15
+ #
16
+ # See the COPYING file for the complete text of the license.
17
+ #
18
+
19
+ module Omnizip
20
+ module Filters
21
+ module Bcj2Constants
22
+ # Number of output streams
23
+ NUM_STREAMS = 4
24
+
25
+ # Stream indices
26
+ STREAM_MAIN = 0 # Main data stream (non-convertible bytes)
27
+ STREAM_CALL = 1 # CALL instruction addresses (E8)
28
+ STREAM_JUMP = 2 # JUMP instruction addresses (E9)
29
+ STREAM_RC = 3 # Range coder probability stream
30
+
31
+ # x86 opcodes
32
+ OPCODE_CALL = 0xE8 # CALL instruction
33
+ OPCODE_JUMP = 0xE9 # JUMP instruction
34
+
35
+ # Size of x86 address (4 bytes, little-endian)
36
+ ADDRESS_SIZE = 4
37
+
38
+ # Range coder constants
39
+ TOP_VALUE = 1 << 24 # Range normalization threshold
40
+ BIT_MODEL_TOTAL_BITS = 11 # Probability model bits
41
+ BIT_MODEL_TOTAL = 1 << BIT_MODEL_TOTAL_BITS
42
+ MOVE_BITS = 5 # Probability update shift
43
+
44
+ # Number of probability models (2 + 256)
45
+ # - 2 for general cases (not E8/E9, or 0F8x pattern)
46
+ # - 256 for byte-specific models when processing E8
47
+ NUM_PROBS = 2 + 256
48
+
49
+ # Initial probability value (50%)
50
+ INITIAL_PROB = BIT_MODEL_TOTAL >> 1
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,200 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ # Copyright (C) 2024 Ribose Inc.
5
+ #
6
+ # This file is part of Omnizip.
7
+ #
8
+ # Omnizip is a pure Ruby port of 7-Zip compression algorithms.
9
+ # Based on the 7-Zip LZMA SDK by Igor Pavlov.
10
+ #
11
+ # This library is free software; you can redistribute it and/or
12
+ # modify it under the terms of the GNU Lesser General Public
13
+ # License as published by the Free Software Foundation; either
14
+ # version 2.1 of the License, or (at your option) any later version.
15
+ #
16
+ # See the COPYING file for the complete text of the license.
17
+ #
18
+
19
+ require_relative "constants"
20
+ require_relative "stream_data"
21
+
22
+ module Omnizip
23
+ module Filters
24
+ # BCJ2 decoder - reconstructs original data from 4 streams.
25
+ #
26
+ # Combines:
27
+ # - Main stream (non-convertible bytes)
28
+ # - Call stream (CALL/E8 addresses)
29
+ # - Jump stream (JUMP/E9 addresses)
30
+ # - RC stream (range coder probability data)
31
+ class Bcj2Decoder
32
+ include Bcj2Constants
33
+
34
+ attr_reader :ip
35
+
36
+ # Initialize decoder.
37
+ #
38
+ # @param streams [Bcj2StreamData] The 4 input streams
39
+ # @param position [Integer] Starting instruction pointer
40
+ def initialize(streams, position = 0)
41
+ @streams = streams
42
+ @ip = position
43
+ @range = 0
44
+ @code = 0
45
+ @probs = Array.new(NUM_PROBS, INITIAL_PROB)
46
+
47
+ # Stream positions
48
+ @main_pos = 0
49
+ @call_pos = 0
50
+ @jump_pos = 0
51
+ @rc_pos = 0
52
+ end
53
+
54
+ # Decode 4 streams back to original data.
55
+ #
56
+ # @return [String] Decoded binary data
57
+ def decode
58
+ result = String.new(encoding: Encoding::BINARY)
59
+ init_range_decoder
60
+
61
+ loop do
62
+ break if @main_pos >= @streams.main.bytesize
63
+
64
+ byte = @streams.main.getbyte(@main_pos)
65
+ @main_pos += 1
66
+
67
+ # Check for CALL (E8) or JUMP (E9) opcodes
68
+ if [OPCODE_CALL, OPCODE_JUMP].include?(byte)
69
+ # Use range decoder to determine if convertible
70
+ if read_bit(get_prob_index(byte))
71
+ # Convertible - read address from call/jump stream
72
+ addr = read_address(byte)
73
+ result << byte.chr(Encoding::BINARY)
74
+ result << encode_int32_le(addr)
75
+ @ip += 5
76
+ else
77
+ # Not convertible - just copy byte
78
+ result << byte.chr(Encoding::BINARY)
79
+ @ip += 1
80
+ end
81
+ else
82
+ # Regular byte - just copy
83
+ result << byte.chr(Encoding::BINARY)
84
+ @ip += 1
85
+ end
86
+ end
87
+
88
+ result
89
+ end
90
+
91
+ private
92
+
93
+ # Initialize range decoder by reading first 5 bytes from RC stream.
94
+ #
95
+ # @return [void]
96
+ def init_range_decoder
97
+ @range = 0xFFFFFFFF
98
+ @code = 0
99
+
100
+ 5.times do
101
+ break if @rc_pos >= @streams.rc.bytesize
102
+
103
+ @code = (@code << 8) | @streams.rc.getbyte(@rc_pos)
104
+ @rc_pos += 1
105
+ end
106
+ end
107
+
108
+ # Read a single bit from range coder.
109
+ #
110
+ # @param prob_index [Integer] Probability model index
111
+ # @return [Boolean] Decoded bit (true = 1, false = 0)
112
+ def read_bit(prob_index) # rubocop:disable Naming/PredicateMethod
113
+ normalize_range
114
+
115
+ prob = @probs[prob_index]
116
+ bound = (@range >> BIT_MODEL_TOTAL_BITS) * prob
117
+
118
+ if @code < bound
119
+ # Bit is 0
120
+ @range = bound
121
+ @probs[prob_index] += (BIT_MODEL_TOTAL - prob) >> MOVE_BITS
122
+ false
123
+ else
124
+ # Bit is 1
125
+ @range -= bound
126
+ @code -= bound
127
+ @probs[prob_index] -= prob >> MOVE_BITS
128
+ true
129
+ end
130
+ end
131
+
132
+ # Normalize range decoder if needed.
133
+ #
134
+ # @return [void]
135
+ def normalize_range
136
+ while @range < TOP_VALUE
137
+ @range <<= 8
138
+ next_byte = if @rc_pos < @streams.rc.bytesize
139
+ @streams.rc.getbyte(@rc_pos)
140
+ else
141
+ 0
142
+ end
143
+ @code = (@code << 8) | next_byte
144
+ @rc_pos += 1 if @rc_pos < @streams.rc.bytesize
145
+ end
146
+ end
147
+
148
+ # Get probability model index for a byte.
149
+ #
150
+ # @param byte [Integer] Byte value
151
+ # @return [Integer] Probability model index
152
+ def get_prob_index(byte)
153
+ # Use byte-specific model for E8, general model for E9
154
+ byte == OPCODE_CALL ? (2 + (@ip & 0xFF)) : 0
155
+ end
156
+
157
+ # Read 32-bit address from call or jump stream.
158
+ #
159
+ # @param opcode [Integer] Opcode (E8 or E9)
160
+ # @return [Integer] Converted address
161
+ def read_address(opcode)
162
+ stream_pos = opcode == OPCODE_CALL ? @call_pos : @jump_pos
163
+ stream = opcode == OPCODE_CALL ? @streams.call : @streams.jump
164
+
165
+ # Read 4 bytes (big-endian in stream, stored as absolute)
166
+ addr = 0
167
+ 4.times do |i|
168
+ break if stream_pos >= stream.bytesize
169
+
170
+ addr |= stream.getbyte(stream_pos) << (24 - (i * 8))
171
+ stream_pos += 1
172
+ end
173
+
174
+ # Update stream position
175
+ if opcode == OPCODE_CALL
176
+ @call_pos = stream_pos
177
+ else
178
+ @jump_pos = stream_pos
179
+ end
180
+
181
+ # Convert back to relative
182
+ addr - (@ip + 5)
183
+ end
184
+
185
+ # Encode 32-bit integer as little-endian bytes.
186
+ #
187
+ # @param value [Integer] Value to encode
188
+ # @return [String] 4-byte string
189
+ def encode_int32_le(value)
190
+ unsigned = value & 0xFFFFFFFF
191
+ [
192
+ unsigned & 0xFF,
193
+ (unsigned >> 8) & 0xFF,
194
+ (unsigned >> 16) & 0xFF,
195
+ (unsigned >> 24) & 0xFF,
196
+ ].pack("C*")
197
+ end
198
+ end
199
+ end
200
+ end
@@ -0,0 +1,61 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ # Copyright (C) 2024 Ribose Inc.
5
+ #
6
+ # This file is part of Omnizip.
7
+ #
8
+ # Omnizip is a pure Ruby port of 7-Zip compression algorithms.
9
+ # Based on the 7-Zip LZMA SDK by Igor Pavlov.
10
+ #
11
+ # This library is free software; you can redistribute it and/or
12
+ # modify it under the terms of the GNU Lesser General Public
13
+ # License as published by the Free Software Foundation; either
14
+ # version 2.1 of the License, or (at your option) any later version.
15
+ #
16
+ # See the COPYING file for the complete text of the license.
17
+ #
18
+
19
+ require_relative "constants"
20
+ require_relative "stream_data"
21
+
22
+ module Omnizip
23
+ module Filters
24
+ # BCJ2 encoder - splits data into 4 streams.
25
+ #
26
+ # NOTE: BCJ2 encoding is extremely complex and is not yet
27
+ # implemented. This is primarily needed for compression,
28
+ # while decoding (decompression) is the more common use case.
29
+ #
30
+ # BCJ2 encoding requires:
31
+ # - Range encoder implementation
32
+ # - Proper probability model management
33
+ # - Stream splitting logic
34
+ # - Address conversion to absolute
35
+ #
36
+ # This will be implemented in a future version.
37
+ class Bcj2Encoder
38
+ include Bcj2Constants
39
+
40
+ # Initialize encoder.
41
+ #
42
+ # @param data [String] Binary data to encode
43
+ # @param position [Integer] Starting instruction pointer
44
+ def initialize(data, position = 0)
45
+ @data = data
46
+ @position = position
47
+ end
48
+
49
+ # Encode data into 4 streams.
50
+ #
51
+ # @raise [NotImplementedError] BCJ2 encoding not yet impl
52
+ # @return [Bcj2StreamData] The 4 output streams
53
+ def encode
54
+ raise NotImplementedError,
55
+ "BCJ2 encoding is not yet implemented. " \
56
+ "BCJ2 is primarily used for decompression. " \
57
+ "For compression, use the simpler BCJ-x86 filter."
58
+ end
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,93 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ # Copyright (C) 2024 Ribose Inc.
5
+ #
6
+ # This file is part of Omnizip.
7
+ #
8
+ # Omnizip is a pure Ruby port of 7-Zip compression algorithms.
9
+ # Based on the 7-Zip LZMA SDK by Igor Pavlov.
10
+ #
11
+ # This library is free software; you can redistribute it and/or
12
+ # modify it under the terms of the GNU Lesser General Public
13
+ # License as published by the Free Software Foundation; either
14
+ # version 2.1 of the License, or (at your option) any later version.
15
+ #
16
+ # See the COPYING file for the complete text of the license.
17
+ #
18
+
19
+ require_relative "constants"
20
+
21
+ module Omnizip
22
+ module Filters
23
+ # Model class representing the 4 BCJ2 streams.
24
+ #
25
+ # BCJ2 splits data into:
26
+ # - Main stream: Non-convertible bytes
27
+ # - Call stream: CALL (E8) instruction addresses
28
+ # - Jump stream: JUMP (E9) instruction addresses
29
+ # - RC stream: Range coder probability data
30
+ class Bcj2StreamData
31
+ include Bcj2Constants
32
+
33
+ attr_accessor :main, :call, :jump, :rc
34
+
35
+ # Initialize empty streams.
36
+ #
37
+ # @return [Bcj2StreamData] New stream data instance
38
+ def initialize
39
+ @main = String.new(encoding: Encoding::BINARY)
40
+ @call = String.new(encoding: Encoding::BINARY)
41
+ @jump = String.new(encoding: Encoding::BINARY)
42
+ @rc = String.new(encoding: Encoding::BINARY)
43
+ end
44
+
45
+ # Get stream by index.
46
+ #
47
+ # @param index [Integer] Stream index (0-3)
48
+ # @return [String] Stream data
49
+ # @raise [ArgumentError] If index is invalid
50
+ def [](index)
51
+ case index
52
+ when STREAM_MAIN then @main
53
+ when STREAM_CALL then @call
54
+ when STREAM_JUMP then @jump
55
+ when STREAM_RC then @rc
56
+ else
57
+ raise ArgumentError, "Invalid stream index: #{index}"
58
+ end
59
+ end
60
+
61
+ # Set stream by index.
62
+ #
63
+ # @param index [Integer] Stream index (0-3)
64
+ # @param data [String] Stream data
65
+ # @return [String] The data that was set
66
+ # @raise [ArgumentError] If index is invalid
67
+ def []=(index, data)
68
+ case index
69
+ when STREAM_MAIN then @main = data
70
+ when STREAM_CALL then @call = data
71
+ when STREAM_JUMP then @jump = data
72
+ when STREAM_RC then @rc = data
73
+ else
74
+ raise ArgumentError, "Invalid stream index: #{index}"
75
+ end
76
+ end
77
+
78
+ # Get all streams as an array.
79
+ #
80
+ # @return [Array<String>] Array of 4 streams
81
+ def to_a
82
+ [@main, @call, @jump, @rc]
83
+ end
84
+
85
+ # Check if all streams are empty.
86
+ #
87
+ # @return [Boolean] True if all streams are empty
88
+ def empty?
89
+ @main.empty? && @call.empty? && @jump.empty? && @rc.empty?
90
+ end
91
+ end
92
+ end
93
+ end
@@ -0,0 +1,99 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ # Copyright (C) 2024 Ribose Inc.
5
+ #
6
+ # This file is part of Omnizip.
7
+ #
8
+ # Omnizip is a pure Ruby port of 7-Zip compression algorithms.
9
+ # Based on the 7-Zip LZMA SDK by Igor Pavlov.
10
+ #
11
+ # This library is free software; you can redistribute it and/or
12
+ # modify it under the terms of the GNU Lesser General Public
13
+ # License as published by the Free Software Foundation; either
14
+ # version 2.1 of the License, or (at your option) any later version.
15
+ #
16
+ # See the COPYING file for the complete text of the license.
17
+ #
18
+
19
+ require_relative "filter_base"
20
+ require_relative "bcj2/constants"
21
+ require_relative "bcj2/stream_data"
22
+ require_relative "bcj2/decoder"
23
+ require_relative "bcj2/encoder"
24
+
25
+ module Omnizip
26
+ module Filters
27
+ # BCJ2 filter for x86/x64 executables (4-stream variant).
28
+ #
29
+ # BCJ2 is an advanced version of BCJ that splits x86 executable code
30
+ # into 4 separate streams for maximum compression:
31
+ # - Main stream: Non-convertible bytes
32
+ # - Call stream: CALL (0xE8) instruction addresses
33
+ # - Jump stream: JUMP (0xE9) instruction addresses
34
+ # - RC stream: Range coder probability data
35
+ #
36
+ # This provides better compression than simple BCJ at the cost of
37
+ # increased complexity. BCJ2 requires special handling in archive
38
+ # formats - the 4 streams must be stored and retrieved separately.
39
+ #
40
+ # NOTE: Currently only decoding (decompression) is implemented.
41
+ # Encoding is extremely complex and will be added in a future version.
42
+ # For compression use cases, the simpler BCJ-x86 filter is recommended.
43
+ class Bcj2 < FilterBase
44
+ # Encode is not currently supported for BCJ2.
45
+ #
46
+ # @param _data [String] Binary data to encode
47
+ # @param _position [Integer] Current stream position
48
+ # @raise [NotImplementedError] BCJ2 encoding not yet implemented
49
+ # @return [String] Encoded binary data
50
+ def encode(_data, _position = 0)
51
+ raise NotImplementedError,
52
+ "BCJ2 encoding is not yet implemented. " \
53
+ "Use the simpler BCJ-x86 filter for compression, " \
54
+ "or wait for a future version with BCJ2 encoding support."
55
+ end
56
+
57
+ # Decode (postprocess) BCJ2 data after decompression.
58
+ #
59
+ # This method expects the 4 BCJ2 streams to be provided in a
60
+ # Bcj2StreamData object. In practice, this is called by the archive
61
+ # format reader (e.g., 7z reader) which handles splitting the
62
+ # compressed data into the 4 streams.
63
+ #
64
+ # @param data [Bcj2StreamData, String] The 4 BCJ2 streams or error
65
+ # @param position [Integer] Current stream position
66
+ # @raise [ArgumentError] If data is not a Bcj2StreamData object
67
+ # @return [String] Decoded binary data
68
+ def decode(data, position = 0)
69
+ unless data.is_a?(Bcj2StreamData)
70
+ raise ArgumentError,
71
+ "BCJ2 decode requires a Bcj2StreamData object with " \
72
+ "4 streams. This is typically handled by the archive " \
73
+ "format reader."
74
+ end
75
+
76
+ decoder = Bcj2Decoder.new(data, position)
77
+ decoder.decode
78
+ end
79
+
80
+ class << self
81
+ # Get metadata about this filter.
82
+ #
83
+ # @return [Hash] Filter metadata
84
+ def metadata
85
+ {
86
+ name: "BCJ2",
87
+ description: "Advanced 4-stream Branch/Call/Jump converter " \
88
+ "for x86/x64 executables",
89
+ architecture: "x86/x64",
90
+ streams: 4,
91
+ complexity: "high",
92
+ compression_quality: "maximum",
93
+ limitations: "Encoding not yet implemented",
94
+ }
95
+ end
96
+ end
97
+ end
98
+ end
99
+ end
@@ -0,0 +1,176 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ # Copyright (C) 2024 Ribose Inc.
5
+ #
6
+ # This file is part of Omnizip.
7
+ #
8
+ # Omnizip is a pure Ruby port of 7-Zip compression algorithms.
9
+ # Based on the 7-Zip LZMA SDK by Igor Pavlov.
10
+ #
11
+ # This library is free software; you can redistribute it and/or
12
+ # modify it under the terms of the GNU Lesser General Public
13
+ # License as published by the Free Software Foundation; either
14
+ # version 2.1 of the License, or (at your option) any later version.
15
+ #
16
+ # See the COPYING file for the complete text of the license.
17
+ #
18
+
19
+ require_relative "filter_base"
20
+
21
+ module Omnizip
22
+ module Filters
23
+ # BCJ filter for 32-bit ARM executables.
24
+ #
25
+ # This filter preprocesses ARM machine code by converting relative
26
+ # addresses in BL (Branch and Link - 0xEB) instructions to absolute
27
+ # addresses. ARM uses 4-byte aligned instructions with little-endian
28
+ # encoding.
29
+ #
30
+ # The filter improves compression by making branch targets
31
+ # position-independent. The offset in ARM BL instructions is stored
32
+ # as word offset (divided by 4), and is relative to PC+8.
33
+ class BcjArm < FilterBase
34
+ # ARM BL (Branch and Link) opcode
35
+ OPCODE_BL = 0xEB
36
+
37
+ # Size of ARM instruction (4 bytes, little-endian)
38
+ INSTRUCTION_SIZE = 4
39
+
40
+ # Offset mask (24-bit offset in BL instruction)
41
+ OFFSET_MASK = 0x00FFFFFF
42
+
43
+ # Encode (preprocess) ARM executable data for compression.
44
+ #
45
+ # Scans for BL (0xEB) opcodes and converts relative word offsets
46
+ # to absolute word offsets. ARM branch offset is relative to PC+8.
47
+ #
48
+ # @param data [String] Binary executable data
49
+ # @param position [Integer] Current stream position
50
+ # @return [String] Encoded binary data
51
+ def encode(data, position = 0)
52
+ return data.dup if data.bytesize < INSTRUCTION_SIZE
53
+
54
+ result = data.b
55
+ size = data.bytesize & ~(INSTRUCTION_SIZE - 1)
56
+ i = 0
57
+ # PC starts at position + 4 (as per C implementation)
58
+ pc = position + 4
59
+
60
+ while i < size
61
+ # Advance PC to current instruction position
62
+ current_pc = pc + i
63
+
64
+ # Check if last byte is 0xEB (BL instruction)
65
+ if result.getbyte(i + 3) == OPCODE_BL
66
+ # Extract full 32-bit instruction value
67
+ instruction = extract_uint32_le(result, i)
68
+
69
+ # Calculate word offset from PC
70
+ word_offset = current_pc >> 2
71
+
72
+ # Add word offset to instruction value
73
+ instruction += word_offset
74
+
75
+ # Mask to 24-bit and combine with opcode
76
+ instruction = (instruction & OFFSET_MASK) | 0xEB000000
77
+
78
+ write_uint32_le(result, i, instruction)
79
+ end
80
+
81
+ i += INSTRUCTION_SIZE
82
+ end
83
+
84
+ result
85
+ end
86
+
87
+ # Decode (postprocess) ARM executable data after decompression.
88
+ #
89
+ # Reverses the encoding by converting absolute word offsets back to
90
+ # relative word offsets.
91
+ #
92
+ # @param data [String] Binary executable data
93
+ # @param position [Integer] Current stream position
94
+ # @return [String] Decoded binary data
95
+ def decode(data, position = 0)
96
+ return data.dup if data.bytesize < INSTRUCTION_SIZE
97
+
98
+ result = data.b
99
+ size = data.bytesize & ~(INSTRUCTION_SIZE - 1)
100
+ i = 0
101
+ # PC starts at position + 4 (as per C implementation)
102
+ pc = position + 4
103
+
104
+ while i < size
105
+ # Advance PC to current instruction position
106
+ current_pc = pc + i
107
+
108
+ # Check if last byte is 0xEB (BL instruction)
109
+ if result.getbyte(i + 3) == OPCODE_BL
110
+ # Extract full 32-bit instruction value
111
+ instruction = extract_uint32_le(result, i)
112
+
113
+ # Calculate word offset from PC
114
+ word_offset = current_pc >> 2
115
+
116
+ # Subtract word offset from instruction value
117
+ instruction -= word_offset
118
+
119
+ # Mask to 24-bit and combine with opcode
120
+ instruction = (instruction & OFFSET_MASK) | 0xEB000000
121
+
122
+ write_uint32_le(result, i, instruction)
123
+ end
124
+
125
+ i += INSTRUCTION_SIZE
126
+ end
127
+
128
+ result
129
+ end
130
+
131
+ class << self
132
+ # Get metadata about this filter.
133
+ #
134
+ # @return [Hash] Filter metadata
135
+ def metadata
136
+ {
137
+ name: "BCJ-ARM",
138
+ description: "Branch converter for 32-bit ARM executables",
139
+ architecture: "ARM (32-bit)",
140
+ alignment: 4,
141
+ endian: "little",
142
+ }
143
+ end
144
+ end
145
+
146
+ private
147
+
148
+ # Extract an unsigned 32-bit little-endian integer from data.
149
+ #
150
+ # @param data [String] Binary data
151
+ # @param offset [Integer] Starting position
152
+ # @return [Integer] Unsigned 32-bit integer
153
+ def extract_uint32_le(data, offset)
154
+ bytes = data.byteslice(offset, INSTRUCTION_SIZE).bytes
155
+ bytes[0] |
156
+ (bytes[1] << 8) |
157
+ (bytes[2] << 16) |
158
+ (bytes[3] << 24)
159
+ end
160
+
161
+ # Write an unsigned 32-bit little-endian integer to data.
162
+ #
163
+ # @param data [String] Binary data (modified in place)
164
+ # @param offset [Integer] Starting position
165
+ # @param value [Integer] 32-bit integer to write
166
+ # @return [void]
167
+ def write_uint32_le(data, offset, value)
168
+ value &= 0xFFFFFFFF
169
+ data.setbyte(offset, value & 0xFF)
170
+ data.setbyte(offset + 1, (value >> 8) & 0xFF)
171
+ data.setbyte(offset + 2, (value >> 16) & 0xFF)
172
+ data.setbyte(offset + 3, (value >> 24) & 0xFF)
173
+ end
174
+ end
175
+ end
176
+ end