omnizip 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (511) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.rubocop.yml +32 -0
  4. data/.rubocop_todo.yml +754 -0
  5. data/COPYING +502 -0
  6. data/Gemfile +17 -0
  7. data/LICENSE +12 -0
  8. data/README.adoc +1045 -0
  9. data/Rakefile +12 -0
  10. data/benchmark/README.md +260 -0
  11. data/benchmark/benchmark_suite.rb +125 -0
  12. data/benchmark/compression_bench.rb +181 -0
  13. data/benchmark/filter_bench.rb +180 -0
  14. data/benchmark/models/benchmark_result.rb +59 -0
  15. data/benchmark/models/comparison_result.rb +69 -0
  16. data/benchmark/profile_suite.rb +167 -0
  17. data/benchmark/reporter.rb +150 -0
  18. data/benchmark/run_benchmarks.rb +66 -0
  19. data/benchmark/test_data.rb +137 -0
  20. data/config/formats/rar3_spec.yml +91 -0
  21. data/config/formats/rar5_spec.yml +102 -0
  22. data/docs/.github/workflows/docs.yml +142 -0
  23. data/docs/.gitignore +21 -0
  24. data/docs/.lychee.toml +67 -0
  25. data/docs/Gemfile +13 -0
  26. data/docs/RAR_WRITE_SUPPORT.md +26 -0
  27. data/docs/README.md +101 -0
  28. data/docs/_config.yml +112 -0
  29. data/docs/assets/logo.svg +1 -0
  30. data/docs/assets/omnizip-logo.pdf +1540 -11
  31. data/docs/comparison/feature-matrix.adoc +694 -0
  32. data/docs/comparison/index.adoc +113 -0
  33. data/docs/comparison/vs-7zip.adoc +309 -0
  34. data/docs/comparison/vs-peazip.adoc +77 -0
  35. data/docs/comparison/vs-rubyzip.adoc +342 -0
  36. data/docs/comparison/vs-winrar.adoc +100 -0
  37. data/docs/compatibility.adoc +579 -0
  38. data/docs/concepts/index.adoc +129 -0
  39. data/docs/developer/architecture.adoc +256 -0
  40. data/docs/developer/contributing.adoc +158 -0
  41. data/docs/developer/index.adoc +25 -0
  42. data/docs/developer/testing.adoc +212 -0
  43. data/docs/getting-started/basic-usage.adoc +271 -0
  44. data/docs/getting-started/index.adoc +42 -0
  45. data/docs/getting-started/installation.adoc +138 -0
  46. data/docs/getting-started/quick-start.adoc +185 -0
  47. data/docs/getting-started/your-first-archive.adoc +218 -0
  48. data/docs/guides/advanced-features/encryption.adoc +300 -0
  49. data/docs/guides/advanced-features/index.adoc +49 -0
  50. data/docs/guides/advanced-features/parallel-processing.adoc +246 -0
  51. data/docs/guides/advanced-features/progress-tracking.adoc +320 -0
  52. data/docs/guides/advanced-features/streaming.adoc +212 -0
  53. data/docs/guides/archive-formats/gzip-format.adoc +107 -0
  54. data/docs/guides/archive-formats/index.adoc +130 -0
  55. data/docs/guides/archive-formats/rar-format.adoc +104 -0
  56. data/docs/guides/archive-formats/rar5.adoc +521 -0
  57. data/docs/guides/archive-formats/seven-zip-format.adoc +35 -0
  58. data/docs/guides/archive-formats/tar-format.adoc +106 -0
  59. data/docs/guides/archive-formats/xz-format.adoc +118 -0
  60. data/docs/guides/archive-formats/zip-format.adoc +35 -0
  61. data/docs/guides/compression-algorithms/bzip2.adoc +113 -0
  62. data/docs/guides/compression-algorithms/deflate.adoc +319 -0
  63. data/docs/guides/compression-algorithms/index.adoc +190 -0
  64. data/docs/guides/compression-algorithms/lzma.adoc +398 -0
  65. data/docs/guides/compression-algorithms/lzma2.adoc +327 -0
  66. data/docs/guides/compression-algorithms/ppmd.adoc +316 -0
  67. data/docs/guides/compression-algorithms/zstandard.adoc +361 -0
  68. data/docs/guides/creating-archives.adoc +354 -0
  69. data/docs/guides/extracting-archives.adoc +53 -0
  70. data/docs/guides/format-conversion.adoc +64 -0
  71. data/docs/guides/index.adoc +49 -0
  72. data/docs/guides/migration-rubyzip.adoc +217 -0
  73. data/docs/guides/parity-archives.adoc +605 -0
  74. data/docs/guides/performance-tuning.adoc +88 -0
  75. data/docs/index.adoc +218 -0
  76. data/docs/lychee.toml +67 -0
  77. data/docs/reference/api/overview.adoc +188 -0
  78. data/docs/reference/cli/compress-command.adoc +114 -0
  79. data/docs/reference/cli/overview.adoc +140 -0
  80. data/docs/reference/index.adoc +26 -0
  81. data/docs/resources/faq.adoc +185 -0
  82. data/docs/resources/quick-reference.adoc +222 -0
  83. data/docs/troubleshooting/index.adoc +208 -0
  84. data/examples/api_comparison.rb +205 -0
  85. data/examples/deflate64_example.rb +96 -0
  86. data/examples/par2_demo.rb +121 -0
  87. data/examples/quick_start_native.rb +150 -0
  88. data/examples/quick_start_rubyzip.rb +115 -0
  89. data/examples/rubyzip_compatibility_demo.rb +194 -0
  90. data/exe/omnizip +27 -0
  91. data/lib/omnizip/algorithm.rb +130 -0
  92. data/lib/omnizip/algorithm_registry.rb +86 -0
  93. data/lib/omnizip/algorithms/.keep +0 -0
  94. data/lib/omnizip/algorithms/bzip2/bwt.rb +225 -0
  95. data/lib/omnizip/algorithms/bzip2/decoder.rb +193 -0
  96. data/lib/omnizip/algorithms/bzip2/encoder.rb +237 -0
  97. data/lib/omnizip/algorithms/bzip2/huffman.rb +206 -0
  98. data/lib/omnizip/algorithms/bzip2/mtf.rb +101 -0
  99. data/lib/omnizip/algorithms/bzip2/rle.rb +151 -0
  100. data/lib/omnizip/algorithms/bzip2.rb +130 -0
  101. data/lib/omnizip/algorithms/deflate/constants.rb +28 -0
  102. data/lib/omnizip/algorithms/deflate/decoder.rb +38 -0
  103. data/lib/omnizip/algorithms/deflate/encoder.rb +46 -0
  104. data/lib/omnizip/algorithms/deflate.rb +128 -0
  105. data/lib/omnizip/algorithms/deflate64/constants.rb +45 -0
  106. data/lib/omnizip/algorithms/deflate64/decoder.rb +153 -0
  107. data/lib/omnizip/algorithms/deflate64/encoder.rb +98 -0
  108. data/lib/omnizip/algorithms/deflate64/huffman_coder.rb +354 -0
  109. data/lib/omnizip/algorithms/deflate64/lz77_encoder.rb +142 -0
  110. data/lib/omnizip/algorithms/deflate64.rb +109 -0
  111. data/lib/omnizip/algorithms/lzma/bit_model.rb +120 -0
  112. data/lib/omnizip/algorithms/lzma/constants.rb +112 -0
  113. data/lib/omnizip/algorithms/lzma/decoder.rb +148 -0
  114. data/lib/omnizip/algorithms/lzma/dictionary.rb +69 -0
  115. data/lib/omnizip/algorithms/lzma/distance_coder.rb +415 -0
  116. data/lib/omnizip/algorithms/lzma/encoder.rb +142 -0
  117. data/lib/omnizip/algorithms/lzma/length_coder.rb +260 -0
  118. data/lib/omnizip/algorithms/lzma/literal_decoder.rb +320 -0
  119. data/lib/omnizip/algorithms/lzma/literal_encoder.rb +210 -0
  120. data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +341 -0
  121. data/lib/omnizip/algorithms/lzma/lzma_alone_decoder.rb +192 -0
  122. data/lib/omnizip/algorithms/lzma/lzma_state.rb +128 -0
  123. data/lib/omnizip/algorithms/lzma/match.rb +32 -0
  124. data/lib/omnizip/algorithms/lzma/match_finder.rb +205 -0
  125. data/lib/omnizip/algorithms/lzma/match_finder_config.rb +142 -0
  126. data/lib/omnizip/algorithms/lzma/match_finder_factory.rb +88 -0
  127. data/lib/omnizip/algorithms/lzma/optimal_encoder.rb +130 -0
  128. data/lib/omnizip/algorithms/lzma/probability_models.rb +72 -0
  129. data/lib/omnizip/algorithms/lzma/range_coder.rb +85 -0
  130. data/lib/omnizip/algorithms/lzma/range_decoder.rb +434 -0
  131. data/lib/omnizip/algorithms/lzma/range_encoder.rb +194 -0
  132. data/lib/omnizip/algorithms/lzma/state.rb +127 -0
  133. data/lib/omnizip/algorithms/lzma/xz_buffered_range_encoder.rb +325 -0
  134. data/lib/omnizip/algorithms/lzma/xz_encoder.rb +426 -0
  135. data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +645 -0
  136. data/lib/omnizip/algorithms/lzma/xz_match_finder_adapter.rb +227 -0
  137. data/lib/omnizip/algorithms/lzma/xz_price_calculator.rb +169 -0
  138. data/lib/omnizip/algorithms/lzma/xz_probability_models.rb +261 -0
  139. data/lib/omnizip/algorithms/lzma/xz_range_encoder.rb +223 -0
  140. data/lib/omnizip/algorithms/lzma/xz_range_encoder_exact.rb +331 -0
  141. data/lib/omnizip/algorithms/lzma/xz_state.rb +116 -0
  142. data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +2055 -0
  143. data/lib/omnizip/algorithms/lzma.rb +238 -0
  144. data/lib/omnizip/algorithms/lzma2/chunk_manager.rb +182 -0
  145. data/lib/omnizip/algorithms/lzma2/constants.rb +41 -0
  146. data/lib/omnizip/algorithms/lzma2/encoder.rb +147 -0
  147. data/lib/omnizip/algorithms/lzma2/lzma2_chunk.rb +161 -0
  148. data/lib/omnizip/algorithms/lzma2/properties.rb +179 -0
  149. data/lib/omnizip/algorithms/lzma2/simple_lzma2_encoder.rb +127 -0
  150. data/lib/omnizip/algorithms/lzma2/xz_encoder_adapter.rb +85 -0
  151. data/lib/omnizip/algorithms/lzma2.rb +141 -0
  152. data/lib/omnizip/algorithms/ppmd7/constants.rb +74 -0
  153. data/lib/omnizip/algorithms/ppmd7/context.rb +154 -0
  154. data/lib/omnizip/algorithms/ppmd7/decoder.rb +126 -0
  155. data/lib/omnizip/algorithms/ppmd7/encoder.rb +163 -0
  156. data/lib/omnizip/algorithms/ppmd7/model.rb +248 -0
  157. data/lib/omnizip/algorithms/ppmd7/symbol_state.rb +57 -0
  158. data/lib/omnizip/algorithms/ppmd7.rb +116 -0
  159. data/lib/omnizip/algorithms/ppmd8/constants.rb +61 -0
  160. data/lib/omnizip/algorithms/ppmd8/context.rb +34 -0
  161. data/lib/omnizip/algorithms/ppmd8/decoder.rb +107 -0
  162. data/lib/omnizip/algorithms/ppmd8/encoder.rb +138 -0
  163. data/lib/omnizip/algorithms/ppmd8/model.rb +250 -0
  164. data/lib/omnizip/algorithms/ppmd8/restoration_method.rb +78 -0
  165. data/lib/omnizip/algorithms/ppmd8.rb +82 -0
  166. data/lib/omnizip/algorithms/ppmd_base.rb +138 -0
  167. data/lib/omnizip/algorithms/sevenzip_lzma2.rb +123 -0
  168. data/lib/omnizip/algorithms/xz_lzma2.rb +118 -0
  169. data/lib/omnizip/algorithms/zstandard/constants.rb +25 -0
  170. data/lib/omnizip/algorithms/zstandard/decoder.rb +46 -0
  171. data/lib/omnizip/algorithms/zstandard/encoder.rb +51 -0
  172. data/lib/omnizip/algorithms/zstandard.rb +138 -0
  173. data/lib/omnizip/buffer/memory_archive.rb +251 -0
  174. data/lib/omnizip/buffer/memory_extractor.rb +224 -0
  175. data/lib/omnizip/buffer.rb +176 -0
  176. data/lib/omnizip/checksum_registry.rb +114 -0
  177. data/lib/omnizip/checksums/crc32.rb +100 -0
  178. data/lib/omnizip/checksums/crc64.rb +101 -0
  179. data/lib/omnizip/checksums/crc_base.rb +158 -0
  180. data/lib/omnizip/checksums/verifier.rb +131 -0
  181. data/lib/omnizip/chunked/memory_manager.rb +194 -0
  182. data/lib/omnizip/chunked/reader.rb +78 -0
  183. data/lib/omnizip/chunked/writer.rb +120 -0
  184. data/lib/omnizip/chunked.rb +129 -0
  185. data/lib/omnizip/cli/output_formatter.rb +104 -0
  186. data/lib/omnizip/cli.rb +572 -0
  187. data/lib/omnizip/commands/.keep +0 -0
  188. data/lib/omnizip/commands/archive_create_command.rb +427 -0
  189. data/lib/omnizip/commands/archive_extract_command.rb +272 -0
  190. data/lib/omnizip/commands/archive_list_command.rb +218 -0
  191. data/lib/omnizip/commands/archive_repair_command.rb +131 -0
  192. data/lib/omnizip/commands/archive_verify_command.rb +117 -0
  193. data/lib/omnizip/commands/compress_command.rb +117 -0
  194. data/lib/omnizip/commands/decompress_command.rb +120 -0
  195. data/lib/omnizip/commands/list_command.rb +53 -0
  196. data/lib/omnizip/commands/metadata_command.rb +153 -0
  197. data/lib/omnizip/commands/parity_create_command.rb +122 -0
  198. data/lib/omnizip/commands/parity_repair_command.rb +122 -0
  199. data/lib/omnizip/commands/parity_verify_command.rb +124 -0
  200. data/lib/omnizip/commands/profile_list_command.rb +56 -0
  201. data/lib/omnizip/commands/profile_show_command.rb +44 -0
  202. data/lib/omnizip/convenience.rb +359 -0
  203. data/lib/omnizip/converter/conversion_registry.rb +49 -0
  204. data/lib/omnizip/converter/conversion_strategy.rb +121 -0
  205. data/lib/omnizip/converter/seven_zip_to_zip_strategy.rb +97 -0
  206. data/lib/omnizip/converter/zip_to_seven_zip_strategy.rb +112 -0
  207. data/lib/omnizip/converter.rb +105 -0
  208. data/lib/omnizip/crypto/aes256/cipher.rb +100 -0
  209. data/lib/omnizip/crypto/aes256/constants.rb +28 -0
  210. data/lib/omnizip/crypto/aes256/key_derivation.rb +101 -0
  211. data/lib/omnizip/crypto/aes256.rb +102 -0
  212. data/lib/omnizip/error.rb +106 -0
  213. data/lib/omnizip/eta/exponential_smoothing_estimator.rb +98 -0
  214. data/lib/omnizip/eta/moving_average_estimator.rb +99 -0
  215. data/lib/omnizip/eta/rate_calculator.rb +104 -0
  216. data/lib/omnizip/eta/sample_history.rb +143 -0
  217. data/lib/omnizip/eta/time_estimator.rb +106 -0
  218. data/lib/omnizip/eta.rb +63 -0
  219. data/lib/omnizip/extraction/filter_chain.rb +177 -0
  220. data/lib/omnizip/extraction/glob_pattern.rb +140 -0
  221. data/lib/omnizip/extraction/pattern_matcher.rb +70 -0
  222. data/lib/omnizip/extraction/predicate_pattern.rb +52 -0
  223. data/lib/omnizip/extraction/regex_pattern.rb +50 -0
  224. data/lib/omnizip/extraction/selective_extractor.rb +240 -0
  225. data/lib/omnizip/extraction.rb +111 -0
  226. data/lib/omnizip/file_type/mime_classifier.rb +144 -0
  227. data/lib/omnizip/file_type.rb +113 -0
  228. data/lib/omnizip/filter.rb +139 -0
  229. data/lib/omnizip/filter_pipeline.rb +108 -0
  230. data/lib/omnizip/filter_registry.rb +166 -0
  231. data/lib/omnizip/filters/bcj.rb +279 -0
  232. data/lib/omnizip/filters/bcj2/constants.rb +53 -0
  233. data/lib/omnizip/filters/bcj2/decoder.rb +200 -0
  234. data/lib/omnizip/filters/bcj2/encoder.rb +61 -0
  235. data/lib/omnizip/filters/bcj2/stream_data.rb +93 -0
  236. data/lib/omnizip/filters/bcj2.rb +99 -0
  237. data/lib/omnizip/filters/bcj_arm.rb +176 -0
  238. data/lib/omnizip/filters/bcj_arm64.rb +244 -0
  239. data/lib/omnizip/filters/bcj_ia64.rb +196 -0
  240. data/lib/omnizip/filters/bcj_ppc.rb +190 -0
  241. data/lib/omnizip/filters/bcj_sparc.rb +176 -0
  242. data/lib/omnizip/filters/bcj_x86.rb +193 -0
  243. data/lib/omnizip/filters/delta.rb +196 -0
  244. data/lib/omnizip/filters/filter_base.rb +72 -0
  245. data/lib/omnizip/filters/registry.rb +123 -0
  246. data/lib/omnizip/filters/xz_delta.rb +258 -0
  247. data/lib/omnizip/format_detector.rb +162 -0
  248. data/lib/omnizip/format_registry.rb +59 -0
  249. data/lib/omnizip/formats/.keep +0 -0
  250. data/lib/omnizip/formats/bzip2_file.rb +172 -0
  251. data/lib/omnizip/formats/cpio/constants.rb +55 -0
  252. data/lib/omnizip/formats/cpio/entry.rb +385 -0
  253. data/lib/omnizip/formats/cpio/reader.rb +196 -0
  254. data/lib/omnizip/formats/cpio/writer.rb +234 -0
  255. data/lib/omnizip/formats/cpio.rb +140 -0
  256. data/lib/omnizip/formats/format_spec_loader.rb +230 -0
  257. data/lib/omnizip/formats/gzip.rb +238 -0
  258. data/lib/omnizip/formats/iso/directory_builder.rb +297 -0
  259. data/lib/omnizip/formats/iso/directory_record.rb +152 -0
  260. data/lib/omnizip/formats/iso/joliet.rb +204 -0
  261. data/lib/omnizip/formats/iso/path_table.rb +125 -0
  262. data/lib/omnizip/formats/iso/reader.rb +197 -0
  263. data/lib/omnizip/formats/iso/rock_ridge.rb +349 -0
  264. data/lib/omnizip/formats/iso/volume_builder.rb +320 -0
  265. data/lib/omnizip/formats/iso/volume_descriptor.rb +168 -0
  266. data/lib/omnizip/formats/iso/writer.rb +530 -0
  267. data/lib/omnizip/formats/iso.rb +140 -0
  268. data/lib/omnizip/formats/lzip.rb +175 -0
  269. data/lib/omnizip/formats/lzma_alone.rb +171 -0
  270. data/lib/omnizip/formats/rar/archive_repairer.rb +243 -0
  271. data/lib/omnizip/formats/rar/archive_verifier.rb +195 -0
  272. data/lib/omnizip/formats/rar/block_parser.rb +243 -0
  273. data/lib/omnizip/formats/rar/compression/bit_stream.rb +180 -0
  274. data/lib/omnizip/formats/rar/compression/dispatcher.rb +217 -0
  275. data/lib/omnizip/formats/rar/compression/lz77_huffman/decoder.rb +216 -0
  276. data/lib/omnizip/formats/rar/compression/lz77_huffman/encoder.rb +158 -0
  277. data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_builder.rb +217 -0
  278. data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_coder.rb +189 -0
  279. data/lib/omnizip/formats/rar/compression/lz77_huffman/match_finder.rb +135 -0
  280. data/lib/omnizip/formats/rar/compression/lz77_huffman/sliding_window.rb +165 -0
  281. data/lib/omnizip/formats/rar/compression/ppmd/context.rb +105 -0
  282. data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +219 -0
  283. data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +262 -0
  284. data/lib/omnizip/formats/rar/compression_method_registry.rb +106 -0
  285. data/lib/omnizip/formats/rar/constants.rb +82 -0
  286. data/lib/omnizip/formats/rar/decompressor.rb +238 -0
  287. data/lib/omnizip/formats/rar/external_writer.rb +312 -0
  288. data/lib/omnizip/formats/rar/header.rb +192 -0
  289. data/lib/omnizip/formats/rar/license_validator.rb +109 -0
  290. data/lib/omnizip/formats/rar/models/rar_archive.rb +77 -0
  291. data/lib/omnizip/formats/rar/models/rar_entry.rb +65 -0
  292. data/lib/omnizip/formats/rar/models/rar_volume.rb +56 -0
  293. data/lib/omnizip/formats/rar/parity_handler.rb +292 -0
  294. data/lib/omnizip/formats/rar/rar5/compression/lzma.rb +202 -0
  295. data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +578 -0
  296. data/lib/omnizip/formats/rar/rar5/compression/store.rb +60 -0
  297. data/lib/omnizip/formats/rar/rar5/crc32.rb +39 -0
  298. data/lib/omnizip/formats/rar/rar5/encryption/aes256_cbc.rb +97 -0
  299. data/lib/omnizip/formats/rar/rar5/encryption/encryption_header.rb +114 -0
  300. data/lib/omnizip/formats/rar/rar5/encryption/encryption_manager.rb +166 -0
  301. data/lib/omnizip/formats/rar/rar5/encryption/key_derivation.rb +97 -0
  302. data/lib/omnizip/formats/rar/rar5/header.rb +187 -0
  303. data/lib/omnizip/formats/rar/rar5/models/encryption_options.rb +74 -0
  304. data/lib/omnizip/formats/rar/rar5/models/recovery_options.rb +63 -0
  305. data/lib/omnizip/formats/rar/rar5/models/solid_options.rb +63 -0
  306. data/lib/omnizip/formats/rar/rar5/models/volume_options.rb +74 -0
  307. data/lib/omnizip/formats/rar/rar5/multi_volume/ARCHITECTURE.md +290 -0
  308. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_manager.rb +264 -0
  309. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_splitter.rb +155 -0
  310. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_writer.rb +194 -0
  311. data/lib/omnizip/formats/rar/rar5/solid/solid_encoder.rb +109 -0
  312. data/lib/omnizip/formats/rar/rar5/solid/solid_manager.rb +142 -0
  313. data/lib/omnizip/formats/rar/rar5/solid/solid_stream.rb +121 -0
  314. data/lib/omnizip/formats/rar/rar5/vint.rb +65 -0
  315. data/lib/omnizip/formats/rar/rar5/writer.rb +466 -0
  316. data/lib/omnizip/formats/rar/rar_format_base.rb +241 -0
  317. data/lib/omnizip/formats/rar/reader.rb +366 -0
  318. data/lib/omnizip/formats/rar/recovery_record.rb +245 -0
  319. data/lib/omnizip/formats/rar/volume_manager.rb +168 -0
  320. data/lib/omnizip/formats/rar/writer.rb +431 -0
  321. data/lib/omnizip/formats/rar.rb +205 -0
  322. data/lib/omnizip/formats/rar3/compressor.rb +73 -0
  323. data/lib/omnizip/formats/rar3/decompressor.rb +66 -0
  324. data/lib/omnizip/formats/rar3/reader.rb +386 -0
  325. data/lib/omnizip/formats/rar3/writer.rb +219 -0
  326. data/lib/omnizip/formats/rar5/compressor.rb +73 -0
  327. data/lib/omnizip/formats/rar5/decompressor.rb +66 -0
  328. data/lib/omnizip/formats/rar5/reader.rb +342 -0
  329. data/lib/omnizip/formats/rar5/writer.rb +214 -0
  330. data/lib/omnizip/formats/seven_zip/coder_chain.rb +150 -0
  331. data/lib/omnizip/formats/seven_zip/constants.rb +126 -0
  332. data/lib/omnizip/formats/seven_zip/encoded_header.rb +114 -0
  333. data/lib/omnizip/formats/seven_zip/encrypted_header.rb +142 -0
  334. data/lib/omnizip/formats/seven_zip/file_collector.rb +144 -0
  335. data/lib/omnizip/formats/seven_zip/header.rb +106 -0
  336. data/lib/omnizip/formats/seven_zip/header_encryptor.rb +134 -0
  337. data/lib/omnizip/formats/seven_zip/header_writer.rb +466 -0
  338. data/lib/omnizip/formats/seven_zip/models/coder_info.rb +30 -0
  339. data/lib/omnizip/formats/seven_zip/models/file_entry.rb +58 -0
  340. data/lib/omnizip/formats/seven_zip/models/folder.rb +69 -0
  341. data/lib/omnizip/formats/seven_zip/models/stream_info.rb +42 -0
  342. data/lib/omnizip/formats/seven_zip/parser.rb +660 -0
  343. data/lib/omnizip/formats/seven_zip/reader.rb +458 -0
  344. data/lib/omnizip/formats/seven_zip/split_archive_reader.rb +632 -0
  345. data/lib/omnizip/formats/seven_zip/split_archive_writer.rb +315 -0
  346. data/lib/omnizip/formats/seven_zip/stream_compressor.rb +151 -0
  347. data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +162 -0
  348. data/lib/omnizip/formats/seven_zip/writer.rb +740 -0
  349. data/lib/omnizip/formats/seven_zip.rb +93 -0
  350. data/lib/omnizip/formats/tar/constants.rb +73 -0
  351. data/lib/omnizip/formats/tar/entry.rb +94 -0
  352. data/lib/omnizip/formats/tar/header.rb +168 -0
  353. data/lib/omnizip/formats/tar/reader.rb +121 -0
  354. data/lib/omnizip/formats/tar/writer.rb +216 -0
  355. data/lib/omnizip/formats/tar.rb +84 -0
  356. data/lib/omnizip/formats/xz/reader.rb +116 -0
  357. data/lib/omnizip/formats/xz.rb +237 -0
  358. data/lib/omnizip/formats/xz_impl/block_decoder.rb +754 -0
  359. data/lib/omnizip/formats/xz_impl/block_encoder.rb +306 -0
  360. data/lib/omnizip/formats/xz_impl/block_header.rb +210 -0
  361. data/lib/omnizip/formats/xz_impl/block_header_parser.rb +186 -0
  362. data/lib/omnizip/formats/xz_impl/constants.rb +49 -0
  363. data/lib/omnizip/formats/xz_impl/index_decoder.rb +174 -0
  364. data/lib/omnizip/formats/xz_impl/index_encoder.rb +122 -0
  365. data/lib/omnizip/formats/xz_impl/stream_decoder.rb +468 -0
  366. data/lib/omnizip/formats/xz_impl/stream_encoder.rb +99 -0
  367. data/lib/omnizip/formats/xz_impl/stream_footer.rb +81 -0
  368. data/lib/omnizip/formats/xz_impl/stream_footer_parser.rb +117 -0
  369. data/lib/omnizip/formats/xz_impl/stream_header.rb +55 -0
  370. data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +108 -0
  371. data/lib/omnizip/formats/xz_impl/vli.rb +128 -0
  372. data/lib/omnizip/formats/xz_impl/writer.rb +421 -0
  373. data/lib/omnizip/formats/zip/central_directory_header.rb +195 -0
  374. data/lib/omnizip/formats/zip/constants.rb +69 -0
  375. data/lib/omnizip/formats/zip/end_of_central_directory.rb +133 -0
  376. data/lib/omnizip/formats/zip/local_file_header.rb +138 -0
  377. data/lib/omnizip/formats/zip/reader.rb +250 -0
  378. data/lib/omnizip/formats/zip/unix_extra_field.rb +153 -0
  379. data/lib/omnizip/formats/zip/writer.rb +375 -0
  380. data/lib/omnizip/formats/zip/zip64_end_of_central_directory.rb +104 -0
  381. data/lib/omnizip/formats/zip/zip64_end_of_central_directory_locator.rb +66 -0
  382. data/lib/omnizip/formats/zip/zip64_extra_field.rb +114 -0
  383. data/lib/omnizip/formats/zip.rb +50 -0
  384. data/lib/omnizip/implementations/base/lzma2_decoder_base.rb +75 -0
  385. data/lib/omnizip/implementations/base/lzma2_encoder_base.rb +128 -0
  386. data/lib/omnizip/implementations/base/lzma_decoder_base.rb +83 -0
  387. data/lib/omnizip/implementations/base/lzma_encoder_base.rb +108 -0
  388. data/lib/omnizip/implementations/base/state_machine_base.rb +182 -0
  389. data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +421 -0
  390. data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +465 -0
  391. data/lib/omnizip/implementations/seven_zip/lzma/match_finder.rb +288 -0
  392. data/lib/omnizip/implementations/seven_zip/lzma/range_decoder.rb +200 -0
  393. data/lib/omnizip/implementations/seven_zip/lzma/range_encoder.rb +197 -0
  394. data/lib/omnizip/implementations/seven_zip/lzma/state_machine.rb +141 -0
  395. data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +519 -0
  396. data/lib/omnizip/implementations/xz_utils/lzma2/decoder.rb +723 -0
  397. data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +750 -0
  398. data/lib/omnizip/io/buffered_input.rb +146 -0
  399. data/lib/omnizip/io/buffered_output.rb +105 -0
  400. data/lib/omnizip/io/stream_manager.rb +115 -0
  401. data/lib/omnizip/link_handler/hard_link.rb +79 -0
  402. data/lib/omnizip/link_handler/symbolic_link.rb +74 -0
  403. data/lib/omnizip/link_handler.rb +124 -0
  404. data/lib/omnizip/metadata/archive_metadata.rb +114 -0
  405. data/lib/omnizip/metadata/entry_metadata.rb +146 -0
  406. data/lib/omnizip/metadata/metadata_editor.rb +171 -0
  407. data/lib/omnizip/metadata/metadata_registry.rb +64 -0
  408. data/lib/omnizip/metadata/metadata_validator.rb +99 -0
  409. data/lib/omnizip/metadata.rb +57 -0
  410. data/lib/omnizip/models/.keep +0 -0
  411. data/lib/omnizip/models/algorithm_metadata.rb +73 -0
  412. data/lib/omnizip/models/compression_options.rb +71 -0
  413. data/lib/omnizip/models/conversion_options.rb +87 -0
  414. data/lib/omnizip/models/conversion_result.rb +135 -0
  415. data/lib/omnizip/models/eta_result.rb +46 -0
  416. data/lib/omnizip/models/extraction_rule.rb +115 -0
  417. data/lib/omnizip/models/filter_chain.rb +144 -0
  418. data/lib/omnizip/models/filter_config.rb +183 -0
  419. data/lib/omnizip/models/match_result.rb +124 -0
  420. data/lib/omnizip/models/optimization_suggestion.rb +91 -0
  421. data/lib/omnizip/models/parallel_options.rb +104 -0
  422. data/lib/omnizip/models/performance_result.rb +79 -0
  423. data/lib/omnizip/models/profile_report.rb +82 -0
  424. data/lib/omnizip/models/progress_options.rb +38 -0
  425. data/lib/omnizip/models/split_options.rb +116 -0
  426. data/lib/omnizip/optimization_registry.rb +81 -0
  427. data/lib/omnizip/parallel/job_queue.rb +209 -0
  428. data/lib/omnizip/parallel/job_scheduler.rb +203 -0
  429. data/lib/omnizip/parallel/parallel_compressor.rb +347 -0
  430. data/lib/omnizip/parallel/parallel_extractor.rb +329 -0
  431. data/lib/omnizip/parallel/worker_pool.rb +223 -0
  432. data/lib/omnizip/parallel.rb +149 -0
  433. data/lib/omnizip/parity/chunked_block_processor.rb +196 -0
  434. data/lib/omnizip/parity/galois16.rb +145 -0
  435. data/lib/omnizip/parity/models/creator_packet.rb +73 -0
  436. data/lib/omnizip/parity/models/file_description_packet.rb +133 -0
  437. data/lib/omnizip/parity/models/ifsc_packet.rb +123 -0
  438. data/lib/omnizip/parity/models/main_packet.rb +128 -0
  439. data/lib/omnizip/parity/models/packet.rb +156 -0
  440. data/lib/omnizip/parity/models/packet_registry.rb +109 -0
  441. data/lib/omnizip/parity/models/recovery_slice_packet.rb +78 -0
  442. data/lib/omnizip/parity/par2_creator.rb +531 -0
  443. data/lib/omnizip/parity/par2_repairer.rb +407 -0
  444. data/lib/omnizip/parity/par2_verifier.rb +364 -0
  445. data/lib/omnizip/parity/par2cmdline_algorithm.rb +110 -0
  446. data/lib/omnizip/parity/par2cmdline_coefficients.rb +78 -0
  447. data/lib/omnizip/parity/reed_solomon_decoder.rb +266 -0
  448. data/lib/omnizip/parity/reed_solomon_encoder.rb +111 -0
  449. data/lib/omnizip/parity/reed_solomon_matrix.rb +342 -0
  450. data/lib/omnizip/parity.rb +186 -0
  451. data/lib/omnizip/password/encryption_registry.rb +65 -0
  452. data/lib/omnizip/password/encryption_strategy.rb +96 -0
  453. data/lib/omnizip/password/password_validator.rb +129 -0
  454. data/lib/omnizip/password/winzip_aes_strategy.rb +192 -0
  455. data/lib/omnizip/password/zip_crypto_strategy.rb +141 -0
  456. data/lib/omnizip/password.rb +87 -0
  457. data/lib/omnizip/pipe/stream_compressor.rb +124 -0
  458. data/lib/omnizip/pipe/stream_decompressor.rb +174 -0
  459. data/lib/omnizip/pipe.rb +121 -0
  460. data/lib/omnizip/platform/ntfs_streams.rb +201 -0
  461. data/lib/omnizip/platform.rb +189 -0
  462. data/lib/omnizip/profile/archive_profile.rb +39 -0
  463. data/lib/omnizip/profile/balanced_profile.rb +33 -0
  464. data/lib/omnizip/profile/binary_profile.rb +36 -0
  465. data/lib/omnizip/profile/compression_profile.rb +158 -0
  466. data/lib/omnizip/profile/custom_profile.rb +157 -0
  467. data/lib/omnizip/profile/fast_profile.rb +33 -0
  468. data/lib/omnizip/profile/maximum_profile.rb +33 -0
  469. data/lib/omnizip/profile/profile_detector.rb +110 -0
  470. data/lib/omnizip/profile/profile_registry.rb +161 -0
  471. data/lib/omnizip/profile/text_profile.rb +36 -0
  472. data/lib/omnizip/profile.rb +190 -0
  473. data/lib/omnizip/profiler/memory_profiler.rb +66 -0
  474. data/lib/omnizip/profiler/method_profiler.rb +49 -0
  475. data/lib/omnizip/profiler/report_generator.rb +169 -0
  476. data/lib/omnizip/profiler.rb +204 -0
  477. data/lib/omnizip/progress/callback_reporter.rb +36 -0
  478. data/lib/omnizip/progress/console_reporter.rb +62 -0
  479. data/lib/omnizip/progress/log_reporter.rb +91 -0
  480. data/lib/omnizip/progress/operation_progress.rb +118 -0
  481. data/lib/omnizip/progress/progress_bar.rb +156 -0
  482. data/lib/omnizip/progress/progress_reporter.rb +40 -0
  483. data/lib/omnizip/progress/progress_tracker.rb +190 -0
  484. data/lib/omnizip/progress/silent_reporter.rb +24 -0
  485. data/lib/omnizip/progress.rb +127 -0
  486. data/lib/omnizip/rubyzip_compat.rb +63 -0
  487. data/lib/omnizip/temp/safe_extract.rb +168 -0
  488. data/lib/omnizip/temp/temp_file.rb +124 -0
  489. data/lib/omnizip/temp/temp_file_pool.rb +109 -0
  490. data/lib/omnizip/temp.rb +181 -0
  491. data/lib/omnizip/version.rb +5 -0
  492. data/lib/omnizip/zip/entry.rb +156 -0
  493. data/lib/omnizip/zip/file.rb +485 -0
  494. data/lib/omnizip/zip/input_stream.rb +273 -0
  495. data/lib/omnizip/zip/output_stream.rb +324 -0
  496. data/lib/omnizip.rb +156 -0
  497. data/readme-docs/advanced-features.adoc +515 -0
  498. data/readme-docs/api-usage.adoc +444 -0
  499. data/readme-docs/architecture.adoc +449 -0
  500. data/readme-docs/archive-formats.adoc +479 -0
  501. data/readme-docs/cli-usage.adoc +222 -0
  502. data/readme-docs/compression-algorithms.adoc +442 -0
  503. data/readme-docs/compression-profiles.adoc +247 -0
  504. data/readme-docs/encryption-checksums.adoc +328 -0
  505. data/readme-docs/format-converter.adoc +325 -0
  506. data/readme-docs/installation.adoc +228 -0
  507. data/readme-docs/par2-archives.adoc +608 -0
  508. data/readme-docs/performance-profiler.adoc +389 -0
  509. data/readme-docs/preprocessing-filters.adoc +280 -0
  510. data/xz-file-format-1.2.1.txt +1174 -0
  511. metadata +617 -0
@@ -0,0 +1,176 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ # Copyright (C) 2024 Ribose Inc.
5
+ #
6
+ # This file is part of Omnizip.
7
+ #
8
+ # Omnizip is a pure Ruby port of 7-Zip compression algorithms.
9
+ # Based on the 7-Zip LZMA SDK by Igor Pavlov.
10
+ #
11
+ # This library is free software; you can redistribute it and/or
12
+ # modify it under the terms of the GNU Lesser General Public
13
+ # License as published by the Free Software Foundation; either
14
+ # version 2.1 of the License, or (at your option) any later version.
15
+ #
16
+ # See the COPYING file for the complete text of the license.
17
+ #
18
+
19
+ require_relative "filter_base"
20
+
21
+ module Omnizip
22
+ module Filters
23
+ # BCJ filter for SPARC executables.
24
+ #
25
+ # This filter preprocesses SPARC machine code by converting relative
26
+ # addresses in CALL and BA (Branch Always) instructions to absolute
27
+ # addresses. SPARC uses 4-byte aligned instructions with big-endian
28
+ # encoding.
29
+ #
30
+ # The filter improves compression by making branch targets
31
+ # position-independent.
32
+ class BcjSparc < FilterBase
33
+ # Size of SPARC instruction (4 bytes, big-endian)
34
+ INSTRUCTION_SIZE = 4
35
+
36
+ # Flag constant for address validation
37
+ FLAG = 1 << 22
38
+
39
+ # Mask for offset extraction
40
+ OFFSET_MASK = (FLAG << 3) - 1
41
+
42
+ # Encode (preprocess) SPARC executable data for compression.
43
+ #
44
+ # Scans for CALL and BA instructions and converts relative addresses
45
+ # to absolute addresses.
46
+ #
47
+ # @param data [String] Binary executable data
48
+ # @param position [Integer] Current stream position
49
+ # @return [String] Encoded binary data
50
+ def encode(data, position = 0)
51
+ return data.dup if data.bytesize < INSTRUCTION_SIZE
52
+
53
+ result = data.b
54
+ size = data.bytesize & ~(INSTRUCTION_SIZE - 1)
55
+ i = 0
56
+ pc = position - INSTRUCTION_SIZE
57
+
58
+ while i < size
59
+ instruction = extract_uint32_be(result, i)
60
+ pc += INSTRUCTION_SIZE
61
+
62
+ # Check for CALL or BA instruction
63
+ # The check is based on specific bit patterns in SPARC ISA
64
+ test_val = instruction + (5 << 29)
65
+ test_val ^= 7 << 29
66
+ test_val += FLAG
67
+
68
+ if test_val.nobits?(0 - (FLAG << 1))
69
+ # Extract 22-bit offset (bits 0-21) or 30-bit for CALL
70
+ offset = (instruction << 2) & OFFSET_MASK
71
+
72
+ # Convert to absolute address
73
+ absolute = offset + pc
74
+
75
+ # Encode back
76
+ new_instruction = (absolute & OFFSET_MASK) >> 2
77
+ new_instruction |= 1 << 30
78
+ write_uint32_be(result, i, new_instruction)
79
+ end
80
+
81
+ i += INSTRUCTION_SIZE
82
+ end
83
+
84
+ result
85
+ end
86
+
87
+ # Decode (postprocess) SPARC executable data after decompression.
88
+ #
89
+ # Reverses the encoding by converting absolute addresses back to
90
+ # relative addresses.
91
+ #
92
+ # @param data [String] Binary executable data
93
+ # @param position [Integer] Current stream position
94
+ # @return [String] Decoded binary data
95
+ def decode(data, position = 0)
96
+ return data.dup if data.bytesize < INSTRUCTION_SIZE
97
+
98
+ result = data.b
99
+ size = data.bytesize & ~(INSTRUCTION_SIZE - 1)
100
+ i = 0
101
+ pc = position - INSTRUCTION_SIZE
102
+
103
+ while i < size
104
+ instruction = extract_uint32_be(result, i)
105
+ pc += INSTRUCTION_SIZE
106
+
107
+ # Check for processed instruction pattern
108
+ test_val = instruction + (5 << 29)
109
+ test_val ^= 7 << 29
110
+ test_val += FLAG
111
+
112
+ if test_val.nobits?(0 - (FLAG << 1))
113
+ # Extract absolute address
114
+ absolute = (instruction << 2) & OFFSET_MASK
115
+
116
+ # Convert to relative offset
117
+ offset = absolute - pc
118
+
119
+ # Encode back
120
+ new_instruction = (offset & OFFSET_MASK) >> 2
121
+ new_instruction |= 1 << 30
122
+ write_uint32_be(result, i, new_instruction)
123
+ end
124
+
125
+ i += INSTRUCTION_SIZE
126
+ end
127
+
128
+ result
129
+ end
130
+
131
+ class << self
132
+ # Get metadata about this filter.
133
+ #
134
+ # @return [Hash] Filter metadata
135
+ def metadata
136
+ {
137
+ name: "BCJ-SPARC",
138
+ description: "Branch converter for SPARC executables",
139
+ architecture: "SPARC",
140
+ alignment: 4,
141
+ endian: "big",
142
+ }
143
+ end
144
+ end
145
+
146
+ private
147
+
148
+ # Extract an unsigned 32-bit big-endian integer from data.
149
+ #
150
+ # @param data [String] Binary data
151
+ # @param offset [Integer] Starting position
152
+ # @return [Integer] Unsigned 32-bit integer
153
+ def extract_uint32_be(data, offset)
154
+ bytes = data.byteslice(offset, INSTRUCTION_SIZE).bytes
155
+ (bytes[0] << 24) |
156
+ (bytes[1] << 16) |
157
+ (bytes[2] << 8) |
158
+ bytes[3]
159
+ end
160
+
161
+ # Write an unsigned 32-bit big-endian integer to data.
162
+ #
163
+ # @param data [String] Binary data (modified in place)
164
+ # @param offset [Integer] Starting position
165
+ # @param value [Integer] 32-bit integer to write
166
+ # @return [void]
167
+ def write_uint32_be(data, offset, value)
168
+ value &= 0xFFFFFFFF
169
+ data.setbyte(offset, (value >> 24) & 0xFF)
170
+ data.setbyte(offset + 1, (value >> 16) & 0xFF)
171
+ data.setbyte(offset + 2, (value >> 8) & 0xFF)
172
+ data.setbyte(offset + 3, value & 0xFF)
173
+ end
174
+ end
175
+ end
176
+ end
@@ -0,0 +1,193 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ # Copyright (C) 2024 Ribose Inc.
5
+ #
6
+ # This file is part of Omnizip.
7
+ #
8
+ # Omnizip is a pure Ruby port of 7-Zip compression algorithms.
9
+ # Based on the 7-Zip LZMA SDK by Igor Pavlov.
10
+ #
11
+ # This library is free software; you can redistribute it and/or
12
+ # modify it under the terms of the GNU Lesser General Public
13
+ # License as published by the Free Software Foundation; either
14
+ # version 2.1 of the License, or (at your option) any later version.
15
+ #
16
+ # See the COPYING file for the complete text of the license.
17
+ #
18
+
19
+ require_relative "filter_base"
20
+
21
+ module Omnizip
22
+ module Filters
23
+ # BCJ (Branch/Call/Jump) filter for x86/x64 executables.
24
+ #
25
+ # This filter preprocesses x86 machine code by converting relative
26
+ # addresses in CALL (0xE8) and JMP (0xE9) instructions to absolute
27
+ # addresses. This transformation makes the code more compressible
28
+ # because the addresses become position-independent.
29
+ #
30
+ # The filter is reversible and works on 5-byte boundaries (1-byte
31
+ # opcode + 4-byte address).
32
+ class BcjX86 < FilterBase
33
+ # x86 CALL opcode
34
+ OPCODE_CALL = 0xE8
35
+
36
+ # x86 JMP opcode
37
+ OPCODE_JMP = 0xE9
38
+
39
+ # Size of x86 address (4 bytes, little-endian)
40
+ ADDRESS_SIZE = 4
41
+
42
+ # Instruction size (opcode + address)
43
+ INSTRUCTION_SIZE = 5
44
+
45
+ # Encode (preprocess) x86 executable data for compression.
46
+ #
47
+ # Scans for E8/E9 opcodes and converts relative addresses to
48
+ # absolute addresses.
49
+ #
50
+ # @param data [String] Binary executable data
51
+ # @param position [Integer] Current stream position
52
+ # @return [String] Encoded binary data
53
+ def encode(data, position = 0)
54
+ return data.dup if data.bytesize < INSTRUCTION_SIZE
55
+
56
+ result = data.b
57
+ i = 0
58
+ limit = data.bytesize - INSTRUCTION_SIZE
59
+
60
+ while i <= limit
61
+ opcode = result.getbyte(i)
62
+
63
+ # Check for CALL or JMP instruction
64
+ if [OPCODE_CALL, OPCODE_JMP].include?(opcode)
65
+ # Extract relative offset (4 bytes, little-endian)
66
+ offset = extract_int32_le(result, i + 1)
67
+
68
+ # Check if this is a valid relative address
69
+ # Valid addresses have high byte of 0x00 or 0xFF
70
+ if valid_relative_address?(offset)
71
+ # Convert relative to absolute
72
+ # Address is relative to position AFTER instruction
73
+ absolute = offset + position + i + INSTRUCTION_SIZE
74
+ write_int32_le(result, i + 1, absolute)
75
+ end
76
+
77
+ # Skip past this instruction
78
+ i += INSTRUCTION_SIZE
79
+ else
80
+ i += 1
81
+ end
82
+ end
83
+
84
+ result
85
+ end
86
+
87
+ # Decode (postprocess) x86 executable data after decompression.
88
+ #
89
+ # Reverses the encoding by converting absolute addresses back to
90
+ # relative addresses.
91
+ #
92
+ # @param data [String] Binary executable data
93
+ # @param position [Integer] Current stream position
94
+ # @return [String] Decoded binary data
95
+ def decode(data, position = 0)
96
+ return data.dup if data.bytesize < INSTRUCTION_SIZE
97
+
98
+ result = data.b
99
+ i = 0
100
+ limit = data.bytesize - INSTRUCTION_SIZE
101
+
102
+ while i <= limit
103
+ opcode = result.getbyte(i)
104
+
105
+ # Check for CALL or JMP instruction
106
+ if [OPCODE_CALL, OPCODE_JMP].include?(opcode)
107
+ # Extract absolute address (4 bytes, little-endian)
108
+ absolute = extract_int32_le(result, i + 1)
109
+
110
+ # Calculate relative offset
111
+ # Offset should be relative to position AFTER instruction
112
+ offset = absolute - (position + i + INSTRUCTION_SIZE)
113
+
114
+ # Check if result is a valid relative address
115
+ if valid_relative_address?(offset)
116
+ write_int32_le(result, i + 1, offset)
117
+ end
118
+
119
+ # Skip past this instruction
120
+ i += INSTRUCTION_SIZE
121
+ else
122
+ i += 1
123
+ end
124
+ end
125
+
126
+ result
127
+ end
128
+
129
+ class << self
130
+ # Get metadata about this filter.
131
+ #
132
+ # @return [Hash] Filter metadata
133
+ def metadata
134
+ {
135
+ name: "BCJ-x86",
136
+ description: "Branch/Call/Jump converter for x86/x64 " \
137
+ "executables",
138
+ architecture: "x86/x64",
139
+ }
140
+ end
141
+ end
142
+
143
+ private
144
+
145
+ # Extract a signed 32-bit little-endian integer from data.
146
+ #
147
+ # @param data [String] Binary data
148
+ # @param offset [Integer] Starting position
149
+ # @return [Integer] Signed 32-bit integer
150
+ def extract_int32_le(data, offset)
151
+ bytes = data.byteslice(offset, ADDRESS_SIZE).bytes
152
+ value = bytes[0] |
153
+ (bytes[1] << 8) |
154
+ (bytes[2] << 16) |
155
+ (bytes[3] << 24)
156
+
157
+ # Convert to signed integer
158
+ value >= 0x80000000 ? value - 0x100000000 : value
159
+ end
160
+
161
+ # Write a signed 32-bit little-endian integer to data.
162
+ #
163
+ # @param data [String] Binary data (modified in place)
164
+ # @param offset [Integer] Starting position
165
+ # @param value [Integer] Signed 32-bit integer to write
166
+ # @return [void]
167
+ def write_int32_le(data, offset, value)
168
+ # Convert to unsigned 32-bit
169
+ unsigned = value & 0xFFFFFFFF
170
+
171
+ data.setbyte(offset, unsigned & 0xFF)
172
+ data.setbyte(offset + 1, (unsigned >> 8) & 0xFF)
173
+ data.setbyte(offset + 2, (unsigned >> 16) & 0xFF)
174
+ data.setbyte(offset + 3, (unsigned >> 24) & 0xFF)
175
+ end
176
+
177
+ # Check if an address value is a valid relative address.
178
+ #
179
+ # Valid relative addresses have a high byte of 0x00 (small positive)
180
+ # or 0xFF (small negative). This indicates they are likely valid
181
+ # relative jumps within executable code.
182
+ #
183
+ # @param value [Integer] Address value to check
184
+ # @return [Boolean] True if valid relative address
185
+ def valid_relative_address?(value)
186
+ unsigned = value & 0xFFFFFFFF
187
+ high_byte = (unsigned >> 24) & 0xFF
188
+ # Only 0x00 (positive) or 0xFF (negative) are valid
189
+ [0x00, 0xFF].include?(high_byte)
190
+ end
191
+ end
192
+ end
193
+ end
@@ -0,0 +1,196 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ # Copyright (C) 2024-2025 Ribose Inc.
5
+ #
6
+ # This file is part of Omnizip.
7
+ #
8
+ # Omnizip is a pure Ruby port of 7-Zip compression algorithms.
9
+ # Based on the 7-Zip LZMA SDK by Igor Pavlov.
10
+ #
11
+ # This library is free software; you can redistribute it and/or
12
+ # modify it under the terms of the GNU Lesser General Public
13
+ # License as published by the Free Software Foundation; either
14
+ # version 2.1 of the License, or (at your option) any later version.
15
+ #
16
+ # See the COPYING file for the complete text of the license.
17
+ #
18
+
19
+ require_relative "../filter"
20
+
21
+ module Omnizip
22
+ module Filters
23
+ # Delta filter for multimedia and database preprocessing.
24
+ #
25
+ # This filter computes byte-wise differences between adjacent bytes
26
+ # at a specified distance. It is particularly effective for
27
+ # multimedia files (WAV, BMP) and database dumps where adjacent
28
+ # bytes often have small differences.
29
+ #
30
+ # The filter uses wrap-around arithmetic (modulo 256) and is
31
+ # fully reversible.
32
+ #
33
+ # Filter IDs by format:
34
+ # - XZ: 0x03
35
+ # - 7-Zip: 0x03
36
+ class Delta < ::Omnizip::Filter
37
+ # Default distance for delta calculation (audio/single channel)
38
+ DEFAULT_DISTANCE = 1
39
+
40
+ # Maximum allowed distance value
41
+ MAX_DISTANCE = 256
42
+
43
+ # Byte modulo for wrap-around arithmetic
44
+ BYTE_MODULO = 256
45
+
46
+ # Filter ID for XZ format
47
+ XZ_FILTER_ID = 0x03
48
+
49
+ # Filter ID for 7-Zip format
50
+ SEVEN_ZIP_FILTER_ID = 0x03
51
+
52
+ attr_reader :distance
53
+
54
+ # Initialize Delta filter with specified distance.
55
+ #
56
+ # Supports both positional and keyword argument styles:
57
+ # Delta.new(3) # positional
58
+ # Delta.new(distance: 3) # keyword
59
+ #
60
+ # @param distance [Integer] Byte distance for delta calculation
61
+ # - 1: Audio/single channel data
62
+ # - 2: Stereo 16-bit audio
63
+ # - 3: RGB image data (24-bit)
64
+ # - 4: RGBA image data (32-bit) or 32-bit integers
65
+ # @raise [ArgumentError] If distance is invalid
66
+ def initialize(distance_arg = DEFAULT_DISTANCE, distance: DEFAULT_DISTANCE)
67
+ # Support both positional and keyword argument styles
68
+ # If called with Delta.new(3), distance_arg=3, distance=DEFAULT (keyword not used)
69
+ # If called with Delta.new(distance: 3), distance_arg=DEFAULT, distance=3
70
+ dist = if distance == DEFAULT_DISTANCE
71
+ distance_arg
72
+ else
73
+ distance
74
+ end
75
+
76
+ validate_distance(dist)
77
+ @distance = dist
78
+ super(architecture: :delta, name: "Delta")
79
+ end
80
+
81
+ # Get filter ID for specific format
82
+ #
83
+ # @param format [Symbol] Format identifier (:seven_zip, :xz)
84
+ # @return [Integer] Format-specific filter ID
85
+ # @raise [ArgumentError] If format is not supported
86
+ def id_for_format(format)
87
+ case format
88
+ when :seven_zip
89
+ SEVEN_ZIP_FILTER_ID
90
+ when :xz
91
+ XZ_FILTER_ID
92
+ else
93
+ raise ArgumentError,
94
+ "Unknown format: #{format}. Supported: :seven_zip, :xz"
95
+ end
96
+ end
97
+
98
+ # Encode (preprocess) data by computing forward differences.
99
+ #
100
+ # For each byte at position i >= distance:
101
+ # new[i] = (old[i] - old[i - distance]) mod 256
102
+ #
103
+ # Bytes before the distance remain unchanged (no previous value).
104
+ #
105
+ # @param data [String] Binary data to encode
106
+ # @param position [Integer] Current stream position (unused for
107
+ # Delta)
108
+ # @return [String] Encoded binary data
109
+ def encode(data, _position = 0)
110
+ return data.dup if data.empty?
111
+
112
+ source = data.b
113
+ result = data.b
114
+ size = data.bytesize
115
+
116
+ # Process bytes starting from distance
117
+ distance.upto(size - 1) do |i|
118
+ current = source.getbyte(i)
119
+ previous = source.getbyte(i - distance)
120
+ # Compute difference with wrap-around
121
+ diff = (current - previous) & 0xFF
122
+ result.setbyte(i, diff)
123
+ end
124
+
125
+ result
126
+ end
127
+
128
+ # Decode (postprocess) data by restoring from differences.
129
+ #
130
+ # For each byte at position i >= distance:
131
+ # old[i] = (new[i] + old[i - distance]) mod 256
132
+ #
133
+ # Bytes before the distance remain unchanged (already original).
134
+ #
135
+ # @param data [String] Binary data to decode
136
+ # @param position [Integer] Current stream position (unused for
137
+ # Delta)
138
+ # @return [String] Decoded binary data
139
+ def decode(data, _position = 0)
140
+ return data.dup if data.empty?
141
+
142
+ source = data.b
143
+ result = data.b
144
+ size = data.bytesize
145
+
146
+ # Process bytes starting from distance
147
+ distance.upto(size - 1) do |i|
148
+ diff = source.getbyte(i)
149
+ previous = result.getbyte(i - distance)
150
+ # Restore original value with wrap-around
151
+ original = (diff + previous) & 0xFF
152
+ result.setbyte(i, original)
153
+ end
154
+
155
+ result
156
+ end
157
+
158
+ class << self
159
+ # Get metadata about this filter.
160
+ #
161
+ # @return [Hash] Filter metadata
162
+ def metadata
163
+ {
164
+ name: "Delta",
165
+ description: "Byte-wise difference filter for multimedia " \
166
+ "and database preprocessing",
167
+ typical_usage: "WAV audio, BMP images, database dumps",
168
+ }
169
+ end
170
+ end
171
+
172
+ private
173
+
174
+ # Validate distance parameter.
175
+ #
176
+ # @param dist [Integer] Distance value to validate
177
+ # @raise [ArgumentError] If distance is invalid
178
+ # @return [void]
179
+ def validate_distance(dist)
180
+ unless dist.is_a?(Integer)
181
+ raise ArgumentError, "Distance must be an integer"
182
+ end
183
+
184
+ if dist < 1
185
+ raise ArgumentError,
186
+ "Distance must be at least 1, got #{dist}"
187
+ end
188
+
189
+ return unless dist > MAX_DISTANCE
190
+
191
+ raise ArgumentError,
192
+ "Distance must not exceed #{MAX_DISTANCE}, got #{dist}"
193
+ end
194
+ end
195
+ end
196
+ end
@@ -0,0 +1,72 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ # Copyright (C) 2024 Ribose Inc.
5
+ #
6
+ # This file is part of Omnizip.
7
+ #
8
+ # Omnizip is a pure Ruby port of 7-Zip compression algorithms.
9
+ # Based on the 7-Zip LZMA SDK by Igor Pavlov.
10
+ #
11
+ # This library is free software; you can redistribute it and/or
12
+ # modify it under the terms of the GNU Lesser General Public
13
+ # License as published by the Free Software Foundation; either
14
+ # version 2.1 of the License, or (at your option) any later version.
15
+ #
16
+ # See the COPYING file for the complete text of the license.
17
+ #
18
+
19
+ module Omnizip
20
+ module Filters
21
+ # Abstract base class for preprocessing filters.
22
+ #
23
+ # Filters are reversible transformations applied to data before
24
+ # compression to improve compression ratios. They are particularly
25
+ # effective for executable files and other structured data.
26
+ #
27
+ # All filter implementations should inherit from this class and
28
+ # implement the required methods.
29
+ class FilterBase
30
+ # Encode (preprocess) data for compression.
31
+ #
32
+ # This method transforms data to make it more compressible. The
33
+ # transformation must be reversible - decode(encode(data)) == data.
34
+ #
35
+ # @param data [String] Binary data to encode
36
+ # @param position [Integer] Current position in stream (for
37
+ # multi-block filtering)
38
+ # @raise [NotImplementedError] Must be implemented by subclass
39
+ # @return [String] Encoded binary data
40
+ def encode(data, position = 0)
41
+ raise NotImplementedError,
42
+ "#{self.class} must implement #encode"
43
+ end
44
+
45
+ # Decode (postprocess) data after decompression.
46
+ #
47
+ # This method reverses the encoding transformation, restoring
48
+ # original data.
49
+ #
50
+ # @param data [String] Binary data to decode
51
+ # @param position [Integer] Current position in stream (for
52
+ # multi-block filtering)
53
+ # @raise [NotImplementedError] Must be implemented by subclass
54
+ # @return [String] Decoded binary data
55
+ def decode(data, position = 0)
56
+ raise NotImplementedError,
57
+ "#{self.class} must implement #decode"
58
+ end
59
+
60
+ class << self
61
+ # Get metadata about this filter.
62
+ #
63
+ # @raise [NotImplementedError] Must be implemented by subclass
64
+ # @return [Hash] Filter metadata including name, description
65
+ def metadata
66
+ raise NotImplementedError,
67
+ "#{self} must implement .metadata"
68
+ end
69
+ end
70
+ end
71
+ end
72
+ end