omnizip 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (511) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.rubocop.yml +32 -0
  4. data/.rubocop_todo.yml +754 -0
  5. data/COPYING +502 -0
  6. data/Gemfile +17 -0
  7. data/LICENSE +12 -0
  8. data/README.adoc +1045 -0
  9. data/Rakefile +12 -0
  10. data/benchmark/README.md +260 -0
  11. data/benchmark/benchmark_suite.rb +125 -0
  12. data/benchmark/compression_bench.rb +181 -0
  13. data/benchmark/filter_bench.rb +180 -0
  14. data/benchmark/models/benchmark_result.rb +59 -0
  15. data/benchmark/models/comparison_result.rb +69 -0
  16. data/benchmark/profile_suite.rb +167 -0
  17. data/benchmark/reporter.rb +150 -0
  18. data/benchmark/run_benchmarks.rb +66 -0
  19. data/benchmark/test_data.rb +137 -0
  20. data/config/formats/rar3_spec.yml +91 -0
  21. data/config/formats/rar5_spec.yml +102 -0
  22. data/docs/.github/workflows/docs.yml +142 -0
  23. data/docs/.gitignore +21 -0
  24. data/docs/.lychee.toml +67 -0
  25. data/docs/Gemfile +13 -0
  26. data/docs/RAR_WRITE_SUPPORT.md +26 -0
  27. data/docs/README.md +101 -0
  28. data/docs/_config.yml +112 -0
  29. data/docs/assets/logo.svg +1 -0
  30. data/docs/assets/omnizip-logo.pdf +1540 -11
  31. data/docs/comparison/feature-matrix.adoc +694 -0
  32. data/docs/comparison/index.adoc +113 -0
  33. data/docs/comparison/vs-7zip.adoc +309 -0
  34. data/docs/comparison/vs-peazip.adoc +77 -0
  35. data/docs/comparison/vs-rubyzip.adoc +342 -0
  36. data/docs/comparison/vs-winrar.adoc +100 -0
  37. data/docs/compatibility.adoc +579 -0
  38. data/docs/concepts/index.adoc +129 -0
  39. data/docs/developer/architecture.adoc +256 -0
  40. data/docs/developer/contributing.adoc +158 -0
  41. data/docs/developer/index.adoc +25 -0
  42. data/docs/developer/testing.adoc +212 -0
  43. data/docs/getting-started/basic-usage.adoc +271 -0
  44. data/docs/getting-started/index.adoc +42 -0
  45. data/docs/getting-started/installation.adoc +138 -0
  46. data/docs/getting-started/quick-start.adoc +185 -0
  47. data/docs/getting-started/your-first-archive.adoc +218 -0
  48. data/docs/guides/advanced-features/encryption.adoc +300 -0
  49. data/docs/guides/advanced-features/index.adoc +49 -0
  50. data/docs/guides/advanced-features/parallel-processing.adoc +246 -0
  51. data/docs/guides/advanced-features/progress-tracking.adoc +320 -0
  52. data/docs/guides/advanced-features/streaming.adoc +212 -0
  53. data/docs/guides/archive-formats/gzip-format.adoc +107 -0
  54. data/docs/guides/archive-formats/index.adoc +130 -0
  55. data/docs/guides/archive-formats/rar-format.adoc +104 -0
  56. data/docs/guides/archive-formats/rar5.adoc +521 -0
  57. data/docs/guides/archive-formats/seven-zip-format.adoc +35 -0
  58. data/docs/guides/archive-formats/tar-format.adoc +106 -0
  59. data/docs/guides/archive-formats/xz-format.adoc +118 -0
  60. data/docs/guides/archive-formats/zip-format.adoc +35 -0
  61. data/docs/guides/compression-algorithms/bzip2.adoc +113 -0
  62. data/docs/guides/compression-algorithms/deflate.adoc +319 -0
  63. data/docs/guides/compression-algorithms/index.adoc +190 -0
  64. data/docs/guides/compression-algorithms/lzma.adoc +398 -0
  65. data/docs/guides/compression-algorithms/lzma2.adoc +327 -0
  66. data/docs/guides/compression-algorithms/ppmd.adoc +316 -0
  67. data/docs/guides/compression-algorithms/zstandard.adoc +361 -0
  68. data/docs/guides/creating-archives.adoc +354 -0
  69. data/docs/guides/extracting-archives.adoc +53 -0
  70. data/docs/guides/format-conversion.adoc +64 -0
  71. data/docs/guides/index.adoc +49 -0
  72. data/docs/guides/migration-rubyzip.adoc +217 -0
  73. data/docs/guides/parity-archives.adoc +605 -0
  74. data/docs/guides/performance-tuning.adoc +88 -0
  75. data/docs/index.adoc +218 -0
  76. data/docs/lychee.toml +67 -0
  77. data/docs/reference/api/overview.adoc +188 -0
  78. data/docs/reference/cli/compress-command.adoc +114 -0
  79. data/docs/reference/cli/overview.adoc +140 -0
  80. data/docs/reference/index.adoc +26 -0
  81. data/docs/resources/faq.adoc +185 -0
  82. data/docs/resources/quick-reference.adoc +222 -0
  83. data/docs/troubleshooting/index.adoc +208 -0
  84. data/examples/api_comparison.rb +205 -0
  85. data/examples/deflate64_example.rb +96 -0
  86. data/examples/par2_demo.rb +121 -0
  87. data/examples/quick_start_native.rb +150 -0
  88. data/examples/quick_start_rubyzip.rb +115 -0
  89. data/examples/rubyzip_compatibility_demo.rb +194 -0
  90. data/exe/omnizip +27 -0
  91. data/lib/omnizip/algorithm.rb +130 -0
  92. data/lib/omnizip/algorithm_registry.rb +86 -0
  93. data/lib/omnizip/algorithms/.keep +0 -0
  94. data/lib/omnizip/algorithms/bzip2/bwt.rb +225 -0
  95. data/lib/omnizip/algorithms/bzip2/decoder.rb +193 -0
  96. data/lib/omnizip/algorithms/bzip2/encoder.rb +237 -0
  97. data/lib/omnizip/algorithms/bzip2/huffman.rb +206 -0
  98. data/lib/omnizip/algorithms/bzip2/mtf.rb +101 -0
  99. data/lib/omnizip/algorithms/bzip2/rle.rb +151 -0
  100. data/lib/omnizip/algorithms/bzip2.rb +130 -0
  101. data/lib/omnizip/algorithms/deflate/constants.rb +28 -0
  102. data/lib/omnizip/algorithms/deflate/decoder.rb +38 -0
  103. data/lib/omnizip/algorithms/deflate/encoder.rb +46 -0
  104. data/lib/omnizip/algorithms/deflate.rb +128 -0
  105. data/lib/omnizip/algorithms/deflate64/constants.rb +45 -0
  106. data/lib/omnizip/algorithms/deflate64/decoder.rb +153 -0
  107. data/lib/omnizip/algorithms/deflate64/encoder.rb +98 -0
  108. data/lib/omnizip/algorithms/deflate64/huffman_coder.rb +354 -0
  109. data/lib/omnizip/algorithms/deflate64/lz77_encoder.rb +142 -0
  110. data/lib/omnizip/algorithms/deflate64.rb +109 -0
  111. data/lib/omnizip/algorithms/lzma/bit_model.rb +120 -0
  112. data/lib/omnizip/algorithms/lzma/constants.rb +112 -0
  113. data/lib/omnizip/algorithms/lzma/decoder.rb +148 -0
  114. data/lib/omnizip/algorithms/lzma/dictionary.rb +69 -0
  115. data/lib/omnizip/algorithms/lzma/distance_coder.rb +415 -0
  116. data/lib/omnizip/algorithms/lzma/encoder.rb +142 -0
  117. data/lib/omnizip/algorithms/lzma/length_coder.rb +260 -0
  118. data/lib/omnizip/algorithms/lzma/literal_decoder.rb +320 -0
  119. data/lib/omnizip/algorithms/lzma/literal_encoder.rb +210 -0
  120. data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +341 -0
  121. data/lib/omnizip/algorithms/lzma/lzma_alone_decoder.rb +192 -0
  122. data/lib/omnizip/algorithms/lzma/lzma_state.rb +128 -0
  123. data/lib/omnizip/algorithms/lzma/match.rb +32 -0
  124. data/lib/omnizip/algorithms/lzma/match_finder.rb +205 -0
  125. data/lib/omnizip/algorithms/lzma/match_finder_config.rb +142 -0
  126. data/lib/omnizip/algorithms/lzma/match_finder_factory.rb +88 -0
  127. data/lib/omnizip/algorithms/lzma/optimal_encoder.rb +130 -0
  128. data/lib/omnizip/algorithms/lzma/probability_models.rb +72 -0
  129. data/lib/omnizip/algorithms/lzma/range_coder.rb +85 -0
  130. data/lib/omnizip/algorithms/lzma/range_decoder.rb +434 -0
  131. data/lib/omnizip/algorithms/lzma/range_encoder.rb +194 -0
  132. data/lib/omnizip/algorithms/lzma/state.rb +127 -0
  133. data/lib/omnizip/algorithms/lzma/xz_buffered_range_encoder.rb +325 -0
  134. data/lib/omnizip/algorithms/lzma/xz_encoder.rb +426 -0
  135. data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +645 -0
  136. data/lib/omnizip/algorithms/lzma/xz_match_finder_adapter.rb +227 -0
  137. data/lib/omnizip/algorithms/lzma/xz_price_calculator.rb +169 -0
  138. data/lib/omnizip/algorithms/lzma/xz_probability_models.rb +261 -0
  139. data/lib/omnizip/algorithms/lzma/xz_range_encoder.rb +223 -0
  140. data/lib/omnizip/algorithms/lzma/xz_range_encoder_exact.rb +331 -0
  141. data/lib/omnizip/algorithms/lzma/xz_state.rb +116 -0
  142. data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +2055 -0
  143. data/lib/omnizip/algorithms/lzma.rb +238 -0
  144. data/lib/omnizip/algorithms/lzma2/chunk_manager.rb +182 -0
  145. data/lib/omnizip/algorithms/lzma2/constants.rb +41 -0
  146. data/lib/omnizip/algorithms/lzma2/encoder.rb +147 -0
  147. data/lib/omnizip/algorithms/lzma2/lzma2_chunk.rb +161 -0
  148. data/lib/omnizip/algorithms/lzma2/properties.rb +179 -0
  149. data/lib/omnizip/algorithms/lzma2/simple_lzma2_encoder.rb +127 -0
  150. data/lib/omnizip/algorithms/lzma2/xz_encoder_adapter.rb +85 -0
  151. data/lib/omnizip/algorithms/lzma2.rb +141 -0
  152. data/lib/omnizip/algorithms/ppmd7/constants.rb +74 -0
  153. data/lib/omnizip/algorithms/ppmd7/context.rb +154 -0
  154. data/lib/omnizip/algorithms/ppmd7/decoder.rb +126 -0
  155. data/lib/omnizip/algorithms/ppmd7/encoder.rb +163 -0
  156. data/lib/omnizip/algorithms/ppmd7/model.rb +248 -0
  157. data/lib/omnizip/algorithms/ppmd7/symbol_state.rb +57 -0
  158. data/lib/omnizip/algorithms/ppmd7.rb +116 -0
  159. data/lib/omnizip/algorithms/ppmd8/constants.rb +61 -0
  160. data/lib/omnizip/algorithms/ppmd8/context.rb +34 -0
  161. data/lib/omnizip/algorithms/ppmd8/decoder.rb +107 -0
  162. data/lib/omnizip/algorithms/ppmd8/encoder.rb +138 -0
  163. data/lib/omnizip/algorithms/ppmd8/model.rb +250 -0
  164. data/lib/omnizip/algorithms/ppmd8/restoration_method.rb +78 -0
  165. data/lib/omnizip/algorithms/ppmd8.rb +82 -0
  166. data/lib/omnizip/algorithms/ppmd_base.rb +138 -0
  167. data/lib/omnizip/algorithms/sevenzip_lzma2.rb +123 -0
  168. data/lib/omnizip/algorithms/xz_lzma2.rb +118 -0
  169. data/lib/omnizip/algorithms/zstandard/constants.rb +25 -0
  170. data/lib/omnizip/algorithms/zstandard/decoder.rb +46 -0
  171. data/lib/omnizip/algorithms/zstandard/encoder.rb +51 -0
  172. data/lib/omnizip/algorithms/zstandard.rb +138 -0
  173. data/lib/omnizip/buffer/memory_archive.rb +251 -0
  174. data/lib/omnizip/buffer/memory_extractor.rb +224 -0
  175. data/lib/omnizip/buffer.rb +176 -0
  176. data/lib/omnizip/checksum_registry.rb +114 -0
  177. data/lib/omnizip/checksums/crc32.rb +100 -0
  178. data/lib/omnizip/checksums/crc64.rb +101 -0
  179. data/lib/omnizip/checksums/crc_base.rb +158 -0
  180. data/lib/omnizip/checksums/verifier.rb +131 -0
  181. data/lib/omnizip/chunked/memory_manager.rb +194 -0
  182. data/lib/omnizip/chunked/reader.rb +78 -0
  183. data/lib/omnizip/chunked/writer.rb +120 -0
  184. data/lib/omnizip/chunked.rb +129 -0
  185. data/lib/omnizip/cli/output_formatter.rb +104 -0
  186. data/lib/omnizip/cli.rb +572 -0
  187. data/lib/omnizip/commands/.keep +0 -0
  188. data/lib/omnizip/commands/archive_create_command.rb +427 -0
  189. data/lib/omnizip/commands/archive_extract_command.rb +272 -0
  190. data/lib/omnizip/commands/archive_list_command.rb +218 -0
  191. data/lib/omnizip/commands/archive_repair_command.rb +131 -0
  192. data/lib/omnizip/commands/archive_verify_command.rb +117 -0
  193. data/lib/omnizip/commands/compress_command.rb +117 -0
  194. data/lib/omnizip/commands/decompress_command.rb +120 -0
  195. data/lib/omnizip/commands/list_command.rb +53 -0
  196. data/lib/omnizip/commands/metadata_command.rb +153 -0
  197. data/lib/omnizip/commands/parity_create_command.rb +122 -0
  198. data/lib/omnizip/commands/parity_repair_command.rb +122 -0
  199. data/lib/omnizip/commands/parity_verify_command.rb +124 -0
  200. data/lib/omnizip/commands/profile_list_command.rb +56 -0
  201. data/lib/omnizip/commands/profile_show_command.rb +44 -0
  202. data/lib/omnizip/convenience.rb +359 -0
  203. data/lib/omnizip/converter/conversion_registry.rb +49 -0
  204. data/lib/omnizip/converter/conversion_strategy.rb +121 -0
  205. data/lib/omnizip/converter/seven_zip_to_zip_strategy.rb +97 -0
  206. data/lib/omnizip/converter/zip_to_seven_zip_strategy.rb +112 -0
  207. data/lib/omnizip/converter.rb +105 -0
  208. data/lib/omnizip/crypto/aes256/cipher.rb +100 -0
  209. data/lib/omnizip/crypto/aes256/constants.rb +28 -0
  210. data/lib/omnizip/crypto/aes256/key_derivation.rb +101 -0
  211. data/lib/omnizip/crypto/aes256.rb +102 -0
  212. data/lib/omnizip/error.rb +106 -0
  213. data/lib/omnizip/eta/exponential_smoothing_estimator.rb +98 -0
  214. data/lib/omnizip/eta/moving_average_estimator.rb +99 -0
  215. data/lib/omnizip/eta/rate_calculator.rb +104 -0
  216. data/lib/omnizip/eta/sample_history.rb +143 -0
  217. data/lib/omnizip/eta/time_estimator.rb +106 -0
  218. data/lib/omnizip/eta.rb +63 -0
  219. data/lib/omnizip/extraction/filter_chain.rb +177 -0
  220. data/lib/omnizip/extraction/glob_pattern.rb +140 -0
  221. data/lib/omnizip/extraction/pattern_matcher.rb +70 -0
  222. data/lib/omnizip/extraction/predicate_pattern.rb +52 -0
  223. data/lib/omnizip/extraction/regex_pattern.rb +50 -0
  224. data/lib/omnizip/extraction/selective_extractor.rb +240 -0
  225. data/lib/omnizip/extraction.rb +111 -0
  226. data/lib/omnizip/file_type/mime_classifier.rb +144 -0
  227. data/lib/omnizip/file_type.rb +113 -0
  228. data/lib/omnizip/filter.rb +139 -0
  229. data/lib/omnizip/filter_pipeline.rb +108 -0
  230. data/lib/omnizip/filter_registry.rb +166 -0
  231. data/lib/omnizip/filters/bcj.rb +279 -0
  232. data/lib/omnizip/filters/bcj2/constants.rb +53 -0
  233. data/lib/omnizip/filters/bcj2/decoder.rb +200 -0
  234. data/lib/omnizip/filters/bcj2/encoder.rb +61 -0
  235. data/lib/omnizip/filters/bcj2/stream_data.rb +93 -0
  236. data/lib/omnizip/filters/bcj2.rb +99 -0
  237. data/lib/omnizip/filters/bcj_arm.rb +176 -0
  238. data/lib/omnizip/filters/bcj_arm64.rb +244 -0
  239. data/lib/omnizip/filters/bcj_ia64.rb +196 -0
  240. data/lib/omnizip/filters/bcj_ppc.rb +190 -0
  241. data/lib/omnizip/filters/bcj_sparc.rb +176 -0
  242. data/lib/omnizip/filters/bcj_x86.rb +193 -0
  243. data/lib/omnizip/filters/delta.rb +196 -0
  244. data/lib/omnizip/filters/filter_base.rb +72 -0
  245. data/lib/omnizip/filters/registry.rb +123 -0
  246. data/lib/omnizip/filters/xz_delta.rb +258 -0
  247. data/lib/omnizip/format_detector.rb +162 -0
  248. data/lib/omnizip/format_registry.rb +59 -0
  249. data/lib/omnizip/formats/.keep +0 -0
  250. data/lib/omnizip/formats/bzip2_file.rb +172 -0
  251. data/lib/omnizip/formats/cpio/constants.rb +55 -0
  252. data/lib/omnizip/formats/cpio/entry.rb +385 -0
  253. data/lib/omnizip/formats/cpio/reader.rb +196 -0
  254. data/lib/omnizip/formats/cpio/writer.rb +234 -0
  255. data/lib/omnizip/formats/cpio.rb +140 -0
  256. data/lib/omnizip/formats/format_spec_loader.rb +230 -0
  257. data/lib/omnizip/formats/gzip.rb +238 -0
  258. data/lib/omnizip/formats/iso/directory_builder.rb +297 -0
  259. data/lib/omnizip/formats/iso/directory_record.rb +152 -0
  260. data/lib/omnizip/formats/iso/joliet.rb +204 -0
  261. data/lib/omnizip/formats/iso/path_table.rb +125 -0
  262. data/lib/omnizip/formats/iso/reader.rb +197 -0
  263. data/lib/omnizip/formats/iso/rock_ridge.rb +349 -0
  264. data/lib/omnizip/formats/iso/volume_builder.rb +320 -0
  265. data/lib/omnizip/formats/iso/volume_descriptor.rb +168 -0
  266. data/lib/omnizip/formats/iso/writer.rb +530 -0
  267. data/lib/omnizip/formats/iso.rb +140 -0
  268. data/lib/omnizip/formats/lzip.rb +175 -0
  269. data/lib/omnizip/formats/lzma_alone.rb +171 -0
  270. data/lib/omnizip/formats/rar/archive_repairer.rb +243 -0
  271. data/lib/omnizip/formats/rar/archive_verifier.rb +195 -0
  272. data/lib/omnizip/formats/rar/block_parser.rb +243 -0
  273. data/lib/omnizip/formats/rar/compression/bit_stream.rb +180 -0
  274. data/lib/omnizip/formats/rar/compression/dispatcher.rb +217 -0
  275. data/lib/omnizip/formats/rar/compression/lz77_huffman/decoder.rb +216 -0
  276. data/lib/omnizip/formats/rar/compression/lz77_huffman/encoder.rb +158 -0
  277. data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_builder.rb +217 -0
  278. data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_coder.rb +189 -0
  279. data/lib/omnizip/formats/rar/compression/lz77_huffman/match_finder.rb +135 -0
  280. data/lib/omnizip/formats/rar/compression/lz77_huffman/sliding_window.rb +165 -0
  281. data/lib/omnizip/formats/rar/compression/ppmd/context.rb +105 -0
  282. data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +219 -0
  283. data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +262 -0
  284. data/lib/omnizip/formats/rar/compression_method_registry.rb +106 -0
  285. data/lib/omnizip/formats/rar/constants.rb +82 -0
  286. data/lib/omnizip/formats/rar/decompressor.rb +238 -0
  287. data/lib/omnizip/formats/rar/external_writer.rb +312 -0
  288. data/lib/omnizip/formats/rar/header.rb +192 -0
  289. data/lib/omnizip/formats/rar/license_validator.rb +109 -0
  290. data/lib/omnizip/formats/rar/models/rar_archive.rb +77 -0
  291. data/lib/omnizip/formats/rar/models/rar_entry.rb +65 -0
  292. data/lib/omnizip/formats/rar/models/rar_volume.rb +56 -0
  293. data/lib/omnizip/formats/rar/parity_handler.rb +292 -0
  294. data/lib/omnizip/formats/rar/rar5/compression/lzma.rb +202 -0
  295. data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +578 -0
  296. data/lib/omnizip/formats/rar/rar5/compression/store.rb +60 -0
  297. data/lib/omnizip/formats/rar/rar5/crc32.rb +39 -0
  298. data/lib/omnizip/formats/rar/rar5/encryption/aes256_cbc.rb +97 -0
  299. data/lib/omnizip/formats/rar/rar5/encryption/encryption_header.rb +114 -0
  300. data/lib/omnizip/formats/rar/rar5/encryption/encryption_manager.rb +166 -0
  301. data/lib/omnizip/formats/rar/rar5/encryption/key_derivation.rb +97 -0
  302. data/lib/omnizip/formats/rar/rar5/header.rb +187 -0
  303. data/lib/omnizip/formats/rar/rar5/models/encryption_options.rb +74 -0
  304. data/lib/omnizip/formats/rar/rar5/models/recovery_options.rb +63 -0
  305. data/lib/omnizip/formats/rar/rar5/models/solid_options.rb +63 -0
  306. data/lib/omnizip/formats/rar/rar5/models/volume_options.rb +74 -0
  307. data/lib/omnizip/formats/rar/rar5/multi_volume/ARCHITECTURE.md +290 -0
  308. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_manager.rb +264 -0
  309. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_splitter.rb +155 -0
  310. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_writer.rb +194 -0
  311. data/lib/omnizip/formats/rar/rar5/solid/solid_encoder.rb +109 -0
  312. data/lib/omnizip/formats/rar/rar5/solid/solid_manager.rb +142 -0
  313. data/lib/omnizip/formats/rar/rar5/solid/solid_stream.rb +121 -0
  314. data/lib/omnizip/formats/rar/rar5/vint.rb +65 -0
  315. data/lib/omnizip/formats/rar/rar5/writer.rb +466 -0
  316. data/lib/omnizip/formats/rar/rar_format_base.rb +241 -0
  317. data/lib/omnizip/formats/rar/reader.rb +366 -0
  318. data/lib/omnizip/formats/rar/recovery_record.rb +245 -0
  319. data/lib/omnizip/formats/rar/volume_manager.rb +168 -0
  320. data/lib/omnizip/formats/rar/writer.rb +431 -0
  321. data/lib/omnizip/formats/rar.rb +205 -0
  322. data/lib/omnizip/formats/rar3/compressor.rb +73 -0
  323. data/lib/omnizip/formats/rar3/decompressor.rb +66 -0
  324. data/lib/omnizip/formats/rar3/reader.rb +386 -0
  325. data/lib/omnizip/formats/rar3/writer.rb +219 -0
  326. data/lib/omnizip/formats/rar5/compressor.rb +73 -0
  327. data/lib/omnizip/formats/rar5/decompressor.rb +66 -0
  328. data/lib/omnizip/formats/rar5/reader.rb +342 -0
  329. data/lib/omnizip/formats/rar5/writer.rb +214 -0
  330. data/lib/omnizip/formats/seven_zip/coder_chain.rb +150 -0
  331. data/lib/omnizip/formats/seven_zip/constants.rb +126 -0
  332. data/lib/omnizip/formats/seven_zip/encoded_header.rb +114 -0
  333. data/lib/omnizip/formats/seven_zip/encrypted_header.rb +142 -0
  334. data/lib/omnizip/formats/seven_zip/file_collector.rb +144 -0
  335. data/lib/omnizip/formats/seven_zip/header.rb +106 -0
  336. data/lib/omnizip/formats/seven_zip/header_encryptor.rb +134 -0
  337. data/lib/omnizip/formats/seven_zip/header_writer.rb +466 -0
  338. data/lib/omnizip/formats/seven_zip/models/coder_info.rb +30 -0
  339. data/lib/omnizip/formats/seven_zip/models/file_entry.rb +58 -0
  340. data/lib/omnizip/formats/seven_zip/models/folder.rb +69 -0
  341. data/lib/omnizip/formats/seven_zip/models/stream_info.rb +42 -0
  342. data/lib/omnizip/formats/seven_zip/parser.rb +660 -0
  343. data/lib/omnizip/formats/seven_zip/reader.rb +458 -0
  344. data/lib/omnizip/formats/seven_zip/split_archive_reader.rb +632 -0
  345. data/lib/omnizip/formats/seven_zip/split_archive_writer.rb +315 -0
  346. data/lib/omnizip/formats/seven_zip/stream_compressor.rb +151 -0
  347. data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +162 -0
  348. data/lib/omnizip/formats/seven_zip/writer.rb +740 -0
  349. data/lib/omnizip/formats/seven_zip.rb +93 -0
  350. data/lib/omnizip/formats/tar/constants.rb +73 -0
  351. data/lib/omnizip/formats/tar/entry.rb +94 -0
  352. data/lib/omnizip/formats/tar/header.rb +168 -0
  353. data/lib/omnizip/formats/tar/reader.rb +121 -0
  354. data/lib/omnizip/formats/tar/writer.rb +216 -0
  355. data/lib/omnizip/formats/tar.rb +84 -0
  356. data/lib/omnizip/formats/xz/reader.rb +116 -0
  357. data/lib/omnizip/formats/xz.rb +237 -0
  358. data/lib/omnizip/formats/xz_impl/block_decoder.rb +754 -0
  359. data/lib/omnizip/formats/xz_impl/block_encoder.rb +306 -0
  360. data/lib/omnizip/formats/xz_impl/block_header.rb +210 -0
  361. data/lib/omnizip/formats/xz_impl/block_header_parser.rb +186 -0
  362. data/lib/omnizip/formats/xz_impl/constants.rb +49 -0
  363. data/lib/omnizip/formats/xz_impl/index_decoder.rb +174 -0
  364. data/lib/omnizip/formats/xz_impl/index_encoder.rb +122 -0
  365. data/lib/omnizip/formats/xz_impl/stream_decoder.rb +468 -0
  366. data/lib/omnizip/formats/xz_impl/stream_encoder.rb +99 -0
  367. data/lib/omnizip/formats/xz_impl/stream_footer.rb +81 -0
  368. data/lib/omnizip/formats/xz_impl/stream_footer_parser.rb +117 -0
  369. data/lib/omnizip/formats/xz_impl/stream_header.rb +55 -0
  370. data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +108 -0
  371. data/lib/omnizip/formats/xz_impl/vli.rb +128 -0
  372. data/lib/omnizip/formats/xz_impl/writer.rb +421 -0
  373. data/lib/omnizip/formats/zip/central_directory_header.rb +195 -0
  374. data/lib/omnizip/formats/zip/constants.rb +69 -0
  375. data/lib/omnizip/formats/zip/end_of_central_directory.rb +133 -0
  376. data/lib/omnizip/formats/zip/local_file_header.rb +138 -0
  377. data/lib/omnizip/formats/zip/reader.rb +250 -0
  378. data/lib/omnizip/formats/zip/unix_extra_field.rb +153 -0
  379. data/lib/omnizip/formats/zip/writer.rb +375 -0
  380. data/lib/omnizip/formats/zip/zip64_end_of_central_directory.rb +104 -0
  381. data/lib/omnizip/formats/zip/zip64_end_of_central_directory_locator.rb +66 -0
  382. data/lib/omnizip/formats/zip/zip64_extra_field.rb +114 -0
  383. data/lib/omnizip/formats/zip.rb +50 -0
  384. data/lib/omnizip/implementations/base/lzma2_decoder_base.rb +75 -0
  385. data/lib/omnizip/implementations/base/lzma2_encoder_base.rb +128 -0
  386. data/lib/omnizip/implementations/base/lzma_decoder_base.rb +83 -0
  387. data/lib/omnizip/implementations/base/lzma_encoder_base.rb +108 -0
  388. data/lib/omnizip/implementations/base/state_machine_base.rb +182 -0
  389. data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +421 -0
  390. data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +465 -0
  391. data/lib/omnizip/implementations/seven_zip/lzma/match_finder.rb +288 -0
  392. data/lib/omnizip/implementations/seven_zip/lzma/range_decoder.rb +200 -0
  393. data/lib/omnizip/implementations/seven_zip/lzma/range_encoder.rb +197 -0
  394. data/lib/omnizip/implementations/seven_zip/lzma/state_machine.rb +141 -0
  395. data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +519 -0
  396. data/lib/omnizip/implementations/xz_utils/lzma2/decoder.rb +723 -0
  397. data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +750 -0
  398. data/lib/omnizip/io/buffered_input.rb +146 -0
  399. data/lib/omnizip/io/buffered_output.rb +105 -0
  400. data/lib/omnizip/io/stream_manager.rb +115 -0
  401. data/lib/omnizip/link_handler/hard_link.rb +79 -0
  402. data/lib/omnizip/link_handler/symbolic_link.rb +74 -0
  403. data/lib/omnizip/link_handler.rb +124 -0
  404. data/lib/omnizip/metadata/archive_metadata.rb +114 -0
  405. data/lib/omnizip/metadata/entry_metadata.rb +146 -0
  406. data/lib/omnizip/metadata/metadata_editor.rb +171 -0
  407. data/lib/omnizip/metadata/metadata_registry.rb +64 -0
  408. data/lib/omnizip/metadata/metadata_validator.rb +99 -0
  409. data/lib/omnizip/metadata.rb +57 -0
  410. data/lib/omnizip/models/.keep +0 -0
  411. data/lib/omnizip/models/algorithm_metadata.rb +73 -0
  412. data/lib/omnizip/models/compression_options.rb +71 -0
  413. data/lib/omnizip/models/conversion_options.rb +87 -0
  414. data/lib/omnizip/models/conversion_result.rb +135 -0
  415. data/lib/omnizip/models/eta_result.rb +46 -0
  416. data/lib/omnizip/models/extraction_rule.rb +115 -0
  417. data/lib/omnizip/models/filter_chain.rb +144 -0
  418. data/lib/omnizip/models/filter_config.rb +183 -0
  419. data/lib/omnizip/models/match_result.rb +124 -0
  420. data/lib/omnizip/models/optimization_suggestion.rb +91 -0
  421. data/lib/omnizip/models/parallel_options.rb +104 -0
  422. data/lib/omnizip/models/performance_result.rb +79 -0
  423. data/lib/omnizip/models/profile_report.rb +82 -0
  424. data/lib/omnizip/models/progress_options.rb +38 -0
  425. data/lib/omnizip/models/split_options.rb +116 -0
  426. data/lib/omnizip/optimization_registry.rb +81 -0
  427. data/lib/omnizip/parallel/job_queue.rb +209 -0
  428. data/lib/omnizip/parallel/job_scheduler.rb +203 -0
  429. data/lib/omnizip/parallel/parallel_compressor.rb +347 -0
  430. data/lib/omnizip/parallel/parallel_extractor.rb +329 -0
  431. data/lib/omnizip/parallel/worker_pool.rb +223 -0
  432. data/lib/omnizip/parallel.rb +149 -0
  433. data/lib/omnizip/parity/chunked_block_processor.rb +196 -0
  434. data/lib/omnizip/parity/galois16.rb +145 -0
  435. data/lib/omnizip/parity/models/creator_packet.rb +73 -0
  436. data/lib/omnizip/parity/models/file_description_packet.rb +133 -0
  437. data/lib/omnizip/parity/models/ifsc_packet.rb +123 -0
  438. data/lib/omnizip/parity/models/main_packet.rb +128 -0
  439. data/lib/omnizip/parity/models/packet.rb +156 -0
  440. data/lib/omnizip/parity/models/packet_registry.rb +109 -0
  441. data/lib/omnizip/parity/models/recovery_slice_packet.rb +78 -0
  442. data/lib/omnizip/parity/par2_creator.rb +531 -0
  443. data/lib/omnizip/parity/par2_repairer.rb +407 -0
  444. data/lib/omnizip/parity/par2_verifier.rb +364 -0
  445. data/lib/omnizip/parity/par2cmdline_algorithm.rb +110 -0
  446. data/lib/omnizip/parity/par2cmdline_coefficients.rb +78 -0
  447. data/lib/omnizip/parity/reed_solomon_decoder.rb +266 -0
  448. data/lib/omnizip/parity/reed_solomon_encoder.rb +111 -0
  449. data/lib/omnizip/parity/reed_solomon_matrix.rb +342 -0
  450. data/lib/omnizip/parity.rb +186 -0
  451. data/lib/omnizip/password/encryption_registry.rb +65 -0
  452. data/lib/omnizip/password/encryption_strategy.rb +96 -0
  453. data/lib/omnizip/password/password_validator.rb +129 -0
  454. data/lib/omnizip/password/winzip_aes_strategy.rb +192 -0
  455. data/lib/omnizip/password/zip_crypto_strategy.rb +141 -0
  456. data/lib/omnizip/password.rb +87 -0
  457. data/lib/omnizip/pipe/stream_compressor.rb +124 -0
  458. data/lib/omnizip/pipe/stream_decompressor.rb +174 -0
  459. data/lib/omnizip/pipe.rb +121 -0
  460. data/lib/omnizip/platform/ntfs_streams.rb +201 -0
  461. data/lib/omnizip/platform.rb +189 -0
  462. data/lib/omnizip/profile/archive_profile.rb +39 -0
  463. data/lib/omnizip/profile/balanced_profile.rb +33 -0
  464. data/lib/omnizip/profile/binary_profile.rb +36 -0
  465. data/lib/omnizip/profile/compression_profile.rb +158 -0
  466. data/lib/omnizip/profile/custom_profile.rb +157 -0
  467. data/lib/omnizip/profile/fast_profile.rb +33 -0
  468. data/lib/omnizip/profile/maximum_profile.rb +33 -0
  469. data/lib/omnizip/profile/profile_detector.rb +110 -0
  470. data/lib/omnizip/profile/profile_registry.rb +161 -0
  471. data/lib/omnizip/profile/text_profile.rb +36 -0
  472. data/lib/omnizip/profile.rb +190 -0
  473. data/lib/omnizip/profiler/memory_profiler.rb +66 -0
  474. data/lib/omnizip/profiler/method_profiler.rb +49 -0
  475. data/lib/omnizip/profiler/report_generator.rb +169 -0
  476. data/lib/omnizip/profiler.rb +204 -0
  477. data/lib/omnizip/progress/callback_reporter.rb +36 -0
  478. data/lib/omnizip/progress/console_reporter.rb +62 -0
  479. data/lib/omnizip/progress/log_reporter.rb +91 -0
  480. data/lib/omnizip/progress/operation_progress.rb +118 -0
  481. data/lib/omnizip/progress/progress_bar.rb +156 -0
  482. data/lib/omnizip/progress/progress_reporter.rb +40 -0
  483. data/lib/omnizip/progress/progress_tracker.rb +190 -0
  484. data/lib/omnizip/progress/silent_reporter.rb +24 -0
  485. data/lib/omnizip/progress.rb +127 -0
  486. data/lib/omnizip/rubyzip_compat.rb +63 -0
  487. data/lib/omnizip/temp/safe_extract.rb +168 -0
  488. data/lib/omnizip/temp/temp_file.rb +124 -0
  489. data/lib/omnizip/temp/temp_file_pool.rb +109 -0
  490. data/lib/omnizip/temp.rb +181 -0
  491. data/lib/omnizip/version.rb +5 -0
  492. data/lib/omnizip/zip/entry.rb +156 -0
  493. data/lib/omnizip/zip/file.rb +485 -0
  494. data/lib/omnizip/zip/input_stream.rb +273 -0
  495. data/lib/omnizip/zip/output_stream.rb +324 -0
  496. data/lib/omnizip.rb +156 -0
  497. data/readme-docs/advanced-features.adoc +515 -0
  498. data/readme-docs/api-usage.adoc +444 -0
  499. data/readme-docs/architecture.adoc +449 -0
  500. data/readme-docs/archive-formats.adoc +479 -0
  501. data/readme-docs/cli-usage.adoc +222 -0
  502. data/readme-docs/compression-algorithms.adoc +442 -0
  503. data/readme-docs/compression-profiles.adoc +247 -0
  504. data/readme-docs/encryption-checksums.adoc +328 -0
  505. data/readme-docs/format-converter.adoc +325 -0
  506. data/readme-docs/installation.adoc +228 -0
  507. data/readme-docs/par2-archives.adoc +608 -0
  508. data/readme-docs/performance-profiler.adoc +389 -0
  509. data/readme-docs/preprocessing-filters.adoc +280 -0
  510. data/xz-file-format-1.2.1.txt +1174 -0
  511. metadata +617 -0
@@ -0,0 +1,105 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Copyright (C) 2025 Ribose Inc.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a
6
+ # copy of this software and associated documentation files (the "Software"),
7
+ # to deal in the Software without restriction, including without limitation
8
+ # the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
+ # and/or sell copies of the Software, and to permit persons to whom the
10
+ # Software is furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
+ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21
+ # DEALINGS IN THE SOFTWARE.
22
+
23
+ require_relative "../../../../algorithms/ppmd7/context"
24
+
25
+ module Omnizip
26
+ module Formats
27
+ module Rar
28
+ module Compression
29
+ module PPMd
30
+ # RAR variant H context node in PPMd model
31
+ #
32
+ # Adapts PPMd7 Context for RAR-specific requirements:
33
+ # - Different memory allocation strategy
34
+ # - RAR-specific escape frequency initialization
35
+ # - Modified probability update rules
36
+ #
37
+ # Responsibilities:
38
+ # - ONE responsibility: Manage RAR PPMd variant H context
39
+ # - Track symbol statistics for RAR compression
40
+ # - Maintain context tree structure
41
+ # - Handle RAR-specific probability updates
42
+ class Context < Omnizip::Algorithms::PPMd7::Context
43
+ # RAR variant H escape frequency constant
44
+ # RAR uses different initial escape frequency than PPMd7
45
+ RAR_INIT_ESCAPE_FREQ = 1
46
+
47
+ # Initialize a new RAR variant H context
48
+ #
49
+ # @param order [Integer] The order of this context (depth in tree)
50
+ # @param suffix [Context, nil] Parent context (shorter context)
51
+ def initialize(order, suffix = nil)
52
+ super
53
+ # RAR variant H uses different escape frequency initialization
54
+ @escape_freq = RAR_INIT_ESCAPE_FREQ
55
+ end
56
+
57
+ # Update symbol frequency after encoding/decoding (RAR variant)
58
+ #
59
+ # RAR variant H uses a slightly different update strategy
60
+ # compared to standard PPMd7.
61
+ #
62
+ # @param symbol [Integer] The symbol to update
63
+ # @param increment [Integer] Amount to increase frequency
64
+ # @return [void]
65
+ def update_symbol(symbol, increment = 1)
66
+ state = @states[symbol]
67
+ return unless state
68
+
69
+ # RAR variant H frequency update
70
+ state.freq += increment
71
+ @sum_freq += increment
72
+
73
+ # RAR uses different rescaling threshold
74
+ rescale_frequencies if @sum_freq > rar_max_freq
75
+ end
76
+
77
+ private
78
+
79
+ # RAR variant H maximum frequency threshold
80
+ #
81
+ # @return [Integer] Maximum frequency before rescaling
82
+ def rar_max_freq
83
+ # RAR uses 124 as maximum frequency (same as PPMd7)
84
+ 124
85
+ end
86
+
87
+ # Rescale frequencies when they grow too large (RAR variant)
88
+ #
89
+ # RAR variant H uses same rescaling strategy as PPMd7
90
+ # but this method is here for future RAR-specific modifications.
91
+ #
92
+ # @return [void]
93
+ def rescale_frequencies
94
+ @sum_freq = 0
95
+ @states.each_value do |state|
96
+ state.freq = [(state.freq + 1) / 2, 1].max
97
+ @sum_freq += state.freq
98
+ end
99
+ end
100
+ end
101
+ end
102
+ end
103
+ end
104
+ end
105
+ end
@@ -0,0 +1,219 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Copyright (C) 2025 Ribose Inc.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a
6
+ # copy of this software and associated documentation files (the "Software"),
7
+ # to deal in the Software without restriction, including without limitation
8
+ # the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
+ # and/or sell copies of the Software, and to permit persons to whom the
10
+ # Software is furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
+ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21
+ # DEALINGS IN THE SOFTWARE.
22
+
23
+ require_relative "../../../../algorithms/ppmd7/decoder"
24
+ require_relative "../../../../algorithms/ppmd7/model"
25
+ require_relative "context"
26
+
27
+ module Omnizip
28
+ module Formats
29
+ module Rar
30
+ module Compression
31
+ module PPMd
32
+ # RAR PPMd variant H decoder
33
+ #
34
+ # Implements decoding for RAR's PPMd variant H compression method.
35
+ # This adapts the standard PPMd7 algorithm for RAR-specific
36
+ # requirements:
37
+ #
38
+ # - Different memory model initialization
39
+ # - RAR-specific escape code handling
40
+ # - Modified context order selection
41
+ # - Different binary symbol encoding
42
+ #
43
+ # Responsibilities:
44
+ # - ONE responsibility: Decode RAR PPMd variant H compressed data
45
+ # - Manage decoder state and context
46
+ # - Transform compressed bits to original bytes
47
+ # - Maintain synchronized model state
48
+ class Decoder < Omnizip::Algorithms::PPMd7::Decoder
49
+ # RAR variant H specific constants
50
+ RAR_MAX_ORDER = 16
51
+ RAR_MIN_ORDER = 2
52
+ RAR_DEFAULT_ORDER = 6
53
+
54
+ # RAR memory size multiplier (MB to bytes)
55
+ RAR_MEM_MULTIPLIER = 1024 * 1024
56
+
57
+ # Initialize the RAR PPMd decoder
58
+ #
59
+ # @param input [IO] Input stream of compressed data
60
+ # @param options [Hash] Decoding options
61
+ # @option options [Integer] :model_order Maximum context order
62
+ # @option options [Integer] :mem_size Memory size in MB for RAR
63
+ def initialize(input, options = {})
64
+ @input = input
65
+ @options = options
66
+
67
+ # RAR uses memory size in MB, convert to bytes
68
+ mem_size_mb = options[:mem_size] || 16
69
+ mem_size_bytes = mem_size_mb * RAR_MEM_MULTIPLIER
70
+
71
+ # Initialize model with RAR parameters
72
+ @model = initialize_rar_model(
73
+ options[:model_order] || RAR_DEFAULT_ORDER,
74
+ mem_size_bytes,
75
+ )
76
+
77
+ # Use standard range decoder
78
+ @range_decoder = Omnizip::Algorithms::LZMA::RangeDecoder.new(input)
79
+ end
80
+
81
+ # Decode a stream back to original bytes
82
+ #
83
+ # RAR variant H decoding process:
84
+ # 1. Read compressed bits using range decoder
85
+ # 2. Use model to find corresponding symbol
86
+ # 3. Update model to stay synchronized
87
+ # 4. Handle RAR-specific escape codes
88
+ #
89
+ # @param max_bytes [Integer, nil] Maximum bytes to decode
90
+ # @return [String] Decoded data
91
+ def decode_stream(max_bytes = nil)
92
+ result = String.new(encoding: Encoding::BINARY)
93
+
94
+ # For now, decode a reasonable amount
95
+ # Real implementation would use proper termination
96
+ limit = max_bytes || 1000
97
+
98
+ limit.times do
99
+ symbol = decode_symbol
100
+ break if symbol.nil?
101
+
102
+ result << symbol.chr
103
+ rescue EOFError, Omnizip::DecompressionError
104
+ # Handle EOF gracefully - end of compressed data
105
+ break
106
+ end
107
+
108
+ result
109
+ end
110
+
111
+ private
112
+
113
+ # Initialize RAR variant H PPMd model
114
+ #
115
+ # RAR uses slightly different initialization than PPMd7:
116
+ # - Different context creation strategy
117
+ # - RAR-specific memory allocation
118
+ # - Modified root context initialization
119
+ #
120
+ # @param max_order [Integer] Maximum context order
121
+ # @param memory_size [Integer] Memory size in bytes
122
+ # @return [Omnizip::Algorithms::PPMd7::Model] Initialized model
123
+ def initialize_rar_model(max_order, memory_size)
124
+ # Validate RAR parameters
125
+ unless max_order.between?(RAR_MIN_ORDER, RAR_MAX_ORDER)
126
+ raise ArgumentError,
127
+ "RAR max_order must be between #{RAR_MIN_ORDER} and " \
128
+ "#{RAR_MAX_ORDER}"
129
+ end
130
+
131
+ # Create model with RAR parameters
132
+ # Note: Using PPMd7::Model as base, but with RAR contexts
133
+ Omnizip::Algorithms::PPMd7::Model.new(max_order, memory_size)
134
+ end
135
+
136
+ # Decode a single symbol using RAR variant H
137
+ #
138
+ # RAR uses same basic decoding as PPMd7 but with
139
+ # different escape code handling.
140
+ #
141
+ # @return [Integer, nil] Decoded byte or nil if end
142
+ def decode_symbol
143
+ # Simplified decoding - real implementation needs:
144
+ # 1. Proper context selection
145
+ # 2. RAR-specific escape handling
146
+ # 3. Binary symbol encoding
147
+ # 4. Proper termination detection
148
+
149
+ # Decode range value
150
+ value = @range_decoder.decode_direct_bits(16)
151
+
152
+ # Find symbol from range using current context
153
+ symbol = find_symbol_from_range(value)
154
+ return nil if symbol.nil?
155
+
156
+ # Update model to stay in sync
157
+ @model.update(symbol)
158
+
159
+ symbol
160
+ end
161
+
162
+ # Decode RAR-specific escape code
163
+ #
164
+ # RAR variant H uses different escape code values
165
+ # and handling compared to standard PPMd7.
166
+ #
167
+ # Escape codes in RAR:
168
+ # - 0: New symbol follows
169
+ # - 1: Same as last symbol (run-length)
170
+ # - 2-255: Reserved for future use
171
+ #
172
+ # @return [Integer, nil] Escape code or nil
173
+ def decode_escape_code
174
+ # RAR escape codes differ from PPMd7
175
+ # This is a placeholder for the proper implementation
176
+
177
+ # For now, return 0 (new symbol follows)
178
+ # Real implementation would decode from range coder
179
+ 0
180
+ end
181
+
182
+ # Find symbol from decoded range value (RAR variant)
183
+ #
184
+ # Uses RAR-specific probability distribution to map
185
+ # range value back to original symbol.
186
+ #
187
+ # @param value [Integer] Decoded range value
188
+ # @return [Integer, nil] The symbol
189
+ def find_symbol_from_range(value)
190
+ # This is simplified - real RAR implementation uses:
191
+ # 1. Current context's probability distribution
192
+ # 2. RAR-specific escape handling
193
+ # 3. Proper cumulative frequency calculation
194
+
195
+ context = @model.root_context
196
+
197
+ # Find symbol whose cumulative range contains value
198
+ scale = 0x10000
199
+ cum_freq = 0
200
+
201
+ context.states.keys.sort.each do |symbol|
202
+ state = context.states[symbol]
203
+ next_cum = cum_freq + state.freq
204
+ sym_low = (cum_freq * scale) / context.total_freq
205
+ sym_high = (next_cum * scale) / context.total_freq
206
+
207
+ return symbol if value >= sym_low && value < sym_high
208
+
209
+ cum_freq = next_cum
210
+ end
211
+
212
+ nil
213
+ end
214
+ end
215
+ end
216
+ end
217
+ end
218
+ end
219
+ end
@@ -0,0 +1,262 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Copyright (C) 2025 Ribose Inc.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a
6
+ # copy of this software and associated documentation files (the "Software"),
7
+ # to deal in the Software without restriction, including without limitation
8
+ # the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
+ # and/or sell copies of the Software, and to permit persons to whom the
10
+ # Software is furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
+ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21
+ # DEALINGS IN THE SOFTWARE.
22
+
23
+ require_relative "../../../../algorithms/ppmd7/encoder"
24
+ require_relative "../../../../algorithms/ppmd7/model"
25
+ require_relative "../../../../algorithms/lzma/range_encoder"
26
+ require_relative "context"
27
+
28
+ module Omnizip
29
+ module Formats
30
+ module Rar
31
+ module Compression
32
+ module PPMd
33
+ # RAR PPMd variant H encoder
34
+ #
35
+ # Implements encoding for RAR's PPMd variant H compression method.
36
+ # This adapts the standard PPMd7 algorithm for RAR-specific
37
+ # requirements:
38
+ #
39
+ # - Different memory model initialization
40
+ # - RAR-specific escape code handling
41
+ # - Modified context order selection
42
+ # - Different binary symbol encoding
43
+ #
44
+ # Responsibilities:
45
+ # - ONE responsibility: Encode data using RAR PPMd variant H
46
+ # - Manage encoder state and context
47
+ # - Transform original bytes to compressed bits
48
+ # - Maintain synchronized model state (matches decoder)
49
+ class Encoder < Omnizip::Algorithms::PPMd7::Encoder
50
+ # RAR variant H specific constants
51
+ RAR_MAX_ORDER = 16
52
+ RAR_MIN_ORDER = 2
53
+ RAR_DEFAULT_ORDER = 6
54
+
55
+ # RAR memory size multiplier (MB to bytes)
56
+ RAR_MEM_MULTIPLIER = 1024 * 1024
57
+
58
+ # Accessor for memory size (for testing)
59
+ def memory_size
60
+ @model.instance_variable_get(:@mem_size)
61
+ end
62
+
63
+ # Initialize the RAR PPMd encoder
64
+ #
65
+ # @param output [IO] Output stream for compressed data
66
+ # @param options [Hash] Encoding options
67
+ # @option options [Integer] :model_order Maximum context order
68
+ # @option options [Integer] :mem_size Memory size in MB for RAR
69
+ def initialize(output, options = {})
70
+ @output = output
71
+ @options = options
72
+
73
+ # RAR uses memory size in MB, convert to bytes
74
+ mem_size_mb = options[:mem_size] || 16
75
+ mem_size_bytes = mem_size_mb * RAR_MEM_MULTIPLIER
76
+
77
+ # Initialize model with RAR parameters
78
+ @model = initialize_rar_model(
79
+ options[:model_order] || RAR_DEFAULT_ORDER,
80
+ mem_size_bytes,
81
+ )
82
+
83
+ # Use range encoder for bit output
84
+ @range_encoder = Omnizip::Algorithms::LZMA::RangeEncoder.new(output)
85
+ end
86
+
87
+ # Encode a stream to compressed bytes
88
+ #
89
+ # RAR variant H encoding process:
90
+ # 1. Read byte from input
91
+ # 2. Find symbol in current context
92
+ # 3. Encode using range coder with probabilities
93
+ # 4. Update model to stay synchronized with decoder
94
+ # 5. Handle RAR-specific escape codes if needed
95
+ #
96
+ # @param input [IO] Input stream to compress
97
+ # @param max_bytes [Integer, nil] Maximum bytes to encode
98
+ # @return [Integer] Number of bytes encoded
99
+ def encode_stream(input, max_bytes = nil)
100
+ bytes_encoded = 0
101
+
102
+ loop do
103
+ break if max_bytes && bytes_encoded >= max_bytes
104
+
105
+ byte = input.read(1)
106
+ break unless byte
107
+
108
+ encode_symbol(byte.ord)
109
+ bytes_encoded += 1
110
+ end
111
+
112
+ # Flush encoder to ensure all data is written
113
+ @range_encoder.flush
114
+ bytes_encoded
115
+ end
116
+
117
+ private
118
+
119
+ # Initialize RAR variant H PPMd model
120
+ #
121
+ # RAR uses slightly different initialization than PPMd7:
122
+ # - Different context creation strategy
123
+ # - RAR-specific memory allocation
124
+ # - Modified root context initialization
125
+ #
126
+ # @param max_order [Integer] Maximum context order
127
+ # @param memory_size [Integer] Memory size in bytes
128
+ # @return [Omnizip::Algorithms::PPMd7::Model] Initialized model
129
+ def initialize_rar_model(max_order, memory_size)
130
+ # Validate RAR parameters
131
+ unless max_order.between?(RAR_MIN_ORDER, RAR_MAX_ORDER)
132
+ raise ArgumentError,
133
+ "RAR max_order must be between #{RAR_MIN_ORDER} and " \
134
+ "#{RAR_MAX_ORDER}"
135
+ end
136
+
137
+ # Create model with RAR parameters
138
+ # Note: Using PPMd7::Model as base, but with RAR contexts
139
+ Omnizip::Algorithms::PPMd7::Model.new(max_order, memory_size)
140
+ end
141
+
142
+ # Encode a single symbol using RAR variant H
143
+ #
144
+ # RAR uses same basic encoding as PPMd7 but with
145
+ # different escape code handling and probability calculation.
146
+ #
147
+ # Process:
148
+ # 1. Get current context
149
+ # 2. Check if symbol exists in context
150
+ # 3. If yes: encode using frequency information
151
+ # 4. If no: encode escape + new symbol
152
+ # 5. Update model state
153
+ #
154
+ # @param byte [Integer] Byte value to encode (0-255)
155
+ # @return [void]
156
+ def encode_symbol(byte)
157
+ # Get current context
158
+ context = @model.current_context
159
+
160
+ # Find symbol in context (returns SymbolState or nil)
161
+ state = context.find_symbol(byte)
162
+
163
+ if state
164
+ # Encode using frequency information
165
+ encode_symbol_in_context(byte, state, context)
166
+ else
167
+ # Encode escape + new symbol
168
+ encode_escape_code
169
+ encode_new_symbol(byte)
170
+ end
171
+
172
+ # Update model to stay synchronized with decoder
173
+ @model.update(byte)
174
+ end
175
+
176
+ # Encode symbol that exists in current context
177
+ #
178
+ # Uses the frequency information from the context to
179
+ # calculate probability range for range encoder.
180
+ #
181
+ # @param byte [Integer] Symbol to encode
182
+ # @param state [SymbolState] Symbol's state
183
+ # @param context [Context] Current context
184
+ # @return [void]
185
+ def encode_symbol_in_context(byte, state, context)
186
+ # Get frequency from state
187
+ freq = state.freq
188
+ total_freq = context.total_freq
189
+
190
+ # Calculate cumulative frequency (for range low)
191
+ cum_freq = 0
192
+ context.states.each do |sym, st|
193
+ break if sym >= byte
194
+
195
+ cum_freq += st.freq
196
+ end
197
+
198
+ # Encode range using frequencies
199
+ encode_range(cum_freq, freq, total_freq)
200
+ end
201
+
202
+ # Encode RAR-specific escape code
203
+ #
204
+ # RAR variant H uses different escape code values
205
+ # and handling compared to standard PPMd7.
206
+ #
207
+ # Escape codes in RAR:
208
+ # - 0: New symbol follows
209
+ # - 1: Same as last symbol (run-length)
210
+ # - 2-255: Reserved for future use
211
+ #
212
+ # @return [void]
213
+ def encode_escape_code
214
+ # RAR escape codes differ from PPMd7
215
+ # For now, encode escape code 0 (new symbol follows)
216
+ # Real implementation would use context's escape frequency
217
+
218
+ # Simplified: encode direct bits for escape
219
+ @range_encoder.encode_direct_bits(0, 2)
220
+ end
221
+
222
+ # Encode new symbol not in current context
223
+ #
224
+ # When a symbol doesn't exist in the current context,
225
+ # encode it using uniform distribution (all symbols
226
+ # equally likely).
227
+ #
228
+ # @param byte [Integer] Symbol to encode
229
+ # @return [void]
230
+ def encode_new_symbol(byte)
231
+ # Encode as direct 8 bits (uniform distribution)
232
+ @range_encoder.encode_direct_bits(byte, 8)
233
+ end
234
+
235
+ # Encode a range for the symbol
236
+ #
237
+ # Converts frequency information to range and encodes
238
+ # using the range encoder.
239
+ #
240
+ # @param cum_freq [Integer] Cumulative frequency
241
+ # @param freq [Integer] Symbol frequency
242
+ # @param total_freq [Integer] Total frequency
243
+ # @return [void]
244
+ def encode_range(cum_freq, freq, total_freq)
245
+ # Scale to range coder scale (16-bit)
246
+ scale = 0x10000
247
+ low = (cum_freq * scale) / total_freq
248
+ ((cum_freq + freq) * scale) / total_freq
249
+
250
+ # Encode using direct bits
251
+ # Full implementation would use proper range subdivision
252
+ @range_encoder.encode_direct_bits(low, 16)
253
+
254
+ # In proper implementation, would also need to encode
255
+ # the range width (high - low) somehow
256
+ end
257
+ end
258
+ end
259
+ end
260
+ end
261
+ end
262
+ end
@@ -0,0 +1,106 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Omnizip
4
+ module Formats
5
+ module Rar
6
+ # Registry for RAR compression methods
7
+ #
8
+ # This class manages the registration and retrieval of compression
9
+ # methods for RAR archives. It follows the Registry pattern to allow
10
+ # dynamic addition of compression methods without modifying core code.
11
+ #
12
+ # @example Registering a compression method
13
+ # CompressionMethodRegistry.register(
14
+ # :rar3_normal,
15
+ # Rar3::Compressor,
16
+ # Rar3::Decompressor
17
+ # )
18
+ #
19
+ # @example Getting a compressor
20
+ # compressor = CompressionMethodRegistry.compressor(:rar3_normal)
21
+ class CompressionMethodRegistry
22
+ class << self
23
+ # Register a compression method
24
+ #
25
+ # @param name [Symbol] The method name
26
+ # @param compressor [Class] The compressor class
27
+ # @param decompressor [Class] The decompressor class
28
+ # @return [void]
29
+ def register(name, compressor, decompressor)
30
+ methods[name] = {
31
+ compressor: compressor,
32
+ decompressor: decompressor,
33
+ }
34
+ end
35
+
36
+ # Get a compressor for a method
37
+ #
38
+ # @param name [Symbol] The method name
39
+ # @return [Class] The compressor class
40
+ # @raise [Error::FormatError] If method not registered
41
+ def compressor(name)
42
+ method_data = methods[name]
43
+ return method_data[:compressor] if method_data
44
+
45
+ raise Error::FormatError,
46
+ "No compressor registered for method: #{name}"
47
+ end
48
+
49
+ # Get a decompressor for a method
50
+ #
51
+ # @param name [Symbol] The method name
52
+ # @return [Class] The decompressor class
53
+ # @raise [Error::FormatError] If method not registered
54
+ def decompressor(name)
55
+ method_data = methods[name]
56
+ return method_data[:decompressor] if method_data
57
+
58
+ raise Error::FormatError,
59
+ "No decompressor registered for method: #{name}"
60
+ end
61
+
62
+ # Check if a method is registered
63
+ #
64
+ # @param name [Symbol] The method name
65
+ # @return [Boolean] True if registered
66
+ def registered?(name)
67
+ methods.key?(name)
68
+ end
69
+
70
+ # Get all registered method names
71
+ #
72
+ # @return [Array<Symbol>] The registered method names
73
+ def registered_methods
74
+ methods.keys
75
+ end
76
+
77
+ # Clear all registered methods (primarily for testing)
78
+ #
79
+ # @return [void]
80
+ def clear
81
+ @methods = {}
82
+ end
83
+
84
+ # Get a compression method for a RAR version and level
85
+ #
86
+ # @param version [String] The RAR version (e.g., "3.0", "5.0")
87
+ # @param level [Symbol] The compression level
88
+ # @return [Symbol] The method name
89
+ def method_for_version(version, level)
90
+ prefix = version.start_with?("5") ? "rar5" : "rar3"
91
+ :"#{prefix}_#{level}"
92
+ end
93
+
94
+ private
95
+
96
+ # Storage for registered methods
97
+ #
98
+ # @return [Hash] The methods hash
99
+ def methods
100
+ @methods ||= {}
101
+ end
102
+ end
103
+ end
104
+ end
105
+ end
106
+ end