omnizip 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (511) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.rubocop.yml +32 -0
  4. data/.rubocop_todo.yml +754 -0
  5. data/COPYING +502 -0
  6. data/Gemfile +17 -0
  7. data/LICENSE +12 -0
  8. data/README.adoc +1045 -0
  9. data/Rakefile +12 -0
  10. data/benchmark/README.md +260 -0
  11. data/benchmark/benchmark_suite.rb +125 -0
  12. data/benchmark/compression_bench.rb +181 -0
  13. data/benchmark/filter_bench.rb +180 -0
  14. data/benchmark/models/benchmark_result.rb +59 -0
  15. data/benchmark/models/comparison_result.rb +69 -0
  16. data/benchmark/profile_suite.rb +167 -0
  17. data/benchmark/reporter.rb +150 -0
  18. data/benchmark/run_benchmarks.rb +66 -0
  19. data/benchmark/test_data.rb +137 -0
  20. data/config/formats/rar3_spec.yml +91 -0
  21. data/config/formats/rar5_spec.yml +102 -0
  22. data/docs/.github/workflows/docs.yml +142 -0
  23. data/docs/.gitignore +21 -0
  24. data/docs/.lychee.toml +67 -0
  25. data/docs/Gemfile +13 -0
  26. data/docs/RAR_WRITE_SUPPORT.md +26 -0
  27. data/docs/README.md +101 -0
  28. data/docs/_config.yml +112 -0
  29. data/docs/assets/logo.svg +1 -0
  30. data/docs/assets/omnizip-logo.pdf +1540 -11
  31. data/docs/comparison/feature-matrix.adoc +694 -0
  32. data/docs/comparison/index.adoc +113 -0
  33. data/docs/comparison/vs-7zip.adoc +309 -0
  34. data/docs/comparison/vs-peazip.adoc +77 -0
  35. data/docs/comparison/vs-rubyzip.adoc +342 -0
  36. data/docs/comparison/vs-winrar.adoc +100 -0
  37. data/docs/compatibility.adoc +579 -0
  38. data/docs/concepts/index.adoc +129 -0
  39. data/docs/developer/architecture.adoc +256 -0
  40. data/docs/developer/contributing.adoc +158 -0
  41. data/docs/developer/index.adoc +25 -0
  42. data/docs/developer/testing.adoc +212 -0
  43. data/docs/getting-started/basic-usage.adoc +271 -0
  44. data/docs/getting-started/index.adoc +42 -0
  45. data/docs/getting-started/installation.adoc +138 -0
  46. data/docs/getting-started/quick-start.adoc +185 -0
  47. data/docs/getting-started/your-first-archive.adoc +218 -0
  48. data/docs/guides/advanced-features/encryption.adoc +300 -0
  49. data/docs/guides/advanced-features/index.adoc +49 -0
  50. data/docs/guides/advanced-features/parallel-processing.adoc +246 -0
  51. data/docs/guides/advanced-features/progress-tracking.adoc +320 -0
  52. data/docs/guides/advanced-features/streaming.adoc +212 -0
  53. data/docs/guides/archive-formats/gzip-format.adoc +107 -0
  54. data/docs/guides/archive-formats/index.adoc +130 -0
  55. data/docs/guides/archive-formats/rar-format.adoc +104 -0
  56. data/docs/guides/archive-formats/rar5.adoc +521 -0
  57. data/docs/guides/archive-formats/seven-zip-format.adoc +35 -0
  58. data/docs/guides/archive-formats/tar-format.adoc +106 -0
  59. data/docs/guides/archive-formats/xz-format.adoc +118 -0
  60. data/docs/guides/archive-formats/zip-format.adoc +35 -0
  61. data/docs/guides/compression-algorithms/bzip2.adoc +113 -0
  62. data/docs/guides/compression-algorithms/deflate.adoc +319 -0
  63. data/docs/guides/compression-algorithms/index.adoc +190 -0
  64. data/docs/guides/compression-algorithms/lzma.adoc +398 -0
  65. data/docs/guides/compression-algorithms/lzma2.adoc +327 -0
  66. data/docs/guides/compression-algorithms/ppmd.adoc +316 -0
  67. data/docs/guides/compression-algorithms/zstandard.adoc +361 -0
  68. data/docs/guides/creating-archives.adoc +354 -0
  69. data/docs/guides/extracting-archives.adoc +53 -0
  70. data/docs/guides/format-conversion.adoc +64 -0
  71. data/docs/guides/index.adoc +49 -0
  72. data/docs/guides/migration-rubyzip.adoc +217 -0
  73. data/docs/guides/parity-archives.adoc +605 -0
  74. data/docs/guides/performance-tuning.adoc +88 -0
  75. data/docs/index.adoc +218 -0
  76. data/docs/lychee.toml +67 -0
  77. data/docs/reference/api/overview.adoc +188 -0
  78. data/docs/reference/cli/compress-command.adoc +114 -0
  79. data/docs/reference/cli/overview.adoc +140 -0
  80. data/docs/reference/index.adoc +26 -0
  81. data/docs/resources/faq.adoc +185 -0
  82. data/docs/resources/quick-reference.adoc +222 -0
  83. data/docs/troubleshooting/index.adoc +208 -0
  84. data/examples/api_comparison.rb +205 -0
  85. data/examples/deflate64_example.rb +96 -0
  86. data/examples/par2_demo.rb +121 -0
  87. data/examples/quick_start_native.rb +150 -0
  88. data/examples/quick_start_rubyzip.rb +115 -0
  89. data/examples/rubyzip_compatibility_demo.rb +194 -0
  90. data/exe/omnizip +27 -0
  91. data/lib/omnizip/algorithm.rb +130 -0
  92. data/lib/omnizip/algorithm_registry.rb +86 -0
  93. data/lib/omnizip/algorithms/.keep +0 -0
  94. data/lib/omnizip/algorithms/bzip2/bwt.rb +225 -0
  95. data/lib/omnizip/algorithms/bzip2/decoder.rb +193 -0
  96. data/lib/omnizip/algorithms/bzip2/encoder.rb +237 -0
  97. data/lib/omnizip/algorithms/bzip2/huffman.rb +206 -0
  98. data/lib/omnizip/algorithms/bzip2/mtf.rb +101 -0
  99. data/lib/omnizip/algorithms/bzip2/rle.rb +151 -0
  100. data/lib/omnizip/algorithms/bzip2.rb +130 -0
  101. data/lib/omnizip/algorithms/deflate/constants.rb +28 -0
  102. data/lib/omnizip/algorithms/deflate/decoder.rb +38 -0
  103. data/lib/omnizip/algorithms/deflate/encoder.rb +46 -0
  104. data/lib/omnizip/algorithms/deflate.rb +128 -0
  105. data/lib/omnizip/algorithms/deflate64/constants.rb +45 -0
  106. data/lib/omnizip/algorithms/deflate64/decoder.rb +153 -0
  107. data/lib/omnizip/algorithms/deflate64/encoder.rb +98 -0
  108. data/lib/omnizip/algorithms/deflate64/huffman_coder.rb +354 -0
  109. data/lib/omnizip/algorithms/deflate64/lz77_encoder.rb +142 -0
  110. data/lib/omnizip/algorithms/deflate64.rb +109 -0
  111. data/lib/omnizip/algorithms/lzma/bit_model.rb +120 -0
  112. data/lib/omnizip/algorithms/lzma/constants.rb +112 -0
  113. data/lib/omnizip/algorithms/lzma/decoder.rb +148 -0
  114. data/lib/omnizip/algorithms/lzma/dictionary.rb +69 -0
  115. data/lib/omnizip/algorithms/lzma/distance_coder.rb +415 -0
  116. data/lib/omnizip/algorithms/lzma/encoder.rb +142 -0
  117. data/lib/omnizip/algorithms/lzma/length_coder.rb +260 -0
  118. data/lib/omnizip/algorithms/lzma/literal_decoder.rb +320 -0
  119. data/lib/omnizip/algorithms/lzma/literal_encoder.rb +210 -0
  120. data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +341 -0
  121. data/lib/omnizip/algorithms/lzma/lzma_alone_decoder.rb +192 -0
  122. data/lib/omnizip/algorithms/lzma/lzma_state.rb +128 -0
  123. data/lib/omnizip/algorithms/lzma/match.rb +32 -0
  124. data/lib/omnizip/algorithms/lzma/match_finder.rb +205 -0
  125. data/lib/omnizip/algorithms/lzma/match_finder_config.rb +142 -0
  126. data/lib/omnizip/algorithms/lzma/match_finder_factory.rb +88 -0
  127. data/lib/omnizip/algorithms/lzma/optimal_encoder.rb +130 -0
  128. data/lib/omnizip/algorithms/lzma/probability_models.rb +72 -0
  129. data/lib/omnizip/algorithms/lzma/range_coder.rb +85 -0
  130. data/lib/omnizip/algorithms/lzma/range_decoder.rb +434 -0
  131. data/lib/omnizip/algorithms/lzma/range_encoder.rb +194 -0
  132. data/lib/omnizip/algorithms/lzma/state.rb +127 -0
  133. data/lib/omnizip/algorithms/lzma/xz_buffered_range_encoder.rb +325 -0
  134. data/lib/omnizip/algorithms/lzma/xz_encoder.rb +426 -0
  135. data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +645 -0
  136. data/lib/omnizip/algorithms/lzma/xz_match_finder_adapter.rb +227 -0
  137. data/lib/omnizip/algorithms/lzma/xz_price_calculator.rb +169 -0
  138. data/lib/omnizip/algorithms/lzma/xz_probability_models.rb +261 -0
  139. data/lib/omnizip/algorithms/lzma/xz_range_encoder.rb +223 -0
  140. data/lib/omnizip/algorithms/lzma/xz_range_encoder_exact.rb +331 -0
  141. data/lib/omnizip/algorithms/lzma/xz_state.rb +116 -0
  142. data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +2055 -0
  143. data/lib/omnizip/algorithms/lzma.rb +238 -0
  144. data/lib/omnizip/algorithms/lzma2/chunk_manager.rb +182 -0
  145. data/lib/omnizip/algorithms/lzma2/constants.rb +41 -0
  146. data/lib/omnizip/algorithms/lzma2/encoder.rb +147 -0
  147. data/lib/omnizip/algorithms/lzma2/lzma2_chunk.rb +161 -0
  148. data/lib/omnizip/algorithms/lzma2/properties.rb +179 -0
  149. data/lib/omnizip/algorithms/lzma2/simple_lzma2_encoder.rb +127 -0
  150. data/lib/omnizip/algorithms/lzma2/xz_encoder_adapter.rb +85 -0
  151. data/lib/omnizip/algorithms/lzma2.rb +141 -0
  152. data/lib/omnizip/algorithms/ppmd7/constants.rb +74 -0
  153. data/lib/omnizip/algorithms/ppmd7/context.rb +154 -0
  154. data/lib/omnizip/algorithms/ppmd7/decoder.rb +126 -0
  155. data/lib/omnizip/algorithms/ppmd7/encoder.rb +163 -0
  156. data/lib/omnizip/algorithms/ppmd7/model.rb +248 -0
  157. data/lib/omnizip/algorithms/ppmd7/symbol_state.rb +57 -0
  158. data/lib/omnizip/algorithms/ppmd7.rb +116 -0
  159. data/lib/omnizip/algorithms/ppmd8/constants.rb +61 -0
  160. data/lib/omnizip/algorithms/ppmd8/context.rb +34 -0
  161. data/lib/omnizip/algorithms/ppmd8/decoder.rb +107 -0
  162. data/lib/omnizip/algorithms/ppmd8/encoder.rb +138 -0
  163. data/lib/omnizip/algorithms/ppmd8/model.rb +250 -0
  164. data/lib/omnizip/algorithms/ppmd8/restoration_method.rb +78 -0
  165. data/lib/omnizip/algorithms/ppmd8.rb +82 -0
  166. data/lib/omnizip/algorithms/ppmd_base.rb +138 -0
  167. data/lib/omnizip/algorithms/sevenzip_lzma2.rb +123 -0
  168. data/lib/omnizip/algorithms/xz_lzma2.rb +118 -0
  169. data/lib/omnizip/algorithms/zstandard/constants.rb +25 -0
  170. data/lib/omnizip/algorithms/zstandard/decoder.rb +46 -0
  171. data/lib/omnizip/algorithms/zstandard/encoder.rb +51 -0
  172. data/lib/omnizip/algorithms/zstandard.rb +138 -0
  173. data/lib/omnizip/buffer/memory_archive.rb +251 -0
  174. data/lib/omnizip/buffer/memory_extractor.rb +224 -0
  175. data/lib/omnizip/buffer.rb +176 -0
  176. data/lib/omnizip/checksum_registry.rb +114 -0
  177. data/lib/omnizip/checksums/crc32.rb +100 -0
  178. data/lib/omnizip/checksums/crc64.rb +101 -0
  179. data/lib/omnizip/checksums/crc_base.rb +158 -0
  180. data/lib/omnizip/checksums/verifier.rb +131 -0
  181. data/lib/omnizip/chunked/memory_manager.rb +194 -0
  182. data/lib/omnizip/chunked/reader.rb +78 -0
  183. data/lib/omnizip/chunked/writer.rb +120 -0
  184. data/lib/omnizip/chunked.rb +129 -0
  185. data/lib/omnizip/cli/output_formatter.rb +104 -0
  186. data/lib/omnizip/cli.rb +572 -0
  187. data/lib/omnizip/commands/.keep +0 -0
  188. data/lib/omnizip/commands/archive_create_command.rb +427 -0
  189. data/lib/omnizip/commands/archive_extract_command.rb +272 -0
  190. data/lib/omnizip/commands/archive_list_command.rb +218 -0
  191. data/lib/omnizip/commands/archive_repair_command.rb +131 -0
  192. data/lib/omnizip/commands/archive_verify_command.rb +117 -0
  193. data/lib/omnizip/commands/compress_command.rb +117 -0
  194. data/lib/omnizip/commands/decompress_command.rb +120 -0
  195. data/lib/omnizip/commands/list_command.rb +53 -0
  196. data/lib/omnizip/commands/metadata_command.rb +153 -0
  197. data/lib/omnizip/commands/parity_create_command.rb +122 -0
  198. data/lib/omnizip/commands/parity_repair_command.rb +122 -0
  199. data/lib/omnizip/commands/parity_verify_command.rb +124 -0
  200. data/lib/omnizip/commands/profile_list_command.rb +56 -0
  201. data/lib/omnizip/commands/profile_show_command.rb +44 -0
  202. data/lib/omnizip/convenience.rb +359 -0
  203. data/lib/omnizip/converter/conversion_registry.rb +49 -0
  204. data/lib/omnizip/converter/conversion_strategy.rb +121 -0
  205. data/lib/omnizip/converter/seven_zip_to_zip_strategy.rb +97 -0
  206. data/lib/omnizip/converter/zip_to_seven_zip_strategy.rb +112 -0
  207. data/lib/omnizip/converter.rb +105 -0
  208. data/lib/omnizip/crypto/aes256/cipher.rb +100 -0
  209. data/lib/omnizip/crypto/aes256/constants.rb +28 -0
  210. data/lib/omnizip/crypto/aes256/key_derivation.rb +101 -0
  211. data/lib/omnizip/crypto/aes256.rb +102 -0
  212. data/lib/omnizip/error.rb +106 -0
  213. data/lib/omnizip/eta/exponential_smoothing_estimator.rb +98 -0
  214. data/lib/omnizip/eta/moving_average_estimator.rb +99 -0
  215. data/lib/omnizip/eta/rate_calculator.rb +104 -0
  216. data/lib/omnizip/eta/sample_history.rb +143 -0
  217. data/lib/omnizip/eta/time_estimator.rb +106 -0
  218. data/lib/omnizip/eta.rb +63 -0
  219. data/lib/omnizip/extraction/filter_chain.rb +177 -0
  220. data/lib/omnizip/extraction/glob_pattern.rb +140 -0
  221. data/lib/omnizip/extraction/pattern_matcher.rb +70 -0
  222. data/lib/omnizip/extraction/predicate_pattern.rb +52 -0
  223. data/lib/omnizip/extraction/regex_pattern.rb +50 -0
  224. data/lib/omnizip/extraction/selective_extractor.rb +240 -0
  225. data/lib/omnizip/extraction.rb +111 -0
  226. data/lib/omnizip/file_type/mime_classifier.rb +144 -0
  227. data/lib/omnizip/file_type.rb +113 -0
  228. data/lib/omnizip/filter.rb +139 -0
  229. data/lib/omnizip/filter_pipeline.rb +108 -0
  230. data/lib/omnizip/filter_registry.rb +166 -0
  231. data/lib/omnizip/filters/bcj.rb +279 -0
  232. data/lib/omnizip/filters/bcj2/constants.rb +53 -0
  233. data/lib/omnizip/filters/bcj2/decoder.rb +200 -0
  234. data/lib/omnizip/filters/bcj2/encoder.rb +61 -0
  235. data/lib/omnizip/filters/bcj2/stream_data.rb +93 -0
  236. data/lib/omnizip/filters/bcj2.rb +99 -0
  237. data/lib/omnizip/filters/bcj_arm.rb +176 -0
  238. data/lib/omnizip/filters/bcj_arm64.rb +244 -0
  239. data/lib/omnizip/filters/bcj_ia64.rb +196 -0
  240. data/lib/omnizip/filters/bcj_ppc.rb +190 -0
  241. data/lib/omnizip/filters/bcj_sparc.rb +176 -0
  242. data/lib/omnizip/filters/bcj_x86.rb +193 -0
  243. data/lib/omnizip/filters/delta.rb +196 -0
  244. data/lib/omnizip/filters/filter_base.rb +72 -0
  245. data/lib/omnizip/filters/registry.rb +123 -0
  246. data/lib/omnizip/filters/xz_delta.rb +258 -0
  247. data/lib/omnizip/format_detector.rb +162 -0
  248. data/lib/omnizip/format_registry.rb +59 -0
  249. data/lib/omnizip/formats/.keep +0 -0
  250. data/lib/omnizip/formats/bzip2_file.rb +172 -0
  251. data/lib/omnizip/formats/cpio/constants.rb +55 -0
  252. data/lib/omnizip/formats/cpio/entry.rb +385 -0
  253. data/lib/omnizip/formats/cpio/reader.rb +196 -0
  254. data/lib/omnizip/formats/cpio/writer.rb +234 -0
  255. data/lib/omnizip/formats/cpio.rb +140 -0
  256. data/lib/omnizip/formats/format_spec_loader.rb +230 -0
  257. data/lib/omnizip/formats/gzip.rb +238 -0
  258. data/lib/omnizip/formats/iso/directory_builder.rb +297 -0
  259. data/lib/omnizip/formats/iso/directory_record.rb +152 -0
  260. data/lib/omnizip/formats/iso/joliet.rb +204 -0
  261. data/lib/omnizip/formats/iso/path_table.rb +125 -0
  262. data/lib/omnizip/formats/iso/reader.rb +197 -0
  263. data/lib/omnizip/formats/iso/rock_ridge.rb +349 -0
  264. data/lib/omnizip/formats/iso/volume_builder.rb +320 -0
  265. data/lib/omnizip/formats/iso/volume_descriptor.rb +168 -0
  266. data/lib/omnizip/formats/iso/writer.rb +530 -0
  267. data/lib/omnizip/formats/iso.rb +140 -0
  268. data/lib/omnizip/formats/lzip.rb +175 -0
  269. data/lib/omnizip/formats/lzma_alone.rb +171 -0
  270. data/lib/omnizip/formats/rar/archive_repairer.rb +243 -0
  271. data/lib/omnizip/formats/rar/archive_verifier.rb +195 -0
  272. data/lib/omnizip/formats/rar/block_parser.rb +243 -0
  273. data/lib/omnizip/formats/rar/compression/bit_stream.rb +180 -0
  274. data/lib/omnizip/formats/rar/compression/dispatcher.rb +217 -0
  275. data/lib/omnizip/formats/rar/compression/lz77_huffman/decoder.rb +216 -0
  276. data/lib/omnizip/formats/rar/compression/lz77_huffman/encoder.rb +158 -0
  277. data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_builder.rb +217 -0
  278. data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_coder.rb +189 -0
  279. data/lib/omnizip/formats/rar/compression/lz77_huffman/match_finder.rb +135 -0
  280. data/lib/omnizip/formats/rar/compression/lz77_huffman/sliding_window.rb +165 -0
  281. data/lib/omnizip/formats/rar/compression/ppmd/context.rb +105 -0
  282. data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +219 -0
  283. data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +262 -0
  284. data/lib/omnizip/formats/rar/compression_method_registry.rb +106 -0
  285. data/lib/omnizip/formats/rar/constants.rb +82 -0
  286. data/lib/omnizip/formats/rar/decompressor.rb +238 -0
  287. data/lib/omnizip/formats/rar/external_writer.rb +312 -0
  288. data/lib/omnizip/formats/rar/header.rb +192 -0
  289. data/lib/omnizip/formats/rar/license_validator.rb +109 -0
  290. data/lib/omnizip/formats/rar/models/rar_archive.rb +77 -0
  291. data/lib/omnizip/formats/rar/models/rar_entry.rb +65 -0
  292. data/lib/omnizip/formats/rar/models/rar_volume.rb +56 -0
  293. data/lib/omnizip/formats/rar/parity_handler.rb +292 -0
  294. data/lib/omnizip/formats/rar/rar5/compression/lzma.rb +202 -0
  295. data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +578 -0
  296. data/lib/omnizip/formats/rar/rar5/compression/store.rb +60 -0
  297. data/lib/omnizip/formats/rar/rar5/crc32.rb +39 -0
  298. data/lib/omnizip/formats/rar/rar5/encryption/aes256_cbc.rb +97 -0
  299. data/lib/omnizip/formats/rar/rar5/encryption/encryption_header.rb +114 -0
  300. data/lib/omnizip/formats/rar/rar5/encryption/encryption_manager.rb +166 -0
  301. data/lib/omnizip/formats/rar/rar5/encryption/key_derivation.rb +97 -0
  302. data/lib/omnizip/formats/rar/rar5/header.rb +187 -0
  303. data/lib/omnizip/formats/rar/rar5/models/encryption_options.rb +74 -0
  304. data/lib/omnizip/formats/rar/rar5/models/recovery_options.rb +63 -0
  305. data/lib/omnizip/formats/rar/rar5/models/solid_options.rb +63 -0
  306. data/lib/omnizip/formats/rar/rar5/models/volume_options.rb +74 -0
  307. data/lib/omnizip/formats/rar/rar5/multi_volume/ARCHITECTURE.md +290 -0
  308. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_manager.rb +264 -0
  309. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_splitter.rb +155 -0
  310. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_writer.rb +194 -0
  311. data/lib/omnizip/formats/rar/rar5/solid/solid_encoder.rb +109 -0
  312. data/lib/omnizip/formats/rar/rar5/solid/solid_manager.rb +142 -0
  313. data/lib/omnizip/formats/rar/rar5/solid/solid_stream.rb +121 -0
  314. data/lib/omnizip/formats/rar/rar5/vint.rb +65 -0
  315. data/lib/omnizip/formats/rar/rar5/writer.rb +466 -0
  316. data/lib/omnizip/formats/rar/rar_format_base.rb +241 -0
  317. data/lib/omnizip/formats/rar/reader.rb +366 -0
  318. data/lib/omnizip/formats/rar/recovery_record.rb +245 -0
  319. data/lib/omnizip/formats/rar/volume_manager.rb +168 -0
  320. data/lib/omnizip/formats/rar/writer.rb +431 -0
  321. data/lib/omnizip/formats/rar.rb +205 -0
  322. data/lib/omnizip/formats/rar3/compressor.rb +73 -0
  323. data/lib/omnizip/formats/rar3/decompressor.rb +66 -0
  324. data/lib/omnizip/formats/rar3/reader.rb +386 -0
  325. data/lib/omnizip/formats/rar3/writer.rb +219 -0
  326. data/lib/omnizip/formats/rar5/compressor.rb +73 -0
  327. data/lib/omnizip/formats/rar5/decompressor.rb +66 -0
  328. data/lib/omnizip/formats/rar5/reader.rb +342 -0
  329. data/lib/omnizip/formats/rar5/writer.rb +214 -0
  330. data/lib/omnizip/formats/seven_zip/coder_chain.rb +150 -0
  331. data/lib/omnizip/formats/seven_zip/constants.rb +126 -0
  332. data/lib/omnizip/formats/seven_zip/encoded_header.rb +114 -0
  333. data/lib/omnizip/formats/seven_zip/encrypted_header.rb +142 -0
  334. data/lib/omnizip/formats/seven_zip/file_collector.rb +144 -0
  335. data/lib/omnizip/formats/seven_zip/header.rb +106 -0
  336. data/lib/omnizip/formats/seven_zip/header_encryptor.rb +134 -0
  337. data/lib/omnizip/formats/seven_zip/header_writer.rb +466 -0
  338. data/lib/omnizip/formats/seven_zip/models/coder_info.rb +30 -0
  339. data/lib/omnizip/formats/seven_zip/models/file_entry.rb +58 -0
  340. data/lib/omnizip/formats/seven_zip/models/folder.rb +69 -0
  341. data/lib/omnizip/formats/seven_zip/models/stream_info.rb +42 -0
  342. data/lib/omnizip/formats/seven_zip/parser.rb +660 -0
  343. data/lib/omnizip/formats/seven_zip/reader.rb +458 -0
  344. data/lib/omnizip/formats/seven_zip/split_archive_reader.rb +632 -0
  345. data/lib/omnizip/formats/seven_zip/split_archive_writer.rb +315 -0
  346. data/lib/omnizip/formats/seven_zip/stream_compressor.rb +151 -0
  347. data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +162 -0
  348. data/lib/omnizip/formats/seven_zip/writer.rb +740 -0
  349. data/lib/omnizip/formats/seven_zip.rb +93 -0
  350. data/lib/omnizip/formats/tar/constants.rb +73 -0
  351. data/lib/omnizip/formats/tar/entry.rb +94 -0
  352. data/lib/omnizip/formats/tar/header.rb +168 -0
  353. data/lib/omnizip/formats/tar/reader.rb +121 -0
  354. data/lib/omnizip/formats/tar/writer.rb +216 -0
  355. data/lib/omnizip/formats/tar.rb +84 -0
  356. data/lib/omnizip/formats/xz/reader.rb +116 -0
  357. data/lib/omnizip/formats/xz.rb +237 -0
  358. data/lib/omnizip/formats/xz_impl/block_decoder.rb +754 -0
  359. data/lib/omnizip/formats/xz_impl/block_encoder.rb +306 -0
  360. data/lib/omnizip/formats/xz_impl/block_header.rb +210 -0
  361. data/lib/omnizip/formats/xz_impl/block_header_parser.rb +186 -0
  362. data/lib/omnizip/formats/xz_impl/constants.rb +49 -0
  363. data/lib/omnizip/formats/xz_impl/index_decoder.rb +174 -0
  364. data/lib/omnizip/formats/xz_impl/index_encoder.rb +122 -0
  365. data/lib/omnizip/formats/xz_impl/stream_decoder.rb +468 -0
  366. data/lib/omnizip/formats/xz_impl/stream_encoder.rb +99 -0
  367. data/lib/omnizip/formats/xz_impl/stream_footer.rb +81 -0
  368. data/lib/omnizip/formats/xz_impl/stream_footer_parser.rb +117 -0
  369. data/lib/omnizip/formats/xz_impl/stream_header.rb +55 -0
  370. data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +108 -0
  371. data/lib/omnizip/formats/xz_impl/vli.rb +128 -0
  372. data/lib/omnizip/formats/xz_impl/writer.rb +421 -0
  373. data/lib/omnizip/formats/zip/central_directory_header.rb +195 -0
  374. data/lib/omnizip/formats/zip/constants.rb +69 -0
  375. data/lib/omnizip/formats/zip/end_of_central_directory.rb +133 -0
  376. data/lib/omnizip/formats/zip/local_file_header.rb +138 -0
  377. data/lib/omnizip/formats/zip/reader.rb +250 -0
  378. data/lib/omnizip/formats/zip/unix_extra_field.rb +153 -0
  379. data/lib/omnizip/formats/zip/writer.rb +375 -0
  380. data/lib/omnizip/formats/zip/zip64_end_of_central_directory.rb +104 -0
  381. data/lib/omnizip/formats/zip/zip64_end_of_central_directory_locator.rb +66 -0
  382. data/lib/omnizip/formats/zip/zip64_extra_field.rb +114 -0
  383. data/lib/omnizip/formats/zip.rb +50 -0
  384. data/lib/omnizip/implementations/base/lzma2_decoder_base.rb +75 -0
  385. data/lib/omnizip/implementations/base/lzma2_encoder_base.rb +128 -0
  386. data/lib/omnizip/implementations/base/lzma_decoder_base.rb +83 -0
  387. data/lib/omnizip/implementations/base/lzma_encoder_base.rb +108 -0
  388. data/lib/omnizip/implementations/base/state_machine_base.rb +182 -0
  389. data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +421 -0
  390. data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +465 -0
  391. data/lib/omnizip/implementations/seven_zip/lzma/match_finder.rb +288 -0
  392. data/lib/omnizip/implementations/seven_zip/lzma/range_decoder.rb +200 -0
  393. data/lib/omnizip/implementations/seven_zip/lzma/range_encoder.rb +197 -0
  394. data/lib/omnizip/implementations/seven_zip/lzma/state_machine.rb +141 -0
  395. data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +519 -0
  396. data/lib/omnizip/implementations/xz_utils/lzma2/decoder.rb +723 -0
  397. data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +750 -0
  398. data/lib/omnizip/io/buffered_input.rb +146 -0
  399. data/lib/omnizip/io/buffered_output.rb +105 -0
  400. data/lib/omnizip/io/stream_manager.rb +115 -0
  401. data/lib/omnizip/link_handler/hard_link.rb +79 -0
  402. data/lib/omnizip/link_handler/symbolic_link.rb +74 -0
  403. data/lib/omnizip/link_handler.rb +124 -0
  404. data/lib/omnizip/metadata/archive_metadata.rb +114 -0
  405. data/lib/omnizip/metadata/entry_metadata.rb +146 -0
  406. data/lib/omnizip/metadata/metadata_editor.rb +171 -0
  407. data/lib/omnizip/metadata/metadata_registry.rb +64 -0
  408. data/lib/omnizip/metadata/metadata_validator.rb +99 -0
  409. data/lib/omnizip/metadata.rb +57 -0
  410. data/lib/omnizip/models/.keep +0 -0
  411. data/lib/omnizip/models/algorithm_metadata.rb +73 -0
  412. data/lib/omnizip/models/compression_options.rb +71 -0
  413. data/lib/omnizip/models/conversion_options.rb +87 -0
  414. data/lib/omnizip/models/conversion_result.rb +135 -0
  415. data/lib/omnizip/models/eta_result.rb +46 -0
  416. data/lib/omnizip/models/extraction_rule.rb +115 -0
  417. data/lib/omnizip/models/filter_chain.rb +144 -0
  418. data/lib/omnizip/models/filter_config.rb +183 -0
  419. data/lib/omnizip/models/match_result.rb +124 -0
  420. data/lib/omnizip/models/optimization_suggestion.rb +91 -0
  421. data/lib/omnizip/models/parallel_options.rb +104 -0
  422. data/lib/omnizip/models/performance_result.rb +79 -0
  423. data/lib/omnizip/models/profile_report.rb +82 -0
  424. data/lib/omnizip/models/progress_options.rb +38 -0
  425. data/lib/omnizip/models/split_options.rb +116 -0
  426. data/lib/omnizip/optimization_registry.rb +81 -0
  427. data/lib/omnizip/parallel/job_queue.rb +209 -0
  428. data/lib/omnizip/parallel/job_scheduler.rb +203 -0
  429. data/lib/omnizip/parallel/parallel_compressor.rb +347 -0
  430. data/lib/omnizip/parallel/parallel_extractor.rb +329 -0
  431. data/lib/omnizip/parallel/worker_pool.rb +223 -0
  432. data/lib/omnizip/parallel.rb +149 -0
  433. data/lib/omnizip/parity/chunked_block_processor.rb +196 -0
  434. data/lib/omnizip/parity/galois16.rb +145 -0
  435. data/lib/omnizip/parity/models/creator_packet.rb +73 -0
  436. data/lib/omnizip/parity/models/file_description_packet.rb +133 -0
  437. data/lib/omnizip/parity/models/ifsc_packet.rb +123 -0
  438. data/lib/omnizip/parity/models/main_packet.rb +128 -0
  439. data/lib/omnizip/parity/models/packet.rb +156 -0
  440. data/lib/omnizip/parity/models/packet_registry.rb +109 -0
  441. data/lib/omnizip/parity/models/recovery_slice_packet.rb +78 -0
  442. data/lib/omnizip/parity/par2_creator.rb +531 -0
  443. data/lib/omnizip/parity/par2_repairer.rb +407 -0
  444. data/lib/omnizip/parity/par2_verifier.rb +364 -0
  445. data/lib/omnizip/parity/par2cmdline_algorithm.rb +110 -0
  446. data/lib/omnizip/parity/par2cmdline_coefficients.rb +78 -0
  447. data/lib/omnizip/parity/reed_solomon_decoder.rb +266 -0
  448. data/lib/omnizip/parity/reed_solomon_encoder.rb +111 -0
  449. data/lib/omnizip/parity/reed_solomon_matrix.rb +342 -0
  450. data/lib/omnizip/parity.rb +186 -0
  451. data/lib/omnizip/password/encryption_registry.rb +65 -0
  452. data/lib/omnizip/password/encryption_strategy.rb +96 -0
  453. data/lib/omnizip/password/password_validator.rb +129 -0
  454. data/lib/omnizip/password/winzip_aes_strategy.rb +192 -0
  455. data/lib/omnizip/password/zip_crypto_strategy.rb +141 -0
  456. data/lib/omnizip/password.rb +87 -0
  457. data/lib/omnizip/pipe/stream_compressor.rb +124 -0
  458. data/lib/omnizip/pipe/stream_decompressor.rb +174 -0
  459. data/lib/omnizip/pipe.rb +121 -0
  460. data/lib/omnizip/platform/ntfs_streams.rb +201 -0
  461. data/lib/omnizip/platform.rb +189 -0
  462. data/lib/omnizip/profile/archive_profile.rb +39 -0
  463. data/lib/omnizip/profile/balanced_profile.rb +33 -0
  464. data/lib/omnizip/profile/binary_profile.rb +36 -0
  465. data/lib/omnizip/profile/compression_profile.rb +158 -0
  466. data/lib/omnizip/profile/custom_profile.rb +157 -0
  467. data/lib/omnizip/profile/fast_profile.rb +33 -0
  468. data/lib/omnizip/profile/maximum_profile.rb +33 -0
  469. data/lib/omnizip/profile/profile_detector.rb +110 -0
  470. data/lib/omnizip/profile/profile_registry.rb +161 -0
  471. data/lib/omnizip/profile/text_profile.rb +36 -0
  472. data/lib/omnizip/profile.rb +190 -0
  473. data/lib/omnizip/profiler/memory_profiler.rb +66 -0
  474. data/lib/omnizip/profiler/method_profiler.rb +49 -0
  475. data/lib/omnizip/profiler/report_generator.rb +169 -0
  476. data/lib/omnizip/profiler.rb +204 -0
  477. data/lib/omnizip/progress/callback_reporter.rb +36 -0
  478. data/lib/omnizip/progress/console_reporter.rb +62 -0
  479. data/lib/omnizip/progress/log_reporter.rb +91 -0
  480. data/lib/omnizip/progress/operation_progress.rb +118 -0
  481. data/lib/omnizip/progress/progress_bar.rb +156 -0
  482. data/lib/omnizip/progress/progress_reporter.rb +40 -0
  483. data/lib/omnizip/progress/progress_tracker.rb +190 -0
  484. data/lib/omnizip/progress/silent_reporter.rb +24 -0
  485. data/lib/omnizip/progress.rb +127 -0
  486. data/lib/omnizip/rubyzip_compat.rb +63 -0
  487. data/lib/omnizip/temp/safe_extract.rb +168 -0
  488. data/lib/omnizip/temp/temp_file.rb +124 -0
  489. data/lib/omnizip/temp/temp_file_pool.rb +109 -0
  490. data/lib/omnizip/temp.rb +181 -0
  491. data/lib/omnizip/version.rb +5 -0
  492. data/lib/omnizip/zip/entry.rb +156 -0
  493. data/lib/omnizip/zip/file.rb +485 -0
  494. data/lib/omnizip/zip/input_stream.rb +273 -0
  495. data/lib/omnizip/zip/output_stream.rb +324 -0
  496. data/lib/omnizip.rb +156 -0
  497. data/readme-docs/advanced-features.adoc +515 -0
  498. data/readme-docs/api-usage.adoc +444 -0
  499. data/readme-docs/architecture.adoc +449 -0
  500. data/readme-docs/archive-formats.adoc +479 -0
  501. data/readme-docs/cli-usage.adoc +222 -0
  502. data/readme-docs/compression-algorithms.adoc +442 -0
  503. data/readme-docs/compression-profiles.adoc +247 -0
  504. data/readme-docs/encryption-checksums.adoc +328 -0
  505. data/readme-docs/format-converter.adoc +325 -0
  506. data/readme-docs/installation.adoc +228 -0
  507. data/readme-docs/par2-archives.adoc +608 -0
  508. data/readme-docs/performance-profiler.adoc +389 -0
  509. data/readme-docs/preprocessing-filters.adoc +280 -0
  510. data/xz-file-format-1.2.1.txt +1174 -0
  511. metadata +617 -0
@@ -0,0 +1,217 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Omnizip
4
+ module Formats
5
+ module Rar
6
+ module Compression
7
+ # Algorithm dispatcher for RAR compression
8
+ #
9
+ # Selects appropriate compression algorithm based on RAR method
10
+ # and dispatches to correct encoder/decoder.
11
+ #
12
+ # Responsibilities:
13
+ # - Algorithm selection based on compression method
14
+ # - Dispatch to appropriate decoder/encoder
15
+ # - Error handling for unsupported/unknown methods
16
+ #
17
+ # Note: Does NOT perform actual compression/decompression
18
+ # (delegated to decoder/encoder classes)
19
+ class Dispatcher
20
+ # RAR compression methods
21
+ METHOD_STORE = 0x30 # No compression
22
+ METHOD_FASTEST = 0x31 # LZ77+Huffman (fast)
23
+ METHOD_FAST = 0x32 # LZ77+Huffman
24
+ METHOD_NORMAL = 0x33 # LZ77+Huffman (default)
25
+ METHOD_GOOD = 0x34 # LZ77+Huffman or PPMd
26
+ METHOD_BEST = 0x35 # PPMd
27
+
28
+ # Custom errors
29
+ class UnsupportedMethodError < StandardError; end
30
+ class DecompressionError < StandardError; end
31
+ class CompressionError < StandardError; end
32
+
33
+ class << self
34
+ # Decompress data using appropriate algorithm
35
+ #
36
+ # @param method [Integer] RAR compression method (0x30-0x35)
37
+ # @param input [IO] Input stream
38
+ # @param output [IO] Output stream
39
+ # @param options [Hash] Decoder options
40
+ # @raise [UnsupportedMethodError] if method unknown
41
+ # @raise [DecompressionError] if decompression fails
42
+ def decompress(method, input, output, options = {})
43
+ case method
44
+ when METHOD_STORE
45
+ decompress_store(input, output)
46
+ when METHOD_FASTEST, METHOD_FAST, METHOD_NORMAL
47
+ decompress_lz77_huffman(input, output, options)
48
+ when METHOD_GOOD
49
+ decompress_good(input, output, options)
50
+ when METHOD_BEST
51
+ decompress_ppmd(input, output, options)
52
+ else
53
+ raise UnsupportedMethodError,
54
+ "Unknown compression method: 0x#{method.to_s(16).upcase}"
55
+ end
56
+ rescue StandardError => e
57
+ unless e.is_a?(UnsupportedMethodError)
58
+ raise DecompressionError,
59
+ "Decompression failed: #{e.message}"
60
+ end
61
+
62
+ raise
63
+ end
64
+
65
+ # Compress data using appropriate algorithm
66
+ #
67
+ # @param method [Integer] RAR compression method
68
+ # @param input [IO] Input stream
69
+ # @param output [IO] Output stream
70
+ # @param options [Hash] Encoder options
71
+ # @raise [UnsupportedMethodError] if method unknown
72
+ # @raise [CompressionError] if compression fails
73
+ # @raise [NotImplementedError] for methods not yet implemented
74
+ def compress(method, input, output, options = {})
75
+ case method
76
+ when METHOD_STORE
77
+ compress_store(input, output)
78
+ when METHOD_FASTEST, METHOD_FAST, METHOD_NORMAL
79
+ compress_lz77_huffman(input, output, options)
80
+ when METHOD_GOOD
81
+ compress_good(input, output, options)
82
+ when METHOD_BEST
83
+ compress_ppmd(input, output, options)
84
+ else
85
+ raise UnsupportedMethodError,
86
+ "Unknown compression method: 0x#{method.to_s(16).upcase}"
87
+ end
88
+ rescue StandardError => e
89
+ raise CompressionError, "Compression failed: #{e.message}" unless
90
+ e.is_a?(UnsupportedMethodError) || e.is_a?(NotImplementedError)
91
+
92
+ raise
93
+ end
94
+
95
+ private
96
+
97
+ # Decompress METHOD_STORE (no compression)
98
+ #
99
+ # @param input [IO] Input stream
100
+ # @param output [IO] Output stream
101
+ def decompress_store(input, output)
102
+ # Direct copy, no decompression needed
103
+ ::IO.copy_stream(input, output)
104
+ end
105
+
106
+ # Decompress using LZ77+Huffman decoder
107
+ #
108
+ # @param input [IO] Input stream
109
+ # @param output [IO] Output stream
110
+ # @param options [Hash] Decoder options
111
+ def decompress_lz77_huffman(input, output, options)
112
+ require_relative "lz77_huffman/decoder"
113
+
114
+ decoder = LZ77Huffman::Decoder.new(input, options)
115
+ decoded_data = decoder.decode
116
+ output.write(decoded_data)
117
+ end
118
+
119
+ # Decompress METHOD_GOOD (adaptive)
120
+ #
121
+ # For now, default to LZ77+Huffman
122
+ # In future, could analyze content to choose algorithm
123
+ #
124
+ # @param input [IO] Input stream
125
+ # @param output [IO] Output stream
126
+ # @param options [Hash] Decoder options
127
+ def decompress_good(input, output, options)
128
+ # TODO: Implement content-based algorithm selection
129
+ # For now, use LZ77+Huffman as default
130
+ decompress_lz77_huffman(input, output, options)
131
+ end
132
+
133
+ # Decompress using PPMd decoder
134
+ #
135
+ # @param input [IO] Input stream
136
+ # @param output [IO] Output stream
137
+ # @param options [Hash] Decoder options
138
+ def decompress_ppmd(input, output, options)
139
+ require_relative "ppmd/decoder"
140
+
141
+ decoder = PPMd::Decoder.new(input, options)
142
+ decoded_data = decoder.decode_stream
143
+ output.write(decoded_data)
144
+ end
145
+
146
+ # Compress METHOD_STORE (no compression)
147
+ #
148
+ # @param input [IO] Input stream
149
+ # @param output [IO] Output stream
150
+ def compress_store(input, output)
151
+ # Direct copy, no compression
152
+ ::IO.copy_stream(input, output)
153
+ end
154
+
155
+ # Compress using LZ77+Huffman encoder
156
+ #
157
+ # @param input [IO] Input stream
158
+ # @param output [IO] Output stream
159
+ # @param options [Hash] Encoder options
160
+ def compress_lz77_huffman(input, output, options)
161
+ require_relative "lz77_huffman/encoder"
162
+
163
+ encoder = LZ77Huffman::Encoder.new(output, options)
164
+ encoder.encode(input)
165
+ end
166
+
167
+ # Compress METHOD_GOOD (adaptive)
168
+ #
169
+ # For now, default to LZ77+Huffman
170
+ # In future, could analyze content to choose algorithm
171
+ #
172
+ # @param input [IO] Input stream
173
+ # @param output [IO] Output stream
174
+ # @param options [Hash] Encoder options
175
+ def compress_good(input, output, options)
176
+ # TODO: Implement content-based algorithm selection
177
+ # For now, use LZ77+Huffman as default
178
+ compress_lz77_huffman(input, output, options)
179
+ end
180
+
181
+ # Compress using PPMd encoder
182
+ #
183
+ # @param input [IO] Input stream
184
+ # @param output [IO] Output stream
185
+ # @param options [Hash] Encoder options
186
+ def compress_ppmd(input, output, options)
187
+ require_relative "ppmd/encoder"
188
+
189
+ encoder = PPMd::Encoder.new(output, options)
190
+ encoder.encode_stream(input)
191
+ end
192
+
193
+ # Select decoder class for method (for testing)
194
+ #
195
+ # @param method [Integer] Compression method
196
+ # @return [Class, nil] Decoder class or nil for METHOD_STORE
197
+ def select_decoder(method)
198
+ case method
199
+ when METHOD_STORE
200
+ nil
201
+ when METHOD_FASTEST, METHOD_FAST, METHOD_NORMAL, METHOD_GOOD
202
+ require_relative "lz77_huffman/decoder"
203
+ LZ77Huffman::Decoder
204
+ when METHOD_BEST
205
+ require_relative "ppmd/decoder"
206
+ PPMd::Decoder
207
+ else
208
+ raise UnsupportedMethodError,
209
+ "Unknown compression method: 0x#{method.to_s(16).upcase}"
210
+ end
211
+ end
212
+ end
213
+ end
214
+ end
215
+ end
216
+ end
217
+ end
@@ -0,0 +1,216 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Copyright (C) 2025 Ribose Inc.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a
6
+ # copy of this software and associated documentation files (the "Software"),
7
+ # to deal in the Software without restriction, including without limitation
8
+ # the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
+ # and/or sell copies of the Software, and to permit persons to whom the
10
+ # Software is furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
+ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21
+ # DEALINGS IN THE SOFTWARE.
22
+
23
+ require_relative "../bit_stream"
24
+ require_relative "sliding_window"
25
+ require_relative "huffman_coder"
26
+
27
+ module Omnizip
28
+ module Formats
29
+ module Rar
30
+ module Compression
31
+ module LZ77Huffman
32
+ # RAR LZ77+Huffman decoder
33
+ #
34
+ # Orchestrates the decoding of RAR METHOD_NORMAL compressed data.
35
+ # Combines Huffman coding with LZ77 sliding window compression.
36
+ #
37
+ # Responsibilities:
38
+ # - ONE responsibility: Orchestrate LZ77+Huffman decoding
39
+ # - Parse Huffman trees from bit stream
40
+ # - Decode symbols using Huffman coder
41
+ # - Process LZ77 matches via sliding window
42
+ # - Manage decoder state and output
43
+ #
44
+ # RAR LZ77+Huffman Format:
45
+ # 1. Block header with Huffman tree definitions
46
+ # 2. Compressed data stream
47
+ # 3. Symbols: literals (0-255), matches (length+distance), end marker
48
+ class Decoder
49
+ # Symbol ranges
50
+ LITERAL_SYMBOLS = (0..255)
51
+ END_OF_BLOCK = 256
52
+ MATCH_SYMBOLS = (257..511)
53
+
54
+ # Match parameters
55
+ MIN_MATCH_LENGTH = 3
56
+ MAX_MATCH_LENGTH = 257
57
+
58
+ # Window size for RAR4
59
+ DEFAULT_WINDOW_SIZE = 64 * 1024
60
+
61
+ # Initialize LZ77+Huffman decoder
62
+ #
63
+ # @param input [IO] Compressed input stream
64
+ # @param options [Hash] Decoding options
65
+ # @option options [Integer] :window_size Window size in bytes
66
+ def initialize(input, options = {})
67
+ @bit_stream = BitStream.new(input, :read)
68
+ @window = SlidingWindow.new(options[:window_size] || DEFAULT_WINDOW_SIZE)
69
+ @huffman = HuffmanCoder.new
70
+ @output = String.new(encoding: Encoding::BINARY)
71
+ end
72
+
73
+ # Decode compressed data
74
+ #
75
+ # Main decoding loop:
76
+ # 1. Parse Huffman tree (simplified for MVP)
77
+ # 2. Decode symbols until end-of-block
78
+ # 3. Process literals and matches
79
+ #
80
+ # @param max_output [Integer, nil] Maximum output bytes
81
+ # @return [String] Decoded data
82
+ def decode(max_output = nil)
83
+ @output.clear
84
+
85
+ # Parse Huffman tree (simplified - real RAR has complex structure)
86
+ parse_huffman_trees
87
+
88
+ # Decode symbols until end-of-block or max output
89
+ loop do
90
+ break if max_output && @output.bytesize >= max_output
91
+
92
+ symbol = @huffman.decode_symbol(@bit_stream)
93
+ break if symbol.nil? || symbol == END_OF_BLOCK
94
+
95
+ process_symbol(symbol)
96
+ end
97
+
98
+ @output
99
+ rescue EOFError
100
+ @output
101
+ end
102
+
103
+ # Get window size
104
+ #
105
+ # @return [Integer] Window size in bytes
106
+ def window_size
107
+ @window.size
108
+ end
109
+
110
+ private
111
+
112
+ # Parse Huffman trees from bit stream
113
+ #
114
+ # RAR uses multiple Huffman tables for different symbol types.
115
+ # This is a simplified implementation for MVP.
116
+ #
117
+ # Simplified format (written by Encoder):
118
+ # 1. 16-bit number of symbols (always 512 for MVP)
119
+ # 2. Code lengths (4 bits each, 512 × 4 bits = 2048 bits = 256 bytes)
120
+ #
121
+ # Real RAR format:
122
+ # - MC table: Main code (literals + length codes)
123
+ # - LD table: Low distance bits
124
+ # - RC table: Repeat codes
125
+ # - LDD table: Low distance for distance codes
126
+ #
127
+ # @return [void]
128
+ def parse_huffman_trees
129
+ # Read number of symbols from encoder (16-bit header)
130
+ num_symbols = @bit_stream.read_bits(16)
131
+
132
+ # Parse tree structure
133
+ @huffman.parse_tree(@bit_stream, num_symbols)
134
+ end
135
+
136
+ # Process a decoded symbol
137
+ #
138
+ # Symbol types:
139
+ # - 0-255: Literal byte
140
+ # - 256: End of block
141
+ # - 257-511: Match (length+distance)
142
+ #
143
+ # @param symbol [Integer] Decoded symbol
144
+ # @return [void]
145
+ def process_symbol(symbol)
146
+ if LITERAL_SYMBOLS.cover?(symbol)
147
+ process_literal(symbol)
148
+ elsif MATCH_SYMBOLS.cover?(symbol)
149
+ process_match(symbol)
150
+ end
151
+ end
152
+
153
+ # Process literal byte
154
+ #
155
+ # @param byte [Integer] Literal byte value (0-255)
156
+ # @return [void]
157
+ def process_literal(byte)
158
+ @output << byte.chr
159
+ @window.add_byte(byte)
160
+ end
161
+
162
+ # Process LZ77 match
163
+ #
164
+ # Match symbol encodes both length and distance information.
165
+ # Additional bits may be read for exact values.
166
+ #
167
+ # @param symbol [Integer] Match symbol (257-511)
168
+ # @return [void]
169
+ def process_match(symbol)
170
+ length = decode_match_length(symbol)
171
+ distance = decode_match_distance
172
+
173
+ # Copy match from window
174
+ match_bytes = @window.copy_match(distance, length)
175
+ match_bytes.each { |byte| @output << byte.chr }
176
+ end
177
+
178
+ # Decode match length from symbol
179
+ #
180
+ # RAR encodes length in the symbol itself plus extra bits.
181
+ # This is simplified for MVP.
182
+ #
183
+ # @param symbol [Integer] Match symbol
184
+ # @return [Integer] Match length
185
+ def decode_match_length(symbol)
186
+ # Simplified length decoding
187
+ # Real RAR uses complex length encoding with extra bits
188
+
189
+ base_length = symbol - 257 + MIN_MATCH_LENGTH
190
+
191
+ # Could read extra bits here for longer lengths
192
+ # For now, use base length
193
+ [base_length, MAX_MATCH_LENGTH].min
194
+ end
195
+
196
+ # Decode match distance
197
+ #
198
+ # Distance is encoded separately, often with additional
199
+ # Huffman tables and extra bits.
200
+ #
201
+ # @return [Integer] Match distance
202
+ def decode_match_distance
203
+ # Simplified distance decoding
204
+ # Real RAR uses separate Huffman table for distance
205
+
206
+ # Read distance as direct bits (simplified)
207
+ # Real implementation would use distance Huffman table
208
+ distance_bits = 16 # Changed from 8 to 16 bits for 64KB window
209
+ @bit_stream.read_bits(distance_bits)
210
+ end
211
+ end
212
+ end
213
+ end
214
+ end
215
+ end
216
+ end
@@ -0,0 +1,158 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../bit_stream"
4
+ require_relative "match_finder"
5
+ require_relative "huffman_builder"
6
+
7
+ module Omnizip
8
+ module Formats
9
+ module Rar
10
+ module Compression
11
+ module LZ77Huffman
12
+ # RAR LZ77+Huffman encoder
13
+ #
14
+ # Implements compression using LZ77 string matching combined with
15
+ # Huffman coding for symbol encoding.
16
+ #
17
+ # ## Simplified Huffman Tree Format (MVP)
18
+ #
19
+ # This implementation uses a simplified tree format for portability
20
+ # and ease of implementation. The format differs from official RAR
21
+ # but maintains full compatibility between encoder and decoder.
22
+ #
23
+ # ### Format Structure:
24
+ # ```
25
+ # [16-bit num_symbols] [code_lengths...]
26
+ # 2 bytes 512 × 4 bits = 256 bytes
27
+ # ```
28
+ #
29
+ # ### Details:
30
+ # - **Header**: 16-bit number of symbols (always 512 for MVP)
31
+ # - 0-255: Literal bytes
32
+ # - 256: End-of-block marker
33
+ # - 257-511: LZ77 match symbols
34
+ #
35
+ # - **Code Lengths**: 4 bits per symbol × 512 symbols = 2048 bits
36
+ # - Each symbol gets a 4-bit code length (0-15)
37
+ # - Length 0 means symbol not used
38
+ # - Lengths build canonical Huffman tree
39
+ #
40
+ # ### Trade-offs:
41
+ # - **Fixed Overhead**: 258 bytes (2 + 256) per block
42
+ # - **Simplicity**: Easy to implement and debug
43
+ # - **Portability**: Pure Ruby, no external dependencies
44
+ # - **Compatibility**: Encoder/decoder use identical format
45
+ #
46
+ # ### Real RAR Format Differences:
47
+ # Real RAR uses a more complex format with:
48
+ # - RLE compression of code lengths
49
+ # - Multiple Huffman tables (MC, LD, RC, LDD)
50
+ # - Adaptive tree updates
51
+ # - More efficient length encoding
52
+ #
53
+ # The simplified format is sufficient for MVP and can be upgraded
54
+ # to full RAR format in future versions without breaking the API.
55
+ #
56
+ # @see Decoder for decoding implementation
57
+ # @see HuffmanCoder for tree building
58
+ # @see HuffmanBuilder for code generation
59
+ class Encoder
60
+ LITERAL_SYMBOLS = (0..255)
61
+ END_OF_BLOCK = 256
62
+ MATCH_SYMBOLS = (257..511)
63
+ MIN_MATCH_LENGTH = 3
64
+ MAX_MATCH_LENGTH = 257
65
+
66
+ attr_reader :compressed_size
67
+
68
+ def initialize(output, _options = {})
69
+ @output = output
70
+ @bit_stream = BitStream.new(output, :write)
71
+ @match_finder = MatchFinder.new
72
+ @huffman_builder = HuffmanBuilder.new
73
+ @compressed_size = 0
74
+ end
75
+
76
+ def encode(input)
77
+ data = input.is_a?(String) ? input : input.read
78
+ return 0 if data.empty?
79
+
80
+ start_pos = @output.pos
81
+ items = collect_items(data)
82
+ codes = @huffman_builder.generate_codes
83
+ write_huffman_tree(codes)
84
+
85
+ items.each do |item|
86
+ if item[:type] == :literal
87
+ encode_literal(item[:value], codes)
88
+ else
89
+ encode_match(item[:offset], item[:length], codes)
90
+ end
91
+ end
92
+
93
+ encode_symbol(END_OF_BLOCK, codes)
94
+ @bit_stream.flush
95
+ @compressed_size = @output.pos - start_pos
96
+ end
97
+
98
+ private
99
+
100
+ def collect_items(data)
101
+ items = []
102
+ position = 0
103
+
104
+ while position < data.size
105
+ match = @match_finder.find_match(data.bytes, position)
106
+
107
+ if match && match.length >= MIN_MATCH_LENGTH
108
+ items << { type: :match, offset: match.offset,
109
+ length: match.length }
110
+ match_symbol = encode_match_symbol(match.length)
111
+ @huffman_builder.add_symbol(match_symbol)
112
+ position += match.length
113
+ else
114
+ byte = data.bytes[position]
115
+ items << { type: :literal, value: byte }
116
+ @huffman_builder.add_symbol(byte)
117
+ position += 1
118
+ end
119
+ end
120
+
121
+ @huffman_builder.add_symbol(END_OF_BLOCK)
122
+ items
123
+ end
124
+
125
+ def write_huffman_tree(codes)
126
+ lengths = Array.new(512, 0)
127
+ codes.each { |symbol, (_code, length)| lengths[symbol] = length }
128
+ @bit_stream.write_bits(512, 16)
129
+ lengths.each { |length| @bit_stream.write_bits(length, 4) }
130
+ end
131
+
132
+ def encode_literal(byte, codes)
133
+ encode_symbol(byte, codes)
134
+ end
135
+
136
+ def encode_match(offset, length, codes)
137
+ match_symbol = encode_match_symbol(length)
138
+ encode_symbol(match_symbol, codes)
139
+ @bit_stream.write_bits(offset, 16) # Changed from 8 to 16 bits for 64KB window
140
+ end
141
+
142
+ def encode_match_symbol(length)
143
+ base_symbol = length - MIN_MATCH_LENGTH + 257
144
+ [base_symbol, 511].min
145
+ end
146
+
147
+ def encode_symbol(symbol, codes)
148
+ code, length = codes[symbol]
149
+ return unless code && length
150
+
151
+ @bit_stream.write_bits(code, length)
152
+ end
153
+ end
154
+ end
155
+ end
156
+ end
157
+ end
158
+ end