omnizip 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (511) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.rubocop.yml +32 -0
  4. data/.rubocop_todo.yml +754 -0
  5. data/COPYING +502 -0
  6. data/Gemfile +17 -0
  7. data/LICENSE +12 -0
  8. data/README.adoc +1045 -0
  9. data/Rakefile +12 -0
  10. data/benchmark/README.md +260 -0
  11. data/benchmark/benchmark_suite.rb +125 -0
  12. data/benchmark/compression_bench.rb +181 -0
  13. data/benchmark/filter_bench.rb +180 -0
  14. data/benchmark/models/benchmark_result.rb +59 -0
  15. data/benchmark/models/comparison_result.rb +69 -0
  16. data/benchmark/profile_suite.rb +167 -0
  17. data/benchmark/reporter.rb +150 -0
  18. data/benchmark/run_benchmarks.rb +66 -0
  19. data/benchmark/test_data.rb +137 -0
  20. data/config/formats/rar3_spec.yml +91 -0
  21. data/config/formats/rar5_spec.yml +102 -0
  22. data/docs/.github/workflows/docs.yml +142 -0
  23. data/docs/.gitignore +21 -0
  24. data/docs/.lychee.toml +67 -0
  25. data/docs/Gemfile +13 -0
  26. data/docs/RAR_WRITE_SUPPORT.md +26 -0
  27. data/docs/README.md +101 -0
  28. data/docs/_config.yml +112 -0
  29. data/docs/assets/logo.svg +1 -0
  30. data/docs/assets/omnizip-logo.pdf +1540 -11
  31. data/docs/comparison/feature-matrix.adoc +694 -0
  32. data/docs/comparison/index.adoc +113 -0
  33. data/docs/comparison/vs-7zip.adoc +309 -0
  34. data/docs/comparison/vs-peazip.adoc +77 -0
  35. data/docs/comparison/vs-rubyzip.adoc +342 -0
  36. data/docs/comparison/vs-winrar.adoc +100 -0
  37. data/docs/compatibility.adoc +579 -0
  38. data/docs/concepts/index.adoc +129 -0
  39. data/docs/developer/architecture.adoc +256 -0
  40. data/docs/developer/contributing.adoc +158 -0
  41. data/docs/developer/index.adoc +25 -0
  42. data/docs/developer/testing.adoc +212 -0
  43. data/docs/getting-started/basic-usage.adoc +271 -0
  44. data/docs/getting-started/index.adoc +42 -0
  45. data/docs/getting-started/installation.adoc +138 -0
  46. data/docs/getting-started/quick-start.adoc +185 -0
  47. data/docs/getting-started/your-first-archive.adoc +218 -0
  48. data/docs/guides/advanced-features/encryption.adoc +300 -0
  49. data/docs/guides/advanced-features/index.adoc +49 -0
  50. data/docs/guides/advanced-features/parallel-processing.adoc +246 -0
  51. data/docs/guides/advanced-features/progress-tracking.adoc +320 -0
  52. data/docs/guides/advanced-features/streaming.adoc +212 -0
  53. data/docs/guides/archive-formats/gzip-format.adoc +107 -0
  54. data/docs/guides/archive-formats/index.adoc +130 -0
  55. data/docs/guides/archive-formats/rar-format.adoc +104 -0
  56. data/docs/guides/archive-formats/rar5.adoc +521 -0
  57. data/docs/guides/archive-formats/seven-zip-format.adoc +35 -0
  58. data/docs/guides/archive-formats/tar-format.adoc +106 -0
  59. data/docs/guides/archive-formats/xz-format.adoc +118 -0
  60. data/docs/guides/archive-formats/zip-format.adoc +35 -0
  61. data/docs/guides/compression-algorithms/bzip2.adoc +113 -0
  62. data/docs/guides/compression-algorithms/deflate.adoc +319 -0
  63. data/docs/guides/compression-algorithms/index.adoc +190 -0
  64. data/docs/guides/compression-algorithms/lzma.adoc +398 -0
  65. data/docs/guides/compression-algorithms/lzma2.adoc +327 -0
  66. data/docs/guides/compression-algorithms/ppmd.adoc +316 -0
  67. data/docs/guides/compression-algorithms/zstandard.adoc +361 -0
  68. data/docs/guides/creating-archives.adoc +354 -0
  69. data/docs/guides/extracting-archives.adoc +53 -0
  70. data/docs/guides/format-conversion.adoc +64 -0
  71. data/docs/guides/index.adoc +49 -0
  72. data/docs/guides/migration-rubyzip.adoc +217 -0
  73. data/docs/guides/parity-archives.adoc +605 -0
  74. data/docs/guides/performance-tuning.adoc +88 -0
  75. data/docs/index.adoc +218 -0
  76. data/docs/lychee.toml +67 -0
  77. data/docs/reference/api/overview.adoc +188 -0
  78. data/docs/reference/cli/compress-command.adoc +114 -0
  79. data/docs/reference/cli/overview.adoc +140 -0
  80. data/docs/reference/index.adoc +26 -0
  81. data/docs/resources/faq.adoc +185 -0
  82. data/docs/resources/quick-reference.adoc +222 -0
  83. data/docs/troubleshooting/index.adoc +208 -0
  84. data/examples/api_comparison.rb +205 -0
  85. data/examples/deflate64_example.rb +96 -0
  86. data/examples/par2_demo.rb +121 -0
  87. data/examples/quick_start_native.rb +150 -0
  88. data/examples/quick_start_rubyzip.rb +115 -0
  89. data/examples/rubyzip_compatibility_demo.rb +194 -0
  90. data/exe/omnizip +27 -0
  91. data/lib/omnizip/algorithm.rb +130 -0
  92. data/lib/omnizip/algorithm_registry.rb +86 -0
  93. data/lib/omnizip/algorithms/.keep +0 -0
  94. data/lib/omnizip/algorithms/bzip2/bwt.rb +225 -0
  95. data/lib/omnizip/algorithms/bzip2/decoder.rb +193 -0
  96. data/lib/omnizip/algorithms/bzip2/encoder.rb +237 -0
  97. data/lib/omnizip/algorithms/bzip2/huffman.rb +206 -0
  98. data/lib/omnizip/algorithms/bzip2/mtf.rb +101 -0
  99. data/lib/omnizip/algorithms/bzip2/rle.rb +151 -0
  100. data/lib/omnizip/algorithms/bzip2.rb +130 -0
  101. data/lib/omnizip/algorithms/deflate/constants.rb +28 -0
  102. data/lib/omnizip/algorithms/deflate/decoder.rb +38 -0
  103. data/lib/omnizip/algorithms/deflate/encoder.rb +46 -0
  104. data/lib/omnizip/algorithms/deflate.rb +128 -0
  105. data/lib/omnizip/algorithms/deflate64/constants.rb +45 -0
  106. data/lib/omnizip/algorithms/deflate64/decoder.rb +153 -0
  107. data/lib/omnizip/algorithms/deflate64/encoder.rb +98 -0
  108. data/lib/omnizip/algorithms/deflate64/huffman_coder.rb +354 -0
  109. data/lib/omnizip/algorithms/deflate64/lz77_encoder.rb +142 -0
  110. data/lib/omnizip/algorithms/deflate64.rb +109 -0
  111. data/lib/omnizip/algorithms/lzma/bit_model.rb +120 -0
  112. data/lib/omnizip/algorithms/lzma/constants.rb +112 -0
  113. data/lib/omnizip/algorithms/lzma/decoder.rb +148 -0
  114. data/lib/omnizip/algorithms/lzma/dictionary.rb +69 -0
  115. data/lib/omnizip/algorithms/lzma/distance_coder.rb +415 -0
  116. data/lib/omnizip/algorithms/lzma/encoder.rb +142 -0
  117. data/lib/omnizip/algorithms/lzma/length_coder.rb +260 -0
  118. data/lib/omnizip/algorithms/lzma/literal_decoder.rb +320 -0
  119. data/lib/omnizip/algorithms/lzma/literal_encoder.rb +210 -0
  120. data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +341 -0
  121. data/lib/omnizip/algorithms/lzma/lzma_alone_decoder.rb +192 -0
  122. data/lib/omnizip/algorithms/lzma/lzma_state.rb +128 -0
  123. data/lib/omnizip/algorithms/lzma/match.rb +32 -0
  124. data/lib/omnizip/algorithms/lzma/match_finder.rb +205 -0
  125. data/lib/omnizip/algorithms/lzma/match_finder_config.rb +142 -0
  126. data/lib/omnizip/algorithms/lzma/match_finder_factory.rb +88 -0
  127. data/lib/omnizip/algorithms/lzma/optimal_encoder.rb +130 -0
  128. data/lib/omnizip/algorithms/lzma/probability_models.rb +72 -0
  129. data/lib/omnizip/algorithms/lzma/range_coder.rb +85 -0
  130. data/lib/omnizip/algorithms/lzma/range_decoder.rb +434 -0
  131. data/lib/omnizip/algorithms/lzma/range_encoder.rb +194 -0
  132. data/lib/omnizip/algorithms/lzma/state.rb +127 -0
  133. data/lib/omnizip/algorithms/lzma/xz_buffered_range_encoder.rb +325 -0
  134. data/lib/omnizip/algorithms/lzma/xz_encoder.rb +426 -0
  135. data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +645 -0
  136. data/lib/omnizip/algorithms/lzma/xz_match_finder_adapter.rb +227 -0
  137. data/lib/omnizip/algorithms/lzma/xz_price_calculator.rb +169 -0
  138. data/lib/omnizip/algorithms/lzma/xz_probability_models.rb +261 -0
  139. data/lib/omnizip/algorithms/lzma/xz_range_encoder.rb +223 -0
  140. data/lib/omnizip/algorithms/lzma/xz_range_encoder_exact.rb +331 -0
  141. data/lib/omnizip/algorithms/lzma/xz_state.rb +116 -0
  142. data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +2055 -0
  143. data/lib/omnizip/algorithms/lzma.rb +238 -0
  144. data/lib/omnizip/algorithms/lzma2/chunk_manager.rb +182 -0
  145. data/lib/omnizip/algorithms/lzma2/constants.rb +41 -0
  146. data/lib/omnizip/algorithms/lzma2/encoder.rb +147 -0
  147. data/lib/omnizip/algorithms/lzma2/lzma2_chunk.rb +161 -0
  148. data/lib/omnizip/algorithms/lzma2/properties.rb +179 -0
  149. data/lib/omnizip/algorithms/lzma2/simple_lzma2_encoder.rb +127 -0
  150. data/lib/omnizip/algorithms/lzma2/xz_encoder_adapter.rb +85 -0
  151. data/lib/omnizip/algorithms/lzma2.rb +141 -0
  152. data/lib/omnizip/algorithms/ppmd7/constants.rb +74 -0
  153. data/lib/omnizip/algorithms/ppmd7/context.rb +154 -0
  154. data/lib/omnizip/algorithms/ppmd7/decoder.rb +126 -0
  155. data/lib/omnizip/algorithms/ppmd7/encoder.rb +163 -0
  156. data/lib/omnizip/algorithms/ppmd7/model.rb +248 -0
  157. data/lib/omnizip/algorithms/ppmd7/symbol_state.rb +57 -0
  158. data/lib/omnizip/algorithms/ppmd7.rb +116 -0
  159. data/lib/omnizip/algorithms/ppmd8/constants.rb +61 -0
  160. data/lib/omnizip/algorithms/ppmd8/context.rb +34 -0
  161. data/lib/omnizip/algorithms/ppmd8/decoder.rb +107 -0
  162. data/lib/omnizip/algorithms/ppmd8/encoder.rb +138 -0
  163. data/lib/omnizip/algorithms/ppmd8/model.rb +250 -0
  164. data/lib/omnizip/algorithms/ppmd8/restoration_method.rb +78 -0
  165. data/lib/omnizip/algorithms/ppmd8.rb +82 -0
  166. data/lib/omnizip/algorithms/ppmd_base.rb +138 -0
  167. data/lib/omnizip/algorithms/sevenzip_lzma2.rb +123 -0
  168. data/lib/omnizip/algorithms/xz_lzma2.rb +118 -0
  169. data/lib/omnizip/algorithms/zstandard/constants.rb +25 -0
  170. data/lib/omnizip/algorithms/zstandard/decoder.rb +46 -0
  171. data/lib/omnizip/algorithms/zstandard/encoder.rb +51 -0
  172. data/lib/omnizip/algorithms/zstandard.rb +138 -0
  173. data/lib/omnizip/buffer/memory_archive.rb +251 -0
  174. data/lib/omnizip/buffer/memory_extractor.rb +224 -0
  175. data/lib/omnizip/buffer.rb +176 -0
  176. data/lib/omnizip/checksum_registry.rb +114 -0
  177. data/lib/omnizip/checksums/crc32.rb +100 -0
  178. data/lib/omnizip/checksums/crc64.rb +101 -0
  179. data/lib/omnizip/checksums/crc_base.rb +158 -0
  180. data/lib/omnizip/checksums/verifier.rb +131 -0
  181. data/lib/omnizip/chunked/memory_manager.rb +194 -0
  182. data/lib/omnizip/chunked/reader.rb +78 -0
  183. data/lib/omnizip/chunked/writer.rb +120 -0
  184. data/lib/omnizip/chunked.rb +129 -0
  185. data/lib/omnizip/cli/output_formatter.rb +104 -0
  186. data/lib/omnizip/cli.rb +572 -0
  187. data/lib/omnizip/commands/.keep +0 -0
  188. data/lib/omnizip/commands/archive_create_command.rb +427 -0
  189. data/lib/omnizip/commands/archive_extract_command.rb +272 -0
  190. data/lib/omnizip/commands/archive_list_command.rb +218 -0
  191. data/lib/omnizip/commands/archive_repair_command.rb +131 -0
  192. data/lib/omnizip/commands/archive_verify_command.rb +117 -0
  193. data/lib/omnizip/commands/compress_command.rb +117 -0
  194. data/lib/omnizip/commands/decompress_command.rb +120 -0
  195. data/lib/omnizip/commands/list_command.rb +53 -0
  196. data/lib/omnizip/commands/metadata_command.rb +153 -0
  197. data/lib/omnizip/commands/parity_create_command.rb +122 -0
  198. data/lib/omnizip/commands/parity_repair_command.rb +122 -0
  199. data/lib/omnizip/commands/parity_verify_command.rb +124 -0
  200. data/lib/omnizip/commands/profile_list_command.rb +56 -0
  201. data/lib/omnizip/commands/profile_show_command.rb +44 -0
  202. data/lib/omnizip/convenience.rb +359 -0
  203. data/lib/omnizip/converter/conversion_registry.rb +49 -0
  204. data/lib/omnizip/converter/conversion_strategy.rb +121 -0
  205. data/lib/omnizip/converter/seven_zip_to_zip_strategy.rb +97 -0
  206. data/lib/omnizip/converter/zip_to_seven_zip_strategy.rb +112 -0
  207. data/lib/omnizip/converter.rb +105 -0
  208. data/lib/omnizip/crypto/aes256/cipher.rb +100 -0
  209. data/lib/omnizip/crypto/aes256/constants.rb +28 -0
  210. data/lib/omnizip/crypto/aes256/key_derivation.rb +101 -0
  211. data/lib/omnizip/crypto/aes256.rb +102 -0
  212. data/lib/omnizip/error.rb +106 -0
  213. data/lib/omnizip/eta/exponential_smoothing_estimator.rb +98 -0
  214. data/lib/omnizip/eta/moving_average_estimator.rb +99 -0
  215. data/lib/omnizip/eta/rate_calculator.rb +104 -0
  216. data/lib/omnizip/eta/sample_history.rb +143 -0
  217. data/lib/omnizip/eta/time_estimator.rb +106 -0
  218. data/lib/omnizip/eta.rb +63 -0
  219. data/lib/omnizip/extraction/filter_chain.rb +177 -0
  220. data/lib/omnizip/extraction/glob_pattern.rb +140 -0
  221. data/lib/omnizip/extraction/pattern_matcher.rb +70 -0
  222. data/lib/omnizip/extraction/predicate_pattern.rb +52 -0
  223. data/lib/omnizip/extraction/regex_pattern.rb +50 -0
  224. data/lib/omnizip/extraction/selective_extractor.rb +240 -0
  225. data/lib/omnizip/extraction.rb +111 -0
  226. data/lib/omnizip/file_type/mime_classifier.rb +144 -0
  227. data/lib/omnizip/file_type.rb +113 -0
  228. data/lib/omnizip/filter.rb +139 -0
  229. data/lib/omnizip/filter_pipeline.rb +108 -0
  230. data/lib/omnizip/filter_registry.rb +166 -0
  231. data/lib/omnizip/filters/bcj.rb +279 -0
  232. data/lib/omnizip/filters/bcj2/constants.rb +53 -0
  233. data/lib/omnizip/filters/bcj2/decoder.rb +200 -0
  234. data/lib/omnizip/filters/bcj2/encoder.rb +61 -0
  235. data/lib/omnizip/filters/bcj2/stream_data.rb +93 -0
  236. data/lib/omnizip/filters/bcj2.rb +99 -0
  237. data/lib/omnizip/filters/bcj_arm.rb +176 -0
  238. data/lib/omnizip/filters/bcj_arm64.rb +244 -0
  239. data/lib/omnizip/filters/bcj_ia64.rb +196 -0
  240. data/lib/omnizip/filters/bcj_ppc.rb +190 -0
  241. data/lib/omnizip/filters/bcj_sparc.rb +176 -0
  242. data/lib/omnizip/filters/bcj_x86.rb +193 -0
  243. data/lib/omnizip/filters/delta.rb +196 -0
  244. data/lib/omnizip/filters/filter_base.rb +72 -0
  245. data/lib/omnizip/filters/registry.rb +123 -0
  246. data/lib/omnizip/filters/xz_delta.rb +258 -0
  247. data/lib/omnizip/format_detector.rb +162 -0
  248. data/lib/omnizip/format_registry.rb +59 -0
  249. data/lib/omnizip/formats/.keep +0 -0
  250. data/lib/omnizip/formats/bzip2_file.rb +172 -0
  251. data/lib/omnizip/formats/cpio/constants.rb +55 -0
  252. data/lib/omnizip/formats/cpio/entry.rb +385 -0
  253. data/lib/omnizip/formats/cpio/reader.rb +196 -0
  254. data/lib/omnizip/formats/cpio/writer.rb +234 -0
  255. data/lib/omnizip/formats/cpio.rb +140 -0
  256. data/lib/omnizip/formats/format_spec_loader.rb +230 -0
  257. data/lib/omnizip/formats/gzip.rb +238 -0
  258. data/lib/omnizip/formats/iso/directory_builder.rb +297 -0
  259. data/lib/omnizip/formats/iso/directory_record.rb +152 -0
  260. data/lib/omnizip/formats/iso/joliet.rb +204 -0
  261. data/lib/omnizip/formats/iso/path_table.rb +125 -0
  262. data/lib/omnizip/formats/iso/reader.rb +197 -0
  263. data/lib/omnizip/formats/iso/rock_ridge.rb +349 -0
  264. data/lib/omnizip/formats/iso/volume_builder.rb +320 -0
  265. data/lib/omnizip/formats/iso/volume_descriptor.rb +168 -0
  266. data/lib/omnizip/formats/iso/writer.rb +530 -0
  267. data/lib/omnizip/formats/iso.rb +140 -0
  268. data/lib/omnizip/formats/lzip.rb +175 -0
  269. data/lib/omnizip/formats/lzma_alone.rb +171 -0
  270. data/lib/omnizip/formats/rar/archive_repairer.rb +243 -0
  271. data/lib/omnizip/formats/rar/archive_verifier.rb +195 -0
  272. data/lib/omnizip/formats/rar/block_parser.rb +243 -0
  273. data/lib/omnizip/formats/rar/compression/bit_stream.rb +180 -0
  274. data/lib/omnizip/formats/rar/compression/dispatcher.rb +217 -0
  275. data/lib/omnizip/formats/rar/compression/lz77_huffman/decoder.rb +216 -0
  276. data/lib/omnizip/formats/rar/compression/lz77_huffman/encoder.rb +158 -0
  277. data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_builder.rb +217 -0
  278. data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_coder.rb +189 -0
  279. data/lib/omnizip/formats/rar/compression/lz77_huffman/match_finder.rb +135 -0
  280. data/lib/omnizip/formats/rar/compression/lz77_huffman/sliding_window.rb +165 -0
  281. data/lib/omnizip/formats/rar/compression/ppmd/context.rb +105 -0
  282. data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +219 -0
  283. data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +262 -0
  284. data/lib/omnizip/formats/rar/compression_method_registry.rb +106 -0
  285. data/lib/omnizip/formats/rar/constants.rb +82 -0
  286. data/lib/omnizip/formats/rar/decompressor.rb +238 -0
  287. data/lib/omnizip/formats/rar/external_writer.rb +312 -0
  288. data/lib/omnizip/formats/rar/header.rb +192 -0
  289. data/lib/omnizip/formats/rar/license_validator.rb +109 -0
  290. data/lib/omnizip/formats/rar/models/rar_archive.rb +77 -0
  291. data/lib/omnizip/formats/rar/models/rar_entry.rb +65 -0
  292. data/lib/omnizip/formats/rar/models/rar_volume.rb +56 -0
  293. data/lib/omnizip/formats/rar/parity_handler.rb +292 -0
  294. data/lib/omnizip/formats/rar/rar5/compression/lzma.rb +202 -0
  295. data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +578 -0
  296. data/lib/omnizip/formats/rar/rar5/compression/store.rb +60 -0
  297. data/lib/omnizip/formats/rar/rar5/crc32.rb +39 -0
  298. data/lib/omnizip/formats/rar/rar5/encryption/aes256_cbc.rb +97 -0
  299. data/lib/omnizip/formats/rar/rar5/encryption/encryption_header.rb +114 -0
  300. data/lib/omnizip/formats/rar/rar5/encryption/encryption_manager.rb +166 -0
  301. data/lib/omnizip/formats/rar/rar5/encryption/key_derivation.rb +97 -0
  302. data/lib/omnizip/formats/rar/rar5/header.rb +187 -0
  303. data/lib/omnizip/formats/rar/rar5/models/encryption_options.rb +74 -0
  304. data/lib/omnizip/formats/rar/rar5/models/recovery_options.rb +63 -0
  305. data/lib/omnizip/formats/rar/rar5/models/solid_options.rb +63 -0
  306. data/lib/omnizip/formats/rar/rar5/models/volume_options.rb +74 -0
  307. data/lib/omnizip/formats/rar/rar5/multi_volume/ARCHITECTURE.md +290 -0
  308. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_manager.rb +264 -0
  309. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_splitter.rb +155 -0
  310. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_writer.rb +194 -0
  311. data/lib/omnizip/formats/rar/rar5/solid/solid_encoder.rb +109 -0
  312. data/lib/omnizip/formats/rar/rar5/solid/solid_manager.rb +142 -0
  313. data/lib/omnizip/formats/rar/rar5/solid/solid_stream.rb +121 -0
  314. data/lib/omnizip/formats/rar/rar5/vint.rb +65 -0
  315. data/lib/omnizip/formats/rar/rar5/writer.rb +466 -0
  316. data/lib/omnizip/formats/rar/rar_format_base.rb +241 -0
  317. data/lib/omnizip/formats/rar/reader.rb +366 -0
  318. data/lib/omnizip/formats/rar/recovery_record.rb +245 -0
  319. data/lib/omnizip/formats/rar/volume_manager.rb +168 -0
  320. data/lib/omnizip/formats/rar/writer.rb +431 -0
  321. data/lib/omnizip/formats/rar.rb +205 -0
  322. data/lib/omnizip/formats/rar3/compressor.rb +73 -0
  323. data/lib/omnizip/formats/rar3/decompressor.rb +66 -0
  324. data/lib/omnizip/formats/rar3/reader.rb +386 -0
  325. data/lib/omnizip/formats/rar3/writer.rb +219 -0
  326. data/lib/omnizip/formats/rar5/compressor.rb +73 -0
  327. data/lib/omnizip/formats/rar5/decompressor.rb +66 -0
  328. data/lib/omnizip/formats/rar5/reader.rb +342 -0
  329. data/lib/omnizip/formats/rar5/writer.rb +214 -0
  330. data/lib/omnizip/formats/seven_zip/coder_chain.rb +150 -0
  331. data/lib/omnizip/formats/seven_zip/constants.rb +126 -0
  332. data/lib/omnizip/formats/seven_zip/encoded_header.rb +114 -0
  333. data/lib/omnizip/formats/seven_zip/encrypted_header.rb +142 -0
  334. data/lib/omnizip/formats/seven_zip/file_collector.rb +144 -0
  335. data/lib/omnizip/formats/seven_zip/header.rb +106 -0
  336. data/lib/omnizip/formats/seven_zip/header_encryptor.rb +134 -0
  337. data/lib/omnizip/formats/seven_zip/header_writer.rb +466 -0
  338. data/lib/omnizip/formats/seven_zip/models/coder_info.rb +30 -0
  339. data/lib/omnizip/formats/seven_zip/models/file_entry.rb +58 -0
  340. data/lib/omnizip/formats/seven_zip/models/folder.rb +69 -0
  341. data/lib/omnizip/formats/seven_zip/models/stream_info.rb +42 -0
  342. data/lib/omnizip/formats/seven_zip/parser.rb +660 -0
  343. data/lib/omnizip/formats/seven_zip/reader.rb +458 -0
  344. data/lib/omnizip/formats/seven_zip/split_archive_reader.rb +632 -0
  345. data/lib/omnizip/formats/seven_zip/split_archive_writer.rb +315 -0
  346. data/lib/omnizip/formats/seven_zip/stream_compressor.rb +151 -0
  347. data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +162 -0
  348. data/lib/omnizip/formats/seven_zip/writer.rb +740 -0
  349. data/lib/omnizip/formats/seven_zip.rb +93 -0
  350. data/lib/omnizip/formats/tar/constants.rb +73 -0
  351. data/lib/omnizip/formats/tar/entry.rb +94 -0
  352. data/lib/omnizip/formats/tar/header.rb +168 -0
  353. data/lib/omnizip/formats/tar/reader.rb +121 -0
  354. data/lib/omnizip/formats/tar/writer.rb +216 -0
  355. data/lib/omnizip/formats/tar.rb +84 -0
  356. data/lib/omnizip/formats/xz/reader.rb +116 -0
  357. data/lib/omnizip/formats/xz.rb +237 -0
  358. data/lib/omnizip/formats/xz_impl/block_decoder.rb +754 -0
  359. data/lib/omnizip/formats/xz_impl/block_encoder.rb +306 -0
  360. data/lib/omnizip/formats/xz_impl/block_header.rb +210 -0
  361. data/lib/omnizip/formats/xz_impl/block_header_parser.rb +186 -0
  362. data/lib/omnizip/formats/xz_impl/constants.rb +49 -0
  363. data/lib/omnizip/formats/xz_impl/index_decoder.rb +174 -0
  364. data/lib/omnizip/formats/xz_impl/index_encoder.rb +122 -0
  365. data/lib/omnizip/formats/xz_impl/stream_decoder.rb +468 -0
  366. data/lib/omnizip/formats/xz_impl/stream_encoder.rb +99 -0
  367. data/lib/omnizip/formats/xz_impl/stream_footer.rb +81 -0
  368. data/lib/omnizip/formats/xz_impl/stream_footer_parser.rb +117 -0
  369. data/lib/omnizip/formats/xz_impl/stream_header.rb +55 -0
  370. data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +108 -0
  371. data/lib/omnizip/formats/xz_impl/vli.rb +128 -0
  372. data/lib/omnizip/formats/xz_impl/writer.rb +421 -0
  373. data/lib/omnizip/formats/zip/central_directory_header.rb +195 -0
  374. data/lib/omnizip/formats/zip/constants.rb +69 -0
  375. data/lib/omnizip/formats/zip/end_of_central_directory.rb +133 -0
  376. data/lib/omnizip/formats/zip/local_file_header.rb +138 -0
  377. data/lib/omnizip/formats/zip/reader.rb +250 -0
  378. data/lib/omnizip/formats/zip/unix_extra_field.rb +153 -0
  379. data/lib/omnizip/formats/zip/writer.rb +375 -0
  380. data/lib/omnizip/formats/zip/zip64_end_of_central_directory.rb +104 -0
  381. data/lib/omnizip/formats/zip/zip64_end_of_central_directory_locator.rb +66 -0
  382. data/lib/omnizip/formats/zip/zip64_extra_field.rb +114 -0
  383. data/lib/omnizip/formats/zip.rb +50 -0
  384. data/lib/omnizip/implementations/base/lzma2_decoder_base.rb +75 -0
  385. data/lib/omnizip/implementations/base/lzma2_encoder_base.rb +128 -0
  386. data/lib/omnizip/implementations/base/lzma_decoder_base.rb +83 -0
  387. data/lib/omnizip/implementations/base/lzma_encoder_base.rb +108 -0
  388. data/lib/omnizip/implementations/base/state_machine_base.rb +182 -0
  389. data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +421 -0
  390. data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +465 -0
  391. data/lib/omnizip/implementations/seven_zip/lzma/match_finder.rb +288 -0
  392. data/lib/omnizip/implementations/seven_zip/lzma/range_decoder.rb +200 -0
  393. data/lib/omnizip/implementations/seven_zip/lzma/range_encoder.rb +197 -0
  394. data/lib/omnizip/implementations/seven_zip/lzma/state_machine.rb +141 -0
  395. data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +519 -0
  396. data/lib/omnizip/implementations/xz_utils/lzma2/decoder.rb +723 -0
  397. data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +750 -0
  398. data/lib/omnizip/io/buffered_input.rb +146 -0
  399. data/lib/omnizip/io/buffered_output.rb +105 -0
  400. data/lib/omnizip/io/stream_manager.rb +115 -0
  401. data/lib/omnizip/link_handler/hard_link.rb +79 -0
  402. data/lib/omnizip/link_handler/symbolic_link.rb +74 -0
  403. data/lib/omnizip/link_handler.rb +124 -0
  404. data/lib/omnizip/metadata/archive_metadata.rb +114 -0
  405. data/lib/omnizip/metadata/entry_metadata.rb +146 -0
  406. data/lib/omnizip/metadata/metadata_editor.rb +171 -0
  407. data/lib/omnizip/metadata/metadata_registry.rb +64 -0
  408. data/lib/omnizip/metadata/metadata_validator.rb +99 -0
  409. data/lib/omnizip/metadata.rb +57 -0
  410. data/lib/omnizip/models/.keep +0 -0
  411. data/lib/omnizip/models/algorithm_metadata.rb +73 -0
  412. data/lib/omnizip/models/compression_options.rb +71 -0
  413. data/lib/omnizip/models/conversion_options.rb +87 -0
  414. data/lib/omnizip/models/conversion_result.rb +135 -0
  415. data/lib/omnizip/models/eta_result.rb +46 -0
  416. data/lib/omnizip/models/extraction_rule.rb +115 -0
  417. data/lib/omnizip/models/filter_chain.rb +144 -0
  418. data/lib/omnizip/models/filter_config.rb +183 -0
  419. data/lib/omnizip/models/match_result.rb +124 -0
  420. data/lib/omnizip/models/optimization_suggestion.rb +91 -0
  421. data/lib/omnizip/models/parallel_options.rb +104 -0
  422. data/lib/omnizip/models/performance_result.rb +79 -0
  423. data/lib/omnizip/models/profile_report.rb +82 -0
  424. data/lib/omnizip/models/progress_options.rb +38 -0
  425. data/lib/omnizip/models/split_options.rb +116 -0
  426. data/lib/omnizip/optimization_registry.rb +81 -0
  427. data/lib/omnizip/parallel/job_queue.rb +209 -0
  428. data/lib/omnizip/parallel/job_scheduler.rb +203 -0
  429. data/lib/omnizip/parallel/parallel_compressor.rb +347 -0
  430. data/lib/omnizip/parallel/parallel_extractor.rb +329 -0
  431. data/lib/omnizip/parallel/worker_pool.rb +223 -0
  432. data/lib/omnizip/parallel.rb +149 -0
  433. data/lib/omnizip/parity/chunked_block_processor.rb +196 -0
  434. data/lib/omnizip/parity/galois16.rb +145 -0
  435. data/lib/omnizip/parity/models/creator_packet.rb +73 -0
  436. data/lib/omnizip/parity/models/file_description_packet.rb +133 -0
  437. data/lib/omnizip/parity/models/ifsc_packet.rb +123 -0
  438. data/lib/omnizip/parity/models/main_packet.rb +128 -0
  439. data/lib/omnizip/parity/models/packet.rb +156 -0
  440. data/lib/omnizip/parity/models/packet_registry.rb +109 -0
  441. data/lib/omnizip/parity/models/recovery_slice_packet.rb +78 -0
  442. data/lib/omnizip/parity/par2_creator.rb +531 -0
  443. data/lib/omnizip/parity/par2_repairer.rb +407 -0
  444. data/lib/omnizip/parity/par2_verifier.rb +364 -0
  445. data/lib/omnizip/parity/par2cmdline_algorithm.rb +110 -0
  446. data/lib/omnizip/parity/par2cmdline_coefficients.rb +78 -0
  447. data/lib/omnizip/parity/reed_solomon_decoder.rb +266 -0
  448. data/lib/omnizip/parity/reed_solomon_encoder.rb +111 -0
  449. data/lib/omnizip/parity/reed_solomon_matrix.rb +342 -0
  450. data/lib/omnizip/parity.rb +186 -0
  451. data/lib/omnizip/password/encryption_registry.rb +65 -0
  452. data/lib/omnizip/password/encryption_strategy.rb +96 -0
  453. data/lib/omnizip/password/password_validator.rb +129 -0
  454. data/lib/omnizip/password/winzip_aes_strategy.rb +192 -0
  455. data/lib/omnizip/password/zip_crypto_strategy.rb +141 -0
  456. data/lib/omnizip/password.rb +87 -0
  457. data/lib/omnizip/pipe/stream_compressor.rb +124 -0
  458. data/lib/omnizip/pipe/stream_decompressor.rb +174 -0
  459. data/lib/omnizip/pipe.rb +121 -0
  460. data/lib/omnizip/platform/ntfs_streams.rb +201 -0
  461. data/lib/omnizip/platform.rb +189 -0
  462. data/lib/omnizip/profile/archive_profile.rb +39 -0
  463. data/lib/omnizip/profile/balanced_profile.rb +33 -0
  464. data/lib/omnizip/profile/binary_profile.rb +36 -0
  465. data/lib/omnizip/profile/compression_profile.rb +158 -0
  466. data/lib/omnizip/profile/custom_profile.rb +157 -0
  467. data/lib/omnizip/profile/fast_profile.rb +33 -0
  468. data/lib/omnizip/profile/maximum_profile.rb +33 -0
  469. data/lib/omnizip/profile/profile_detector.rb +110 -0
  470. data/lib/omnizip/profile/profile_registry.rb +161 -0
  471. data/lib/omnizip/profile/text_profile.rb +36 -0
  472. data/lib/omnizip/profile.rb +190 -0
  473. data/lib/omnizip/profiler/memory_profiler.rb +66 -0
  474. data/lib/omnizip/profiler/method_profiler.rb +49 -0
  475. data/lib/omnizip/profiler/report_generator.rb +169 -0
  476. data/lib/omnizip/profiler.rb +204 -0
  477. data/lib/omnizip/progress/callback_reporter.rb +36 -0
  478. data/lib/omnizip/progress/console_reporter.rb +62 -0
  479. data/lib/omnizip/progress/log_reporter.rb +91 -0
  480. data/lib/omnizip/progress/operation_progress.rb +118 -0
  481. data/lib/omnizip/progress/progress_bar.rb +156 -0
  482. data/lib/omnizip/progress/progress_reporter.rb +40 -0
  483. data/lib/omnizip/progress/progress_tracker.rb +190 -0
  484. data/lib/omnizip/progress/silent_reporter.rb +24 -0
  485. data/lib/omnizip/progress.rb +127 -0
  486. data/lib/omnizip/rubyzip_compat.rb +63 -0
  487. data/lib/omnizip/temp/safe_extract.rb +168 -0
  488. data/lib/omnizip/temp/temp_file.rb +124 -0
  489. data/lib/omnizip/temp/temp_file_pool.rb +109 -0
  490. data/lib/omnizip/temp.rb +181 -0
  491. data/lib/omnizip/version.rb +5 -0
  492. data/lib/omnizip/zip/entry.rb +156 -0
  493. data/lib/omnizip/zip/file.rb +485 -0
  494. data/lib/omnizip/zip/input_stream.rb +273 -0
  495. data/lib/omnizip/zip/output_stream.rb +324 -0
  496. data/lib/omnizip.rb +156 -0
  497. data/readme-docs/advanced-features.adoc +515 -0
  498. data/readme-docs/api-usage.adoc +444 -0
  499. data/readme-docs/architecture.adoc +449 -0
  500. data/readme-docs/archive-formats.adoc +479 -0
  501. data/readme-docs/cli-usage.adoc +222 -0
  502. data/readme-docs/compression-algorithms.adoc +442 -0
  503. data/readme-docs/compression-profiles.adoc +247 -0
  504. data/readme-docs/encryption-checksums.adoc +328 -0
  505. data/readme-docs/format-converter.adoc +325 -0
  506. data/readme-docs/installation.adoc +228 -0
  507. data/readme-docs/par2-archives.adoc +608 -0
  508. data/readme-docs/performance-profiler.adoc +389 -0
  509. data/readme-docs/preprocessing-filters.adoc +280 -0
  510. data/xz-file-format-1.2.1.txt +1174 -0
  511. metadata +617 -0
@@ -0,0 +1,217 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Omnizip
4
+ module Formats
5
+ module Rar
6
+ module Compression
7
+ module LZ77Huffman
8
+ # Huffman tree builder for dynamic compression
9
+ #
10
+ # Builds canonical Huffman trees from symbol frequencies.
11
+ # Uses priority queue (heap) algorithm to construct optimal trees.
12
+ #
13
+ # Responsibilities:
14
+ # - ONE responsibility: Build Huffman trees and generate codes
15
+ # - Collect symbol frequencies
16
+ # - Build optimal Huffman tree
17
+ # - Generate canonical Huffman codes
18
+ # - Calculate code lengths
19
+ class HuffmanBuilder
20
+ MAX_CODE_LENGTH = 15
21
+
22
+ # Tree node for Huffman tree construction
23
+ class Node
24
+ attr_accessor :symbol, :frequency, :left, :right
25
+
26
+ def initialize(symbol, frequency)
27
+ @symbol = symbol
28
+ @frequency = frequency
29
+ @left = nil
30
+ @right = nil
31
+ end
32
+
33
+ def leaf?
34
+ @left.nil? && @right.nil?
35
+ end
36
+ end
37
+
38
+ attr_reader :frequencies
39
+
40
+ def initialize
41
+ @frequencies = Hash.new(0)
42
+ end
43
+
44
+ # Add symbol occurrence(s)
45
+ #
46
+ # @param symbol [Integer] Symbol value
47
+ # @param count [Integer] Number of occurrences
48
+ # @return [void]
49
+ def add_symbol(symbol, count = 1)
50
+ @frequencies[symbol] += count
51
+ end
52
+
53
+ # Build Huffman tree from frequencies
54
+ #
55
+ # Uses priority queue algorithm to build optimal tree.
56
+ # Returns root node of the tree.
57
+ #
58
+ # @return [Node, nil] Root node or nil if empty
59
+ def build_tree
60
+ return nil if @frequencies.empty?
61
+
62
+ if @frequencies.size == 1
63
+ return Node.new(@frequencies.keys.first,
64
+ @frequencies.values.first)
65
+ end
66
+
67
+ # Create leaf nodes
68
+ heap = @frequencies.map { |symbol, freq| Node.new(symbol, freq) }
69
+ heap.sort_by!(&:frequency)
70
+
71
+ # Build tree bottom-up
72
+ while heap.size > 1
73
+ left = heap.shift
74
+ right = heap.shift
75
+
76
+ parent = Node.new(nil, left.frequency + right.frequency)
77
+ parent.left = left
78
+ parent.right = right
79
+
80
+ # Insert maintaining heap property
81
+ insert_into_heap(heap, parent)
82
+ end
83
+
84
+ heap.first
85
+ end
86
+
87
+ # Generate canonical Huffman codes
88
+ #
89
+ # Returns hash mapping symbols to [code, length] pairs.
90
+ # Codes are canonical (same-length codes are sequential).
91
+ #
92
+ # @return [Hash<Integer, Array(Integer, Integer)>] symbol => [code, length]
93
+ def generate_codes
94
+ root = build_tree
95
+ return {} if root.nil?
96
+
97
+ # Handle single symbol case
98
+ if root.leaf?
99
+ return { root.symbol => [0, 1] }
100
+ end
101
+
102
+ # Calculate code lengths for each symbol
103
+ code_lengths = {}
104
+ calculate_code_lengths(root, 0, code_lengths)
105
+
106
+ # Generate canonical codes from lengths
107
+ generate_canonical_codes(code_lengths)
108
+ end
109
+
110
+ # Get code lengths only (for header transmission)
111
+ #
112
+ # @return [Hash<Integer, Integer>] symbol => length
113
+ def code_lengths
114
+ root = build_tree
115
+ return {} if root.nil?
116
+
117
+ if root.leaf?
118
+ return { root.symbol => 1 }
119
+ end
120
+
121
+ lengths = {}
122
+ calculate_code_lengths(root, 0, lengths)
123
+ lengths
124
+ end
125
+
126
+ # Reset builder
127
+ #
128
+ # @return [void]
129
+ def reset
130
+ @frequencies.clear
131
+ end
132
+
133
+ # Check if empty
134
+ #
135
+ # @return [Boolean]
136
+ def empty?
137
+ @frequencies.empty?
138
+ end
139
+
140
+ # Get number of symbols
141
+ #
142
+ # @return [Integer]
143
+ def symbol_count
144
+ @frequencies.size
145
+ end
146
+
147
+ private
148
+
149
+ # Insert node into heap maintaining sort order
150
+ #
151
+ # @param heap [Array<Node>] Heap array
152
+ # @param node [Node] Node to insert
153
+ # @return [void]
154
+ def insert_into_heap(heap, node)
155
+ index = heap.bsearch_index do |n|
156
+ n.frequency >= node.frequency
157
+ end || heap.size
158
+ heap.insert(index, node)
159
+ end
160
+
161
+ # Calculate code lengths via tree traversal
162
+ #
163
+ # @param node [Node] Current node
164
+ # @param depth [Integer] Current depth
165
+ # @param lengths [Hash] Output hash
166
+ # @return [void]
167
+ def calculate_code_lengths(node, depth, lengths)
168
+ return if node.nil?
169
+
170
+ if node.leaf?
171
+ lengths[node.symbol] = [depth, MAX_CODE_LENGTH].min
172
+ else
173
+ calculate_code_lengths(node.left, depth + 1, lengths)
174
+ calculate_code_lengths(node.right, depth + 1, lengths)
175
+ end
176
+ end
177
+
178
+ # Generate canonical codes from code lengths
179
+ #
180
+ # Canonical codes have the property that codes of the same
181
+ # length are sequential integers.
182
+ #
183
+ # @param code_lengths [Hash<Integer, Integer>] symbol => length
184
+ # @return [Hash<Integer, Array(Integer, Integer)>] symbol => [code, length]
185
+ def generate_canonical_codes(code_lengths)
186
+ return {} if code_lengths.empty?
187
+
188
+ # Count symbols at each length
189
+ length_counts = Array.new(MAX_CODE_LENGTH + 1, 0)
190
+ code_lengths.each_value { |len| length_counts[len] += 1 }
191
+
192
+ # Calculate first code for each length
193
+ first_codes = Array.new(MAX_CODE_LENGTH + 1, 0)
194
+ code = 0
195
+ (1..MAX_CODE_LENGTH).each do |len|
196
+ first_codes[len] = code
197
+ code = (code + length_counts[len]) << 1
198
+ end
199
+
200
+ # Assign codes to symbols
201
+ codes = {}
202
+ code_lengths.sort_by do |sym, len|
203
+ [len, sym]
204
+ end.each do |symbol, length|
205
+ code = first_codes[length]
206
+ first_codes[length] += 1
207
+ codes[symbol] = [code, length]
208
+ end
209
+
210
+ codes
211
+ end
212
+ end
213
+ end
214
+ end
215
+ end
216
+ end
217
+ end
@@ -0,0 +1,189 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Copyright (C) 2025 Ribose Inc.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a
6
+ # copy of this software and associated documentation files (the "Software"),
7
+ # to deal in the Software without restriction, including without limitation
8
+ # the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
+ # and/or sell copies of the Software, and to permit persons to whom the
10
+ # Software is furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
+ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21
+ # DEALINGS IN THE SOFTWARE.
22
+
23
+ module Omnizip
24
+ module Formats
25
+ module Rar
26
+ module Compression
27
+ module LZ77Huffman
28
+ # Huffman coding for RAR LZ77+Huffman compression
29
+ #
30
+ # Implements canonical Huffman tree decoding for RAR archives.
31
+ # RAR uses multiple Huffman tables:
32
+ # - MC (Main Code): Literals and length codes
33
+ # - LD (Length-Distance): Distance codes
34
+ # - RC (Repeat Count): Run-length encoding
35
+ # - LDD (Low Distance): Low distance values
36
+ #
37
+ # Responsibilities:
38
+ # - ONE responsibility: Huffman tree operations
39
+ # - Build canonical Huffman trees from code lengths
40
+ # - Decode symbols using Huffman trees
41
+ # - Parse tree structure from bit stream
42
+ #
43
+ # Canonical Huffman Code Properties:
44
+ # - Codes of same length are sequential
45
+ # - Shorter codes have lower values
46
+ # - Deterministic tree construction from lengths
47
+ class HuffmanCoder
48
+ # Maximum code length for RAR
49
+ MAX_CODE_LENGTH = 15
50
+
51
+ # Initialize Huffman coder
52
+ def initialize
53
+ @decode_table = {}
54
+ @code_lengths = []
55
+ end
56
+
57
+ # Build Huffman tree from code lengths
58
+ #
59
+ # Constructs a canonical Huffman tree given the code lengths
60
+ # for each symbol. This is how RAR transmits Huffman tables.
61
+ #
62
+ # @param code_lengths [Array<Integer>] Code length for each symbol
63
+ # @return [void]
64
+ def build_tree(code_lengths)
65
+ @code_lengths = code_lengths
66
+ @decode_table = {}
67
+
68
+ # Count codes of each length
69
+ length_counts = Array.new(MAX_CODE_LENGTH + 1, 0)
70
+ code_lengths.each do |len|
71
+ length_counts[len] += 1 if len.positive?
72
+ end
73
+
74
+ # Calculate first code for each length
75
+ first_codes = Array.new(MAX_CODE_LENGTH + 1, 0)
76
+ code = 0
77
+ (1..MAX_CODE_LENGTH).each do |len|
78
+ first_codes[len] = code
79
+ code = (code + length_counts[len]) << 1
80
+ end
81
+
82
+ # Assign codes to symbols
83
+ code_lengths.each_with_index do |len, symbol|
84
+ next if len.zero?
85
+
86
+ code = first_codes[len]
87
+ first_codes[len] += 1
88
+
89
+ # Store in decode table: [code, length] => symbol
90
+ key = (code << 8) | len
91
+ @decode_table[key] = symbol
92
+ end
93
+ end
94
+
95
+ # Decode a single symbol from bit stream
96
+ #
97
+ # Reads bits one at a time until a valid Huffman code is found,
98
+ # then returns the corresponding symbol.
99
+ #
100
+ # @param bit_stream [BitStream] Input bit stream
101
+ # @return [Integer, nil] Decoded symbol or nil if end
102
+ def decode_symbol(bit_stream)
103
+ code = 0
104
+ length = 0
105
+
106
+ # Read bits until we find a valid code
107
+ (1..MAX_CODE_LENGTH).each do |len|
108
+ bit = bit_stream.read_bit
109
+ code = (code << 1) | bit
110
+ length = len
111
+
112
+ # Check if this code exists in decode table
113
+ key = (code << 8) | length
114
+ return @decode_table[key] if @decode_table.key?(key)
115
+ end
116
+
117
+ # No valid code found
118
+ nil
119
+ end
120
+
121
+ # Parse Huffman tree from RAR bit stream
122
+ #
123
+ # RAR encodes Huffman trees in a compact format:
124
+ # 1. Number of code lengths
125
+ # 2. Code lengths (potentially compressed)
126
+ # 3. Tree structure
127
+ #
128
+ # This is a simplified implementation for MVP.
129
+ #
130
+ # @param bit_stream [BitStream] Input bit stream
131
+ # @param num_symbols [Integer] Number of symbols in alphabet
132
+ # @return [void]
133
+ def parse_tree(bit_stream, num_symbols)
134
+ code_lengths = Array.new(num_symbols, 0)
135
+
136
+ # Read code lengths (simplified - real RAR uses RLE)
137
+ num_symbols.times do |i|
138
+ # Read length as 4-bit value
139
+ len = bit_stream.read_bits(4)
140
+ code_lengths[i] = len
141
+ end
142
+
143
+ build_tree(code_lengths)
144
+ end
145
+
146
+ # Check if tree is empty
147
+ #
148
+ # @return [Boolean] True if no codes defined
149
+ def empty?
150
+ @decode_table.empty?
151
+ end
152
+
153
+ # Get number of symbols in tree
154
+ #
155
+ # @return [Integer] Number of symbols
156
+ def symbol_count
157
+ @decode_table.size
158
+ end
159
+
160
+ # Reset the coder
161
+ #
162
+ # @return [void]
163
+ def reset
164
+ @decode_table = {}
165
+ @code_lengths = []
166
+ end
167
+
168
+ # Encode a symbol (for future encoder implementation)
169
+ #
170
+ # @param symbol [Integer] Symbol to encode
171
+ # @return [Array<Integer, Integer>] [code, length]
172
+ def encode_symbol(symbol)
173
+ # Find code for symbol
174
+ @decode_table.each do |key, sym|
175
+ next unless sym == symbol
176
+
177
+ code = key >> 8
178
+ length = key & 0xFF
179
+ return [code, length]
180
+ end
181
+
182
+ nil
183
+ end
184
+ end
185
+ end
186
+ end
187
+ end
188
+ end
189
+ end
@@ -0,0 +1,135 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Omnizip
4
+ module Formats
5
+ module Rar
6
+ module Compression
7
+ module LZ77Huffman
8
+ # LZ77 Match Finder for RAR compression
9
+ class MatchFinder
10
+ MAX_MATCH_LENGTH = 257
11
+ MIN_MATCH_LENGTH = 3
12
+ WINDOW_SIZE = 32768
13
+ MAX_CHAIN_LENGTH = 1024
14
+
15
+ class Match
16
+ attr_reader :offset, :length
17
+
18
+ def initialize(offset, length)
19
+ @offset = offset
20
+ @length = length
21
+ end
22
+
23
+ def ==(other)
24
+ offset == other.offset && length == other.length
25
+ end
26
+ end
27
+
28
+ attr_reader :window_size, :max_match_length
29
+
30
+ def initialize(window_size = WINDOW_SIZE,
31
+ max_match_length = MAX_MATCH_LENGTH)
32
+ @window_size = window_size
33
+ @max_match_length = [max_match_length, MAX_MATCH_LENGTH].min
34
+ @hash_table = {}
35
+ end
36
+
37
+ def find_match(data, position)
38
+ return nil if position >= data.size
39
+ return nil if data.size - position < MIN_MATCH_LENGTH
40
+
41
+ # Index all positions up to current if not done yet
42
+ ensure_indexed(data, position)
43
+
44
+ hash_val = hash_bytes(data, position)
45
+ candidates = @hash_table[hash_val] || []
46
+ best_match = find_best_among_candidates(data, position,
47
+ candidates)
48
+ update_hash(hash_val, position)
49
+ best_match
50
+ end
51
+
52
+ def update(data, position)
53
+ return if position >= data.size
54
+
55
+ hash_val = hash_bytes(data, position)
56
+ update_hash(hash_val, position)
57
+ end
58
+
59
+ def reset
60
+ @hash_table.clear
61
+ @last_indexed = -1
62
+ end
63
+
64
+ def hash_chain_count
65
+ @hash_table.size
66
+ end
67
+
68
+ private
69
+
70
+ def ensure_indexed(data, position)
71
+ @last_indexed ||= -1
72
+ start_pos = [@last_indexed + 1, 0].max
73
+ (start_pos...position).each do |pos|
74
+ next if pos + MIN_MATCH_LENGTH > data.size
75
+
76
+ hash_val = hash_bytes(data, pos)
77
+ @hash_table[hash_val] ||= []
78
+ @hash_table[hash_val] << pos
79
+ end
80
+ @last_indexed = position - 1
81
+ end
82
+
83
+ def hash_bytes(data, position)
84
+ return 0 if position + 2 >= data.size
85
+
86
+ bytes = data.is_a?(String) ? data.bytes : data
87
+ (bytes[position] << 16) ^ (bytes[position + 1] << 8) ^ bytes[position + 2]
88
+ end
89
+
90
+ def find_best_among_candidates(data, position, candidates)
91
+ best_length = MIN_MATCH_LENGTH - 1
92
+ best_offset = 0
93
+ checked = 0
94
+
95
+ candidates.reverse_each do |candidate_pos|
96
+ offset = position - candidate_pos
97
+ break if offset > @window_size
98
+
99
+ checked += 1
100
+ break if checked > MAX_CHAIN_LENGTH
101
+
102
+ length = match_length(data, position, candidate_pos)
103
+ if length > best_length
104
+ best_length = length
105
+ best_offset = offset
106
+ break if best_length >= @max_match_length
107
+ end
108
+ end
109
+
110
+ return nil if best_length < MIN_MATCH_LENGTH
111
+
112
+ Match.new(best_offset, best_length)
113
+ end
114
+
115
+ def match_length(data, pos1, pos2)
116
+ bytes = data.is_a?(String) ? data.bytes : data
117
+ max_len = [data.size - pos1, @max_match_length].min
118
+ length = 0
119
+ while length < max_len && bytes[pos1 + length] == bytes[pos2 + length]
120
+ length += 1
121
+ end
122
+ length
123
+ end
124
+
125
+ def update_hash(hash_val, position)
126
+ @hash_table[hash_val] ||= []
127
+ @hash_table[hash_val] << position
128
+ @hash_table[hash_val].shift if @hash_table[hash_val].size > MAX_CHAIN_LENGTH
129
+ end
130
+ end
131
+ end
132
+ end
133
+ end
134
+ end
135
+ end
@@ -0,0 +1,165 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Copyright (C) 2025 Ribose Inc.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a
6
+ # copy of this software and associated documentation files (the "Software"),
7
+ # to deal in the Software without restriction, including without limitation
8
+ # the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
+ # and/or sell copies of the Software, and to permit persons to whom the
10
+ # Software is furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
+ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21
+ # DEALINGS IN THE SOFTWARE.
22
+
23
+ module Omnizip
24
+ module Formats
25
+ module Rar
26
+ module Compression
27
+ module LZ77Huffman
28
+ # Sliding window buffer for LZ77 compression
29
+ #
30
+ # Provides a circular buffer that stores previously decoded bytes
31
+ # for LZ77 match copying. The window allows looking back at
32
+ # previously decoded data to resolve distance-length match pairs.
33
+ #
34
+ # Responsibilities:
35
+ # - ONE responsibility: Window buffer management
36
+ # - Store decoded bytes in circular buffer
37
+ # - Copy matches from window offset
38
+ # - Handle window wrap-around
39
+ # - Efficient lookback for match resolution
40
+ #
41
+ # RAR LZ77 Window Sizes:
42
+ # - RAR3: 32KB (32 * 1024 bytes)
43
+ # - RAR4: 64KB (64 * 1024 bytes)
44
+ # - RAR5: Up to 1GB (dynamic)
45
+ class SlidingWindow
46
+ # Default window size (64KB for RAR4)
47
+ DEFAULT_SIZE = 64 * 1024
48
+
49
+ # Initialize a new sliding window
50
+ #
51
+ # @param size [Integer] Window size in bytes
52
+ def initialize(size = DEFAULT_SIZE)
53
+ unless size.positive?
54
+ raise ArgumentError,
55
+ "Window size must be positive"
56
+ end
57
+
58
+ @size = size
59
+ @buffer = Array.new(size, 0)
60
+ @position = 0
61
+ end
62
+
63
+ # Add a single byte to the window
64
+ #
65
+ # Stores the byte at the current position and advances.
66
+ # When position reaches window size, it wraps around to 0.
67
+ #
68
+ # @param byte [Integer] Byte value (0-255)
69
+ # @return [void]
70
+ def add_byte(byte)
71
+ raise ArgumentError, "Byte must be 0-255" unless byte.between?(0,
72
+ 255)
73
+
74
+ @buffer[@position] = byte
75
+ @position = (@position + 1) % @size
76
+ end
77
+
78
+ # Copy a match from the window
79
+ #
80
+ # Copies bytes from a backward offset (distance) and returns
81
+ # them as an array. This is used to resolve LZ77 match pairs.
82
+ #
83
+ # The match can overlap with the current position (e.g., when
84
+ # distance < length), which is handled byte-by-byte.
85
+ #
86
+ # @param distance [Integer] Backward offset (1 to window_size)
87
+ # @param length [Integer] Number of bytes to copy (1+)
88
+ # @return [Array<Integer>] Copied bytes
89
+ def copy_match(distance, length)
90
+ validate_match_params(distance, length)
91
+
92
+ result = []
93
+ start_pos = (@position - distance) % @size
94
+
95
+ length.times do |i|
96
+ copy_pos = (start_pos + i) % @size
97
+ byte = @buffer[copy_pos]
98
+ result << byte
99
+ add_byte(byte) # Add to window as we copy
100
+ end
101
+
102
+ result
103
+ end
104
+
105
+ # Get current window position
106
+ #
107
+ # @return [Integer] Current position (0 to size-1)
108
+ def position
109
+ @position
110
+ end
111
+
112
+ # Get window size
113
+ #
114
+ # @return [Integer] Window size in bytes
115
+ def size
116
+ @size
117
+ end
118
+
119
+ # Get byte at specific offset from current position
120
+ #
121
+ # @param offset [Integer] Backward offset (1 to window_size)
122
+ # @return [Integer] Byte value at offset
123
+ def get_byte_at_offset(offset)
124
+ unless offset.between?(
125
+ 1, @size
126
+ )
127
+ raise ArgumentError,
128
+ "Offset must be 1 to #{@size}"
129
+ end
130
+
131
+ pos = (@position - offset) % @size
132
+ @buffer[pos]
133
+ end
134
+
135
+ # Reset window to initial state
136
+ #
137
+ # @return [void]
138
+ def reset
139
+ @buffer.fill(0)
140
+ @position = 0
141
+ end
142
+
143
+ private
144
+
145
+ # Validate match parameters
146
+ #
147
+ # @param distance [Integer] Distance parameter
148
+ # @param length [Integer] Length parameter
149
+ # @return [void]
150
+ def validate_match_params(distance, length)
151
+ unless distance.between?(1, @size)
152
+ raise ArgumentError, "Distance must be 1 to #{@size}"
153
+ end
154
+
155
+ unless length.positive?
156
+ raise ArgumentError,
157
+ "Length must be positive"
158
+ end
159
+ end
160
+ end
161
+ end
162
+ end
163
+ end
164
+ end
165
+ end