omnizip 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (511) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.rubocop.yml +32 -0
  4. data/.rubocop_todo.yml +754 -0
  5. data/COPYING +502 -0
  6. data/Gemfile +17 -0
  7. data/LICENSE +12 -0
  8. data/README.adoc +1045 -0
  9. data/Rakefile +12 -0
  10. data/benchmark/README.md +260 -0
  11. data/benchmark/benchmark_suite.rb +125 -0
  12. data/benchmark/compression_bench.rb +181 -0
  13. data/benchmark/filter_bench.rb +180 -0
  14. data/benchmark/models/benchmark_result.rb +59 -0
  15. data/benchmark/models/comparison_result.rb +69 -0
  16. data/benchmark/profile_suite.rb +167 -0
  17. data/benchmark/reporter.rb +150 -0
  18. data/benchmark/run_benchmarks.rb +66 -0
  19. data/benchmark/test_data.rb +137 -0
  20. data/config/formats/rar3_spec.yml +91 -0
  21. data/config/formats/rar5_spec.yml +102 -0
  22. data/docs/.github/workflows/docs.yml +142 -0
  23. data/docs/.gitignore +21 -0
  24. data/docs/.lychee.toml +67 -0
  25. data/docs/Gemfile +13 -0
  26. data/docs/RAR_WRITE_SUPPORT.md +26 -0
  27. data/docs/README.md +101 -0
  28. data/docs/_config.yml +112 -0
  29. data/docs/assets/logo.svg +1 -0
  30. data/docs/assets/omnizip-logo.pdf +1540 -11
  31. data/docs/comparison/feature-matrix.adoc +694 -0
  32. data/docs/comparison/index.adoc +113 -0
  33. data/docs/comparison/vs-7zip.adoc +309 -0
  34. data/docs/comparison/vs-peazip.adoc +77 -0
  35. data/docs/comparison/vs-rubyzip.adoc +342 -0
  36. data/docs/comparison/vs-winrar.adoc +100 -0
  37. data/docs/compatibility.adoc +579 -0
  38. data/docs/concepts/index.adoc +129 -0
  39. data/docs/developer/architecture.adoc +256 -0
  40. data/docs/developer/contributing.adoc +158 -0
  41. data/docs/developer/index.adoc +25 -0
  42. data/docs/developer/testing.adoc +212 -0
  43. data/docs/getting-started/basic-usage.adoc +271 -0
  44. data/docs/getting-started/index.adoc +42 -0
  45. data/docs/getting-started/installation.adoc +138 -0
  46. data/docs/getting-started/quick-start.adoc +185 -0
  47. data/docs/getting-started/your-first-archive.adoc +218 -0
  48. data/docs/guides/advanced-features/encryption.adoc +300 -0
  49. data/docs/guides/advanced-features/index.adoc +49 -0
  50. data/docs/guides/advanced-features/parallel-processing.adoc +246 -0
  51. data/docs/guides/advanced-features/progress-tracking.adoc +320 -0
  52. data/docs/guides/advanced-features/streaming.adoc +212 -0
  53. data/docs/guides/archive-formats/gzip-format.adoc +107 -0
  54. data/docs/guides/archive-formats/index.adoc +130 -0
  55. data/docs/guides/archive-formats/rar-format.adoc +104 -0
  56. data/docs/guides/archive-formats/rar5.adoc +521 -0
  57. data/docs/guides/archive-formats/seven-zip-format.adoc +35 -0
  58. data/docs/guides/archive-formats/tar-format.adoc +106 -0
  59. data/docs/guides/archive-formats/xz-format.adoc +118 -0
  60. data/docs/guides/archive-formats/zip-format.adoc +35 -0
  61. data/docs/guides/compression-algorithms/bzip2.adoc +113 -0
  62. data/docs/guides/compression-algorithms/deflate.adoc +319 -0
  63. data/docs/guides/compression-algorithms/index.adoc +190 -0
  64. data/docs/guides/compression-algorithms/lzma.adoc +398 -0
  65. data/docs/guides/compression-algorithms/lzma2.adoc +327 -0
  66. data/docs/guides/compression-algorithms/ppmd.adoc +316 -0
  67. data/docs/guides/compression-algorithms/zstandard.adoc +361 -0
  68. data/docs/guides/creating-archives.adoc +354 -0
  69. data/docs/guides/extracting-archives.adoc +53 -0
  70. data/docs/guides/format-conversion.adoc +64 -0
  71. data/docs/guides/index.adoc +49 -0
  72. data/docs/guides/migration-rubyzip.adoc +217 -0
  73. data/docs/guides/parity-archives.adoc +605 -0
  74. data/docs/guides/performance-tuning.adoc +88 -0
  75. data/docs/index.adoc +218 -0
  76. data/docs/lychee.toml +67 -0
  77. data/docs/reference/api/overview.adoc +188 -0
  78. data/docs/reference/cli/compress-command.adoc +114 -0
  79. data/docs/reference/cli/overview.adoc +140 -0
  80. data/docs/reference/index.adoc +26 -0
  81. data/docs/resources/faq.adoc +185 -0
  82. data/docs/resources/quick-reference.adoc +222 -0
  83. data/docs/troubleshooting/index.adoc +208 -0
  84. data/examples/api_comparison.rb +205 -0
  85. data/examples/deflate64_example.rb +96 -0
  86. data/examples/par2_demo.rb +121 -0
  87. data/examples/quick_start_native.rb +150 -0
  88. data/examples/quick_start_rubyzip.rb +115 -0
  89. data/examples/rubyzip_compatibility_demo.rb +194 -0
  90. data/exe/omnizip +27 -0
  91. data/lib/omnizip/algorithm.rb +130 -0
  92. data/lib/omnizip/algorithm_registry.rb +86 -0
  93. data/lib/omnizip/algorithms/.keep +0 -0
  94. data/lib/omnizip/algorithms/bzip2/bwt.rb +225 -0
  95. data/lib/omnizip/algorithms/bzip2/decoder.rb +193 -0
  96. data/lib/omnizip/algorithms/bzip2/encoder.rb +237 -0
  97. data/lib/omnizip/algorithms/bzip2/huffman.rb +206 -0
  98. data/lib/omnizip/algorithms/bzip2/mtf.rb +101 -0
  99. data/lib/omnizip/algorithms/bzip2/rle.rb +151 -0
  100. data/lib/omnizip/algorithms/bzip2.rb +130 -0
  101. data/lib/omnizip/algorithms/deflate/constants.rb +28 -0
  102. data/lib/omnizip/algorithms/deflate/decoder.rb +38 -0
  103. data/lib/omnizip/algorithms/deflate/encoder.rb +46 -0
  104. data/lib/omnizip/algorithms/deflate.rb +128 -0
  105. data/lib/omnizip/algorithms/deflate64/constants.rb +45 -0
  106. data/lib/omnizip/algorithms/deflate64/decoder.rb +153 -0
  107. data/lib/omnizip/algorithms/deflate64/encoder.rb +98 -0
  108. data/lib/omnizip/algorithms/deflate64/huffman_coder.rb +354 -0
  109. data/lib/omnizip/algorithms/deflate64/lz77_encoder.rb +142 -0
  110. data/lib/omnizip/algorithms/deflate64.rb +109 -0
  111. data/lib/omnizip/algorithms/lzma/bit_model.rb +120 -0
  112. data/lib/omnizip/algorithms/lzma/constants.rb +112 -0
  113. data/lib/omnizip/algorithms/lzma/decoder.rb +148 -0
  114. data/lib/omnizip/algorithms/lzma/dictionary.rb +69 -0
  115. data/lib/omnizip/algorithms/lzma/distance_coder.rb +415 -0
  116. data/lib/omnizip/algorithms/lzma/encoder.rb +142 -0
  117. data/lib/omnizip/algorithms/lzma/length_coder.rb +260 -0
  118. data/lib/omnizip/algorithms/lzma/literal_decoder.rb +320 -0
  119. data/lib/omnizip/algorithms/lzma/literal_encoder.rb +210 -0
  120. data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +341 -0
  121. data/lib/omnizip/algorithms/lzma/lzma_alone_decoder.rb +192 -0
  122. data/lib/omnizip/algorithms/lzma/lzma_state.rb +128 -0
  123. data/lib/omnizip/algorithms/lzma/match.rb +32 -0
  124. data/lib/omnizip/algorithms/lzma/match_finder.rb +205 -0
  125. data/lib/omnizip/algorithms/lzma/match_finder_config.rb +142 -0
  126. data/lib/omnizip/algorithms/lzma/match_finder_factory.rb +88 -0
  127. data/lib/omnizip/algorithms/lzma/optimal_encoder.rb +130 -0
  128. data/lib/omnizip/algorithms/lzma/probability_models.rb +72 -0
  129. data/lib/omnizip/algorithms/lzma/range_coder.rb +85 -0
  130. data/lib/omnizip/algorithms/lzma/range_decoder.rb +434 -0
  131. data/lib/omnizip/algorithms/lzma/range_encoder.rb +194 -0
  132. data/lib/omnizip/algorithms/lzma/state.rb +127 -0
  133. data/lib/omnizip/algorithms/lzma/xz_buffered_range_encoder.rb +325 -0
  134. data/lib/omnizip/algorithms/lzma/xz_encoder.rb +426 -0
  135. data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +645 -0
  136. data/lib/omnizip/algorithms/lzma/xz_match_finder_adapter.rb +227 -0
  137. data/lib/omnizip/algorithms/lzma/xz_price_calculator.rb +169 -0
  138. data/lib/omnizip/algorithms/lzma/xz_probability_models.rb +261 -0
  139. data/lib/omnizip/algorithms/lzma/xz_range_encoder.rb +223 -0
  140. data/lib/omnizip/algorithms/lzma/xz_range_encoder_exact.rb +331 -0
  141. data/lib/omnizip/algorithms/lzma/xz_state.rb +116 -0
  142. data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +2055 -0
  143. data/lib/omnizip/algorithms/lzma.rb +238 -0
  144. data/lib/omnizip/algorithms/lzma2/chunk_manager.rb +182 -0
  145. data/lib/omnizip/algorithms/lzma2/constants.rb +41 -0
  146. data/lib/omnizip/algorithms/lzma2/encoder.rb +147 -0
  147. data/lib/omnizip/algorithms/lzma2/lzma2_chunk.rb +161 -0
  148. data/lib/omnizip/algorithms/lzma2/properties.rb +179 -0
  149. data/lib/omnizip/algorithms/lzma2/simple_lzma2_encoder.rb +127 -0
  150. data/lib/omnizip/algorithms/lzma2/xz_encoder_adapter.rb +85 -0
  151. data/lib/omnizip/algorithms/lzma2.rb +141 -0
  152. data/lib/omnizip/algorithms/ppmd7/constants.rb +74 -0
  153. data/lib/omnizip/algorithms/ppmd7/context.rb +154 -0
  154. data/lib/omnizip/algorithms/ppmd7/decoder.rb +126 -0
  155. data/lib/omnizip/algorithms/ppmd7/encoder.rb +163 -0
  156. data/lib/omnizip/algorithms/ppmd7/model.rb +248 -0
  157. data/lib/omnizip/algorithms/ppmd7/symbol_state.rb +57 -0
  158. data/lib/omnizip/algorithms/ppmd7.rb +116 -0
  159. data/lib/omnizip/algorithms/ppmd8/constants.rb +61 -0
  160. data/lib/omnizip/algorithms/ppmd8/context.rb +34 -0
  161. data/lib/omnizip/algorithms/ppmd8/decoder.rb +107 -0
  162. data/lib/omnizip/algorithms/ppmd8/encoder.rb +138 -0
  163. data/lib/omnizip/algorithms/ppmd8/model.rb +250 -0
  164. data/lib/omnizip/algorithms/ppmd8/restoration_method.rb +78 -0
  165. data/lib/omnizip/algorithms/ppmd8.rb +82 -0
  166. data/lib/omnizip/algorithms/ppmd_base.rb +138 -0
  167. data/lib/omnizip/algorithms/sevenzip_lzma2.rb +123 -0
  168. data/lib/omnizip/algorithms/xz_lzma2.rb +118 -0
  169. data/lib/omnizip/algorithms/zstandard/constants.rb +25 -0
  170. data/lib/omnizip/algorithms/zstandard/decoder.rb +46 -0
  171. data/lib/omnizip/algorithms/zstandard/encoder.rb +51 -0
  172. data/lib/omnizip/algorithms/zstandard.rb +138 -0
  173. data/lib/omnizip/buffer/memory_archive.rb +251 -0
  174. data/lib/omnizip/buffer/memory_extractor.rb +224 -0
  175. data/lib/omnizip/buffer.rb +176 -0
  176. data/lib/omnizip/checksum_registry.rb +114 -0
  177. data/lib/omnizip/checksums/crc32.rb +100 -0
  178. data/lib/omnizip/checksums/crc64.rb +101 -0
  179. data/lib/omnizip/checksums/crc_base.rb +158 -0
  180. data/lib/omnizip/checksums/verifier.rb +131 -0
  181. data/lib/omnizip/chunked/memory_manager.rb +194 -0
  182. data/lib/omnizip/chunked/reader.rb +78 -0
  183. data/lib/omnizip/chunked/writer.rb +120 -0
  184. data/lib/omnizip/chunked.rb +129 -0
  185. data/lib/omnizip/cli/output_formatter.rb +104 -0
  186. data/lib/omnizip/cli.rb +572 -0
  187. data/lib/omnizip/commands/.keep +0 -0
  188. data/lib/omnizip/commands/archive_create_command.rb +427 -0
  189. data/lib/omnizip/commands/archive_extract_command.rb +272 -0
  190. data/lib/omnizip/commands/archive_list_command.rb +218 -0
  191. data/lib/omnizip/commands/archive_repair_command.rb +131 -0
  192. data/lib/omnizip/commands/archive_verify_command.rb +117 -0
  193. data/lib/omnizip/commands/compress_command.rb +117 -0
  194. data/lib/omnizip/commands/decompress_command.rb +120 -0
  195. data/lib/omnizip/commands/list_command.rb +53 -0
  196. data/lib/omnizip/commands/metadata_command.rb +153 -0
  197. data/lib/omnizip/commands/parity_create_command.rb +122 -0
  198. data/lib/omnizip/commands/parity_repair_command.rb +122 -0
  199. data/lib/omnizip/commands/parity_verify_command.rb +124 -0
  200. data/lib/omnizip/commands/profile_list_command.rb +56 -0
  201. data/lib/omnizip/commands/profile_show_command.rb +44 -0
  202. data/lib/omnizip/convenience.rb +359 -0
  203. data/lib/omnizip/converter/conversion_registry.rb +49 -0
  204. data/lib/omnizip/converter/conversion_strategy.rb +121 -0
  205. data/lib/omnizip/converter/seven_zip_to_zip_strategy.rb +97 -0
  206. data/lib/omnizip/converter/zip_to_seven_zip_strategy.rb +112 -0
  207. data/lib/omnizip/converter.rb +105 -0
  208. data/lib/omnizip/crypto/aes256/cipher.rb +100 -0
  209. data/lib/omnizip/crypto/aes256/constants.rb +28 -0
  210. data/lib/omnizip/crypto/aes256/key_derivation.rb +101 -0
  211. data/lib/omnizip/crypto/aes256.rb +102 -0
  212. data/lib/omnizip/error.rb +106 -0
  213. data/lib/omnizip/eta/exponential_smoothing_estimator.rb +98 -0
  214. data/lib/omnizip/eta/moving_average_estimator.rb +99 -0
  215. data/lib/omnizip/eta/rate_calculator.rb +104 -0
  216. data/lib/omnizip/eta/sample_history.rb +143 -0
  217. data/lib/omnizip/eta/time_estimator.rb +106 -0
  218. data/lib/omnizip/eta.rb +63 -0
  219. data/lib/omnizip/extraction/filter_chain.rb +177 -0
  220. data/lib/omnizip/extraction/glob_pattern.rb +140 -0
  221. data/lib/omnizip/extraction/pattern_matcher.rb +70 -0
  222. data/lib/omnizip/extraction/predicate_pattern.rb +52 -0
  223. data/lib/omnizip/extraction/regex_pattern.rb +50 -0
  224. data/lib/omnizip/extraction/selective_extractor.rb +240 -0
  225. data/lib/omnizip/extraction.rb +111 -0
  226. data/lib/omnizip/file_type/mime_classifier.rb +144 -0
  227. data/lib/omnizip/file_type.rb +113 -0
  228. data/lib/omnizip/filter.rb +139 -0
  229. data/lib/omnizip/filter_pipeline.rb +108 -0
  230. data/lib/omnizip/filter_registry.rb +166 -0
  231. data/lib/omnizip/filters/bcj.rb +279 -0
  232. data/lib/omnizip/filters/bcj2/constants.rb +53 -0
  233. data/lib/omnizip/filters/bcj2/decoder.rb +200 -0
  234. data/lib/omnizip/filters/bcj2/encoder.rb +61 -0
  235. data/lib/omnizip/filters/bcj2/stream_data.rb +93 -0
  236. data/lib/omnizip/filters/bcj2.rb +99 -0
  237. data/lib/omnizip/filters/bcj_arm.rb +176 -0
  238. data/lib/omnizip/filters/bcj_arm64.rb +244 -0
  239. data/lib/omnizip/filters/bcj_ia64.rb +196 -0
  240. data/lib/omnizip/filters/bcj_ppc.rb +190 -0
  241. data/lib/omnizip/filters/bcj_sparc.rb +176 -0
  242. data/lib/omnizip/filters/bcj_x86.rb +193 -0
  243. data/lib/omnizip/filters/delta.rb +196 -0
  244. data/lib/omnizip/filters/filter_base.rb +72 -0
  245. data/lib/omnizip/filters/registry.rb +123 -0
  246. data/lib/omnizip/filters/xz_delta.rb +258 -0
  247. data/lib/omnizip/format_detector.rb +162 -0
  248. data/lib/omnizip/format_registry.rb +59 -0
  249. data/lib/omnizip/formats/.keep +0 -0
  250. data/lib/omnizip/formats/bzip2_file.rb +172 -0
  251. data/lib/omnizip/formats/cpio/constants.rb +55 -0
  252. data/lib/omnizip/formats/cpio/entry.rb +385 -0
  253. data/lib/omnizip/formats/cpio/reader.rb +196 -0
  254. data/lib/omnizip/formats/cpio/writer.rb +234 -0
  255. data/lib/omnizip/formats/cpio.rb +140 -0
  256. data/lib/omnizip/formats/format_spec_loader.rb +230 -0
  257. data/lib/omnizip/formats/gzip.rb +238 -0
  258. data/lib/omnizip/formats/iso/directory_builder.rb +297 -0
  259. data/lib/omnizip/formats/iso/directory_record.rb +152 -0
  260. data/lib/omnizip/formats/iso/joliet.rb +204 -0
  261. data/lib/omnizip/formats/iso/path_table.rb +125 -0
  262. data/lib/omnizip/formats/iso/reader.rb +197 -0
  263. data/lib/omnizip/formats/iso/rock_ridge.rb +349 -0
  264. data/lib/omnizip/formats/iso/volume_builder.rb +320 -0
  265. data/lib/omnizip/formats/iso/volume_descriptor.rb +168 -0
  266. data/lib/omnizip/formats/iso/writer.rb +530 -0
  267. data/lib/omnizip/formats/iso.rb +140 -0
  268. data/lib/omnizip/formats/lzip.rb +175 -0
  269. data/lib/omnizip/formats/lzma_alone.rb +171 -0
  270. data/lib/omnizip/formats/rar/archive_repairer.rb +243 -0
  271. data/lib/omnizip/formats/rar/archive_verifier.rb +195 -0
  272. data/lib/omnizip/formats/rar/block_parser.rb +243 -0
  273. data/lib/omnizip/formats/rar/compression/bit_stream.rb +180 -0
  274. data/lib/omnizip/formats/rar/compression/dispatcher.rb +217 -0
  275. data/lib/omnizip/formats/rar/compression/lz77_huffman/decoder.rb +216 -0
  276. data/lib/omnizip/formats/rar/compression/lz77_huffman/encoder.rb +158 -0
  277. data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_builder.rb +217 -0
  278. data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_coder.rb +189 -0
  279. data/lib/omnizip/formats/rar/compression/lz77_huffman/match_finder.rb +135 -0
  280. data/lib/omnizip/formats/rar/compression/lz77_huffman/sliding_window.rb +165 -0
  281. data/lib/omnizip/formats/rar/compression/ppmd/context.rb +105 -0
  282. data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +219 -0
  283. data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +262 -0
  284. data/lib/omnizip/formats/rar/compression_method_registry.rb +106 -0
  285. data/lib/omnizip/formats/rar/constants.rb +82 -0
  286. data/lib/omnizip/formats/rar/decompressor.rb +238 -0
  287. data/lib/omnizip/formats/rar/external_writer.rb +312 -0
  288. data/lib/omnizip/formats/rar/header.rb +192 -0
  289. data/lib/omnizip/formats/rar/license_validator.rb +109 -0
  290. data/lib/omnizip/formats/rar/models/rar_archive.rb +77 -0
  291. data/lib/omnizip/formats/rar/models/rar_entry.rb +65 -0
  292. data/lib/omnizip/formats/rar/models/rar_volume.rb +56 -0
  293. data/lib/omnizip/formats/rar/parity_handler.rb +292 -0
  294. data/lib/omnizip/formats/rar/rar5/compression/lzma.rb +202 -0
  295. data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +578 -0
  296. data/lib/omnizip/formats/rar/rar5/compression/store.rb +60 -0
  297. data/lib/omnizip/formats/rar/rar5/crc32.rb +39 -0
  298. data/lib/omnizip/formats/rar/rar5/encryption/aes256_cbc.rb +97 -0
  299. data/lib/omnizip/formats/rar/rar5/encryption/encryption_header.rb +114 -0
  300. data/lib/omnizip/formats/rar/rar5/encryption/encryption_manager.rb +166 -0
  301. data/lib/omnizip/formats/rar/rar5/encryption/key_derivation.rb +97 -0
  302. data/lib/omnizip/formats/rar/rar5/header.rb +187 -0
  303. data/lib/omnizip/formats/rar/rar5/models/encryption_options.rb +74 -0
  304. data/lib/omnizip/formats/rar/rar5/models/recovery_options.rb +63 -0
  305. data/lib/omnizip/formats/rar/rar5/models/solid_options.rb +63 -0
  306. data/lib/omnizip/formats/rar/rar5/models/volume_options.rb +74 -0
  307. data/lib/omnizip/formats/rar/rar5/multi_volume/ARCHITECTURE.md +290 -0
  308. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_manager.rb +264 -0
  309. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_splitter.rb +155 -0
  310. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_writer.rb +194 -0
  311. data/lib/omnizip/formats/rar/rar5/solid/solid_encoder.rb +109 -0
  312. data/lib/omnizip/formats/rar/rar5/solid/solid_manager.rb +142 -0
  313. data/lib/omnizip/formats/rar/rar5/solid/solid_stream.rb +121 -0
  314. data/lib/omnizip/formats/rar/rar5/vint.rb +65 -0
  315. data/lib/omnizip/formats/rar/rar5/writer.rb +466 -0
  316. data/lib/omnizip/formats/rar/rar_format_base.rb +241 -0
  317. data/lib/omnizip/formats/rar/reader.rb +366 -0
  318. data/lib/omnizip/formats/rar/recovery_record.rb +245 -0
  319. data/lib/omnizip/formats/rar/volume_manager.rb +168 -0
  320. data/lib/omnizip/formats/rar/writer.rb +431 -0
  321. data/lib/omnizip/formats/rar.rb +205 -0
  322. data/lib/omnizip/formats/rar3/compressor.rb +73 -0
  323. data/lib/omnizip/formats/rar3/decompressor.rb +66 -0
  324. data/lib/omnizip/formats/rar3/reader.rb +386 -0
  325. data/lib/omnizip/formats/rar3/writer.rb +219 -0
  326. data/lib/omnizip/formats/rar5/compressor.rb +73 -0
  327. data/lib/omnizip/formats/rar5/decompressor.rb +66 -0
  328. data/lib/omnizip/formats/rar5/reader.rb +342 -0
  329. data/lib/omnizip/formats/rar5/writer.rb +214 -0
  330. data/lib/omnizip/formats/seven_zip/coder_chain.rb +150 -0
  331. data/lib/omnizip/formats/seven_zip/constants.rb +126 -0
  332. data/lib/omnizip/formats/seven_zip/encoded_header.rb +114 -0
  333. data/lib/omnizip/formats/seven_zip/encrypted_header.rb +142 -0
  334. data/lib/omnizip/formats/seven_zip/file_collector.rb +144 -0
  335. data/lib/omnizip/formats/seven_zip/header.rb +106 -0
  336. data/lib/omnizip/formats/seven_zip/header_encryptor.rb +134 -0
  337. data/lib/omnizip/formats/seven_zip/header_writer.rb +466 -0
  338. data/lib/omnizip/formats/seven_zip/models/coder_info.rb +30 -0
  339. data/lib/omnizip/formats/seven_zip/models/file_entry.rb +58 -0
  340. data/lib/omnizip/formats/seven_zip/models/folder.rb +69 -0
  341. data/lib/omnizip/formats/seven_zip/models/stream_info.rb +42 -0
  342. data/lib/omnizip/formats/seven_zip/parser.rb +660 -0
  343. data/lib/omnizip/formats/seven_zip/reader.rb +458 -0
  344. data/lib/omnizip/formats/seven_zip/split_archive_reader.rb +632 -0
  345. data/lib/omnizip/formats/seven_zip/split_archive_writer.rb +315 -0
  346. data/lib/omnizip/formats/seven_zip/stream_compressor.rb +151 -0
  347. data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +162 -0
  348. data/lib/omnizip/formats/seven_zip/writer.rb +740 -0
  349. data/lib/omnizip/formats/seven_zip.rb +93 -0
  350. data/lib/omnizip/formats/tar/constants.rb +73 -0
  351. data/lib/omnizip/formats/tar/entry.rb +94 -0
  352. data/lib/omnizip/formats/tar/header.rb +168 -0
  353. data/lib/omnizip/formats/tar/reader.rb +121 -0
  354. data/lib/omnizip/formats/tar/writer.rb +216 -0
  355. data/lib/omnizip/formats/tar.rb +84 -0
  356. data/lib/omnizip/formats/xz/reader.rb +116 -0
  357. data/lib/omnizip/formats/xz.rb +237 -0
  358. data/lib/omnizip/formats/xz_impl/block_decoder.rb +754 -0
  359. data/lib/omnizip/formats/xz_impl/block_encoder.rb +306 -0
  360. data/lib/omnizip/formats/xz_impl/block_header.rb +210 -0
  361. data/lib/omnizip/formats/xz_impl/block_header_parser.rb +186 -0
  362. data/lib/omnizip/formats/xz_impl/constants.rb +49 -0
  363. data/lib/omnizip/formats/xz_impl/index_decoder.rb +174 -0
  364. data/lib/omnizip/formats/xz_impl/index_encoder.rb +122 -0
  365. data/lib/omnizip/formats/xz_impl/stream_decoder.rb +468 -0
  366. data/lib/omnizip/formats/xz_impl/stream_encoder.rb +99 -0
  367. data/lib/omnizip/formats/xz_impl/stream_footer.rb +81 -0
  368. data/lib/omnizip/formats/xz_impl/stream_footer_parser.rb +117 -0
  369. data/lib/omnizip/formats/xz_impl/stream_header.rb +55 -0
  370. data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +108 -0
  371. data/lib/omnizip/formats/xz_impl/vli.rb +128 -0
  372. data/lib/omnizip/formats/xz_impl/writer.rb +421 -0
  373. data/lib/omnizip/formats/zip/central_directory_header.rb +195 -0
  374. data/lib/omnizip/formats/zip/constants.rb +69 -0
  375. data/lib/omnizip/formats/zip/end_of_central_directory.rb +133 -0
  376. data/lib/omnizip/formats/zip/local_file_header.rb +138 -0
  377. data/lib/omnizip/formats/zip/reader.rb +250 -0
  378. data/lib/omnizip/formats/zip/unix_extra_field.rb +153 -0
  379. data/lib/omnizip/formats/zip/writer.rb +375 -0
  380. data/lib/omnizip/formats/zip/zip64_end_of_central_directory.rb +104 -0
  381. data/lib/omnizip/formats/zip/zip64_end_of_central_directory_locator.rb +66 -0
  382. data/lib/omnizip/formats/zip/zip64_extra_field.rb +114 -0
  383. data/lib/omnizip/formats/zip.rb +50 -0
  384. data/lib/omnizip/implementations/base/lzma2_decoder_base.rb +75 -0
  385. data/lib/omnizip/implementations/base/lzma2_encoder_base.rb +128 -0
  386. data/lib/omnizip/implementations/base/lzma_decoder_base.rb +83 -0
  387. data/lib/omnizip/implementations/base/lzma_encoder_base.rb +108 -0
  388. data/lib/omnizip/implementations/base/state_machine_base.rb +182 -0
  389. data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +421 -0
  390. data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +465 -0
  391. data/lib/omnizip/implementations/seven_zip/lzma/match_finder.rb +288 -0
  392. data/lib/omnizip/implementations/seven_zip/lzma/range_decoder.rb +200 -0
  393. data/lib/omnizip/implementations/seven_zip/lzma/range_encoder.rb +197 -0
  394. data/lib/omnizip/implementations/seven_zip/lzma/state_machine.rb +141 -0
  395. data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +519 -0
  396. data/lib/omnizip/implementations/xz_utils/lzma2/decoder.rb +723 -0
  397. data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +750 -0
  398. data/lib/omnizip/io/buffered_input.rb +146 -0
  399. data/lib/omnizip/io/buffered_output.rb +105 -0
  400. data/lib/omnizip/io/stream_manager.rb +115 -0
  401. data/lib/omnizip/link_handler/hard_link.rb +79 -0
  402. data/lib/omnizip/link_handler/symbolic_link.rb +74 -0
  403. data/lib/omnizip/link_handler.rb +124 -0
  404. data/lib/omnizip/metadata/archive_metadata.rb +114 -0
  405. data/lib/omnizip/metadata/entry_metadata.rb +146 -0
  406. data/lib/omnizip/metadata/metadata_editor.rb +171 -0
  407. data/lib/omnizip/metadata/metadata_registry.rb +64 -0
  408. data/lib/omnizip/metadata/metadata_validator.rb +99 -0
  409. data/lib/omnizip/metadata.rb +57 -0
  410. data/lib/omnizip/models/.keep +0 -0
  411. data/lib/omnizip/models/algorithm_metadata.rb +73 -0
  412. data/lib/omnizip/models/compression_options.rb +71 -0
  413. data/lib/omnizip/models/conversion_options.rb +87 -0
  414. data/lib/omnizip/models/conversion_result.rb +135 -0
  415. data/lib/omnizip/models/eta_result.rb +46 -0
  416. data/lib/omnizip/models/extraction_rule.rb +115 -0
  417. data/lib/omnizip/models/filter_chain.rb +144 -0
  418. data/lib/omnizip/models/filter_config.rb +183 -0
  419. data/lib/omnizip/models/match_result.rb +124 -0
  420. data/lib/omnizip/models/optimization_suggestion.rb +91 -0
  421. data/lib/omnizip/models/parallel_options.rb +104 -0
  422. data/lib/omnizip/models/performance_result.rb +79 -0
  423. data/lib/omnizip/models/profile_report.rb +82 -0
  424. data/lib/omnizip/models/progress_options.rb +38 -0
  425. data/lib/omnizip/models/split_options.rb +116 -0
  426. data/lib/omnizip/optimization_registry.rb +81 -0
  427. data/lib/omnizip/parallel/job_queue.rb +209 -0
  428. data/lib/omnizip/parallel/job_scheduler.rb +203 -0
  429. data/lib/omnizip/parallel/parallel_compressor.rb +347 -0
  430. data/lib/omnizip/parallel/parallel_extractor.rb +329 -0
  431. data/lib/omnizip/parallel/worker_pool.rb +223 -0
  432. data/lib/omnizip/parallel.rb +149 -0
  433. data/lib/omnizip/parity/chunked_block_processor.rb +196 -0
  434. data/lib/omnizip/parity/galois16.rb +145 -0
  435. data/lib/omnizip/parity/models/creator_packet.rb +73 -0
  436. data/lib/omnizip/parity/models/file_description_packet.rb +133 -0
  437. data/lib/omnizip/parity/models/ifsc_packet.rb +123 -0
  438. data/lib/omnizip/parity/models/main_packet.rb +128 -0
  439. data/lib/omnizip/parity/models/packet.rb +156 -0
  440. data/lib/omnizip/parity/models/packet_registry.rb +109 -0
  441. data/lib/omnizip/parity/models/recovery_slice_packet.rb +78 -0
  442. data/lib/omnizip/parity/par2_creator.rb +531 -0
  443. data/lib/omnizip/parity/par2_repairer.rb +407 -0
  444. data/lib/omnizip/parity/par2_verifier.rb +364 -0
  445. data/lib/omnizip/parity/par2cmdline_algorithm.rb +110 -0
  446. data/lib/omnizip/parity/par2cmdline_coefficients.rb +78 -0
  447. data/lib/omnizip/parity/reed_solomon_decoder.rb +266 -0
  448. data/lib/omnizip/parity/reed_solomon_encoder.rb +111 -0
  449. data/lib/omnizip/parity/reed_solomon_matrix.rb +342 -0
  450. data/lib/omnizip/parity.rb +186 -0
  451. data/lib/omnizip/password/encryption_registry.rb +65 -0
  452. data/lib/omnizip/password/encryption_strategy.rb +96 -0
  453. data/lib/omnizip/password/password_validator.rb +129 -0
  454. data/lib/omnizip/password/winzip_aes_strategy.rb +192 -0
  455. data/lib/omnizip/password/zip_crypto_strategy.rb +141 -0
  456. data/lib/omnizip/password.rb +87 -0
  457. data/lib/omnizip/pipe/stream_compressor.rb +124 -0
  458. data/lib/omnizip/pipe/stream_decompressor.rb +174 -0
  459. data/lib/omnizip/pipe.rb +121 -0
  460. data/lib/omnizip/platform/ntfs_streams.rb +201 -0
  461. data/lib/omnizip/platform.rb +189 -0
  462. data/lib/omnizip/profile/archive_profile.rb +39 -0
  463. data/lib/omnizip/profile/balanced_profile.rb +33 -0
  464. data/lib/omnizip/profile/binary_profile.rb +36 -0
  465. data/lib/omnizip/profile/compression_profile.rb +158 -0
  466. data/lib/omnizip/profile/custom_profile.rb +157 -0
  467. data/lib/omnizip/profile/fast_profile.rb +33 -0
  468. data/lib/omnizip/profile/maximum_profile.rb +33 -0
  469. data/lib/omnizip/profile/profile_detector.rb +110 -0
  470. data/lib/omnizip/profile/profile_registry.rb +161 -0
  471. data/lib/omnizip/profile/text_profile.rb +36 -0
  472. data/lib/omnizip/profile.rb +190 -0
  473. data/lib/omnizip/profiler/memory_profiler.rb +66 -0
  474. data/lib/omnizip/profiler/method_profiler.rb +49 -0
  475. data/lib/omnizip/profiler/report_generator.rb +169 -0
  476. data/lib/omnizip/profiler.rb +204 -0
  477. data/lib/omnizip/progress/callback_reporter.rb +36 -0
  478. data/lib/omnizip/progress/console_reporter.rb +62 -0
  479. data/lib/omnizip/progress/log_reporter.rb +91 -0
  480. data/lib/omnizip/progress/operation_progress.rb +118 -0
  481. data/lib/omnizip/progress/progress_bar.rb +156 -0
  482. data/lib/omnizip/progress/progress_reporter.rb +40 -0
  483. data/lib/omnizip/progress/progress_tracker.rb +190 -0
  484. data/lib/omnizip/progress/silent_reporter.rb +24 -0
  485. data/lib/omnizip/progress.rb +127 -0
  486. data/lib/omnizip/rubyzip_compat.rb +63 -0
  487. data/lib/omnizip/temp/safe_extract.rb +168 -0
  488. data/lib/omnizip/temp/temp_file.rb +124 -0
  489. data/lib/omnizip/temp/temp_file_pool.rb +109 -0
  490. data/lib/omnizip/temp.rb +181 -0
  491. data/lib/omnizip/version.rb +5 -0
  492. data/lib/omnizip/zip/entry.rb +156 -0
  493. data/lib/omnizip/zip/file.rb +485 -0
  494. data/lib/omnizip/zip/input_stream.rb +273 -0
  495. data/lib/omnizip/zip/output_stream.rb +324 -0
  496. data/lib/omnizip.rb +156 -0
  497. data/readme-docs/advanced-features.adoc +515 -0
  498. data/readme-docs/api-usage.adoc +444 -0
  499. data/readme-docs/architecture.adoc +449 -0
  500. data/readme-docs/archive-formats.adoc +479 -0
  501. data/readme-docs/cli-usage.adoc +222 -0
  502. data/readme-docs/compression-algorithms.adoc +442 -0
  503. data/readme-docs/compression-profiles.adoc +247 -0
  504. data/readme-docs/encryption-checksums.adoc +328 -0
  505. data/readme-docs/format-converter.adoc +325 -0
  506. data/readme-docs/installation.adoc +228 -0
  507. data/readme-docs/par2-archives.adoc +608 -0
  508. data/readme-docs/performance-profiler.adoc +389 -0
  509. data/readme-docs/preprocessing-filters.adoc +280 -0
  510. data/xz-file-format-1.2.1.txt +1174 -0
  511. metadata +617 -0
@@ -0,0 +1,153 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+ require_relative "constants"
5
+ require_relative "huffman_coder"
6
+
7
+ module Omnizip
8
+ module Algorithms
9
+ class Deflate64
10
+ # Deflate64 decoder
11
+ class Decoder
12
+ include Constants
13
+
14
+ attr_reader :window_size
15
+
16
+ def initialize(input_stream)
17
+ @input_stream = input_stream
18
+ @window_size = DICTIONARY_SIZE
19
+ @window = []
20
+ @huffman = HuffmanCoder.new
21
+ end
22
+
23
+ # Decompress input stream to output stream
24
+ #
25
+ # @param output_stream [IO] Output data stream
26
+ def decompress(output_stream)
27
+ compressed_data = @input_stream.read
28
+
29
+ # Deserialize trees and compressed data
30
+ literal_tree, distance_tree, data = deserialize_with_trees(compressed_data)
31
+
32
+ # Decode Huffman-encoded data
33
+ tokens = @huffman.decode(data, literal_tree, distance_tree)
34
+
35
+ # Reconstruct data from LZ77 tokens
36
+ decompressed = reconstruct_from_tokens(tokens)
37
+
38
+ output_stream.write(decompressed)
39
+ end
40
+
41
+ # Deserialize compressed data with Huffman trees
42
+ #
43
+ # @param data [String] Serialized compressed data
44
+ # @return [Array] Literal tree, distance tree, compressed data
45
+ def deserialize_with_trees(data)
46
+ # Extract sizes (4 bytes each)
47
+ literal_size, distance_size = data.unpack("NN")
48
+ offset = 8
49
+
50
+ # Extract literal tree JSON
51
+ literal_json = data[offset, literal_size]
52
+ offset += literal_size
53
+
54
+ # Extract distance tree JSON
55
+ distance_json = data[offset, distance_size]
56
+ offset += distance_size
57
+
58
+ # Extract compressed data
59
+ compressed = data[offset..]
60
+
61
+ # Parse trees from JSON with symbol keys as integers
62
+ literal_tree = parse_tree_from_json(literal_json)
63
+ distance_tree = parse_tree_from_json(distance_json)
64
+
65
+ [literal_tree, distance_tree, compressed]
66
+ end
67
+
68
+ # Parse Huffman tree from JSON with integer keys
69
+ #
70
+ # @param json [String] JSON string
71
+ # @return [Hash] Huffman tree with integer keys
72
+ def parse_tree_from_json(json)
73
+ parsed = JSON.parse(json)
74
+ # Convert string keys back to integers
75
+ parsed.transform_keys(&:to_i)
76
+ end
77
+
78
+ # Reconstruct data from LZ77 tokens
79
+ #
80
+ # @param tokens [Array<Hash>] LZ77 tokens
81
+ # @return [String] Decompressed data
82
+ def reconstruct_from_tokens(tokens)
83
+ output = []
84
+
85
+ tokens.each do |token|
86
+ if token[:type] == :literal
87
+ byte_char = token[:value].chr(Encoding::BINARY)
88
+ output << byte_char
89
+ @window << token[:value]
90
+ elsif token[:type] == :match
91
+ copy_from_window(
92
+ output,
93
+ token[:distance],
94
+ token[:length],
95
+ )
96
+ end
97
+
98
+ # Maintain 64KB window
99
+ while @window.size > @window_size
100
+ @window.shift
101
+ end
102
+ end
103
+
104
+ output.join.force_encoding(Encoding::BINARY)
105
+ end
106
+
107
+ # Decode single block
108
+ #
109
+ # @param data [String] Compressed block
110
+ # @return [String] Decompressed data
111
+ def decode_block(data)
112
+ tokens = @huffman.decode(data, {}, {})
113
+ reconstruct_from_tokens(tokens)
114
+ end
115
+
116
+ private
117
+
118
+ # Copy data from sliding window
119
+ #
120
+ # @param output [Array] Output buffer
121
+ # @param distance [Integer] Distance back in window
122
+ # @param length [Integer] Number of bytes to copy
123
+ def copy_from_window(output, distance, length)
124
+ start_pos = @window.size - distance
125
+
126
+ # Check if we're trying to copy from beyond the window
127
+ if start_pos.negative?
128
+ raise Omnizip::DecompressionError,
129
+ "Invalid distance: #{distance} exceeds window size #{@window.size}"
130
+ end
131
+
132
+ length.times do |i|
133
+ # Handle RLE case where we copy bytes we just wrote
134
+ idx = (start_pos + i) % @window.size
135
+ byte = @window[idx]
136
+
137
+ if byte.nil?
138
+ raise Omnizip::DecompressionError,
139
+ "Window access failed at index #{idx} (start: #{start_pos}, i: #{i})"
140
+ end
141
+
142
+ byte_char = byte.chr(Encoding::BINARY)
143
+ output << byte_char
144
+ @window << byte
145
+
146
+ # Maintain window size during copy
147
+ @window.shift if @window.size > @window_size
148
+ end
149
+ end
150
+ end
151
+ end
152
+ end
153
+ end
@@ -0,0 +1,98 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+ require_relative "constants"
5
+ require_relative "lz77_encoder"
6
+ require_relative "huffman_coder"
7
+
8
+ module Omnizip
9
+ module Algorithms
10
+ class Deflate64
11
+ # Deflate64 encoder
12
+ class Encoder
13
+ include Constants
14
+
15
+ attr_reader :window_size
16
+
17
+ def initialize(output_stream, options = {})
18
+ @output_stream = output_stream
19
+ @window_size = options[:window_size] || DICTIONARY_SIZE
20
+ @compression_level = options[:level] || 6
21
+ @lz77_encoder = LZ77Encoder.new(@window_size)
22
+ @huffman = HuffmanCoder.new
23
+ end
24
+
25
+ # Compress input stream to output stream
26
+ #
27
+ # @param input_stream [IO] Input data stream
28
+ def compress(input_stream)
29
+ data = input_stream.read
30
+
31
+ # Step 1: LZ77 compression with 64KB window
32
+ tokens = @lz77_encoder.find_matches(data)
33
+
34
+ # Step 2: Huffman coding
35
+ compressed = @huffman.encode(tokens)
36
+
37
+ # Step 3: Serialize trees and write to output
38
+ output = serialize_with_trees(
39
+ compressed,
40
+ @huffman.literal_tree,
41
+ @huffman.distance_tree,
42
+ )
43
+
44
+ @output_stream.write(output)
45
+ end
46
+
47
+ private
48
+
49
+ # Serialize compressed data with Huffman trees
50
+ #
51
+ # @param compressed [String] Compressed data
52
+ # @param literal_tree [Hash] Literal Huffman tree
53
+ # @param distance_tree [Hash] Distance Huffman tree
54
+ # @return [String] Serialized output
55
+ def serialize_with_trees(compressed, literal_tree, distance_tree)
56
+ literal_json = literal_tree.to_json
57
+ distance_json = distance_tree.to_json
58
+
59
+ # Pack: literal_size (4 bytes), distance_size (4 bytes),
60
+ # literal_tree, distance_tree, compressed_data
61
+ [
62
+ literal_json.bytesize,
63
+ distance_json.bytesize,
64
+ literal_json,
65
+ distance_json,
66
+ compressed,
67
+ ].pack("NNA#{literal_json.bytesize}A#{distance_json.bytesize}A*")
68
+ end
69
+
70
+ # Encode data block
71
+ #
72
+ # @param data [String] Input data
73
+ # @return [String] Compressed data
74
+ def encode_block(data)
75
+ # Find LZ77 matches
76
+ tokens = @lz77_encoder.find_matches(data)
77
+
78
+ # Huffman encode
79
+ @huffman.encode(tokens)
80
+ end
81
+
82
+ # Encode stream in chunks
83
+ #
84
+ # @param input_stream [IO] Input stream
85
+ # @param chunk_size [Integer] Size of chunks to process
86
+ def encode_stream(input_stream, chunk_size = 65_536)
87
+ until input_stream.eof?
88
+ chunk = input_stream.read(chunk_size)
89
+ break if chunk.nil? || chunk.empty?
90
+
91
+ compressed = encode_block(chunk)
92
+ @output_stream.write(compressed)
93
+ end
94
+ end
95
+ end
96
+ end
97
+ end
98
+ end
@@ -0,0 +1,354 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "constants"
4
+
5
+ module Omnizip
6
+ module Algorithms
7
+ class Deflate64
8
+ # Huffman coding for Deflate64
9
+ class HuffmanCoder
10
+ include Constants
11
+
12
+ # Length code mapping
13
+ LENGTH_CODES_MAP = {
14
+ 3 => 257, 4 => 258, 5 => 259, 6 => 260, 7 => 261,
15
+ 8 => 262, 9 => 263, 10 => 264, 11 => 265, 12 => 266,
16
+ 13 => 267, 14 => 268, 15 => 269, 16 => 270, 17 => 271,
17
+ 18 => 272, 19 => 273, 20 => 274, 21 => 275, 22 => 276,
18
+ 23 => 277, 24 => 278, 25 => 279, 26 => 280, 27 => 281,
19
+ 28 => 282, 29 => 283, 30 => 284, 31 => 285
20
+ }.freeze
21
+
22
+ # Distance code mapping
23
+ DISTANCE_CODES_MAP = (0..29).to_a.freeze
24
+
25
+ attr_reader :literal_tree, :distance_tree
26
+
27
+ def initialize
28
+ @literal_tree = nil
29
+ @distance_tree = nil
30
+ end
31
+
32
+ # Encode tokens using Huffman coding
33
+ #
34
+ # @param tokens [Array<Hash>] LZ77 tokens
35
+ # @return [String] Encoded bitstream
36
+ def encode(tokens)
37
+ # Build frequency tables
38
+ literal_freqs = build_literal_frequencies(tokens)
39
+ distance_freqs = build_distance_frequencies(tokens)
40
+
41
+ # Build Huffman trees
42
+ @literal_tree = build_tree(literal_freqs)
43
+ @distance_tree = build_tree(distance_freqs)
44
+
45
+ # Encode tokens
46
+ encode_tokens(tokens)
47
+ end
48
+
49
+ # Decode bitstream using Huffman coding
50
+ #
51
+ # @param bitstream [String] Encoded data
52
+ # @param literal_tree [Hash] Literal Huffman tree
53
+ # @param distance_tree [Hash] Distance Huffman tree
54
+ # @return [Array<Hash>] Decoded tokens
55
+ def decode(bitstream, literal_tree, distance_tree)
56
+ @literal_tree = literal_tree
57
+ @distance_tree = distance_tree
58
+
59
+ decode_tokens(bitstream)
60
+ end
61
+
62
+ private
63
+
64
+ # Build frequency table for literals and lengths
65
+ #
66
+ # @param tokens [Array<Hash>] LZ77 tokens
67
+ # @return [Hash] Frequency table
68
+ def build_literal_frequencies(tokens)
69
+ freqs = Hash.new(0)
70
+
71
+ tokens.each do |token|
72
+ if token[:type] == :literal
73
+ freqs[token[:value]] += 1
74
+ else
75
+ length_code = length_to_code(token[:length])
76
+ freqs[length_code] += 1
77
+ end
78
+ end
79
+
80
+ # Add end of block marker
81
+ freqs[END_OF_BLOCK] = 1
82
+
83
+ freqs
84
+ end
85
+
86
+ # Build frequency table for distances
87
+ #
88
+ # @param tokens [Array<Hash>] LZ77 tokens
89
+ # @return [Hash] Frequency table
90
+ def build_distance_frequencies(tokens)
91
+ freqs = Hash.new(0)
92
+
93
+ tokens.each do |token|
94
+ next unless token[:type] == :match
95
+
96
+ distance_code = distance_to_code(token[:distance])
97
+ freqs[distance_code] += 1
98
+ end
99
+
100
+ freqs
101
+ end
102
+
103
+ # Build Huffman tree from frequencies
104
+ #
105
+ # @param frequencies [Hash] Symbol frequencies
106
+ # @return [Hash] Huffman code table
107
+ def build_tree(frequencies)
108
+ return {} if frequencies.empty?
109
+
110
+ # Build priority queue of nodes
111
+ nodes = frequencies.map do |symbol, freq|
112
+ { symbol: symbol, freq: freq, left: nil, right: nil }
113
+ end
114
+
115
+ # Build tree using priority queue
116
+ while nodes.size > 1
117
+ nodes.sort_by! { |n| n[:freq] }
118
+ left = nodes.shift
119
+ right = nodes.shift
120
+
121
+ parent = {
122
+ symbol: nil,
123
+ freq: left[:freq] + right[:freq],
124
+ left: left,
125
+ right: right,
126
+ }
127
+
128
+ nodes << parent
129
+ end
130
+
131
+ # Generate codes from tree
132
+ generate_codes(nodes.first)
133
+ end
134
+
135
+ # Generate Huffman codes from tree
136
+ #
137
+ # @param node [Hash] Tree node
138
+ # @param code [String] Current code
139
+ # @param codes [Hash] Code table
140
+ # @return [Hash] Complete code table
141
+ def generate_codes(node, code = "", codes = {})
142
+ return codes if node.nil?
143
+
144
+ if node[:symbol]
145
+ codes[node[:symbol]] = code
146
+ else
147
+ generate_codes(node[:left], "#{code}0", codes)
148
+ generate_codes(node[:right], "#{code}1", codes)
149
+ end
150
+
151
+ codes
152
+ end
153
+
154
+ # Encode tokens to bitstream
155
+ #
156
+ # @param tokens [Array<Hash>] LZ77 tokens
157
+ # @return [String] Encoded bitstream
158
+ def encode_tokens(tokens)
159
+ bits = ""
160
+
161
+ tokens.each do |token|
162
+ if token[:type] == :literal
163
+ bits += @literal_tree[token[:value]]
164
+ else
165
+ length_code = length_to_code(token[:length])
166
+ bits += @literal_tree[length_code]
167
+
168
+ distance_code = distance_to_code(token[:distance])
169
+ bits += @distance_tree[distance_code]
170
+ end
171
+ end
172
+
173
+ # Add end of block marker
174
+ bits += @literal_tree[END_OF_BLOCK]
175
+
176
+ # Convert bits to bytes
177
+ bits_to_bytes(bits)
178
+ end
179
+
180
+ # Decode tokens from bitstream
181
+ #
182
+ # @param bitstream [String] Encoded data
183
+ # @return [Array<Hash>] Decoded tokens
184
+ def decode_tokens(bitstream)
185
+ tokens = []
186
+ bits = bytes_to_bits(bitstream)
187
+ pos = 0
188
+
189
+ while pos < bits.length
190
+ symbol, length = decode_symbol(bits, pos, @literal_tree)
191
+
192
+ # Check for decoding failure
193
+ if symbol.nil?
194
+ raise Omnizip::DecompressionError,
195
+ "Failed to decode symbol at bit position #{pos}"
196
+ end
197
+
198
+ pos += length
199
+
200
+ break if symbol == END_OF_BLOCK
201
+
202
+ if symbol < 256
203
+ tokens << { type: :literal, value: symbol }
204
+ else
205
+ match_length = code_to_length(symbol)
206
+
207
+ dist_symbol, dist_length =
208
+ decode_symbol(bits, pos, @distance_tree)
209
+
210
+ # Check for distance decoding failure
211
+ if dist_symbol.nil?
212
+ raise Omnizip::DecompressionError,
213
+ "Failed to decode distance at bit position #{pos}"
214
+ end
215
+
216
+ pos += dist_length
217
+
218
+ distance = code_to_distance(dist_symbol)
219
+
220
+ tokens << {
221
+ type: :match,
222
+ length: match_length,
223
+ distance: distance,
224
+ }
225
+ end
226
+ end
227
+
228
+ tokens
229
+ end
230
+
231
+ # Decode single symbol from bitstream
232
+ #
233
+ # @param bits [String] Bit string
234
+ # @param pos [Integer] Current position
235
+ # @param tree [Hash] Huffman tree
236
+ # @return [Array] Symbol and bits consumed
237
+ def decode_symbol(bits, pos, tree)
238
+ code = ""
239
+ reverse_tree = tree.invert
240
+
241
+ while pos < bits.length
242
+ code += bits[pos]
243
+ pos += 1
244
+
245
+ if reverse_tree[code]
246
+ return [reverse_tree[code], code.length]
247
+ end
248
+ end
249
+
250
+ [nil, 0]
251
+ end
252
+
253
+ # Convert match length to Huffman code
254
+ def length_to_code(length)
255
+ LENGTH_CODES_MAP[length] || 285
256
+ end
257
+
258
+ # Convert Huffman code to match length
259
+ def code_to_length(code)
260
+ LENGTH_CODES_MAP.key(code) || 258
261
+ end
262
+
263
+ # Convert distance to Huffman code
264
+ # Uses DEFLATE distance code table
265
+ def distance_to_code(distance)
266
+ case distance
267
+ when 1..4
268
+ distance - 1
269
+ when 5..8
270
+ 4 + ((distance - 5) / 2)
271
+ when 9..16
272
+ 6 + ((distance - 9) / 4)
273
+ when 17..32
274
+ 8 + ((distance - 17) / 8)
275
+ when 33..64
276
+ 10 + ((distance - 33) / 16)
277
+ when 65..128
278
+ 12 + ((distance - 65) / 32)
279
+ when 129..256
280
+ 14 + ((distance - 129) / 64)
281
+ when 257..512
282
+ 16 + ((distance - 257) / 128)
283
+ when 513..1024
284
+ 18 + ((distance - 513) / 256)
285
+ when 1025..2048
286
+ 20 + ((distance - 1025) / 512)
287
+ when 2049..4096
288
+ 22 + ((distance - 2049) / 1024)
289
+ when 4097..8192
290
+ 24 + ((distance - 4097) / 2048)
291
+ when 8193..16384
292
+ 26 + ((distance - 8193) / 4096)
293
+ when 16385..32768
294
+ 28 + ((distance - 16385) / 8192)
295
+ when 32769..65536
296
+ 29
297
+ else
298
+ 29 # Max distance code for 64KB window
299
+ end
300
+ end
301
+
302
+ # Convert Huffman code to distance
303
+ # Uses DEFLATE distance code table (base distances)
304
+ def code_to_distance(code)
305
+ case code
306
+ when 0..3
307
+ code + 1
308
+ when 4..5
309
+ 5 + ((code - 4) * 2)
310
+ when 6..7
311
+ 9 + ((code - 6) * 4)
312
+ when 8..9
313
+ 17 + ((code - 8) * 8)
314
+ when 10..11
315
+ 33 + ((code - 10) * 16)
316
+ when 12..13
317
+ 65 + ((code - 12) * 32)
318
+ when 14..15
319
+ 129 + ((code - 14) * 64)
320
+ when 16..17
321
+ 257 + ((code - 16) * 128)
322
+ when 18..19
323
+ 513 + ((code - 18) * 256)
324
+ when 20..21
325
+ 1025 + ((code - 20) * 512)
326
+ when 22..23
327
+ 2049 + ((code - 22) * 1024)
328
+ when 24..25
329
+ 4097 + ((code - 24) * 2048)
330
+ when 26..27
331
+ 8193 + ((code - 26) * 4096)
332
+ when 28..29
333
+ 16385 + ((code - 28) * 8192)
334
+ else
335
+ 1 # Default to distance 1
336
+ end
337
+ end
338
+
339
+ # Convert bit string to bytes
340
+ def bits_to_bytes(bits)
341
+ bytes = bits.scan(/.{1,8}/).map do |byte_bits|
342
+ byte_bits.ljust(8, "0").to_i(2)
343
+ end
344
+ bytes.pack("C*")
345
+ end
346
+
347
+ # Convert bytes to bit string
348
+ def bytes_to_bits(bytes)
349
+ bytes.unpack("C*").map { |b| b.to_s(2).rjust(8, "0") }.join
350
+ end
351
+ end
352
+ end
353
+ end
354
+ end