omnizip 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (511) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.rubocop.yml +32 -0
  4. data/.rubocop_todo.yml +754 -0
  5. data/COPYING +502 -0
  6. data/Gemfile +17 -0
  7. data/LICENSE +12 -0
  8. data/README.adoc +1045 -0
  9. data/Rakefile +12 -0
  10. data/benchmark/README.md +260 -0
  11. data/benchmark/benchmark_suite.rb +125 -0
  12. data/benchmark/compression_bench.rb +181 -0
  13. data/benchmark/filter_bench.rb +180 -0
  14. data/benchmark/models/benchmark_result.rb +59 -0
  15. data/benchmark/models/comparison_result.rb +69 -0
  16. data/benchmark/profile_suite.rb +167 -0
  17. data/benchmark/reporter.rb +150 -0
  18. data/benchmark/run_benchmarks.rb +66 -0
  19. data/benchmark/test_data.rb +137 -0
  20. data/config/formats/rar3_spec.yml +91 -0
  21. data/config/formats/rar5_spec.yml +102 -0
  22. data/docs/.github/workflows/docs.yml +142 -0
  23. data/docs/.gitignore +21 -0
  24. data/docs/.lychee.toml +67 -0
  25. data/docs/Gemfile +13 -0
  26. data/docs/RAR_WRITE_SUPPORT.md +26 -0
  27. data/docs/README.md +101 -0
  28. data/docs/_config.yml +112 -0
  29. data/docs/assets/logo.svg +1 -0
  30. data/docs/assets/omnizip-logo.pdf +1540 -11
  31. data/docs/comparison/feature-matrix.adoc +694 -0
  32. data/docs/comparison/index.adoc +113 -0
  33. data/docs/comparison/vs-7zip.adoc +309 -0
  34. data/docs/comparison/vs-peazip.adoc +77 -0
  35. data/docs/comparison/vs-rubyzip.adoc +342 -0
  36. data/docs/comparison/vs-winrar.adoc +100 -0
  37. data/docs/compatibility.adoc +579 -0
  38. data/docs/concepts/index.adoc +129 -0
  39. data/docs/developer/architecture.adoc +256 -0
  40. data/docs/developer/contributing.adoc +158 -0
  41. data/docs/developer/index.adoc +25 -0
  42. data/docs/developer/testing.adoc +212 -0
  43. data/docs/getting-started/basic-usage.adoc +271 -0
  44. data/docs/getting-started/index.adoc +42 -0
  45. data/docs/getting-started/installation.adoc +138 -0
  46. data/docs/getting-started/quick-start.adoc +185 -0
  47. data/docs/getting-started/your-first-archive.adoc +218 -0
  48. data/docs/guides/advanced-features/encryption.adoc +300 -0
  49. data/docs/guides/advanced-features/index.adoc +49 -0
  50. data/docs/guides/advanced-features/parallel-processing.adoc +246 -0
  51. data/docs/guides/advanced-features/progress-tracking.adoc +320 -0
  52. data/docs/guides/advanced-features/streaming.adoc +212 -0
  53. data/docs/guides/archive-formats/gzip-format.adoc +107 -0
  54. data/docs/guides/archive-formats/index.adoc +130 -0
  55. data/docs/guides/archive-formats/rar-format.adoc +104 -0
  56. data/docs/guides/archive-formats/rar5.adoc +521 -0
  57. data/docs/guides/archive-formats/seven-zip-format.adoc +35 -0
  58. data/docs/guides/archive-formats/tar-format.adoc +106 -0
  59. data/docs/guides/archive-formats/xz-format.adoc +118 -0
  60. data/docs/guides/archive-formats/zip-format.adoc +35 -0
  61. data/docs/guides/compression-algorithms/bzip2.adoc +113 -0
  62. data/docs/guides/compression-algorithms/deflate.adoc +319 -0
  63. data/docs/guides/compression-algorithms/index.adoc +190 -0
  64. data/docs/guides/compression-algorithms/lzma.adoc +398 -0
  65. data/docs/guides/compression-algorithms/lzma2.adoc +327 -0
  66. data/docs/guides/compression-algorithms/ppmd.adoc +316 -0
  67. data/docs/guides/compression-algorithms/zstandard.adoc +361 -0
  68. data/docs/guides/creating-archives.adoc +354 -0
  69. data/docs/guides/extracting-archives.adoc +53 -0
  70. data/docs/guides/format-conversion.adoc +64 -0
  71. data/docs/guides/index.adoc +49 -0
  72. data/docs/guides/migration-rubyzip.adoc +217 -0
  73. data/docs/guides/parity-archives.adoc +605 -0
  74. data/docs/guides/performance-tuning.adoc +88 -0
  75. data/docs/index.adoc +218 -0
  76. data/docs/lychee.toml +67 -0
  77. data/docs/reference/api/overview.adoc +188 -0
  78. data/docs/reference/cli/compress-command.adoc +114 -0
  79. data/docs/reference/cli/overview.adoc +140 -0
  80. data/docs/reference/index.adoc +26 -0
  81. data/docs/resources/faq.adoc +185 -0
  82. data/docs/resources/quick-reference.adoc +222 -0
  83. data/docs/troubleshooting/index.adoc +208 -0
  84. data/examples/api_comparison.rb +205 -0
  85. data/examples/deflate64_example.rb +96 -0
  86. data/examples/par2_demo.rb +121 -0
  87. data/examples/quick_start_native.rb +150 -0
  88. data/examples/quick_start_rubyzip.rb +115 -0
  89. data/examples/rubyzip_compatibility_demo.rb +194 -0
  90. data/exe/omnizip +27 -0
  91. data/lib/omnizip/algorithm.rb +130 -0
  92. data/lib/omnizip/algorithm_registry.rb +86 -0
  93. data/lib/omnizip/algorithms/.keep +0 -0
  94. data/lib/omnizip/algorithms/bzip2/bwt.rb +225 -0
  95. data/lib/omnizip/algorithms/bzip2/decoder.rb +193 -0
  96. data/lib/omnizip/algorithms/bzip2/encoder.rb +237 -0
  97. data/lib/omnizip/algorithms/bzip2/huffman.rb +206 -0
  98. data/lib/omnizip/algorithms/bzip2/mtf.rb +101 -0
  99. data/lib/omnizip/algorithms/bzip2/rle.rb +151 -0
  100. data/lib/omnizip/algorithms/bzip2.rb +130 -0
  101. data/lib/omnizip/algorithms/deflate/constants.rb +28 -0
  102. data/lib/omnizip/algorithms/deflate/decoder.rb +38 -0
  103. data/lib/omnizip/algorithms/deflate/encoder.rb +46 -0
  104. data/lib/omnizip/algorithms/deflate.rb +128 -0
  105. data/lib/omnizip/algorithms/deflate64/constants.rb +45 -0
  106. data/lib/omnizip/algorithms/deflate64/decoder.rb +153 -0
  107. data/lib/omnizip/algorithms/deflate64/encoder.rb +98 -0
  108. data/lib/omnizip/algorithms/deflate64/huffman_coder.rb +354 -0
  109. data/lib/omnizip/algorithms/deflate64/lz77_encoder.rb +142 -0
  110. data/lib/omnizip/algorithms/deflate64.rb +109 -0
  111. data/lib/omnizip/algorithms/lzma/bit_model.rb +120 -0
  112. data/lib/omnizip/algorithms/lzma/constants.rb +112 -0
  113. data/lib/omnizip/algorithms/lzma/decoder.rb +148 -0
  114. data/lib/omnizip/algorithms/lzma/dictionary.rb +69 -0
  115. data/lib/omnizip/algorithms/lzma/distance_coder.rb +415 -0
  116. data/lib/omnizip/algorithms/lzma/encoder.rb +142 -0
  117. data/lib/omnizip/algorithms/lzma/length_coder.rb +260 -0
  118. data/lib/omnizip/algorithms/lzma/literal_decoder.rb +320 -0
  119. data/lib/omnizip/algorithms/lzma/literal_encoder.rb +210 -0
  120. data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +341 -0
  121. data/lib/omnizip/algorithms/lzma/lzma_alone_decoder.rb +192 -0
  122. data/lib/omnizip/algorithms/lzma/lzma_state.rb +128 -0
  123. data/lib/omnizip/algorithms/lzma/match.rb +32 -0
  124. data/lib/omnizip/algorithms/lzma/match_finder.rb +205 -0
  125. data/lib/omnizip/algorithms/lzma/match_finder_config.rb +142 -0
  126. data/lib/omnizip/algorithms/lzma/match_finder_factory.rb +88 -0
  127. data/lib/omnizip/algorithms/lzma/optimal_encoder.rb +130 -0
  128. data/lib/omnizip/algorithms/lzma/probability_models.rb +72 -0
  129. data/lib/omnizip/algorithms/lzma/range_coder.rb +85 -0
  130. data/lib/omnizip/algorithms/lzma/range_decoder.rb +434 -0
  131. data/lib/omnizip/algorithms/lzma/range_encoder.rb +194 -0
  132. data/lib/omnizip/algorithms/lzma/state.rb +127 -0
  133. data/lib/omnizip/algorithms/lzma/xz_buffered_range_encoder.rb +325 -0
  134. data/lib/omnizip/algorithms/lzma/xz_encoder.rb +426 -0
  135. data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +645 -0
  136. data/lib/omnizip/algorithms/lzma/xz_match_finder_adapter.rb +227 -0
  137. data/lib/omnizip/algorithms/lzma/xz_price_calculator.rb +169 -0
  138. data/lib/omnizip/algorithms/lzma/xz_probability_models.rb +261 -0
  139. data/lib/omnizip/algorithms/lzma/xz_range_encoder.rb +223 -0
  140. data/lib/omnizip/algorithms/lzma/xz_range_encoder_exact.rb +331 -0
  141. data/lib/omnizip/algorithms/lzma/xz_state.rb +116 -0
  142. data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +2055 -0
  143. data/lib/omnizip/algorithms/lzma.rb +238 -0
  144. data/lib/omnizip/algorithms/lzma2/chunk_manager.rb +182 -0
  145. data/lib/omnizip/algorithms/lzma2/constants.rb +41 -0
  146. data/lib/omnizip/algorithms/lzma2/encoder.rb +147 -0
  147. data/lib/omnizip/algorithms/lzma2/lzma2_chunk.rb +161 -0
  148. data/lib/omnizip/algorithms/lzma2/properties.rb +179 -0
  149. data/lib/omnizip/algorithms/lzma2/simple_lzma2_encoder.rb +127 -0
  150. data/lib/omnizip/algorithms/lzma2/xz_encoder_adapter.rb +85 -0
  151. data/lib/omnizip/algorithms/lzma2.rb +141 -0
  152. data/lib/omnizip/algorithms/ppmd7/constants.rb +74 -0
  153. data/lib/omnizip/algorithms/ppmd7/context.rb +154 -0
  154. data/lib/omnizip/algorithms/ppmd7/decoder.rb +126 -0
  155. data/lib/omnizip/algorithms/ppmd7/encoder.rb +163 -0
  156. data/lib/omnizip/algorithms/ppmd7/model.rb +248 -0
  157. data/lib/omnizip/algorithms/ppmd7/symbol_state.rb +57 -0
  158. data/lib/omnizip/algorithms/ppmd7.rb +116 -0
  159. data/lib/omnizip/algorithms/ppmd8/constants.rb +61 -0
  160. data/lib/omnizip/algorithms/ppmd8/context.rb +34 -0
  161. data/lib/omnizip/algorithms/ppmd8/decoder.rb +107 -0
  162. data/lib/omnizip/algorithms/ppmd8/encoder.rb +138 -0
  163. data/lib/omnizip/algorithms/ppmd8/model.rb +250 -0
  164. data/lib/omnizip/algorithms/ppmd8/restoration_method.rb +78 -0
  165. data/lib/omnizip/algorithms/ppmd8.rb +82 -0
  166. data/lib/omnizip/algorithms/ppmd_base.rb +138 -0
  167. data/lib/omnizip/algorithms/sevenzip_lzma2.rb +123 -0
  168. data/lib/omnizip/algorithms/xz_lzma2.rb +118 -0
  169. data/lib/omnizip/algorithms/zstandard/constants.rb +25 -0
  170. data/lib/omnizip/algorithms/zstandard/decoder.rb +46 -0
  171. data/lib/omnizip/algorithms/zstandard/encoder.rb +51 -0
  172. data/lib/omnizip/algorithms/zstandard.rb +138 -0
  173. data/lib/omnizip/buffer/memory_archive.rb +251 -0
  174. data/lib/omnizip/buffer/memory_extractor.rb +224 -0
  175. data/lib/omnizip/buffer.rb +176 -0
  176. data/lib/omnizip/checksum_registry.rb +114 -0
  177. data/lib/omnizip/checksums/crc32.rb +100 -0
  178. data/lib/omnizip/checksums/crc64.rb +101 -0
  179. data/lib/omnizip/checksums/crc_base.rb +158 -0
  180. data/lib/omnizip/checksums/verifier.rb +131 -0
  181. data/lib/omnizip/chunked/memory_manager.rb +194 -0
  182. data/lib/omnizip/chunked/reader.rb +78 -0
  183. data/lib/omnizip/chunked/writer.rb +120 -0
  184. data/lib/omnizip/chunked.rb +129 -0
  185. data/lib/omnizip/cli/output_formatter.rb +104 -0
  186. data/lib/omnizip/cli.rb +572 -0
  187. data/lib/omnizip/commands/.keep +0 -0
  188. data/lib/omnizip/commands/archive_create_command.rb +427 -0
  189. data/lib/omnizip/commands/archive_extract_command.rb +272 -0
  190. data/lib/omnizip/commands/archive_list_command.rb +218 -0
  191. data/lib/omnizip/commands/archive_repair_command.rb +131 -0
  192. data/lib/omnizip/commands/archive_verify_command.rb +117 -0
  193. data/lib/omnizip/commands/compress_command.rb +117 -0
  194. data/lib/omnizip/commands/decompress_command.rb +120 -0
  195. data/lib/omnizip/commands/list_command.rb +53 -0
  196. data/lib/omnizip/commands/metadata_command.rb +153 -0
  197. data/lib/omnizip/commands/parity_create_command.rb +122 -0
  198. data/lib/omnizip/commands/parity_repair_command.rb +122 -0
  199. data/lib/omnizip/commands/parity_verify_command.rb +124 -0
  200. data/lib/omnizip/commands/profile_list_command.rb +56 -0
  201. data/lib/omnizip/commands/profile_show_command.rb +44 -0
  202. data/lib/omnizip/convenience.rb +359 -0
  203. data/lib/omnizip/converter/conversion_registry.rb +49 -0
  204. data/lib/omnizip/converter/conversion_strategy.rb +121 -0
  205. data/lib/omnizip/converter/seven_zip_to_zip_strategy.rb +97 -0
  206. data/lib/omnizip/converter/zip_to_seven_zip_strategy.rb +112 -0
  207. data/lib/omnizip/converter.rb +105 -0
  208. data/lib/omnizip/crypto/aes256/cipher.rb +100 -0
  209. data/lib/omnizip/crypto/aes256/constants.rb +28 -0
  210. data/lib/omnizip/crypto/aes256/key_derivation.rb +101 -0
  211. data/lib/omnizip/crypto/aes256.rb +102 -0
  212. data/lib/omnizip/error.rb +106 -0
  213. data/lib/omnizip/eta/exponential_smoothing_estimator.rb +98 -0
  214. data/lib/omnizip/eta/moving_average_estimator.rb +99 -0
  215. data/lib/omnizip/eta/rate_calculator.rb +104 -0
  216. data/lib/omnizip/eta/sample_history.rb +143 -0
  217. data/lib/omnizip/eta/time_estimator.rb +106 -0
  218. data/lib/omnizip/eta.rb +63 -0
  219. data/lib/omnizip/extraction/filter_chain.rb +177 -0
  220. data/lib/omnizip/extraction/glob_pattern.rb +140 -0
  221. data/lib/omnizip/extraction/pattern_matcher.rb +70 -0
  222. data/lib/omnizip/extraction/predicate_pattern.rb +52 -0
  223. data/lib/omnizip/extraction/regex_pattern.rb +50 -0
  224. data/lib/omnizip/extraction/selective_extractor.rb +240 -0
  225. data/lib/omnizip/extraction.rb +111 -0
  226. data/lib/omnizip/file_type/mime_classifier.rb +144 -0
  227. data/lib/omnizip/file_type.rb +113 -0
  228. data/lib/omnizip/filter.rb +139 -0
  229. data/lib/omnizip/filter_pipeline.rb +108 -0
  230. data/lib/omnizip/filter_registry.rb +166 -0
  231. data/lib/omnizip/filters/bcj.rb +279 -0
  232. data/lib/omnizip/filters/bcj2/constants.rb +53 -0
  233. data/lib/omnizip/filters/bcj2/decoder.rb +200 -0
  234. data/lib/omnizip/filters/bcj2/encoder.rb +61 -0
  235. data/lib/omnizip/filters/bcj2/stream_data.rb +93 -0
  236. data/lib/omnizip/filters/bcj2.rb +99 -0
  237. data/lib/omnizip/filters/bcj_arm.rb +176 -0
  238. data/lib/omnizip/filters/bcj_arm64.rb +244 -0
  239. data/lib/omnizip/filters/bcj_ia64.rb +196 -0
  240. data/lib/omnizip/filters/bcj_ppc.rb +190 -0
  241. data/lib/omnizip/filters/bcj_sparc.rb +176 -0
  242. data/lib/omnizip/filters/bcj_x86.rb +193 -0
  243. data/lib/omnizip/filters/delta.rb +196 -0
  244. data/lib/omnizip/filters/filter_base.rb +72 -0
  245. data/lib/omnizip/filters/registry.rb +123 -0
  246. data/lib/omnizip/filters/xz_delta.rb +258 -0
  247. data/lib/omnizip/format_detector.rb +162 -0
  248. data/lib/omnizip/format_registry.rb +59 -0
  249. data/lib/omnizip/formats/.keep +0 -0
  250. data/lib/omnizip/formats/bzip2_file.rb +172 -0
  251. data/lib/omnizip/formats/cpio/constants.rb +55 -0
  252. data/lib/omnizip/formats/cpio/entry.rb +385 -0
  253. data/lib/omnizip/formats/cpio/reader.rb +196 -0
  254. data/lib/omnizip/formats/cpio/writer.rb +234 -0
  255. data/lib/omnizip/formats/cpio.rb +140 -0
  256. data/lib/omnizip/formats/format_spec_loader.rb +230 -0
  257. data/lib/omnizip/formats/gzip.rb +238 -0
  258. data/lib/omnizip/formats/iso/directory_builder.rb +297 -0
  259. data/lib/omnizip/formats/iso/directory_record.rb +152 -0
  260. data/lib/omnizip/formats/iso/joliet.rb +204 -0
  261. data/lib/omnizip/formats/iso/path_table.rb +125 -0
  262. data/lib/omnizip/formats/iso/reader.rb +197 -0
  263. data/lib/omnizip/formats/iso/rock_ridge.rb +349 -0
  264. data/lib/omnizip/formats/iso/volume_builder.rb +320 -0
  265. data/lib/omnizip/formats/iso/volume_descriptor.rb +168 -0
  266. data/lib/omnizip/formats/iso/writer.rb +530 -0
  267. data/lib/omnizip/formats/iso.rb +140 -0
  268. data/lib/omnizip/formats/lzip.rb +175 -0
  269. data/lib/omnizip/formats/lzma_alone.rb +171 -0
  270. data/lib/omnizip/formats/rar/archive_repairer.rb +243 -0
  271. data/lib/omnizip/formats/rar/archive_verifier.rb +195 -0
  272. data/lib/omnizip/formats/rar/block_parser.rb +243 -0
  273. data/lib/omnizip/formats/rar/compression/bit_stream.rb +180 -0
  274. data/lib/omnizip/formats/rar/compression/dispatcher.rb +217 -0
  275. data/lib/omnizip/formats/rar/compression/lz77_huffman/decoder.rb +216 -0
  276. data/lib/omnizip/formats/rar/compression/lz77_huffman/encoder.rb +158 -0
  277. data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_builder.rb +217 -0
  278. data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_coder.rb +189 -0
  279. data/lib/omnizip/formats/rar/compression/lz77_huffman/match_finder.rb +135 -0
  280. data/lib/omnizip/formats/rar/compression/lz77_huffman/sliding_window.rb +165 -0
  281. data/lib/omnizip/formats/rar/compression/ppmd/context.rb +105 -0
  282. data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +219 -0
  283. data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +262 -0
  284. data/lib/omnizip/formats/rar/compression_method_registry.rb +106 -0
  285. data/lib/omnizip/formats/rar/constants.rb +82 -0
  286. data/lib/omnizip/formats/rar/decompressor.rb +238 -0
  287. data/lib/omnizip/formats/rar/external_writer.rb +312 -0
  288. data/lib/omnizip/formats/rar/header.rb +192 -0
  289. data/lib/omnizip/formats/rar/license_validator.rb +109 -0
  290. data/lib/omnizip/formats/rar/models/rar_archive.rb +77 -0
  291. data/lib/omnizip/formats/rar/models/rar_entry.rb +65 -0
  292. data/lib/omnizip/formats/rar/models/rar_volume.rb +56 -0
  293. data/lib/omnizip/formats/rar/parity_handler.rb +292 -0
  294. data/lib/omnizip/formats/rar/rar5/compression/lzma.rb +202 -0
  295. data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +578 -0
  296. data/lib/omnizip/formats/rar/rar5/compression/store.rb +60 -0
  297. data/lib/omnizip/formats/rar/rar5/crc32.rb +39 -0
  298. data/lib/omnizip/formats/rar/rar5/encryption/aes256_cbc.rb +97 -0
  299. data/lib/omnizip/formats/rar/rar5/encryption/encryption_header.rb +114 -0
  300. data/lib/omnizip/formats/rar/rar5/encryption/encryption_manager.rb +166 -0
  301. data/lib/omnizip/formats/rar/rar5/encryption/key_derivation.rb +97 -0
  302. data/lib/omnizip/formats/rar/rar5/header.rb +187 -0
  303. data/lib/omnizip/formats/rar/rar5/models/encryption_options.rb +74 -0
  304. data/lib/omnizip/formats/rar/rar5/models/recovery_options.rb +63 -0
  305. data/lib/omnizip/formats/rar/rar5/models/solid_options.rb +63 -0
  306. data/lib/omnizip/formats/rar/rar5/models/volume_options.rb +74 -0
  307. data/lib/omnizip/formats/rar/rar5/multi_volume/ARCHITECTURE.md +290 -0
  308. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_manager.rb +264 -0
  309. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_splitter.rb +155 -0
  310. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_writer.rb +194 -0
  311. data/lib/omnizip/formats/rar/rar5/solid/solid_encoder.rb +109 -0
  312. data/lib/omnizip/formats/rar/rar5/solid/solid_manager.rb +142 -0
  313. data/lib/omnizip/formats/rar/rar5/solid/solid_stream.rb +121 -0
  314. data/lib/omnizip/formats/rar/rar5/vint.rb +65 -0
  315. data/lib/omnizip/formats/rar/rar5/writer.rb +466 -0
  316. data/lib/omnizip/formats/rar/rar_format_base.rb +241 -0
  317. data/lib/omnizip/formats/rar/reader.rb +366 -0
  318. data/lib/omnizip/formats/rar/recovery_record.rb +245 -0
  319. data/lib/omnizip/formats/rar/volume_manager.rb +168 -0
  320. data/lib/omnizip/formats/rar/writer.rb +431 -0
  321. data/lib/omnizip/formats/rar.rb +205 -0
  322. data/lib/omnizip/formats/rar3/compressor.rb +73 -0
  323. data/lib/omnizip/formats/rar3/decompressor.rb +66 -0
  324. data/lib/omnizip/formats/rar3/reader.rb +386 -0
  325. data/lib/omnizip/formats/rar3/writer.rb +219 -0
  326. data/lib/omnizip/formats/rar5/compressor.rb +73 -0
  327. data/lib/omnizip/formats/rar5/decompressor.rb +66 -0
  328. data/lib/omnizip/formats/rar5/reader.rb +342 -0
  329. data/lib/omnizip/formats/rar5/writer.rb +214 -0
  330. data/lib/omnizip/formats/seven_zip/coder_chain.rb +150 -0
  331. data/lib/omnizip/formats/seven_zip/constants.rb +126 -0
  332. data/lib/omnizip/formats/seven_zip/encoded_header.rb +114 -0
  333. data/lib/omnizip/formats/seven_zip/encrypted_header.rb +142 -0
  334. data/lib/omnizip/formats/seven_zip/file_collector.rb +144 -0
  335. data/lib/omnizip/formats/seven_zip/header.rb +106 -0
  336. data/lib/omnizip/formats/seven_zip/header_encryptor.rb +134 -0
  337. data/lib/omnizip/formats/seven_zip/header_writer.rb +466 -0
  338. data/lib/omnizip/formats/seven_zip/models/coder_info.rb +30 -0
  339. data/lib/omnizip/formats/seven_zip/models/file_entry.rb +58 -0
  340. data/lib/omnizip/formats/seven_zip/models/folder.rb +69 -0
  341. data/lib/omnizip/formats/seven_zip/models/stream_info.rb +42 -0
  342. data/lib/omnizip/formats/seven_zip/parser.rb +660 -0
  343. data/lib/omnizip/formats/seven_zip/reader.rb +458 -0
  344. data/lib/omnizip/formats/seven_zip/split_archive_reader.rb +632 -0
  345. data/lib/omnizip/formats/seven_zip/split_archive_writer.rb +315 -0
  346. data/lib/omnizip/formats/seven_zip/stream_compressor.rb +151 -0
  347. data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +162 -0
  348. data/lib/omnizip/formats/seven_zip/writer.rb +740 -0
  349. data/lib/omnizip/formats/seven_zip.rb +93 -0
  350. data/lib/omnizip/formats/tar/constants.rb +73 -0
  351. data/lib/omnizip/formats/tar/entry.rb +94 -0
  352. data/lib/omnizip/formats/tar/header.rb +168 -0
  353. data/lib/omnizip/formats/tar/reader.rb +121 -0
  354. data/lib/omnizip/formats/tar/writer.rb +216 -0
  355. data/lib/omnizip/formats/tar.rb +84 -0
  356. data/lib/omnizip/formats/xz/reader.rb +116 -0
  357. data/lib/omnizip/formats/xz.rb +237 -0
  358. data/lib/omnizip/formats/xz_impl/block_decoder.rb +754 -0
  359. data/lib/omnizip/formats/xz_impl/block_encoder.rb +306 -0
  360. data/lib/omnizip/formats/xz_impl/block_header.rb +210 -0
  361. data/lib/omnizip/formats/xz_impl/block_header_parser.rb +186 -0
  362. data/lib/omnizip/formats/xz_impl/constants.rb +49 -0
  363. data/lib/omnizip/formats/xz_impl/index_decoder.rb +174 -0
  364. data/lib/omnizip/formats/xz_impl/index_encoder.rb +122 -0
  365. data/lib/omnizip/formats/xz_impl/stream_decoder.rb +468 -0
  366. data/lib/omnizip/formats/xz_impl/stream_encoder.rb +99 -0
  367. data/lib/omnizip/formats/xz_impl/stream_footer.rb +81 -0
  368. data/lib/omnizip/formats/xz_impl/stream_footer_parser.rb +117 -0
  369. data/lib/omnizip/formats/xz_impl/stream_header.rb +55 -0
  370. data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +108 -0
  371. data/lib/omnizip/formats/xz_impl/vli.rb +128 -0
  372. data/lib/omnizip/formats/xz_impl/writer.rb +421 -0
  373. data/lib/omnizip/formats/zip/central_directory_header.rb +195 -0
  374. data/lib/omnizip/formats/zip/constants.rb +69 -0
  375. data/lib/omnizip/formats/zip/end_of_central_directory.rb +133 -0
  376. data/lib/omnizip/formats/zip/local_file_header.rb +138 -0
  377. data/lib/omnizip/formats/zip/reader.rb +250 -0
  378. data/lib/omnizip/formats/zip/unix_extra_field.rb +153 -0
  379. data/lib/omnizip/formats/zip/writer.rb +375 -0
  380. data/lib/omnizip/formats/zip/zip64_end_of_central_directory.rb +104 -0
  381. data/lib/omnizip/formats/zip/zip64_end_of_central_directory_locator.rb +66 -0
  382. data/lib/omnizip/formats/zip/zip64_extra_field.rb +114 -0
  383. data/lib/omnizip/formats/zip.rb +50 -0
  384. data/lib/omnizip/implementations/base/lzma2_decoder_base.rb +75 -0
  385. data/lib/omnizip/implementations/base/lzma2_encoder_base.rb +128 -0
  386. data/lib/omnizip/implementations/base/lzma_decoder_base.rb +83 -0
  387. data/lib/omnizip/implementations/base/lzma_encoder_base.rb +108 -0
  388. data/lib/omnizip/implementations/base/state_machine_base.rb +182 -0
  389. data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +421 -0
  390. data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +465 -0
  391. data/lib/omnizip/implementations/seven_zip/lzma/match_finder.rb +288 -0
  392. data/lib/omnizip/implementations/seven_zip/lzma/range_decoder.rb +200 -0
  393. data/lib/omnizip/implementations/seven_zip/lzma/range_encoder.rb +197 -0
  394. data/lib/omnizip/implementations/seven_zip/lzma/state_machine.rb +141 -0
  395. data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +519 -0
  396. data/lib/omnizip/implementations/xz_utils/lzma2/decoder.rb +723 -0
  397. data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +750 -0
  398. data/lib/omnizip/io/buffered_input.rb +146 -0
  399. data/lib/omnizip/io/buffered_output.rb +105 -0
  400. data/lib/omnizip/io/stream_manager.rb +115 -0
  401. data/lib/omnizip/link_handler/hard_link.rb +79 -0
  402. data/lib/omnizip/link_handler/symbolic_link.rb +74 -0
  403. data/lib/omnizip/link_handler.rb +124 -0
  404. data/lib/omnizip/metadata/archive_metadata.rb +114 -0
  405. data/lib/omnizip/metadata/entry_metadata.rb +146 -0
  406. data/lib/omnizip/metadata/metadata_editor.rb +171 -0
  407. data/lib/omnizip/metadata/metadata_registry.rb +64 -0
  408. data/lib/omnizip/metadata/metadata_validator.rb +99 -0
  409. data/lib/omnizip/metadata.rb +57 -0
  410. data/lib/omnizip/models/.keep +0 -0
  411. data/lib/omnizip/models/algorithm_metadata.rb +73 -0
  412. data/lib/omnizip/models/compression_options.rb +71 -0
  413. data/lib/omnizip/models/conversion_options.rb +87 -0
  414. data/lib/omnizip/models/conversion_result.rb +135 -0
  415. data/lib/omnizip/models/eta_result.rb +46 -0
  416. data/lib/omnizip/models/extraction_rule.rb +115 -0
  417. data/lib/omnizip/models/filter_chain.rb +144 -0
  418. data/lib/omnizip/models/filter_config.rb +183 -0
  419. data/lib/omnizip/models/match_result.rb +124 -0
  420. data/lib/omnizip/models/optimization_suggestion.rb +91 -0
  421. data/lib/omnizip/models/parallel_options.rb +104 -0
  422. data/lib/omnizip/models/performance_result.rb +79 -0
  423. data/lib/omnizip/models/profile_report.rb +82 -0
  424. data/lib/omnizip/models/progress_options.rb +38 -0
  425. data/lib/omnizip/models/split_options.rb +116 -0
  426. data/lib/omnizip/optimization_registry.rb +81 -0
  427. data/lib/omnizip/parallel/job_queue.rb +209 -0
  428. data/lib/omnizip/parallel/job_scheduler.rb +203 -0
  429. data/lib/omnizip/parallel/parallel_compressor.rb +347 -0
  430. data/lib/omnizip/parallel/parallel_extractor.rb +329 -0
  431. data/lib/omnizip/parallel/worker_pool.rb +223 -0
  432. data/lib/omnizip/parallel.rb +149 -0
  433. data/lib/omnizip/parity/chunked_block_processor.rb +196 -0
  434. data/lib/omnizip/parity/galois16.rb +145 -0
  435. data/lib/omnizip/parity/models/creator_packet.rb +73 -0
  436. data/lib/omnizip/parity/models/file_description_packet.rb +133 -0
  437. data/lib/omnizip/parity/models/ifsc_packet.rb +123 -0
  438. data/lib/omnizip/parity/models/main_packet.rb +128 -0
  439. data/lib/omnizip/parity/models/packet.rb +156 -0
  440. data/lib/omnizip/parity/models/packet_registry.rb +109 -0
  441. data/lib/omnizip/parity/models/recovery_slice_packet.rb +78 -0
  442. data/lib/omnizip/parity/par2_creator.rb +531 -0
  443. data/lib/omnizip/parity/par2_repairer.rb +407 -0
  444. data/lib/omnizip/parity/par2_verifier.rb +364 -0
  445. data/lib/omnizip/parity/par2cmdline_algorithm.rb +110 -0
  446. data/lib/omnizip/parity/par2cmdline_coefficients.rb +78 -0
  447. data/lib/omnizip/parity/reed_solomon_decoder.rb +266 -0
  448. data/lib/omnizip/parity/reed_solomon_encoder.rb +111 -0
  449. data/lib/omnizip/parity/reed_solomon_matrix.rb +342 -0
  450. data/lib/omnizip/parity.rb +186 -0
  451. data/lib/omnizip/password/encryption_registry.rb +65 -0
  452. data/lib/omnizip/password/encryption_strategy.rb +96 -0
  453. data/lib/omnizip/password/password_validator.rb +129 -0
  454. data/lib/omnizip/password/winzip_aes_strategy.rb +192 -0
  455. data/lib/omnizip/password/zip_crypto_strategy.rb +141 -0
  456. data/lib/omnizip/password.rb +87 -0
  457. data/lib/omnizip/pipe/stream_compressor.rb +124 -0
  458. data/lib/omnizip/pipe/stream_decompressor.rb +174 -0
  459. data/lib/omnizip/pipe.rb +121 -0
  460. data/lib/omnizip/platform/ntfs_streams.rb +201 -0
  461. data/lib/omnizip/platform.rb +189 -0
  462. data/lib/omnizip/profile/archive_profile.rb +39 -0
  463. data/lib/omnizip/profile/balanced_profile.rb +33 -0
  464. data/lib/omnizip/profile/binary_profile.rb +36 -0
  465. data/lib/omnizip/profile/compression_profile.rb +158 -0
  466. data/lib/omnizip/profile/custom_profile.rb +157 -0
  467. data/lib/omnizip/profile/fast_profile.rb +33 -0
  468. data/lib/omnizip/profile/maximum_profile.rb +33 -0
  469. data/lib/omnizip/profile/profile_detector.rb +110 -0
  470. data/lib/omnizip/profile/profile_registry.rb +161 -0
  471. data/lib/omnizip/profile/text_profile.rb +36 -0
  472. data/lib/omnizip/profile.rb +190 -0
  473. data/lib/omnizip/profiler/memory_profiler.rb +66 -0
  474. data/lib/omnizip/profiler/method_profiler.rb +49 -0
  475. data/lib/omnizip/profiler/report_generator.rb +169 -0
  476. data/lib/omnizip/profiler.rb +204 -0
  477. data/lib/omnizip/progress/callback_reporter.rb +36 -0
  478. data/lib/omnizip/progress/console_reporter.rb +62 -0
  479. data/lib/omnizip/progress/log_reporter.rb +91 -0
  480. data/lib/omnizip/progress/operation_progress.rb +118 -0
  481. data/lib/omnizip/progress/progress_bar.rb +156 -0
  482. data/lib/omnizip/progress/progress_reporter.rb +40 -0
  483. data/lib/omnizip/progress/progress_tracker.rb +190 -0
  484. data/lib/omnizip/progress/silent_reporter.rb +24 -0
  485. data/lib/omnizip/progress.rb +127 -0
  486. data/lib/omnizip/rubyzip_compat.rb +63 -0
  487. data/lib/omnizip/temp/safe_extract.rb +168 -0
  488. data/lib/omnizip/temp/temp_file.rb +124 -0
  489. data/lib/omnizip/temp/temp_file_pool.rb +109 -0
  490. data/lib/omnizip/temp.rb +181 -0
  491. data/lib/omnizip/version.rb +5 -0
  492. data/lib/omnizip/zip/entry.rb +156 -0
  493. data/lib/omnizip/zip/file.rb +485 -0
  494. data/lib/omnizip/zip/input_stream.rb +273 -0
  495. data/lib/omnizip/zip/output_stream.rb +324 -0
  496. data/lib/omnizip.rb +156 -0
  497. data/readme-docs/advanced-features.adoc +515 -0
  498. data/readme-docs/api-usage.adoc +444 -0
  499. data/readme-docs/architecture.adoc +449 -0
  500. data/readme-docs/archive-formats.adoc +479 -0
  501. data/readme-docs/cli-usage.adoc +222 -0
  502. data/readme-docs/compression-algorithms.adoc +442 -0
  503. data/readme-docs/compression-profiles.adoc +247 -0
  504. data/readme-docs/encryption-checksums.adoc +328 -0
  505. data/readme-docs/format-converter.adoc +325 -0
  506. data/readme-docs/installation.adoc +228 -0
  507. data/readme-docs/par2-archives.adoc +608 -0
  508. data/readme-docs/performance-profiler.adoc +389 -0
  509. data/readme-docs/preprocessing-filters.adoc +280 -0
  510. data/xz-file-format-1.2.1.txt +1174 -0
  511. metadata +617 -0
@@ -0,0 +1,468 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Copyright (C) 2025 Ribose Inc.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a
6
+ # copy of this software and associated documentation files (the "Software"),
7
+ # to deal in the Software without restriction, including without limitation
8
+ # the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
+ # and/or sell copies of the Software, and to permit persons to whom the
10
+ # Software is furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
+ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21
+ # DEALINGS IN THE SOFTWARE.
22
+
23
+ require "stringio"
24
+ require_relative "constants"
25
+ require_relative "stream_header_parser"
26
+ require_relative "stream_footer_parser"
27
+ require_relative "block_decoder"
28
+ require_relative "index_decoder"
29
+ require_relative "../../error"
30
+
31
+ module Omnizip
32
+ module Formats
33
+ module XzFormat
34
+ # XZ Stream decoder
35
+ #
36
+ # Decodes a complete XZ stream which consists of:
37
+ # - Stream Header (12 bytes)
38
+ # - Blocks (one or more)
39
+ # - Index
40
+ # - Stream Footer (12 bytes)
41
+ #
42
+ # Reference: /tmp/xz-source/src/liblzma/common/stream_decoder.c
43
+ class StreamDecoder
44
+ # Decode XZ stream from input
45
+ #
46
+ # @param input [IO] Input stream (file, StringIO, etc.)
47
+ # @return [String] Decompressed data
48
+ # @raise [RuntimeError] If stream is invalid
49
+ def self.decode(input)
50
+ header = StreamHeaderParser.parse(input)
51
+ check_type = header[:check_type]
52
+
53
+ # Store original input and file size for backward_size validation (if available)
54
+ original_input = input
55
+ original_file_size = input.size if input.respond_to?(:size)
56
+
57
+ output, block_count, final_input, block_sizes = decode_blocks(input,
58
+ check_type)
59
+ index = verify_index(final_input, block_count, block_sizes)
60
+
61
+ # Validate backward_size points to valid index position (XZ spec requirement)
62
+ # XZ spec: "The value of Backward Size is the size of the Index field...stored in
63
+ # multiples of four bytes...If the stored value does not match the real size of
64
+ # the Index field, the decoder MUST indicate an error."
65
+ # Reference: /Users/mulgogi/src/external/xz/src/liblzma/common/stream_decoder.c
66
+ if original_input.respond_to?(:seek) && original_file_size&.positive?
67
+ # Use original input and file size for validation
68
+ validate_backward_size_from_footer(original_input,
69
+ original_file_size, index[:index_size])
70
+ end
71
+
72
+ # Read the stream footer to advance input position past it
73
+ read_stream_footer(final_input, check_type, index[:index_size])
74
+
75
+ # Now check for trailing data after the stream footer
76
+ verify_no_trailing_data(final_input)
77
+
78
+ output.join.force_encoding(Encoding::BINARY)
79
+ end
80
+
81
+ # Decode all blocks from stream until index marker
82
+ #
83
+ # @param input [IO] Input stream
84
+ # @param check_type [Symbol] Checksum type
85
+ # @return [Array, Integer, IO, Array] Output data array, block count, final input stream, and array of block size info
86
+ def self.decode_blocks(input, check_type)
87
+ output = []
88
+ block_count = 0
89
+ block_sizes = [] # Track unpadded and uncompressed sizes for index validation
90
+
91
+ loop do
92
+ peek_byte = input.getbyte
93
+ raise FormatError, "Unexpected end of stream" if peek_byte.nil?
94
+
95
+ if peek_byte == XzConst::INDEX_INDICATOR
96
+ restore_byte_for_index(input, peek_byte)
97
+ break
98
+ end
99
+
100
+ data, decoder = decode_block(input, peek_byte, check_type)
101
+ output << data
102
+ block_count += 1
103
+
104
+ # Track block sizes for index validation (per XZ Utils index_hash.c)
105
+ if decoder.unpadded_size && decoder.uncompressed_size
106
+ block_sizes << {
107
+ unpadded_size: decoder.unpadded_size,
108
+ uncompressed_size: decoder.uncompressed_size,
109
+ }
110
+ end
111
+
112
+ # If block decoder created a new input (for multi-block files without explicit sizes),
113
+ # use it for the next iteration
114
+ input = decoder.new_input_after_block if decoder.new_input_after_block
115
+ end
116
+
117
+ [output, block_count, input, block_sizes]
118
+ end
119
+
120
+ # Restore byte to stream for index parser
121
+ #
122
+ # @param input [IO] Input stream
123
+ # @param peek_byte [Integer] Byte to restore
124
+ def self.restore_byte_for_index(input, peek_byte)
125
+ restore_byte(input, peek_byte)
126
+ end
127
+
128
+ # Decode single block from stream
129
+ #
130
+ # @param input [IO] Input stream
131
+ # @param peek_byte [Integer] Peeked byte
132
+ # @param check_type [Symbol] Checksum type
133
+ # @return [Array, Hash, BlockDecoder] Decompressed data, block info, and decoder instance
134
+ def self.decode_block(input, peek_byte, check_type)
135
+ restore_byte(input, peek_byte)
136
+ decoder = BlockDecoder.new(input, check_type)
137
+ data = decoder.decode
138
+ [data, decoder]
139
+ end
140
+
141
+ # Restore a byte to the input stream
142
+ #
143
+ # @param input [IO] Input stream
144
+ # @param byte [Integer] Byte to restore
145
+ # @raise [RuntimeError] If IO doesn't support ungetbyte
146
+ def self.restore_byte(input, byte)
147
+ return input.ungetbyte(byte) if input.respond_to?(:ungetbyte)
148
+
149
+ raise FormatError,
150
+ "IO object doesn't support ungetbyte - cannot parse stream"
151
+ end
152
+
153
+ # Parse and verify index matches decoded blocks
154
+ #
155
+ # @param input [IO] Input stream
156
+ # @param block_count [Integer] Number of blocks decoded
157
+ # @param block_sizes [Array<Hash>] Array of {unpadded_size, uncompressed_size} for each block
158
+ # @return [Hash] Index data including index_size for backward_size validation
159
+ # @raise [FormatError] If index doesn't match decoded blocks
160
+ def self.verify_index(input, block_count, block_sizes)
161
+ index = IndexDecoder.parse(input)
162
+
163
+ # Validate count matches
164
+ if index[:count] != block_count
165
+ raise FormatError,
166
+ "Index count mismatch: index says #{index[:count]}, decoded #{block_count}"
167
+ end
168
+
169
+ # Validate block sizes match index records (per XZ Utils index_hash.c:244-290)
170
+ # This catches corrupted index files where the sizes don't match the actual blocks
171
+ if block_sizes.any? && index[:records].any?
172
+ # Helper function to calculate VLI ceil4 (round up to multiple of 4)
173
+ # Reference: /Users/mulgogi/src/external/xz/src/liblzma/common/index.h:48
174
+ vli_ceil4 = lambda { |vli|
175
+ (vli + 3) & ~3
176
+ }
177
+
178
+ # Calculate sums from actual blocks
179
+ # Note: XZ Utils uses vli_ceil4 on unpadded_size when summing
180
+ blocks_unpadded_sum = block_sizes.sum do |b|
181
+ vli_ceil4.call(b[:unpadded_size])
182
+ end
183
+ blocks_uncompressed_sum = block_sizes.sum do |b|
184
+ b[:uncompressed_size]
185
+ end
186
+
187
+ # Calculate sums from index records
188
+ # Note: Index records already contain the unpadded_size, need to ceil4 them too
189
+ index_unpadded_sum = index[:records].sum do |r|
190
+ vli_ceil4.call(r[:unpadded_size])
191
+ end
192
+ index_uncompressed_sum = index[:records].sum do |r|
193
+ r[:uncompressed_size]
194
+ end
195
+
196
+ # Validate sums match
197
+ if blocks_unpadded_sum != index_unpadded_sum
198
+ raise FormatError,
199
+ "Index unpadded size mismatch: blocks sum to #{blocks_unpadded_sum}, " \
200
+ "index says #{index_unpadded_sum}"
201
+ end
202
+
203
+ if blocks_uncompressed_sum != index_uncompressed_sum
204
+ raise FormatError,
205
+ "Index uncompressed size mismatch: blocks sum to #{blocks_uncompressed_sum}, " \
206
+ "index says #{index_uncompressed_sum}"
207
+ end
208
+
209
+ # Validate individual record sizes match (in correct order)
210
+ # Compare the raw unpadded_size values (not ceiled)
211
+ block_sizes.each_with_index do |block, i|
212
+ record = index[:records][i]
213
+ if block[:unpadded_size] != record[:unpadded_size]
214
+ raise FormatError,
215
+ "Index record #{i} unpadded size mismatch: block has #{block[:unpadded_size]}, " \
216
+ "index says #{record[:unpadded_size]}"
217
+ end
218
+
219
+ if block[:uncompressed_size] != record[:uncompressed_size]
220
+ raise FormatError,
221
+ "Index record #{i} uncompressed size mismatch: block has #{block[:uncompressed_size]}, " \
222
+ "index says #{record[:uncompressed_size]}"
223
+ end
224
+ end
225
+ end
226
+
227
+ index
228
+ end
229
+
230
+ # Parse and verify footer if input is seekable
231
+ #
232
+ # @param input [IO] Input stream
233
+ # @param check_type [Symbol] Expected checksum type
234
+ # @param index_size [Integer, nil] Actual index size for backward_size validation
235
+ def self.verify_footer_if_seekable(input, check_type, index_size = nil)
236
+ return unless input.respond_to?(:seek) && input.respond_to?(:size) && input.size
237
+
238
+ original_pos = input.pos
239
+ input.seek(-12, ::IO::SEEK_END)
240
+ footer = StreamFooterParser.parse(input)
241
+ input.pos = original_pos
242
+
243
+ # Verify check type matches
244
+ return if footer[:check_type] != check_type
245
+
246
+ # Validate backward_size against actual index size (XZ spec requirement)
247
+ # XZ spec: "If the stored value does not match the real size of the Index field,
248
+ # the decoder MUST indicate an error."
249
+ if index_size
250
+ # Convert stored_backward_size to real size: (stored + 1) * 4
251
+ real_backward_size = (footer[:backward_size] + 1) * 4
252
+ if real_backward_size != index_size
253
+ raise FormatError, "Backward size mismatch: footer indicates #{real_backward_size} bytes, " \
254
+ "but index is #{index_size} bytes"
255
+ end
256
+ end
257
+ end
258
+
259
+ # Read and verify the stream footer from the current position
260
+ #
261
+ # @param input [IO] Input stream positioned at the start of the stream footer
262
+ # @param check_type [Symbol] Expected checksum type
263
+ # @param index_size [Integer, nil] Actual index size for backward_size validation
264
+ # @raise [FormatError] If footer is invalid or doesn't match expected values
265
+ def self.read_stream_footer(input, check_type, index_size = nil)
266
+ footer = StreamFooterParser.parse(input)
267
+
268
+ # Verify check type matches
269
+ if footer[:check_type] != check_type
270
+ raise FormatError,
271
+ "Stream footer check type mismatch: expected #{check_type}, got #{footer[:check_type]}"
272
+ end
273
+
274
+ # Validate backward_size against actual index size (XZ spec requirement)
275
+ if index_size
276
+ # Convert stored_backward_size to real size: (stored + 1) * 4
277
+ real_backward_size = (footer[:backward_size] + 1) * 4
278
+ if real_backward_size != index_size
279
+ raise FormatError, "Backward size mismatch: footer indicates #{real_backward_size} bytes, " \
280
+ "but index is #{index_size} bytes"
281
+ end
282
+ end
283
+
284
+ footer
285
+ end
286
+
287
+ # Verify there's no invalid trailing data after the stream footer
288
+ #
289
+ # According to XZ spec, after a stream there can be:
290
+ # 1. Stream padding (null bytes to 4-byte boundary)
291
+ # 2. Another stream (concatenated streams)
292
+ #
293
+ # For bad-0cat-header_magic.xz style files with invalid extra data, we must reject them.
294
+ # XZ Utils rejects these with LZMA_FORMAT_ERROR when the extra data is not valid.
295
+ #
296
+ # @param input [IO] Input stream
297
+ # @raise [FormatError] If there's invalid trailing data
298
+ def self.verify_no_trailing_data(input)
299
+ return unless input.respond_to?(:pos) && input.respond_to?(:getbyte)
300
+
301
+ # Skip stream padding (null bytes)
302
+ # Stream padding must be a multiple of 4 bytes (per XZ spec)
303
+ padding_bytes = 0
304
+ loop do
305
+ byte = input.getbyte
306
+ break if byte.nil?
307
+
308
+ if byte.zero?
309
+ padding_bytes += 1
310
+ else
311
+ # Non-zero byte found - this should be a new stream or it's invalid
312
+ # Restore the byte and check if it's a valid stream header
313
+ input.ungetbyte(byte) if input.respond_to?(:ungetbyte)
314
+
315
+ # Stream padding must be a multiple of 4 bytes
316
+ if padding_bytes % 4 != 0
317
+ raise FormatError,
318
+ "Invalid stream padding: not a multiple of 4 bytes"
319
+ end
320
+
321
+ # Check if this looks like a valid XZ stream header
322
+ verify_or_reject_trailing_stream(input)
323
+ break
324
+ end
325
+ end
326
+
327
+ # If we reached EOF (no more data after padding), verify padding is multiple of 4
328
+ # XZ spec: "Stream Padding MUST contain only null bytes...the size of Stream
329
+ # Padding MUST be a multiple of four bytes."
330
+ if padding_bytes.positive? && padding_bytes % 4 != 0
331
+ raise FormatError,
332
+ "Invalid stream padding at EOF: #{padding_bytes} bytes (not a multiple of 4)"
333
+ end
334
+ end
335
+
336
+ # Verify that trailing data (if any) is a valid XZ stream
337
+ #
338
+ # @param input [IO] Input stream positioned at potential next stream
339
+ # @raise [FormatError] If the trailing data is not a valid XZ stream
340
+ def self.verify_or_reject_trailing_stream(input)
341
+ # Try to peek at the stream header magic
342
+ header_magic = [0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00] # XZ magic bytes
343
+
344
+ # Read the next 6 bytes to check for stream header
345
+ potential_header = []
346
+ 6.times do
347
+ byte = input.getbyte
348
+ break if byte.nil?
349
+
350
+ potential_header << byte
351
+ end
352
+
353
+ # Restore the bytes we read
354
+ if input.respond_to?(:ungetbyte)
355
+ potential_header.reverse_each do |b|
356
+ input.ungetbyte(b)
357
+ end
358
+ end
359
+
360
+ # If we couldn't read 6 bytes, it's EOF - that's fine
361
+ return if potential_header.size < 6
362
+
363
+ # Check if it matches XZ stream header magic
364
+ potential_header.each_with_index do |byte, i|
365
+ if byte != header_magic[i]
366
+ # Invalid trailing data - not a valid XZ stream
367
+ raise FormatError,
368
+ "Trailing data: invalid stream header (byte #{i}: 0x#{byte.to_s(16)} != 0x#{header_magic[i].to_s(16)})"
369
+ end
370
+ end
371
+
372
+ # At this point, we have a valid concatenated stream header
373
+ # We don't decode additional streams yet, but we don't reject them either
374
+ # The XZ spec allows concatenated streams, so having valid stream data after
375
+ # the first stream is OK - we just stop after decoding the first stream
376
+ end
377
+
378
+ # Validate that backward_size in footer points to valid index position
379
+ #
380
+ # This is required by the XZ spec: the backward_size must match the actual
381
+ # index size, and the index must start with the index indicator (0x00).
382
+ # Reference: /Users/mulgogi/src/external/xz/src/liblzma/common/stream_decoder.c
383
+ #
384
+ # @param input [IO] Input stream (must be seekable)
385
+ # @param file_size [Integer] Total file size in bytes
386
+ # @param index_size [Integer] Actual index size in bytes
387
+ # @raise [FormatError] If backward_size points to invalid position
388
+ def self.validate_backward_size_from_footer(input, file_size,
389
+ _index_size)
390
+ return unless input.respond_to?(:seek)
391
+ return if file_size.nil? || file_size.zero?
392
+
393
+ # Save current position
394
+ original_pos = input.pos
395
+
396
+ # Seek to stream footer (last 12 bytes)
397
+ input.seek(-12, ::IO::SEEK_END)
398
+
399
+ # Verify we're at the footer by checking magic bytes
400
+ footer_start = input.pos
401
+ input.seek(10, ::IO::SEEK_CUR)
402
+ magic_bytes = input.read(2)
403
+ if magic_bytes.nil? || magic_bytes.bytesize < 2 || magic_bytes != [
404
+ 0x59, 0x5A
405
+ ]
406
+ input.seek(original_pos, ::IO::SEEK_SET)
407
+ return # Not a valid footer, skip validation
408
+ end
409
+
410
+ # Seek back to footer start and then to backward_size field
411
+ input.seek(footer_start, ::IO::SEEK_SET)
412
+ input.seek(4, ::IO::SEEK_CUR)
413
+ backward_size_bytes = input.read(4)
414
+ if backward_size_bytes.nil? || backward_size_bytes.bytesize < 4
415
+ input.seek(original_pos, ::IO::SEEK_SET)
416
+ return
417
+ end
418
+
419
+ backward_size = backward_size_bytes.unpack1("V")
420
+
421
+ # Calculate real backward size: (stored + 1) * 4
422
+ real_backward_size = (backward_size + 1) * 4
423
+
424
+ # Calculate where index should start
425
+ # Stream structure: [blocks] [index + indicator] [padding] [footer 12 bytes]
426
+ # Index (including indicator) starts at: file_size - footer_size - real_backward_size
427
+ expected_index_start = file_size - 12 - real_backward_size
428
+
429
+ # Validate index start position is valid
430
+ if expected_index_start.negative?
431
+ input.seek(original_pos, ::IO::SEEK_SET)
432
+ raise FormatError,
433
+ "Invalid backward size: #{backward_size} (#{real_backward_size} bytes) " \
434
+ "would place index at negative position #{expected_index_start}"
435
+ end
436
+
437
+ if expected_index_start >= file_size
438
+ input.seek(original_pos, ::IO::SEEK_SET)
439
+ raise FormatError,
440
+ "Invalid backward size: #{backward_size} (#{real_backward_size} bytes) " \
441
+ "would place index past end of file (position #{expected_index_start}, file size #{file_size})"
442
+ end
443
+
444
+ # Check that the byte at the expected index start is the index indicator (0x00)
445
+ input.seek(expected_index_start, ::IO::SEEK_SET)
446
+ index_indicator = input.getbyte
447
+
448
+ if index_indicator.nil?
449
+ input.seek(original_pos, ::IO::SEEK_SET)
450
+ raise FormatError,
451
+ "Invalid backward size: expected index indicator (0x00) at position #{expected_index_start}, " \
452
+ "but reached end of file"
453
+ end
454
+
455
+ if index_indicator != XzConst::INDEX_INDICATOR
456
+ input.seek(original_pos, ::IO::SEEK_SET)
457
+ raise FormatError,
458
+ "Invalid backward size: expected index indicator (0x00) at position #{expected_index_start}, " \
459
+ "but found 0x#{index_indicator.to_s(16).upcase}"
460
+ end
461
+
462
+ # Restore original position
463
+ input.seek(original_pos, ::IO::SEEK_SET)
464
+ end
465
+ end
466
+ end
467
+ end
468
+ end
@@ -0,0 +1,99 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "stringio"
4
+ require_relative "../xz_impl/constants"
5
+ require_relative "../xz_impl/stream_header"
6
+ require_relative "../xz_impl/stream_footer"
7
+ require_relative "../xz_impl/block_encoder"
8
+ require_relative "../xz_impl/index_encoder"
9
+ require_relative "../../error"
10
+
11
+ module Omnizip
12
+ module Formats
13
+ module XzFormat
14
+ # XZ Stream encoder
15
+ # Orchestrates the complete XZ stream creation
16
+ # Based on XZ Utils stream_encoder.c
17
+ class StreamEncoder
18
+ include Omnizip::Formats::XzConst
19
+
20
+ def initialize(check_type: CHECK_CRC64, dict_size: 64 * 1024 * 1024)
21
+ @check_type = check_type
22
+ @dict_size = dict_size
23
+ @index = IndexEncoder.new
24
+ end
25
+
26
+ # Encode data into XZ format
27
+ # @param input [String, IO] Input data to compress
28
+ # @return [String] XZ-formatted compressed data
29
+ def encode(input)
30
+ output = StringIO.new
31
+ output.set_encoding(Encoding::BINARY)
32
+
33
+ # Read input data
34
+ input_data = input.respond_to?(:read) ? input.read : input.to_s
35
+ input_data = input_data.dup.force_encoding(Encoding::BINARY)
36
+
37
+ # 1. Write Stream Header
38
+ header = StreamHeader.new(check_type: @check_type)
39
+ output.write(header.encode)
40
+
41
+ # 2. Encode and write Block(s)
42
+ encode_blocks(input_data, output)
43
+
44
+ # 3. Write Index
45
+ index_data = @index.encode
46
+ output.write(index_data)
47
+
48
+ # 4. Write Stream Footer
49
+ footer = StreamFooter.new(
50
+ check_type: @check_type,
51
+ backward_size: @index.size,
52
+ )
53
+ output.write(footer.encode)
54
+
55
+ output.string
56
+ end
57
+
58
+ private
59
+
60
+ def encode_blocks(data, output)
61
+ # XZ Utils behavior: If input is empty, don't create any blocks
62
+ # The stream will consist of just: Stream Header + Index + Stream Footer
63
+ return if data.empty? || data.nil?
64
+
65
+ # For now, encode entire data as single block
66
+ # TODO: Support multi-block encoding for large files
67
+
68
+ # Include block sizes for XZ Utils compatibility
69
+ # This ensures that XZ Utils can properly decode the files
70
+ block_encoder = BlockEncoder.new(
71
+ check_type: @check_type,
72
+ dict_size: @dict_size,
73
+ include_block_sizes: true, # Include size fields for compatibility
74
+ )
75
+
76
+ block = block_encoder.encode_block(data)
77
+
78
+ # Write block header
79
+ output.write(block[:header])
80
+
81
+ # Write compressed data
82
+ output.write(block[:data])
83
+
84
+ # Write padding
85
+ output.write(block[:padding])
86
+
87
+ # Write check value
88
+ output.write(block[:check])
89
+
90
+ # Add to index
91
+ @index.add_record(
92
+ block_encoder.unpadded_size,
93
+ block_encoder.uncompressed_size,
94
+ )
95
+ end
96
+ end
97
+ end
98
+ end
99
+ end
@@ -0,0 +1,81 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "constants"
4
+ require "zlib"
5
+ require_relative "../../error"
6
+
7
+ module Omnizip
8
+ module Formats
9
+ module XzFormat
10
+ # XZ Stream Footer encoder
11
+ # Based on XZ Utils stream_flags_encoder.c
12
+ class StreamFooter
13
+ include Omnizip::Formats::XzConst
14
+
15
+ attr_reader :check_type, :backward_size
16
+
17
+ def initialize(backward_size:, check_type: CHECK_CRC64)
18
+ @check_type = check_type
19
+ @backward_size = backward_size
20
+ end
21
+
22
+ # Encode stream footer (12 bytes total)
23
+ # Format:
24
+ # - CRC32 (4 bytes): CRC32 of backward size + stream flags
25
+ # - Backward Size (4 bytes): Size of Index in 4-byte multiples
26
+ # - Stream Flags (2 bytes): version + check type
27
+ # - Footer Magic (2 bytes): 59 5A
28
+ def encode
29
+ output = String.new(encoding: Encoding::BINARY)
30
+
31
+ # Validate backward size
32
+ unless valid_backward_size?
33
+ raise ArgumentError, "Invalid backward size: #{@backward_size}"
34
+ end
35
+
36
+ # Encode backward size (stored as (bytes / 4) - 1)
37
+ backward_encoded = (@backward_size / 4) - 1
38
+ backward_bytes = [backward_encoded].pack("V") # Little-endian uint32
39
+
40
+ # Encode stream flags
41
+ flags = encode_stream_flags
42
+
43
+ # Calculate CRC32 of backward size + flags
44
+ crc_data = backward_bytes + flags
45
+ crc = Zlib.crc32(crc_data)
46
+
47
+ # Write CRC32
48
+ output << [crc].pack("V")
49
+
50
+ # Write backward size
51
+ output << backward_bytes
52
+
53
+ # Write stream flags
54
+ output << flags
55
+
56
+ # Write footer magic
57
+ output << FOOTER_MAGIC.pack("C*")
58
+
59
+ output
60
+ end
61
+
62
+ private
63
+
64
+ def encode_stream_flags
65
+ # Stream Flags format:
66
+ # Byte 0: Reserved (must be 0x00)
67
+ # Byte 1: Check type
68
+ flags = String.new(encoding: Encoding::BINARY)
69
+ flags << "\x00" # Reserved byte
70
+ flags << [@check_type].pack("C")
71
+ flags
72
+ end
73
+
74
+ def valid_backward_size?
75
+ @backward_size.between?(BACKWARD_SIZE_MIN, BACKWARD_SIZE_MAX) &&
76
+ (@backward_size % 4).zero?
77
+ end
78
+ end
79
+ end
80
+ end
81
+ end