omnizip 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (511) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.rubocop.yml +32 -0
  4. data/.rubocop_todo.yml +754 -0
  5. data/COPYING +502 -0
  6. data/Gemfile +17 -0
  7. data/LICENSE +12 -0
  8. data/README.adoc +1045 -0
  9. data/Rakefile +12 -0
  10. data/benchmark/README.md +260 -0
  11. data/benchmark/benchmark_suite.rb +125 -0
  12. data/benchmark/compression_bench.rb +181 -0
  13. data/benchmark/filter_bench.rb +180 -0
  14. data/benchmark/models/benchmark_result.rb +59 -0
  15. data/benchmark/models/comparison_result.rb +69 -0
  16. data/benchmark/profile_suite.rb +167 -0
  17. data/benchmark/reporter.rb +150 -0
  18. data/benchmark/run_benchmarks.rb +66 -0
  19. data/benchmark/test_data.rb +137 -0
  20. data/config/formats/rar3_spec.yml +91 -0
  21. data/config/formats/rar5_spec.yml +102 -0
  22. data/docs/.github/workflows/docs.yml +142 -0
  23. data/docs/.gitignore +21 -0
  24. data/docs/.lychee.toml +67 -0
  25. data/docs/Gemfile +13 -0
  26. data/docs/RAR_WRITE_SUPPORT.md +26 -0
  27. data/docs/README.md +101 -0
  28. data/docs/_config.yml +112 -0
  29. data/docs/assets/logo.svg +1 -0
  30. data/docs/assets/omnizip-logo.pdf +1540 -11
  31. data/docs/comparison/feature-matrix.adoc +694 -0
  32. data/docs/comparison/index.adoc +113 -0
  33. data/docs/comparison/vs-7zip.adoc +309 -0
  34. data/docs/comparison/vs-peazip.adoc +77 -0
  35. data/docs/comparison/vs-rubyzip.adoc +342 -0
  36. data/docs/comparison/vs-winrar.adoc +100 -0
  37. data/docs/compatibility.adoc +579 -0
  38. data/docs/concepts/index.adoc +129 -0
  39. data/docs/developer/architecture.adoc +256 -0
  40. data/docs/developer/contributing.adoc +158 -0
  41. data/docs/developer/index.adoc +25 -0
  42. data/docs/developer/testing.adoc +212 -0
  43. data/docs/getting-started/basic-usage.adoc +271 -0
  44. data/docs/getting-started/index.adoc +42 -0
  45. data/docs/getting-started/installation.adoc +138 -0
  46. data/docs/getting-started/quick-start.adoc +185 -0
  47. data/docs/getting-started/your-first-archive.adoc +218 -0
  48. data/docs/guides/advanced-features/encryption.adoc +300 -0
  49. data/docs/guides/advanced-features/index.adoc +49 -0
  50. data/docs/guides/advanced-features/parallel-processing.adoc +246 -0
  51. data/docs/guides/advanced-features/progress-tracking.adoc +320 -0
  52. data/docs/guides/advanced-features/streaming.adoc +212 -0
  53. data/docs/guides/archive-formats/gzip-format.adoc +107 -0
  54. data/docs/guides/archive-formats/index.adoc +130 -0
  55. data/docs/guides/archive-formats/rar-format.adoc +104 -0
  56. data/docs/guides/archive-formats/rar5.adoc +521 -0
  57. data/docs/guides/archive-formats/seven-zip-format.adoc +35 -0
  58. data/docs/guides/archive-formats/tar-format.adoc +106 -0
  59. data/docs/guides/archive-formats/xz-format.adoc +118 -0
  60. data/docs/guides/archive-formats/zip-format.adoc +35 -0
  61. data/docs/guides/compression-algorithms/bzip2.adoc +113 -0
  62. data/docs/guides/compression-algorithms/deflate.adoc +319 -0
  63. data/docs/guides/compression-algorithms/index.adoc +190 -0
  64. data/docs/guides/compression-algorithms/lzma.adoc +398 -0
  65. data/docs/guides/compression-algorithms/lzma2.adoc +327 -0
  66. data/docs/guides/compression-algorithms/ppmd.adoc +316 -0
  67. data/docs/guides/compression-algorithms/zstandard.adoc +361 -0
  68. data/docs/guides/creating-archives.adoc +354 -0
  69. data/docs/guides/extracting-archives.adoc +53 -0
  70. data/docs/guides/format-conversion.adoc +64 -0
  71. data/docs/guides/index.adoc +49 -0
  72. data/docs/guides/migration-rubyzip.adoc +217 -0
  73. data/docs/guides/parity-archives.adoc +605 -0
  74. data/docs/guides/performance-tuning.adoc +88 -0
  75. data/docs/index.adoc +218 -0
  76. data/docs/lychee.toml +67 -0
  77. data/docs/reference/api/overview.adoc +188 -0
  78. data/docs/reference/cli/compress-command.adoc +114 -0
  79. data/docs/reference/cli/overview.adoc +140 -0
  80. data/docs/reference/index.adoc +26 -0
  81. data/docs/resources/faq.adoc +185 -0
  82. data/docs/resources/quick-reference.adoc +222 -0
  83. data/docs/troubleshooting/index.adoc +208 -0
  84. data/examples/api_comparison.rb +205 -0
  85. data/examples/deflate64_example.rb +96 -0
  86. data/examples/par2_demo.rb +121 -0
  87. data/examples/quick_start_native.rb +150 -0
  88. data/examples/quick_start_rubyzip.rb +115 -0
  89. data/examples/rubyzip_compatibility_demo.rb +194 -0
  90. data/exe/omnizip +27 -0
  91. data/lib/omnizip/algorithm.rb +130 -0
  92. data/lib/omnizip/algorithm_registry.rb +86 -0
  93. data/lib/omnizip/algorithms/.keep +0 -0
  94. data/lib/omnizip/algorithms/bzip2/bwt.rb +225 -0
  95. data/lib/omnizip/algorithms/bzip2/decoder.rb +193 -0
  96. data/lib/omnizip/algorithms/bzip2/encoder.rb +237 -0
  97. data/lib/omnizip/algorithms/bzip2/huffman.rb +206 -0
  98. data/lib/omnizip/algorithms/bzip2/mtf.rb +101 -0
  99. data/lib/omnizip/algorithms/bzip2/rle.rb +151 -0
  100. data/lib/omnizip/algorithms/bzip2.rb +130 -0
  101. data/lib/omnizip/algorithms/deflate/constants.rb +28 -0
  102. data/lib/omnizip/algorithms/deflate/decoder.rb +38 -0
  103. data/lib/omnizip/algorithms/deflate/encoder.rb +46 -0
  104. data/lib/omnizip/algorithms/deflate.rb +128 -0
  105. data/lib/omnizip/algorithms/deflate64/constants.rb +45 -0
  106. data/lib/omnizip/algorithms/deflate64/decoder.rb +153 -0
  107. data/lib/omnizip/algorithms/deflate64/encoder.rb +98 -0
  108. data/lib/omnizip/algorithms/deflate64/huffman_coder.rb +354 -0
  109. data/lib/omnizip/algorithms/deflate64/lz77_encoder.rb +142 -0
  110. data/lib/omnizip/algorithms/deflate64.rb +109 -0
  111. data/lib/omnizip/algorithms/lzma/bit_model.rb +120 -0
  112. data/lib/omnizip/algorithms/lzma/constants.rb +112 -0
  113. data/lib/omnizip/algorithms/lzma/decoder.rb +148 -0
  114. data/lib/omnizip/algorithms/lzma/dictionary.rb +69 -0
  115. data/lib/omnizip/algorithms/lzma/distance_coder.rb +415 -0
  116. data/lib/omnizip/algorithms/lzma/encoder.rb +142 -0
  117. data/lib/omnizip/algorithms/lzma/length_coder.rb +260 -0
  118. data/lib/omnizip/algorithms/lzma/literal_decoder.rb +320 -0
  119. data/lib/omnizip/algorithms/lzma/literal_encoder.rb +210 -0
  120. data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +341 -0
  121. data/lib/omnizip/algorithms/lzma/lzma_alone_decoder.rb +192 -0
  122. data/lib/omnizip/algorithms/lzma/lzma_state.rb +128 -0
  123. data/lib/omnizip/algorithms/lzma/match.rb +32 -0
  124. data/lib/omnizip/algorithms/lzma/match_finder.rb +205 -0
  125. data/lib/omnizip/algorithms/lzma/match_finder_config.rb +142 -0
  126. data/lib/omnizip/algorithms/lzma/match_finder_factory.rb +88 -0
  127. data/lib/omnizip/algorithms/lzma/optimal_encoder.rb +130 -0
  128. data/lib/omnizip/algorithms/lzma/probability_models.rb +72 -0
  129. data/lib/omnizip/algorithms/lzma/range_coder.rb +85 -0
  130. data/lib/omnizip/algorithms/lzma/range_decoder.rb +434 -0
  131. data/lib/omnizip/algorithms/lzma/range_encoder.rb +194 -0
  132. data/lib/omnizip/algorithms/lzma/state.rb +127 -0
  133. data/lib/omnizip/algorithms/lzma/xz_buffered_range_encoder.rb +325 -0
  134. data/lib/omnizip/algorithms/lzma/xz_encoder.rb +426 -0
  135. data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +645 -0
  136. data/lib/omnizip/algorithms/lzma/xz_match_finder_adapter.rb +227 -0
  137. data/lib/omnizip/algorithms/lzma/xz_price_calculator.rb +169 -0
  138. data/lib/omnizip/algorithms/lzma/xz_probability_models.rb +261 -0
  139. data/lib/omnizip/algorithms/lzma/xz_range_encoder.rb +223 -0
  140. data/lib/omnizip/algorithms/lzma/xz_range_encoder_exact.rb +331 -0
  141. data/lib/omnizip/algorithms/lzma/xz_state.rb +116 -0
  142. data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +2055 -0
  143. data/lib/omnizip/algorithms/lzma.rb +238 -0
  144. data/lib/omnizip/algorithms/lzma2/chunk_manager.rb +182 -0
  145. data/lib/omnizip/algorithms/lzma2/constants.rb +41 -0
  146. data/lib/omnizip/algorithms/lzma2/encoder.rb +147 -0
  147. data/lib/omnizip/algorithms/lzma2/lzma2_chunk.rb +161 -0
  148. data/lib/omnizip/algorithms/lzma2/properties.rb +179 -0
  149. data/lib/omnizip/algorithms/lzma2/simple_lzma2_encoder.rb +127 -0
  150. data/lib/omnizip/algorithms/lzma2/xz_encoder_adapter.rb +85 -0
  151. data/lib/omnizip/algorithms/lzma2.rb +141 -0
  152. data/lib/omnizip/algorithms/ppmd7/constants.rb +74 -0
  153. data/lib/omnizip/algorithms/ppmd7/context.rb +154 -0
  154. data/lib/omnizip/algorithms/ppmd7/decoder.rb +126 -0
  155. data/lib/omnizip/algorithms/ppmd7/encoder.rb +163 -0
  156. data/lib/omnizip/algorithms/ppmd7/model.rb +248 -0
  157. data/lib/omnizip/algorithms/ppmd7/symbol_state.rb +57 -0
  158. data/lib/omnizip/algorithms/ppmd7.rb +116 -0
  159. data/lib/omnizip/algorithms/ppmd8/constants.rb +61 -0
  160. data/lib/omnizip/algorithms/ppmd8/context.rb +34 -0
  161. data/lib/omnizip/algorithms/ppmd8/decoder.rb +107 -0
  162. data/lib/omnizip/algorithms/ppmd8/encoder.rb +138 -0
  163. data/lib/omnizip/algorithms/ppmd8/model.rb +250 -0
  164. data/lib/omnizip/algorithms/ppmd8/restoration_method.rb +78 -0
  165. data/lib/omnizip/algorithms/ppmd8.rb +82 -0
  166. data/lib/omnizip/algorithms/ppmd_base.rb +138 -0
  167. data/lib/omnizip/algorithms/sevenzip_lzma2.rb +123 -0
  168. data/lib/omnizip/algorithms/xz_lzma2.rb +118 -0
  169. data/lib/omnizip/algorithms/zstandard/constants.rb +25 -0
  170. data/lib/omnizip/algorithms/zstandard/decoder.rb +46 -0
  171. data/lib/omnizip/algorithms/zstandard/encoder.rb +51 -0
  172. data/lib/omnizip/algorithms/zstandard.rb +138 -0
  173. data/lib/omnizip/buffer/memory_archive.rb +251 -0
  174. data/lib/omnizip/buffer/memory_extractor.rb +224 -0
  175. data/lib/omnizip/buffer.rb +176 -0
  176. data/lib/omnizip/checksum_registry.rb +114 -0
  177. data/lib/omnizip/checksums/crc32.rb +100 -0
  178. data/lib/omnizip/checksums/crc64.rb +101 -0
  179. data/lib/omnizip/checksums/crc_base.rb +158 -0
  180. data/lib/omnizip/checksums/verifier.rb +131 -0
  181. data/lib/omnizip/chunked/memory_manager.rb +194 -0
  182. data/lib/omnizip/chunked/reader.rb +78 -0
  183. data/lib/omnizip/chunked/writer.rb +120 -0
  184. data/lib/omnizip/chunked.rb +129 -0
  185. data/lib/omnizip/cli/output_formatter.rb +104 -0
  186. data/lib/omnizip/cli.rb +572 -0
  187. data/lib/omnizip/commands/.keep +0 -0
  188. data/lib/omnizip/commands/archive_create_command.rb +427 -0
  189. data/lib/omnizip/commands/archive_extract_command.rb +272 -0
  190. data/lib/omnizip/commands/archive_list_command.rb +218 -0
  191. data/lib/omnizip/commands/archive_repair_command.rb +131 -0
  192. data/lib/omnizip/commands/archive_verify_command.rb +117 -0
  193. data/lib/omnizip/commands/compress_command.rb +117 -0
  194. data/lib/omnizip/commands/decompress_command.rb +120 -0
  195. data/lib/omnizip/commands/list_command.rb +53 -0
  196. data/lib/omnizip/commands/metadata_command.rb +153 -0
  197. data/lib/omnizip/commands/parity_create_command.rb +122 -0
  198. data/lib/omnizip/commands/parity_repair_command.rb +122 -0
  199. data/lib/omnizip/commands/parity_verify_command.rb +124 -0
  200. data/lib/omnizip/commands/profile_list_command.rb +56 -0
  201. data/lib/omnizip/commands/profile_show_command.rb +44 -0
  202. data/lib/omnizip/convenience.rb +359 -0
  203. data/lib/omnizip/converter/conversion_registry.rb +49 -0
  204. data/lib/omnizip/converter/conversion_strategy.rb +121 -0
  205. data/lib/omnizip/converter/seven_zip_to_zip_strategy.rb +97 -0
  206. data/lib/omnizip/converter/zip_to_seven_zip_strategy.rb +112 -0
  207. data/lib/omnizip/converter.rb +105 -0
  208. data/lib/omnizip/crypto/aes256/cipher.rb +100 -0
  209. data/lib/omnizip/crypto/aes256/constants.rb +28 -0
  210. data/lib/omnizip/crypto/aes256/key_derivation.rb +101 -0
  211. data/lib/omnizip/crypto/aes256.rb +102 -0
  212. data/lib/omnizip/error.rb +106 -0
  213. data/lib/omnizip/eta/exponential_smoothing_estimator.rb +98 -0
  214. data/lib/omnizip/eta/moving_average_estimator.rb +99 -0
  215. data/lib/omnizip/eta/rate_calculator.rb +104 -0
  216. data/lib/omnizip/eta/sample_history.rb +143 -0
  217. data/lib/omnizip/eta/time_estimator.rb +106 -0
  218. data/lib/omnizip/eta.rb +63 -0
  219. data/lib/omnizip/extraction/filter_chain.rb +177 -0
  220. data/lib/omnizip/extraction/glob_pattern.rb +140 -0
  221. data/lib/omnizip/extraction/pattern_matcher.rb +70 -0
  222. data/lib/omnizip/extraction/predicate_pattern.rb +52 -0
  223. data/lib/omnizip/extraction/regex_pattern.rb +50 -0
  224. data/lib/omnizip/extraction/selective_extractor.rb +240 -0
  225. data/lib/omnizip/extraction.rb +111 -0
  226. data/lib/omnizip/file_type/mime_classifier.rb +144 -0
  227. data/lib/omnizip/file_type.rb +113 -0
  228. data/lib/omnizip/filter.rb +139 -0
  229. data/lib/omnizip/filter_pipeline.rb +108 -0
  230. data/lib/omnizip/filter_registry.rb +166 -0
  231. data/lib/omnizip/filters/bcj.rb +279 -0
  232. data/lib/omnizip/filters/bcj2/constants.rb +53 -0
  233. data/lib/omnizip/filters/bcj2/decoder.rb +200 -0
  234. data/lib/omnizip/filters/bcj2/encoder.rb +61 -0
  235. data/lib/omnizip/filters/bcj2/stream_data.rb +93 -0
  236. data/lib/omnizip/filters/bcj2.rb +99 -0
  237. data/lib/omnizip/filters/bcj_arm.rb +176 -0
  238. data/lib/omnizip/filters/bcj_arm64.rb +244 -0
  239. data/lib/omnizip/filters/bcj_ia64.rb +196 -0
  240. data/lib/omnizip/filters/bcj_ppc.rb +190 -0
  241. data/lib/omnizip/filters/bcj_sparc.rb +176 -0
  242. data/lib/omnizip/filters/bcj_x86.rb +193 -0
  243. data/lib/omnizip/filters/delta.rb +196 -0
  244. data/lib/omnizip/filters/filter_base.rb +72 -0
  245. data/lib/omnizip/filters/registry.rb +123 -0
  246. data/lib/omnizip/filters/xz_delta.rb +258 -0
  247. data/lib/omnizip/format_detector.rb +162 -0
  248. data/lib/omnizip/format_registry.rb +59 -0
  249. data/lib/omnizip/formats/.keep +0 -0
  250. data/lib/omnizip/formats/bzip2_file.rb +172 -0
  251. data/lib/omnizip/formats/cpio/constants.rb +55 -0
  252. data/lib/omnizip/formats/cpio/entry.rb +385 -0
  253. data/lib/omnizip/formats/cpio/reader.rb +196 -0
  254. data/lib/omnizip/formats/cpio/writer.rb +234 -0
  255. data/lib/omnizip/formats/cpio.rb +140 -0
  256. data/lib/omnizip/formats/format_spec_loader.rb +230 -0
  257. data/lib/omnizip/formats/gzip.rb +238 -0
  258. data/lib/omnizip/formats/iso/directory_builder.rb +297 -0
  259. data/lib/omnizip/formats/iso/directory_record.rb +152 -0
  260. data/lib/omnizip/formats/iso/joliet.rb +204 -0
  261. data/lib/omnizip/formats/iso/path_table.rb +125 -0
  262. data/lib/omnizip/formats/iso/reader.rb +197 -0
  263. data/lib/omnizip/formats/iso/rock_ridge.rb +349 -0
  264. data/lib/omnizip/formats/iso/volume_builder.rb +320 -0
  265. data/lib/omnizip/formats/iso/volume_descriptor.rb +168 -0
  266. data/lib/omnizip/formats/iso/writer.rb +530 -0
  267. data/lib/omnizip/formats/iso.rb +140 -0
  268. data/lib/omnizip/formats/lzip.rb +175 -0
  269. data/lib/omnizip/formats/lzma_alone.rb +171 -0
  270. data/lib/omnizip/formats/rar/archive_repairer.rb +243 -0
  271. data/lib/omnizip/formats/rar/archive_verifier.rb +195 -0
  272. data/lib/omnizip/formats/rar/block_parser.rb +243 -0
  273. data/lib/omnizip/formats/rar/compression/bit_stream.rb +180 -0
  274. data/lib/omnizip/formats/rar/compression/dispatcher.rb +217 -0
  275. data/lib/omnizip/formats/rar/compression/lz77_huffman/decoder.rb +216 -0
  276. data/lib/omnizip/formats/rar/compression/lz77_huffman/encoder.rb +158 -0
  277. data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_builder.rb +217 -0
  278. data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_coder.rb +189 -0
  279. data/lib/omnizip/formats/rar/compression/lz77_huffman/match_finder.rb +135 -0
  280. data/lib/omnizip/formats/rar/compression/lz77_huffman/sliding_window.rb +165 -0
  281. data/lib/omnizip/formats/rar/compression/ppmd/context.rb +105 -0
  282. data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +219 -0
  283. data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +262 -0
  284. data/lib/omnizip/formats/rar/compression_method_registry.rb +106 -0
  285. data/lib/omnizip/formats/rar/constants.rb +82 -0
  286. data/lib/omnizip/formats/rar/decompressor.rb +238 -0
  287. data/lib/omnizip/formats/rar/external_writer.rb +312 -0
  288. data/lib/omnizip/formats/rar/header.rb +192 -0
  289. data/lib/omnizip/formats/rar/license_validator.rb +109 -0
  290. data/lib/omnizip/formats/rar/models/rar_archive.rb +77 -0
  291. data/lib/omnizip/formats/rar/models/rar_entry.rb +65 -0
  292. data/lib/omnizip/formats/rar/models/rar_volume.rb +56 -0
  293. data/lib/omnizip/formats/rar/parity_handler.rb +292 -0
  294. data/lib/omnizip/formats/rar/rar5/compression/lzma.rb +202 -0
  295. data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +578 -0
  296. data/lib/omnizip/formats/rar/rar5/compression/store.rb +60 -0
  297. data/lib/omnizip/formats/rar/rar5/crc32.rb +39 -0
  298. data/lib/omnizip/formats/rar/rar5/encryption/aes256_cbc.rb +97 -0
  299. data/lib/omnizip/formats/rar/rar5/encryption/encryption_header.rb +114 -0
  300. data/lib/omnizip/formats/rar/rar5/encryption/encryption_manager.rb +166 -0
  301. data/lib/omnizip/formats/rar/rar5/encryption/key_derivation.rb +97 -0
  302. data/lib/omnizip/formats/rar/rar5/header.rb +187 -0
  303. data/lib/omnizip/formats/rar/rar5/models/encryption_options.rb +74 -0
  304. data/lib/omnizip/formats/rar/rar5/models/recovery_options.rb +63 -0
  305. data/lib/omnizip/formats/rar/rar5/models/solid_options.rb +63 -0
  306. data/lib/omnizip/formats/rar/rar5/models/volume_options.rb +74 -0
  307. data/lib/omnizip/formats/rar/rar5/multi_volume/ARCHITECTURE.md +290 -0
  308. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_manager.rb +264 -0
  309. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_splitter.rb +155 -0
  310. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_writer.rb +194 -0
  311. data/lib/omnizip/formats/rar/rar5/solid/solid_encoder.rb +109 -0
  312. data/lib/omnizip/formats/rar/rar5/solid/solid_manager.rb +142 -0
  313. data/lib/omnizip/formats/rar/rar5/solid/solid_stream.rb +121 -0
  314. data/lib/omnizip/formats/rar/rar5/vint.rb +65 -0
  315. data/lib/omnizip/formats/rar/rar5/writer.rb +466 -0
  316. data/lib/omnizip/formats/rar/rar_format_base.rb +241 -0
  317. data/lib/omnizip/formats/rar/reader.rb +366 -0
  318. data/lib/omnizip/formats/rar/recovery_record.rb +245 -0
  319. data/lib/omnizip/formats/rar/volume_manager.rb +168 -0
  320. data/lib/omnizip/formats/rar/writer.rb +431 -0
  321. data/lib/omnizip/formats/rar.rb +205 -0
  322. data/lib/omnizip/formats/rar3/compressor.rb +73 -0
  323. data/lib/omnizip/formats/rar3/decompressor.rb +66 -0
  324. data/lib/omnizip/formats/rar3/reader.rb +386 -0
  325. data/lib/omnizip/formats/rar3/writer.rb +219 -0
  326. data/lib/omnizip/formats/rar5/compressor.rb +73 -0
  327. data/lib/omnizip/formats/rar5/decompressor.rb +66 -0
  328. data/lib/omnizip/formats/rar5/reader.rb +342 -0
  329. data/lib/omnizip/formats/rar5/writer.rb +214 -0
  330. data/lib/omnizip/formats/seven_zip/coder_chain.rb +150 -0
  331. data/lib/omnizip/formats/seven_zip/constants.rb +126 -0
  332. data/lib/omnizip/formats/seven_zip/encoded_header.rb +114 -0
  333. data/lib/omnizip/formats/seven_zip/encrypted_header.rb +142 -0
  334. data/lib/omnizip/formats/seven_zip/file_collector.rb +144 -0
  335. data/lib/omnizip/formats/seven_zip/header.rb +106 -0
  336. data/lib/omnizip/formats/seven_zip/header_encryptor.rb +134 -0
  337. data/lib/omnizip/formats/seven_zip/header_writer.rb +466 -0
  338. data/lib/omnizip/formats/seven_zip/models/coder_info.rb +30 -0
  339. data/lib/omnizip/formats/seven_zip/models/file_entry.rb +58 -0
  340. data/lib/omnizip/formats/seven_zip/models/folder.rb +69 -0
  341. data/lib/omnizip/formats/seven_zip/models/stream_info.rb +42 -0
  342. data/lib/omnizip/formats/seven_zip/parser.rb +660 -0
  343. data/lib/omnizip/formats/seven_zip/reader.rb +458 -0
  344. data/lib/omnizip/formats/seven_zip/split_archive_reader.rb +632 -0
  345. data/lib/omnizip/formats/seven_zip/split_archive_writer.rb +315 -0
  346. data/lib/omnizip/formats/seven_zip/stream_compressor.rb +151 -0
  347. data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +162 -0
  348. data/lib/omnizip/formats/seven_zip/writer.rb +740 -0
  349. data/lib/omnizip/formats/seven_zip.rb +93 -0
  350. data/lib/omnizip/formats/tar/constants.rb +73 -0
  351. data/lib/omnizip/formats/tar/entry.rb +94 -0
  352. data/lib/omnizip/formats/tar/header.rb +168 -0
  353. data/lib/omnizip/formats/tar/reader.rb +121 -0
  354. data/lib/omnizip/formats/tar/writer.rb +216 -0
  355. data/lib/omnizip/formats/tar.rb +84 -0
  356. data/lib/omnizip/formats/xz/reader.rb +116 -0
  357. data/lib/omnizip/formats/xz.rb +237 -0
  358. data/lib/omnizip/formats/xz_impl/block_decoder.rb +754 -0
  359. data/lib/omnizip/formats/xz_impl/block_encoder.rb +306 -0
  360. data/lib/omnizip/formats/xz_impl/block_header.rb +210 -0
  361. data/lib/omnizip/formats/xz_impl/block_header_parser.rb +186 -0
  362. data/lib/omnizip/formats/xz_impl/constants.rb +49 -0
  363. data/lib/omnizip/formats/xz_impl/index_decoder.rb +174 -0
  364. data/lib/omnizip/formats/xz_impl/index_encoder.rb +122 -0
  365. data/lib/omnizip/formats/xz_impl/stream_decoder.rb +468 -0
  366. data/lib/omnizip/formats/xz_impl/stream_encoder.rb +99 -0
  367. data/lib/omnizip/formats/xz_impl/stream_footer.rb +81 -0
  368. data/lib/omnizip/formats/xz_impl/stream_footer_parser.rb +117 -0
  369. data/lib/omnizip/formats/xz_impl/stream_header.rb +55 -0
  370. data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +108 -0
  371. data/lib/omnizip/formats/xz_impl/vli.rb +128 -0
  372. data/lib/omnizip/formats/xz_impl/writer.rb +421 -0
  373. data/lib/omnizip/formats/zip/central_directory_header.rb +195 -0
  374. data/lib/omnizip/formats/zip/constants.rb +69 -0
  375. data/lib/omnizip/formats/zip/end_of_central_directory.rb +133 -0
  376. data/lib/omnizip/formats/zip/local_file_header.rb +138 -0
  377. data/lib/omnizip/formats/zip/reader.rb +250 -0
  378. data/lib/omnizip/formats/zip/unix_extra_field.rb +153 -0
  379. data/lib/omnizip/formats/zip/writer.rb +375 -0
  380. data/lib/omnizip/formats/zip/zip64_end_of_central_directory.rb +104 -0
  381. data/lib/omnizip/formats/zip/zip64_end_of_central_directory_locator.rb +66 -0
  382. data/lib/omnizip/formats/zip/zip64_extra_field.rb +114 -0
  383. data/lib/omnizip/formats/zip.rb +50 -0
  384. data/lib/omnizip/implementations/base/lzma2_decoder_base.rb +75 -0
  385. data/lib/omnizip/implementations/base/lzma2_encoder_base.rb +128 -0
  386. data/lib/omnizip/implementations/base/lzma_decoder_base.rb +83 -0
  387. data/lib/omnizip/implementations/base/lzma_encoder_base.rb +108 -0
  388. data/lib/omnizip/implementations/base/state_machine_base.rb +182 -0
  389. data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +421 -0
  390. data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +465 -0
  391. data/lib/omnizip/implementations/seven_zip/lzma/match_finder.rb +288 -0
  392. data/lib/omnizip/implementations/seven_zip/lzma/range_decoder.rb +200 -0
  393. data/lib/omnizip/implementations/seven_zip/lzma/range_encoder.rb +197 -0
  394. data/lib/omnizip/implementations/seven_zip/lzma/state_machine.rb +141 -0
  395. data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +519 -0
  396. data/lib/omnizip/implementations/xz_utils/lzma2/decoder.rb +723 -0
  397. data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +750 -0
  398. data/lib/omnizip/io/buffered_input.rb +146 -0
  399. data/lib/omnizip/io/buffered_output.rb +105 -0
  400. data/lib/omnizip/io/stream_manager.rb +115 -0
  401. data/lib/omnizip/link_handler/hard_link.rb +79 -0
  402. data/lib/omnizip/link_handler/symbolic_link.rb +74 -0
  403. data/lib/omnizip/link_handler.rb +124 -0
  404. data/lib/omnizip/metadata/archive_metadata.rb +114 -0
  405. data/lib/omnizip/metadata/entry_metadata.rb +146 -0
  406. data/lib/omnizip/metadata/metadata_editor.rb +171 -0
  407. data/lib/omnizip/metadata/metadata_registry.rb +64 -0
  408. data/lib/omnizip/metadata/metadata_validator.rb +99 -0
  409. data/lib/omnizip/metadata.rb +57 -0
  410. data/lib/omnizip/models/.keep +0 -0
  411. data/lib/omnizip/models/algorithm_metadata.rb +73 -0
  412. data/lib/omnizip/models/compression_options.rb +71 -0
  413. data/lib/omnizip/models/conversion_options.rb +87 -0
  414. data/lib/omnizip/models/conversion_result.rb +135 -0
  415. data/lib/omnizip/models/eta_result.rb +46 -0
  416. data/lib/omnizip/models/extraction_rule.rb +115 -0
  417. data/lib/omnizip/models/filter_chain.rb +144 -0
  418. data/lib/omnizip/models/filter_config.rb +183 -0
  419. data/lib/omnizip/models/match_result.rb +124 -0
  420. data/lib/omnizip/models/optimization_suggestion.rb +91 -0
  421. data/lib/omnizip/models/parallel_options.rb +104 -0
  422. data/lib/omnizip/models/performance_result.rb +79 -0
  423. data/lib/omnizip/models/profile_report.rb +82 -0
  424. data/lib/omnizip/models/progress_options.rb +38 -0
  425. data/lib/omnizip/models/split_options.rb +116 -0
  426. data/lib/omnizip/optimization_registry.rb +81 -0
  427. data/lib/omnizip/parallel/job_queue.rb +209 -0
  428. data/lib/omnizip/parallel/job_scheduler.rb +203 -0
  429. data/lib/omnizip/parallel/parallel_compressor.rb +347 -0
  430. data/lib/omnizip/parallel/parallel_extractor.rb +329 -0
  431. data/lib/omnizip/parallel/worker_pool.rb +223 -0
  432. data/lib/omnizip/parallel.rb +149 -0
  433. data/lib/omnizip/parity/chunked_block_processor.rb +196 -0
  434. data/lib/omnizip/parity/galois16.rb +145 -0
  435. data/lib/omnizip/parity/models/creator_packet.rb +73 -0
  436. data/lib/omnizip/parity/models/file_description_packet.rb +133 -0
  437. data/lib/omnizip/parity/models/ifsc_packet.rb +123 -0
  438. data/lib/omnizip/parity/models/main_packet.rb +128 -0
  439. data/lib/omnizip/parity/models/packet.rb +156 -0
  440. data/lib/omnizip/parity/models/packet_registry.rb +109 -0
  441. data/lib/omnizip/parity/models/recovery_slice_packet.rb +78 -0
  442. data/lib/omnizip/parity/par2_creator.rb +531 -0
  443. data/lib/omnizip/parity/par2_repairer.rb +407 -0
  444. data/lib/omnizip/parity/par2_verifier.rb +364 -0
  445. data/lib/omnizip/parity/par2cmdline_algorithm.rb +110 -0
  446. data/lib/omnizip/parity/par2cmdline_coefficients.rb +78 -0
  447. data/lib/omnizip/parity/reed_solomon_decoder.rb +266 -0
  448. data/lib/omnizip/parity/reed_solomon_encoder.rb +111 -0
  449. data/lib/omnizip/parity/reed_solomon_matrix.rb +342 -0
  450. data/lib/omnizip/parity.rb +186 -0
  451. data/lib/omnizip/password/encryption_registry.rb +65 -0
  452. data/lib/omnizip/password/encryption_strategy.rb +96 -0
  453. data/lib/omnizip/password/password_validator.rb +129 -0
  454. data/lib/omnizip/password/winzip_aes_strategy.rb +192 -0
  455. data/lib/omnizip/password/zip_crypto_strategy.rb +141 -0
  456. data/lib/omnizip/password.rb +87 -0
  457. data/lib/omnizip/pipe/stream_compressor.rb +124 -0
  458. data/lib/omnizip/pipe/stream_decompressor.rb +174 -0
  459. data/lib/omnizip/pipe.rb +121 -0
  460. data/lib/omnizip/platform/ntfs_streams.rb +201 -0
  461. data/lib/omnizip/platform.rb +189 -0
  462. data/lib/omnizip/profile/archive_profile.rb +39 -0
  463. data/lib/omnizip/profile/balanced_profile.rb +33 -0
  464. data/lib/omnizip/profile/binary_profile.rb +36 -0
  465. data/lib/omnizip/profile/compression_profile.rb +158 -0
  466. data/lib/omnizip/profile/custom_profile.rb +157 -0
  467. data/lib/omnizip/profile/fast_profile.rb +33 -0
  468. data/lib/omnizip/profile/maximum_profile.rb +33 -0
  469. data/lib/omnizip/profile/profile_detector.rb +110 -0
  470. data/lib/omnizip/profile/profile_registry.rb +161 -0
  471. data/lib/omnizip/profile/text_profile.rb +36 -0
  472. data/lib/omnizip/profile.rb +190 -0
  473. data/lib/omnizip/profiler/memory_profiler.rb +66 -0
  474. data/lib/omnizip/profiler/method_profiler.rb +49 -0
  475. data/lib/omnizip/profiler/report_generator.rb +169 -0
  476. data/lib/omnizip/profiler.rb +204 -0
  477. data/lib/omnizip/progress/callback_reporter.rb +36 -0
  478. data/lib/omnizip/progress/console_reporter.rb +62 -0
  479. data/lib/omnizip/progress/log_reporter.rb +91 -0
  480. data/lib/omnizip/progress/operation_progress.rb +118 -0
  481. data/lib/omnizip/progress/progress_bar.rb +156 -0
  482. data/lib/omnizip/progress/progress_reporter.rb +40 -0
  483. data/lib/omnizip/progress/progress_tracker.rb +190 -0
  484. data/lib/omnizip/progress/silent_reporter.rb +24 -0
  485. data/lib/omnizip/progress.rb +127 -0
  486. data/lib/omnizip/rubyzip_compat.rb +63 -0
  487. data/lib/omnizip/temp/safe_extract.rb +168 -0
  488. data/lib/omnizip/temp/temp_file.rb +124 -0
  489. data/lib/omnizip/temp/temp_file_pool.rb +109 -0
  490. data/lib/omnizip/temp.rb +181 -0
  491. data/lib/omnizip/version.rb +5 -0
  492. data/lib/omnizip/zip/entry.rb +156 -0
  493. data/lib/omnizip/zip/file.rb +485 -0
  494. data/lib/omnizip/zip/input_stream.rb +273 -0
  495. data/lib/omnizip/zip/output_stream.rb +324 -0
  496. data/lib/omnizip.rb +156 -0
  497. data/readme-docs/advanced-features.adoc +515 -0
  498. data/readme-docs/api-usage.adoc +444 -0
  499. data/readme-docs/architecture.adoc +449 -0
  500. data/readme-docs/archive-formats.adoc +479 -0
  501. data/readme-docs/cli-usage.adoc +222 -0
  502. data/readme-docs/compression-algorithms.adoc +442 -0
  503. data/readme-docs/compression-profiles.adoc +247 -0
  504. data/readme-docs/encryption-checksums.adoc +328 -0
  505. data/readme-docs/format-converter.adoc +325 -0
  506. data/readme-docs/installation.adoc +228 -0
  507. data/readme-docs/par2-archives.adoc +608 -0
  508. data/readme-docs/performance-profiler.adoc +389 -0
  509. data/readme-docs/preprocessing-filters.adoc +280 -0
  510. data/xz-file-format-1.2.1.txt +1174 -0
  511. metadata +617 -0
@@ -0,0 +1,421 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Copyright (C) 2025 Ribose Inc.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a
6
+ # copy of this software and associated documentation files (the "Software"),
7
+ # to deal in the Software without restriction, including without limitation
8
+ # the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
+ # and/or sell copies of the Software, and to permit persons to whom the
10
+ # Software is furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
+ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21
+ # DEALINGS IN THE SOFTWARE.
22
+
23
+ require "zlib"
24
+ require "stringio"
25
+ require_relative "../../algorithms/lzma2"
26
+ require_relative "../../checksums/crc64"
27
+ require_relative "../../error"
28
+ require_relative "constants"
29
+ require_relative "block_encoder"
30
+
31
+ module Omnizip
32
+ module Formats
33
+ class Xz
34
+ # XZ format writer
35
+ #
36
+ # Creates .xz files compatible with XZ Utils.
37
+ # Structure: Stream Header + Block(s) + Index + Stream Footer
38
+ #
39
+ # Based on: xz/src/liblzma/common/stream_encoder.c
40
+ class Writer
41
+ include XzConst
42
+
43
+ # XZ format magic bytes
44
+ HEADER_MAGIC = [0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00].freeze
45
+ FOOTER_MAGIC = [0x59, 0x5A].freeze
46
+
47
+ # Create XZ file with block given
48
+ #
49
+ # @param filename [String] Output filename
50
+ # @param options [Hash] Encoding options
51
+ # @yield [writer] Block receives writer instance
52
+ def self.create(filename, options = {})
53
+ File.open(filename, "wb") do |f|
54
+ writer = new(f, options)
55
+ yield writer if block_given?
56
+ writer.finalize
57
+ end
58
+ end
59
+
60
+ # Initialize XZ writer
61
+ #
62
+ # @param output [IO] Output stream
63
+ # @param options [Hash] Encoding options
64
+ def initialize(output, options = {})
65
+ @output = output
66
+ @options = options
67
+ @blocks = []
68
+
69
+ write_stream_header
70
+ end
71
+
72
+ # Add data block to stream
73
+ #
74
+ # @param data [String] Data to compress and add
75
+ def add_data(data)
76
+ # Use BlockEncoder for XZ Utils compatibility
77
+ # This produces compressed blocks compatible with XZ Utils
78
+ block_encoder = XzFormat::BlockEncoder.new(
79
+ check_type: @options[:check_type] || CHECK_CRC64,
80
+ dict_size: @options[:dict_size] || (64 * 1024 * 1024), # Use 64MB to match XZ Utils default
81
+ include_block_sizes: true, # Include size fields for XZ Utils compatibility
82
+ )
83
+
84
+ block = block_encoder.encode_block(data)
85
+
86
+ # Store block info for index
87
+ @blocks << {
88
+ compressed: block[:data],
89
+ actual_compressed_size: block[:compressed_size],
90
+ uncompressed_data: data,
91
+ uncompressed_size: block[:uncompressed_size],
92
+ unpadded_size: block_encoder.unpadded_size,
93
+ }
94
+
95
+ # Write block using the encoded data from BlockEncoder
96
+ write_block_from_encoder(block)
97
+ end
98
+
99
+ # Write block from BlockEncoder output
100
+ #
101
+ # @param block [Hash] Block info from BlockEncoder
102
+ def write_block_from_encoder(block)
103
+ # Write block header (from BlockEncoder)
104
+ @output.write(block[:header])
105
+
106
+ # Write compressed data
107
+ @output.write(block[:data])
108
+
109
+ # Write padding (from BlockEncoder)
110
+ @output.write(block[:padding])
111
+
112
+ # Write check (CRC64 of uncompressed data)
113
+ write_check_from_block(block)
114
+ end
115
+
116
+ # Write check value from block
117
+ #
118
+ # @param block [Hash] Block info from BlockEncoder
119
+ def write_check_from_block(block)
120
+ @output.write(block[:check])
121
+ end
122
+
123
+ # Finalize XZ stream
124
+ def finalize
125
+ write_index
126
+ write_stream_footer
127
+ end
128
+
129
+ private
130
+
131
+ # Encode VLI (variable-length integer)
132
+ #
133
+ # @param value [Integer] Value to encode
134
+ # @return [String] Encoded bytes (low bits first)
135
+ def self.encode_vli(value)
136
+ value
137
+ bytes = []
138
+ loop do
139
+ # Get low 7 bits
140
+ byte = value & 0x7F
141
+ value >>= 7
142
+ # Set continuation bit if there's more data
143
+ byte |= 0x80 unless value.zero?
144
+ bytes << byte
145
+ break if value.zero?
146
+ end
147
+
148
+ bytes.pack("C*")
149
+ end
150
+
151
+ # Write stream header (12 bytes)
152
+ def write_stream_header
153
+ # Magic bytes (6 bytes)
154
+ @output.write(HEADER_MAGIC.pack("C*"))
155
+
156
+ # Stream flags (2 bytes): CRC64 check type (0x04)
157
+ flags = [0x00, 0x04].pack("C*")
158
+ @output.write(flags)
159
+
160
+ # CRC32 of flags (4 bytes, little-endian)
161
+ crc = Zlib.crc32(flags)
162
+ @output.write([crc].pack("V"))
163
+ end
164
+
165
+ # Encode data block with LZMA2
166
+ #
167
+ # @param data [String] Input data
168
+ # @return [Array<String, Integer>] LZMA2 chunk data and actual decode size
169
+ #
170
+ # NOTE: Currently uses uncompressed LZMA2 chunks for maximum compatibility.
171
+ # Compressed mode has subtle bugs in range encoder cache management.
172
+ # Uncompressed XZ files are fully valid and compatible with all XZ Utils.
173
+ def encode_lzma2_block(data)
174
+ # Create LZMA2 encoder
175
+ # NOTE: Currently using uncompressed chunks due to LZMA encoder compatibility issues
176
+ # The SDK encoder produces compressed data that xz cannot decode properly
177
+ # This is a known limitation that needs to be fixed by porting the LZMA encoder
178
+ # more carefully from XZ Utils reference implementation.
179
+ encoder = Omnizip::Algorithms::LZMA2Encoder.new(
180
+ dict_size: @options[:dict_size] || (1 << 23),
181
+ lc: @options[:lc] || 3,
182
+ lp: @options[:lp] || 0,
183
+ pb: @options[:pb] || 2,
184
+ allow_compression: false, # Disable compression for compatibility
185
+ use_xz_encoder: false,
186
+ )
187
+
188
+ # Get full LZMA2 stream (includes chunks + end marker)
189
+ full_stream = encoder.encode(data)
190
+
191
+ # CRITICAL: XZ blocks MUST include the LZMA2 end marker (0x00)
192
+ # The full stream is written to the block as-is
193
+ lzma2_chunk = full_stream
194
+
195
+ # CRITICAL: actual_size MUST include the end marker!
196
+ # The LZMA2 decoder reads the end marker to know when to stop.
197
+ # Per XZ spec, compressed_size in block header = total bytes in block data
198
+ actual_size = lzma2_chunk.bytesize
199
+
200
+ [lzma2_chunk, actual_size]
201
+ end
202
+
203
+ # Calculate unpadded size (block header + compressed size + check size)
204
+ #
205
+ # @param compressed [String] Compressed data (LZMA2 chunk without end marker)
206
+ # @param uncompressed_size [Integer] Uncompressed data size
207
+ # @param actual_compressed_size [Integer, nil] Actual bytes decoder consumes
208
+ # @return [Integer] Unpadded size
209
+ def calculate_unpadded_size(compressed, uncompressed_size,
210
+ actual_compressed_size = nil)
211
+ # CRITICAL: Use actual_compressed_size (bytes decoder consumes)
212
+ # not compressed.bytesize (buffer size including any padding).
213
+ # This matches XZ Utils' Index encoding exactly.
214
+ compressed_size = actual_compressed_size || compressed.bytesize
215
+
216
+ # Build header fields (same as in write_block_header)
217
+ # NOTE: xz command includes size fields by default (block flags = 0xC0)
218
+ header = StringIO.new
219
+ header.write([0xC0].pack("C")) # Flags (both sizes present)
220
+
221
+ # Compressed size (VLI)
222
+ header.write(self.class.encode_vli(compressed_size))
223
+
224
+ # Uncompressed size (VLI)
225
+ header.write(self.class.encode_vli(uncompressed_size))
226
+
227
+ # Filter flags: LZMA2 + props size + dict size encoding
228
+ dict_size = @options[:dict_size] || (1 << 23)
229
+ props = Omnizip::Algorithms::LZMA2Encoder.encode_dict_size(dict_size)
230
+ header.write([0x21, 0x01, props].pack("C*"))
231
+
232
+ header_fields = header.string
233
+
234
+ # Calculate padding (same logic as write_block_header)
235
+ base_size = 1 + header_fields.bytesize + 4 # size_byte + fields + CRC
236
+ padding_needed = (4 - (base_size % 4)) % 4
237
+
238
+ # Total block header size
239
+ block_header_size = base_size + padding_needed
240
+
241
+ # Unpadded size = block header + actual compressed data + check (CRC64 = 8 bytes)
242
+ # NOTE: "Unpadded" means EXCLUDING Block Padding (the padding after compressed data)
243
+ # Block Padding is added in write_block but is NOT counted in Index's Unpadded Size
244
+ block_header_size + compressed_size + 8
245
+ end
246
+
247
+ # Write block to output
248
+ #
249
+ # @param block [Hash] Block info
250
+ def write_block(block)
251
+ # Write block header
252
+ write_block_header(block)
253
+
254
+ # Write compressed data (LZMA2 stream including end marker)
255
+ # CRITICAL: The compressed_size in block header should include the end marker
256
+ # because the decoder reads it to know when to stop
257
+ @output.write(block[:compressed])
258
+
259
+ # Block Padding: XZ spec requires padding (header + data) to 4-byte boundary
260
+ # Block header is always multiple of 4, so we only need to consider data size
261
+ compressed_size = block[:compressed].bytesize
262
+ padding_needed = (4 - (compressed_size % 4)) % 4
263
+ @output.write("\x00" * padding_needed) if padding_needed.positive?
264
+
265
+ # Write check (CRC64 of UNCOMPRESSED data)
266
+ write_check(block[:uncompressed_data])
267
+ end
268
+
269
+ # Write block header
270
+ #
271
+ # @param block [Hash] Block info
272
+ def write_block_header(block)
273
+ header = StringIO.new
274
+
275
+ # Block flags (1 byte):
276
+ # Bit 7: uncompressed size present (SET - matching XZ Utils default)
277
+ # Bit 6: compressed size present (SET - matching XZ Utils default)
278
+ # Bits 0-2: number of filters - 1 (1 filter = 0)
279
+ # Result: 0xC0 (both sizes, 1 filter) - matches XZ Utils default behavior
280
+ # NOTE: xz command includes size fields by default for seeking/validation
281
+ header.write([0xC0].pack("C"))
282
+
283
+ # Compressed size (VLI encoding)
284
+ compressed_size = block[:actual_compressed_size]
285
+ header.write(self.class.encode_vli(compressed_size))
286
+
287
+ # Uncompressed size (VLI encoding)
288
+ header.write(self.class.encode_vli(block[:uncompressed_size]))
289
+
290
+ # Filter flags: LZMA2 (0x21)
291
+ header.write([0x21].pack("C"))
292
+
293
+ # Properties size (1 byte)
294
+ header.write([0x01].pack("C"))
295
+
296
+ # Properties byte: LZMA2 dictionary size encoding
297
+ # For LZMA2, this encodes dictionary size, NOT lc/lp/pb!
298
+ # lc/lp/pb are encoded in LZMA2 chunk properties byte when RESET_PROPS flag is set
299
+ dict_size = @options[:dict_size] || (1 << 23) # 8MB default
300
+ props = Omnizip::Algorithms::LZMA2Encoder.encode_dict_size(dict_size)
301
+ header.write([props].pack("C"))
302
+
303
+ # Get header data (fields only, no size byte yet)
304
+ header_fields = header.string
305
+
306
+ # According to XZ spec, Block Header Size is in multiples of 4 bytes
307
+ # and includes: size_byte + header_fields + padding + CRC32
308
+ # The CRC is calculated over: size_byte + header_fields + padding
309
+
310
+ # Calculate total size needed (must be multiple of 4)
311
+ # We need: 1 (size) + header_fields.length + padding + 4 (CRC) = multiple of 4
312
+ # So: (1 + header_fields.length + padding + 4) % 4 == 0
313
+ # Therefore: (5 + header_fields.length + padding) % 4 == 0
314
+ # So padding = (4 - ((5 + header_fields.length) % 4)) % 4
315
+ base_size = 1 + header_fields.bytesize + 4 # size_byte + fields + CRC
316
+ padding_needed = (4 - (base_size % 4)) % 4
317
+
318
+ # Build the data that will be CRC'd (size_byte + fields + padding)
319
+ total_size_bytes = base_size + padding_needed
320
+ size_byte = [(total_size_bytes / 4) - 1].pack("C")
321
+
322
+ # Data for CRC: size_byte + header_fields + padding
323
+ crc_data = size_byte + header_fields + ("\x00" * padding_needed)
324
+
325
+ # Calculate CRC32 of this data
326
+ crc = Zlib.crc32(crc_data)
327
+
328
+ # Write header:
329
+ # 1. Size field (1 byte)
330
+ @output.write(size_byte)
331
+
332
+ # 2. Header fields
333
+ @output.write(header_fields)
334
+
335
+ # 3. Padding
336
+ @output.write("\x00" * padding_needed) if padding_needed.positive?
337
+
338
+ # 4. CRC32 (4 bytes, little-endian)
339
+ @output.write([crc].pack("V"))
340
+ end
341
+
342
+ # Write padding to 4-byte boundary
343
+ def write_padding
344
+ pos = @output.pos
345
+ padding = (4 - (pos % 4)) % 4
346
+ @output.write("\x00" * padding) if padding.positive?
347
+ end
348
+
349
+ # Write check (CRC64)
350
+ #
351
+ # @param data [String] Data to checksum
352
+ def write_check(data)
353
+ # CRC64 (8 bytes) of UNCOMPRESSED data
354
+ crc = Omnizip::Checksums::Crc64.calculate(data)
355
+ @output.write([crc].pack("Q<"))
356
+ end
357
+
358
+ # Write index
359
+ def write_index
360
+ # Build index in buffer
361
+ index_buffer = StringIO.new
362
+
363
+ # Index indicator (0x00)
364
+ index_buffer.write([0x00].pack("C"))
365
+
366
+ # Number of records (VLI)
367
+ index_buffer.write(self.class.encode_vli(@blocks.size))
368
+
369
+ # Records
370
+ @blocks.each do |block|
371
+ index_buffer.write(self.class.encode_vli(block[:unpadded_size]))
372
+ index_buffer.write(self.class.encode_vli(block[:uncompressed_size]))
373
+ end
374
+
375
+ # Get index data
376
+ index_data = index_buffer.string
377
+
378
+ # Write to output
379
+ @output.write(index_data)
380
+
381
+ # Padding to 4-byte boundary (based on index size, not file position)
382
+ padding_needed = (4 - (index_data.bytesize % 4)) % 4
383
+ @output.write("\x00" * padding_needed) if padding_needed.positive?
384
+
385
+ # CRC32 of index (MUST include padding per XZ spec)
386
+ # CRITICAL FIX: CRC is calculated over index_data + padding, not just index_data
387
+ padding_str = "\x00" * padding_needed
388
+ crc = Zlib.crc32(index_data + padding_str)
389
+ @output.write([crc].pack("V"))
390
+
391
+ # Store backward size for footer
392
+ # Backward size = (index_data + padding) in 4-byte units, NOT including CRC32
393
+ @backward_size = (index_data.bytesize + padding_needed) / 4
394
+ end
395
+
396
+ # Write stream footer (12 bytes)
397
+ def write_stream_footer
398
+ # Stream flags (2 bytes)
399
+ flags = [0x00, 0x04].pack("C*")
400
+
401
+ # Backward size (4 bytes) - size of index in 4-byte units
402
+ backward_size_bytes = [@backward_size].pack("V")
403
+
404
+ # CRC32 of backward_size + flags (6 bytes total)
405
+ crc_data = backward_size_bytes + flags
406
+ crc = Zlib.crc32(crc_data)
407
+ @output.write([crc].pack("V"))
408
+
409
+ # Backward size (4 bytes)
410
+ @output.write(backward_size_bytes)
411
+
412
+ # Stream flags (2 bytes)
413
+ @output.write(flags)
414
+
415
+ # Footer magic (2 bytes)
416
+ @output.write(FOOTER_MAGIC.pack("C*"))
417
+ end
418
+ end
419
+ end
420
+ end
421
+ end
@@ -0,0 +1,195 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "constants"
4
+ require_relative "unix_extra_field"
5
+
6
+ module Omnizip
7
+ module Formats
8
+ module Zip
9
+ # ZIP Central Directory File Header
10
+ class CentralDirectoryHeader
11
+ include Constants
12
+
13
+ attr_accessor :signature, :version_made_by, :version_needed, :flags,
14
+ :compression_method, :last_mod_time, :last_mod_date,
15
+ :crc32, :compressed_size, :uncompressed_size,
16
+ :filename_length, :extra_field_length, :comment_length,
17
+ :disk_number_start, :internal_attributes, :external_attributes,
18
+ :local_header_offset, :filename, :extra_field, :comment
19
+
20
+ def initialize(
21
+ signature: CENTRAL_DIRECTORY_SIGNATURE,
22
+ version_made_by: VERSION_MADE_BY_UNIX,
23
+ version_needed: VERSION_DEFAULT,
24
+ flags: 0,
25
+ compression_method: COMPRESSION_STORE,
26
+ last_mod_time: 0,
27
+ last_mod_date: 0,
28
+ crc32: 0,
29
+ compressed_size: 0,
30
+ uncompressed_size: 0,
31
+ filename_length: 0,
32
+ extra_field_length: 0,
33
+ comment_length: 0,
34
+ disk_number_start: 0,
35
+ internal_attributes: 0,
36
+ external_attributes: 0,
37
+ local_header_offset: 0,
38
+ filename: "",
39
+ extra_field: "",
40
+ comment: ""
41
+ )
42
+ @signature = signature
43
+ @version_made_by = version_made_by
44
+ @version_needed = version_needed
45
+ @flags = flags
46
+ @compression_method = compression_method
47
+ @last_mod_time = last_mod_time
48
+ @last_mod_date = last_mod_date
49
+ @crc32 = crc32
50
+ @compressed_size = compressed_size
51
+ @uncompressed_size = uncompressed_size
52
+ @filename_length = filename_length
53
+ @extra_field_length = extra_field_length
54
+ @comment_length = comment_length
55
+ @disk_number_start = disk_number_start
56
+ @internal_attributes = internal_attributes
57
+ @external_attributes = external_attributes
58
+ @local_header_offset = local_header_offset
59
+ @filename = filename
60
+ @extra_field = extra_field
61
+ @comment = comment
62
+ end
63
+
64
+ # Check if this is a directory entry
65
+ def directory?
66
+ filename.end_with?("/") ||
67
+ external_attributes.anybits?(ATTR_DIRECTORY)
68
+ end
69
+
70
+ # Check if ZIP64 format is needed
71
+ def zip64?
72
+ compressed_size == ZIP64_LIMIT ||
73
+ uncompressed_size == ZIP64_LIMIT ||
74
+ local_header_offset == ZIP64_LIMIT ||
75
+ disk_number_start == 0xFFFF
76
+ end
77
+
78
+ # Check if entry is encrypted
79
+ def encrypted?
80
+ flags.anybits?(FLAG_ENCRYPTED)
81
+ end
82
+
83
+ # Check if UTF-8 encoding is used
84
+ def utf8?
85
+ flags.anybits?(FLAG_UTF8)
86
+ end
87
+
88
+ # Get Unix permissions from external attributes
89
+ def unix_permissions
90
+ (external_attributes >> 16) & 0xFFFF
91
+ end
92
+
93
+ # Set Unix permissions in external attributes
94
+ def unix_permissions=(perms)
95
+ @external_attributes = (perms << 16) | (external_attributes & 0xFFFF)
96
+ end
97
+
98
+ # Check if this is a symbolic link
99
+ def symlink?
100
+ (unix_permissions & 0o170000) == 0o120000
101
+ end
102
+
103
+ # Get link target from Unix extra field
104
+ def link_target
105
+ return nil unless symlink?
106
+
107
+ unix_field = UnixExtraField.find_in_extra_field(extra_field)
108
+ unix_field&.link_target
109
+ end
110
+
111
+ # Serialize to binary format
112
+ def to_binary
113
+ @filename_length = filename.bytesize
114
+ @extra_field_length = extra_field.bytesize
115
+ @comment_length = comment.bytesize
116
+
117
+ [
118
+ signature,
119
+ version_made_by,
120
+ version_needed,
121
+ flags,
122
+ compression_method,
123
+ last_mod_time,
124
+ last_mod_date,
125
+ crc32,
126
+ compressed_size,
127
+ uncompressed_size,
128
+ filename_length,
129
+ extra_field_length,
130
+ comment_length,
131
+ disk_number_start,
132
+ internal_attributes,
133
+ external_attributes,
134
+ local_header_offset,
135
+ ].pack("VvvvvvvVVVvvvvvVV") +
136
+ filename.b +
137
+ extra_field.b +
138
+ comment.b
139
+ end
140
+
141
+ # Parse from binary data
142
+ def self.from_binary(data)
143
+ signature, version_made_by, version_needed, flags,
144
+ compression_method, last_mod_time, last_mod_date,
145
+ crc32, compressed_size, uncompressed_size,
146
+ filename_length, extra_field_length, comment_length,
147
+ disk_number_start, internal_attributes,
148
+ external_attributes, local_header_offset = data.unpack("VvvvvvvVVVvvvvvVV")
149
+
150
+ unless signature == CENTRAL_DIRECTORY_SIGNATURE
151
+ raise Omnizip::FormatError,
152
+ "Invalid central directory signature"
153
+ end
154
+
155
+ offset = 46
156
+ filename = data[offset, filename_length].force_encoding("UTF-8")
157
+ offset += filename_length
158
+
159
+ extra_field = data[offset, extra_field_length]
160
+ offset += extra_field_length
161
+
162
+ comment = data[offset, comment_length].force_encoding("UTF-8")
163
+
164
+ new(
165
+ signature: signature,
166
+ version_made_by: version_made_by,
167
+ version_needed: version_needed,
168
+ flags: flags,
169
+ compression_method: compression_method,
170
+ last_mod_time: last_mod_time,
171
+ last_mod_date: last_mod_date,
172
+ crc32: crc32,
173
+ compressed_size: compressed_size,
174
+ uncompressed_size: uncompressed_size,
175
+ filename_length: filename_length,
176
+ extra_field_length: extra_field_length,
177
+ comment_length: comment_length,
178
+ disk_number_start: disk_number_start,
179
+ internal_attributes: internal_attributes,
180
+ external_attributes: external_attributes,
181
+ local_header_offset: local_header_offset,
182
+ filename: filename,
183
+ extra_field: extra_field,
184
+ comment: comment,
185
+ )
186
+ end
187
+
188
+ # Size of the header in bytes
189
+ def header_size
190
+ 46 + filename_length + extra_field_length + comment_length
191
+ end
192
+ end
193
+ end
194
+ end
195
+ end
@@ -0,0 +1,69 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Omnizip
4
+ module Formats
5
+ module Zip
6
+ # ZIP format constants and signatures
7
+ module Constants
8
+ # File signatures
9
+ LOCAL_FILE_HEADER_SIGNATURE = 0x04034b50
10
+ CENTRAL_DIRECTORY_SIGNATURE = 0x02014b50
11
+ END_OF_CENTRAL_DIRECTORY_SIGNATURE = 0x06054b50
12
+ ZIP64_END_OF_CENTRAL_DIRECTORY_SIGNATURE = 0x06064b50
13
+ ZIP64_END_OF_CENTRAL_DIRECTORY_LOCATOR_SIGNATURE = 0x07064b50
14
+ DATA_DESCRIPTOR_SIGNATURE = 0x08074b50
15
+
16
+ # Compression methods
17
+ COMPRESSION_STORE = 0 # No compression
18
+ COMPRESSION_SHRUNK = 1 # Shrunk
19
+ COMPRESSION_REDUCED_1 = 2 # Reduced with compression factor 1
20
+ COMPRESSION_REDUCED_2 = 3 # Reduced with compression factor 2
21
+ COMPRESSION_REDUCED_3 = 4 # Reduced with compression factor 3
22
+ COMPRESSION_REDUCED_4 = 5 # Reduced with compression factor 4
23
+ COMPRESSION_IMPLODED = 6 # Imploded
24
+ COMPRESSION_DEFLATE = 8 # Deflated
25
+ COMPRESSION_DEFLATE64 = 9 # Enhanced Deflating
26
+ COMPRESSION_BZIP2 = 12 # BZIP2
27
+ COMPRESSION_LZMA = 14 # LZMA
28
+ COMPRESSION_ZSTANDARD = 93 # Zstandard
29
+ COMPRESSION_PPMD = 98 # PPMd version I, Rev 1
30
+
31
+ # General purpose bit flags
32
+ FLAG_ENCRYPTED = 0x0001
33
+ FLAG_DATA_DESCRIPTOR = 0x0008
34
+ FLAG_STRONG_ENCRYPTION = 0x0040
35
+ FLAG_UTF8 = 0x0800
36
+
37
+ # ZIP64 extended information extra field tag
38
+ ZIP64_EXTRA_FIELD_TAG = 0x0001
39
+
40
+ # Version needed to extract
41
+ VERSION_DEFAULT = 20 # 2.0 - Default
42
+ VERSION_DEFLATE = 20 # 2.0 - Deflate
43
+ VERSION_ZIP64 = 45 # 4.5 - ZIP64
44
+ VERSION_BZIP2 = 46 # 4.6 - BZIP2
45
+ VERSION_LZMA = 63 # 6.3 - LZMA
46
+
47
+ # Made by versions
48
+ VERSION_MADE_BY_UNIX = 3 << 8
49
+ VERSION_MADE_BY_WINDOWS = 0 << 8
50
+
51
+ # External file attributes
52
+ ATTR_DIRECTORY = 0x10
53
+ ATTR_ARCHIVE = 0x20
54
+
55
+ # Unix permissions
56
+ UNIX_DIR_PERMISSIONS = 0o755 << 16
57
+ UNIX_FILE_PERMISSIONS = 0o644 << 16
58
+ UNIX_SYMLINK_PERMISSIONS = 0o120777 << 16
59
+
60
+ # Unix extra field tag (Info-ZIP)
61
+ UNIX_EXTRA_FIELD_TAG = 0x7875
62
+
63
+ # Size limits
64
+ ZIP64_LIMIT = 0xFFFFFFFF
65
+ MAX_COMMENT_LENGTH = 0xFFFF
66
+ end
67
+ end
68
+ end
69
+ end