omnizip 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (511) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.rubocop.yml +32 -0
  4. data/.rubocop_todo.yml +754 -0
  5. data/COPYING +502 -0
  6. data/Gemfile +17 -0
  7. data/LICENSE +12 -0
  8. data/README.adoc +1045 -0
  9. data/Rakefile +12 -0
  10. data/benchmark/README.md +260 -0
  11. data/benchmark/benchmark_suite.rb +125 -0
  12. data/benchmark/compression_bench.rb +181 -0
  13. data/benchmark/filter_bench.rb +180 -0
  14. data/benchmark/models/benchmark_result.rb +59 -0
  15. data/benchmark/models/comparison_result.rb +69 -0
  16. data/benchmark/profile_suite.rb +167 -0
  17. data/benchmark/reporter.rb +150 -0
  18. data/benchmark/run_benchmarks.rb +66 -0
  19. data/benchmark/test_data.rb +137 -0
  20. data/config/formats/rar3_spec.yml +91 -0
  21. data/config/formats/rar5_spec.yml +102 -0
  22. data/docs/.github/workflows/docs.yml +142 -0
  23. data/docs/.gitignore +21 -0
  24. data/docs/.lychee.toml +67 -0
  25. data/docs/Gemfile +13 -0
  26. data/docs/RAR_WRITE_SUPPORT.md +26 -0
  27. data/docs/README.md +101 -0
  28. data/docs/_config.yml +112 -0
  29. data/docs/assets/logo.svg +1 -0
  30. data/docs/assets/omnizip-logo.pdf +1540 -11
  31. data/docs/comparison/feature-matrix.adoc +694 -0
  32. data/docs/comparison/index.adoc +113 -0
  33. data/docs/comparison/vs-7zip.adoc +309 -0
  34. data/docs/comparison/vs-peazip.adoc +77 -0
  35. data/docs/comparison/vs-rubyzip.adoc +342 -0
  36. data/docs/comparison/vs-winrar.adoc +100 -0
  37. data/docs/compatibility.adoc +579 -0
  38. data/docs/concepts/index.adoc +129 -0
  39. data/docs/developer/architecture.adoc +256 -0
  40. data/docs/developer/contributing.adoc +158 -0
  41. data/docs/developer/index.adoc +25 -0
  42. data/docs/developer/testing.adoc +212 -0
  43. data/docs/getting-started/basic-usage.adoc +271 -0
  44. data/docs/getting-started/index.adoc +42 -0
  45. data/docs/getting-started/installation.adoc +138 -0
  46. data/docs/getting-started/quick-start.adoc +185 -0
  47. data/docs/getting-started/your-first-archive.adoc +218 -0
  48. data/docs/guides/advanced-features/encryption.adoc +300 -0
  49. data/docs/guides/advanced-features/index.adoc +49 -0
  50. data/docs/guides/advanced-features/parallel-processing.adoc +246 -0
  51. data/docs/guides/advanced-features/progress-tracking.adoc +320 -0
  52. data/docs/guides/advanced-features/streaming.adoc +212 -0
  53. data/docs/guides/archive-formats/gzip-format.adoc +107 -0
  54. data/docs/guides/archive-formats/index.adoc +130 -0
  55. data/docs/guides/archive-formats/rar-format.adoc +104 -0
  56. data/docs/guides/archive-formats/rar5.adoc +521 -0
  57. data/docs/guides/archive-formats/seven-zip-format.adoc +35 -0
  58. data/docs/guides/archive-formats/tar-format.adoc +106 -0
  59. data/docs/guides/archive-formats/xz-format.adoc +118 -0
  60. data/docs/guides/archive-formats/zip-format.adoc +35 -0
  61. data/docs/guides/compression-algorithms/bzip2.adoc +113 -0
  62. data/docs/guides/compression-algorithms/deflate.adoc +319 -0
  63. data/docs/guides/compression-algorithms/index.adoc +190 -0
  64. data/docs/guides/compression-algorithms/lzma.adoc +398 -0
  65. data/docs/guides/compression-algorithms/lzma2.adoc +327 -0
  66. data/docs/guides/compression-algorithms/ppmd.adoc +316 -0
  67. data/docs/guides/compression-algorithms/zstandard.adoc +361 -0
  68. data/docs/guides/creating-archives.adoc +354 -0
  69. data/docs/guides/extracting-archives.adoc +53 -0
  70. data/docs/guides/format-conversion.adoc +64 -0
  71. data/docs/guides/index.adoc +49 -0
  72. data/docs/guides/migration-rubyzip.adoc +217 -0
  73. data/docs/guides/parity-archives.adoc +605 -0
  74. data/docs/guides/performance-tuning.adoc +88 -0
  75. data/docs/index.adoc +218 -0
  76. data/docs/lychee.toml +67 -0
  77. data/docs/reference/api/overview.adoc +188 -0
  78. data/docs/reference/cli/compress-command.adoc +114 -0
  79. data/docs/reference/cli/overview.adoc +140 -0
  80. data/docs/reference/index.adoc +26 -0
  81. data/docs/resources/faq.adoc +185 -0
  82. data/docs/resources/quick-reference.adoc +222 -0
  83. data/docs/troubleshooting/index.adoc +208 -0
  84. data/examples/api_comparison.rb +205 -0
  85. data/examples/deflate64_example.rb +96 -0
  86. data/examples/par2_demo.rb +121 -0
  87. data/examples/quick_start_native.rb +150 -0
  88. data/examples/quick_start_rubyzip.rb +115 -0
  89. data/examples/rubyzip_compatibility_demo.rb +194 -0
  90. data/exe/omnizip +27 -0
  91. data/lib/omnizip/algorithm.rb +130 -0
  92. data/lib/omnizip/algorithm_registry.rb +86 -0
  93. data/lib/omnizip/algorithms/.keep +0 -0
  94. data/lib/omnizip/algorithms/bzip2/bwt.rb +225 -0
  95. data/lib/omnizip/algorithms/bzip2/decoder.rb +193 -0
  96. data/lib/omnizip/algorithms/bzip2/encoder.rb +237 -0
  97. data/lib/omnizip/algorithms/bzip2/huffman.rb +206 -0
  98. data/lib/omnizip/algorithms/bzip2/mtf.rb +101 -0
  99. data/lib/omnizip/algorithms/bzip2/rle.rb +151 -0
  100. data/lib/omnizip/algorithms/bzip2.rb +130 -0
  101. data/lib/omnizip/algorithms/deflate/constants.rb +28 -0
  102. data/lib/omnizip/algorithms/deflate/decoder.rb +38 -0
  103. data/lib/omnizip/algorithms/deflate/encoder.rb +46 -0
  104. data/lib/omnizip/algorithms/deflate.rb +128 -0
  105. data/lib/omnizip/algorithms/deflate64/constants.rb +45 -0
  106. data/lib/omnizip/algorithms/deflate64/decoder.rb +153 -0
  107. data/lib/omnizip/algorithms/deflate64/encoder.rb +98 -0
  108. data/lib/omnizip/algorithms/deflate64/huffman_coder.rb +354 -0
  109. data/lib/omnizip/algorithms/deflate64/lz77_encoder.rb +142 -0
  110. data/lib/omnizip/algorithms/deflate64.rb +109 -0
  111. data/lib/omnizip/algorithms/lzma/bit_model.rb +120 -0
  112. data/lib/omnizip/algorithms/lzma/constants.rb +112 -0
  113. data/lib/omnizip/algorithms/lzma/decoder.rb +148 -0
  114. data/lib/omnizip/algorithms/lzma/dictionary.rb +69 -0
  115. data/lib/omnizip/algorithms/lzma/distance_coder.rb +415 -0
  116. data/lib/omnizip/algorithms/lzma/encoder.rb +142 -0
  117. data/lib/omnizip/algorithms/lzma/length_coder.rb +260 -0
  118. data/lib/omnizip/algorithms/lzma/literal_decoder.rb +320 -0
  119. data/lib/omnizip/algorithms/lzma/literal_encoder.rb +210 -0
  120. data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +341 -0
  121. data/lib/omnizip/algorithms/lzma/lzma_alone_decoder.rb +192 -0
  122. data/lib/omnizip/algorithms/lzma/lzma_state.rb +128 -0
  123. data/lib/omnizip/algorithms/lzma/match.rb +32 -0
  124. data/lib/omnizip/algorithms/lzma/match_finder.rb +205 -0
  125. data/lib/omnizip/algorithms/lzma/match_finder_config.rb +142 -0
  126. data/lib/omnizip/algorithms/lzma/match_finder_factory.rb +88 -0
  127. data/lib/omnizip/algorithms/lzma/optimal_encoder.rb +130 -0
  128. data/lib/omnizip/algorithms/lzma/probability_models.rb +72 -0
  129. data/lib/omnizip/algorithms/lzma/range_coder.rb +85 -0
  130. data/lib/omnizip/algorithms/lzma/range_decoder.rb +434 -0
  131. data/lib/omnizip/algorithms/lzma/range_encoder.rb +194 -0
  132. data/lib/omnizip/algorithms/lzma/state.rb +127 -0
  133. data/lib/omnizip/algorithms/lzma/xz_buffered_range_encoder.rb +325 -0
  134. data/lib/omnizip/algorithms/lzma/xz_encoder.rb +426 -0
  135. data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +645 -0
  136. data/lib/omnizip/algorithms/lzma/xz_match_finder_adapter.rb +227 -0
  137. data/lib/omnizip/algorithms/lzma/xz_price_calculator.rb +169 -0
  138. data/lib/omnizip/algorithms/lzma/xz_probability_models.rb +261 -0
  139. data/lib/omnizip/algorithms/lzma/xz_range_encoder.rb +223 -0
  140. data/lib/omnizip/algorithms/lzma/xz_range_encoder_exact.rb +331 -0
  141. data/lib/omnizip/algorithms/lzma/xz_state.rb +116 -0
  142. data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +2055 -0
  143. data/lib/omnizip/algorithms/lzma.rb +238 -0
  144. data/lib/omnizip/algorithms/lzma2/chunk_manager.rb +182 -0
  145. data/lib/omnizip/algorithms/lzma2/constants.rb +41 -0
  146. data/lib/omnizip/algorithms/lzma2/encoder.rb +147 -0
  147. data/lib/omnizip/algorithms/lzma2/lzma2_chunk.rb +161 -0
  148. data/lib/omnizip/algorithms/lzma2/properties.rb +179 -0
  149. data/lib/omnizip/algorithms/lzma2/simple_lzma2_encoder.rb +127 -0
  150. data/lib/omnizip/algorithms/lzma2/xz_encoder_adapter.rb +85 -0
  151. data/lib/omnizip/algorithms/lzma2.rb +141 -0
  152. data/lib/omnizip/algorithms/ppmd7/constants.rb +74 -0
  153. data/lib/omnizip/algorithms/ppmd7/context.rb +154 -0
  154. data/lib/omnizip/algorithms/ppmd7/decoder.rb +126 -0
  155. data/lib/omnizip/algorithms/ppmd7/encoder.rb +163 -0
  156. data/lib/omnizip/algorithms/ppmd7/model.rb +248 -0
  157. data/lib/omnizip/algorithms/ppmd7/symbol_state.rb +57 -0
  158. data/lib/omnizip/algorithms/ppmd7.rb +116 -0
  159. data/lib/omnizip/algorithms/ppmd8/constants.rb +61 -0
  160. data/lib/omnizip/algorithms/ppmd8/context.rb +34 -0
  161. data/lib/omnizip/algorithms/ppmd8/decoder.rb +107 -0
  162. data/lib/omnizip/algorithms/ppmd8/encoder.rb +138 -0
  163. data/lib/omnizip/algorithms/ppmd8/model.rb +250 -0
  164. data/lib/omnizip/algorithms/ppmd8/restoration_method.rb +78 -0
  165. data/lib/omnizip/algorithms/ppmd8.rb +82 -0
  166. data/lib/omnizip/algorithms/ppmd_base.rb +138 -0
  167. data/lib/omnizip/algorithms/sevenzip_lzma2.rb +123 -0
  168. data/lib/omnizip/algorithms/xz_lzma2.rb +118 -0
  169. data/lib/omnizip/algorithms/zstandard/constants.rb +25 -0
  170. data/lib/omnizip/algorithms/zstandard/decoder.rb +46 -0
  171. data/lib/omnizip/algorithms/zstandard/encoder.rb +51 -0
  172. data/lib/omnizip/algorithms/zstandard.rb +138 -0
  173. data/lib/omnizip/buffer/memory_archive.rb +251 -0
  174. data/lib/omnizip/buffer/memory_extractor.rb +224 -0
  175. data/lib/omnizip/buffer.rb +176 -0
  176. data/lib/omnizip/checksum_registry.rb +114 -0
  177. data/lib/omnizip/checksums/crc32.rb +100 -0
  178. data/lib/omnizip/checksums/crc64.rb +101 -0
  179. data/lib/omnizip/checksums/crc_base.rb +158 -0
  180. data/lib/omnizip/checksums/verifier.rb +131 -0
  181. data/lib/omnizip/chunked/memory_manager.rb +194 -0
  182. data/lib/omnizip/chunked/reader.rb +78 -0
  183. data/lib/omnizip/chunked/writer.rb +120 -0
  184. data/lib/omnizip/chunked.rb +129 -0
  185. data/lib/omnizip/cli/output_formatter.rb +104 -0
  186. data/lib/omnizip/cli.rb +572 -0
  187. data/lib/omnizip/commands/.keep +0 -0
  188. data/lib/omnizip/commands/archive_create_command.rb +427 -0
  189. data/lib/omnizip/commands/archive_extract_command.rb +272 -0
  190. data/lib/omnizip/commands/archive_list_command.rb +218 -0
  191. data/lib/omnizip/commands/archive_repair_command.rb +131 -0
  192. data/lib/omnizip/commands/archive_verify_command.rb +117 -0
  193. data/lib/omnizip/commands/compress_command.rb +117 -0
  194. data/lib/omnizip/commands/decompress_command.rb +120 -0
  195. data/lib/omnizip/commands/list_command.rb +53 -0
  196. data/lib/omnizip/commands/metadata_command.rb +153 -0
  197. data/lib/omnizip/commands/parity_create_command.rb +122 -0
  198. data/lib/omnizip/commands/parity_repair_command.rb +122 -0
  199. data/lib/omnizip/commands/parity_verify_command.rb +124 -0
  200. data/lib/omnizip/commands/profile_list_command.rb +56 -0
  201. data/lib/omnizip/commands/profile_show_command.rb +44 -0
  202. data/lib/omnizip/convenience.rb +359 -0
  203. data/lib/omnizip/converter/conversion_registry.rb +49 -0
  204. data/lib/omnizip/converter/conversion_strategy.rb +121 -0
  205. data/lib/omnizip/converter/seven_zip_to_zip_strategy.rb +97 -0
  206. data/lib/omnizip/converter/zip_to_seven_zip_strategy.rb +112 -0
  207. data/lib/omnizip/converter.rb +105 -0
  208. data/lib/omnizip/crypto/aes256/cipher.rb +100 -0
  209. data/lib/omnizip/crypto/aes256/constants.rb +28 -0
  210. data/lib/omnizip/crypto/aes256/key_derivation.rb +101 -0
  211. data/lib/omnizip/crypto/aes256.rb +102 -0
  212. data/lib/omnizip/error.rb +106 -0
  213. data/lib/omnizip/eta/exponential_smoothing_estimator.rb +98 -0
  214. data/lib/omnizip/eta/moving_average_estimator.rb +99 -0
  215. data/lib/omnizip/eta/rate_calculator.rb +104 -0
  216. data/lib/omnizip/eta/sample_history.rb +143 -0
  217. data/lib/omnizip/eta/time_estimator.rb +106 -0
  218. data/lib/omnizip/eta.rb +63 -0
  219. data/lib/omnizip/extraction/filter_chain.rb +177 -0
  220. data/lib/omnizip/extraction/glob_pattern.rb +140 -0
  221. data/lib/omnizip/extraction/pattern_matcher.rb +70 -0
  222. data/lib/omnizip/extraction/predicate_pattern.rb +52 -0
  223. data/lib/omnizip/extraction/regex_pattern.rb +50 -0
  224. data/lib/omnizip/extraction/selective_extractor.rb +240 -0
  225. data/lib/omnizip/extraction.rb +111 -0
  226. data/lib/omnizip/file_type/mime_classifier.rb +144 -0
  227. data/lib/omnizip/file_type.rb +113 -0
  228. data/lib/omnizip/filter.rb +139 -0
  229. data/lib/omnizip/filter_pipeline.rb +108 -0
  230. data/lib/omnizip/filter_registry.rb +166 -0
  231. data/lib/omnizip/filters/bcj.rb +279 -0
  232. data/lib/omnizip/filters/bcj2/constants.rb +53 -0
  233. data/lib/omnizip/filters/bcj2/decoder.rb +200 -0
  234. data/lib/omnizip/filters/bcj2/encoder.rb +61 -0
  235. data/lib/omnizip/filters/bcj2/stream_data.rb +93 -0
  236. data/lib/omnizip/filters/bcj2.rb +99 -0
  237. data/lib/omnizip/filters/bcj_arm.rb +176 -0
  238. data/lib/omnizip/filters/bcj_arm64.rb +244 -0
  239. data/lib/omnizip/filters/bcj_ia64.rb +196 -0
  240. data/lib/omnizip/filters/bcj_ppc.rb +190 -0
  241. data/lib/omnizip/filters/bcj_sparc.rb +176 -0
  242. data/lib/omnizip/filters/bcj_x86.rb +193 -0
  243. data/lib/omnizip/filters/delta.rb +196 -0
  244. data/lib/omnizip/filters/filter_base.rb +72 -0
  245. data/lib/omnizip/filters/registry.rb +123 -0
  246. data/lib/omnizip/filters/xz_delta.rb +258 -0
  247. data/lib/omnizip/format_detector.rb +162 -0
  248. data/lib/omnizip/format_registry.rb +59 -0
  249. data/lib/omnizip/formats/.keep +0 -0
  250. data/lib/omnizip/formats/bzip2_file.rb +172 -0
  251. data/lib/omnizip/formats/cpio/constants.rb +55 -0
  252. data/lib/omnizip/formats/cpio/entry.rb +385 -0
  253. data/lib/omnizip/formats/cpio/reader.rb +196 -0
  254. data/lib/omnizip/formats/cpio/writer.rb +234 -0
  255. data/lib/omnizip/formats/cpio.rb +140 -0
  256. data/lib/omnizip/formats/format_spec_loader.rb +230 -0
  257. data/lib/omnizip/formats/gzip.rb +238 -0
  258. data/lib/omnizip/formats/iso/directory_builder.rb +297 -0
  259. data/lib/omnizip/formats/iso/directory_record.rb +152 -0
  260. data/lib/omnizip/formats/iso/joliet.rb +204 -0
  261. data/lib/omnizip/formats/iso/path_table.rb +125 -0
  262. data/lib/omnizip/formats/iso/reader.rb +197 -0
  263. data/lib/omnizip/formats/iso/rock_ridge.rb +349 -0
  264. data/lib/omnizip/formats/iso/volume_builder.rb +320 -0
  265. data/lib/omnizip/formats/iso/volume_descriptor.rb +168 -0
  266. data/lib/omnizip/formats/iso/writer.rb +530 -0
  267. data/lib/omnizip/formats/iso.rb +140 -0
  268. data/lib/omnizip/formats/lzip.rb +175 -0
  269. data/lib/omnizip/formats/lzma_alone.rb +171 -0
  270. data/lib/omnizip/formats/rar/archive_repairer.rb +243 -0
  271. data/lib/omnizip/formats/rar/archive_verifier.rb +195 -0
  272. data/lib/omnizip/formats/rar/block_parser.rb +243 -0
  273. data/lib/omnizip/formats/rar/compression/bit_stream.rb +180 -0
  274. data/lib/omnizip/formats/rar/compression/dispatcher.rb +217 -0
  275. data/lib/omnizip/formats/rar/compression/lz77_huffman/decoder.rb +216 -0
  276. data/lib/omnizip/formats/rar/compression/lz77_huffman/encoder.rb +158 -0
  277. data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_builder.rb +217 -0
  278. data/lib/omnizip/formats/rar/compression/lz77_huffman/huffman_coder.rb +189 -0
  279. data/lib/omnizip/formats/rar/compression/lz77_huffman/match_finder.rb +135 -0
  280. data/lib/omnizip/formats/rar/compression/lz77_huffman/sliding_window.rb +165 -0
  281. data/lib/omnizip/formats/rar/compression/ppmd/context.rb +105 -0
  282. data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +219 -0
  283. data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +262 -0
  284. data/lib/omnizip/formats/rar/compression_method_registry.rb +106 -0
  285. data/lib/omnizip/formats/rar/constants.rb +82 -0
  286. data/lib/omnizip/formats/rar/decompressor.rb +238 -0
  287. data/lib/omnizip/formats/rar/external_writer.rb +312 -0
  288. data/lib/omnizip/formats/rar/header.rb +192 -0
  289. data/lib/omnizip/formats/rar/license_validator.rb +109 -0
  290. data/lib/omnizip/formats/rar/models/rar_archive.rb +77 -0
  291. data/lib/omnizip/formats/rar/models/rar_entry.rb +65 -0
  292. data/lib/omnizip/formats/rar/models/rar_volume.rb +56 -0
  293. data/lib/omnizip/formats/rar/parity_handler.rb +292 -0
  294. data/lib/omnizip/formats/rar/rar5/compression/lzma.rb +202 -0
  295. data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +578 -0
  296. data/lib/omnizip/formats/rar/rar5/compression/store.rb +60 -0
  297. data/lib/omnizip/formats/rar/rar5/crc32.rb +39 -0
  298. data/lib/omnizip/formats/rar/rar5/encryption/aes256_cbc.rb +97 -0
  299. data/lib/omnizip/formats/rar/rar5/encryption/encryption_header.rb +114 -0
  300. data/lib/omnizip/formats/rar/rar5/encryption/encryption_manager.rb +166 -0
  301. data/lib/omnizip/formats/rar/rar5/encryption/key_derivation.rb +97 -0
  302. data/lib/omnizip/formats/rar/rar5/header.rb +187 -0
  303. data/lib/omnizip/formats/rar/rar5/models/encryption_options.rb +74 -0
  304. data/lib/omnizip/formats/rar/rar5/models/recovery_options.rb +63 -0
  305. data/lib/omnizip/formats/rar/rar5/models/solid_options.rb +63 -0
  306. data/lib/omnizip/formats/rar/rar5/models/volume_options.rb +74 -0
  307. data/lib/omnizip/formats/rar/rar5/multi_volume/ARCHITECTURE.md +290 -0
  308. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_manager.rb +264 -0
  309. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_splitter.rb +155 -0
  310. data/lib/omnizip/formats/rar/rar5/multi_volume/volume_writer.rb +194 -0
  311. data/lib/omnizip/formats/rar/rar5/solid/solid_encoder.rb +109 -0
  312. data/lib/omnizip/formats/rar/rar5/solid/solid_manager.rb +142 -0
  313. data/lib/omnizip/formats/rar/rar5/solid/solid_stream.rb +121 -0
  314. data/lib/omnizip/formats/rar/rar5/vint.rb +65 -0
  315. data/lib/omnizip/formats/rar/rar5/writer.rb +466 -0
  316. data/lib/omnizip/formats/rar/rar_format_base.rb +241 -0
  317. data/lib/omnizip/formats/rar/reader.rb +366 -0
  318. data/lib/omnizip/formats/rar/recovery_record.rb +245 -0
  319. data/lib/omnizip/formats/rar/volume_manager.rb +168 -0
  320. data/lib/omnizip/formats/rar/writer.rb +431 -0
  321. data/lib/omnizip/formats/rar.rb +205 -0
  322. data/lib/omnizip/formats/rar3/compressor.rb +73 -0
  323. data/lib/omnizip/formats/rar3/decompressor.rb +66 -0
  324. data/lib/omnizip/formats/rar3/reader.rb +386 -0
  325. data/lib/omnizip/formats/rar3/writer.rb +219 -0
  326. data/lib/omnizip/formats/rar5/compressor.rb +73 -0
  327. data/lib/omnizip/formats/rar5/decompressor.rb +66 -0
  328. data/lib/omnizip/formats/rar5/reader.rb +342 -0
  329. data/lib/omnizip/formats/rar5/writer.rb +214 -0
  330. data/lib/omnizip/formats/seven_zip/coder_chain.rb +150 -0
  331. data/lib/omnizip/formats/seven_zip/constants.rb +126 -0
  332. data/lib/omnizip/formats/seven_zip/encoded_header.rb +114 -0
  333. data/lib/omnizip/formats/seven_zip/encrypted_header.rb +142 -0
  334. data/lib/omnizip/formats/seven_zip/file_collector.rb +144 -0
  335. data/lib/omnizip/formats/seven_zip/header.rb +106 -0
  336. data/lib/omnizip/formats/seven_zip/header_encryptor.rb +134 -0
  337. data/lib/omnizip/formats/seven_zip/header_writer.rb +466 -0
  338. data/lib/omnizip/formats/seven_zip/models/coder_info.rb +30 -0
  339. data/lib/omnizip/formats/seven_zip/models/file_entry.rb +58 -0
  340. data/lib/omnizip/formats/seven_zip/models/folder.rb +69 -0
  341. data/lib/omnizip/formats/seven_zip/models/stream_info.rb +42 -0
  342. data/lib/omnizip/formats/seven_zip/parser.rb +660 -0
  343. data/lib/omnizip/formats/seven_zip/reader.rb +458 -0
  344. data/lib/omnizip/formats/seven_zip/split_archive_reader.rb +632 -0
  345. data/lib/omnizip/formats/seven_zip/split_archive_writer.rb +315 -0
  346. data/lib/omnizip/formats/seven_zip/stream_compressor.rb +151 -0
  347. data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +162 -0
  348. data/lib/omnizip/formats/seven_zip/writer.rb +740 -0
  349. data/lib/omnizip/formats/seven_zip.rb +93 -0
  350. data/lib/omnizip/formats/tar/constants.rb +73 -0
  351. data/lib/omnizip/formats/tar/entry.rb +94 -0
  352. data/lib/omnizip/formats/tar/header.rb +168 -0
  353. data/lib/omnizip/formats/tar/reader.rb +121 -0
  354. data/lib/omnizip/formats/tar/writer.rb +216 -0
  355. data/lib/omnizip/formats/tar.rb +84 -0
  356. data/lib/omnizip/formats/xz/reader.rb +116 -0
  357. data/lib/omnizip/formats/xz.rb +237 -0
  358. data/lib/omnizip/formats/xz_impl/block_decoder.rb +754 -0
  359. data/lib/omnizip/formats/xz_impl/block_encoder.rb +306 -0
  360. data/lib/omnizip/formats/xz_impl/block_header.rb +210 -0
  361. data/lib/omnizip/formats/xz_impl/block_header_parser.rb +186 -0
  362. data/lib/omnizip/formats/xz_impl/constants.rb +49 -0
  363. data/lib/omnizip/formats/xz_impl/index_decoder.rb +174 -0
  364. data/lib/omnizip/formats/xz_impl/index_encoder.rb +122 -0
  365. data/lib/omnizip/formats/xz_impl/stream_decoder.rb +468 -0
  366. data/lib/omnizip/formats/xz_impl/stream_encoder.rb +99 -0
  367. data/lib/omnizip/formats/xz_impl/stream_footer.rb +81 -0
  368. data/lib/omnizip/formats/xz_impl/stream_footer_parser.rb +117 -0
  369. data/lib/omnizip/formats/xz_impl/stream_header.rb +55 -0
  370. data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +108 -0
  371. data/lib/omnizip/formats/xz_impl/vli.rb +128 -0
  372. data/lib/omnizip/formats/xz_impl/writer.rb +421 -0
  373. data/lib/omnizip/formats/zip/central_directory_header.rb +195 -0
  374. data/lib/omnizip/formats/zip/constants.rb +69 -0
  375. data/lib/omnizip/formats/zip/end_of_central_directory.rb +133 -0
  376. data/lib/omnizip/formats/zip/local_file_header.rb +138 -0
  377. data/lib/omnizip/formats/zip/reader.rb +250 -0
  378. data/lib/omnizip/formats/zip/unix_extra_field.rb +153 -0
  379. data/lib/omnizip/formats/zip/writer.rb +375 -0
  380. data/lib/omnizip/formats/zip/zip64_end_of_central_directory.rb +104 -0
  381. data/lib/omnizip/formats/zip/zip64_end_of_central_directory_locator.rb +66 -0
  382. data/lib/omnizip/formats/zip/zip64_extra_field.rb +114 -0
  383. data/lib/omnizip/formats/zip.rb +50 -0
  384. data/lib/omnizip/implementations/base/lzma2_decoder_base.rb +75 -0
  385. data/lib/omnizip/implementations/base/lzma2_encoder_base.rb +128 -0
  386. data/lib/omnizip/implementations/base/lzma_decoder_base.rb +83 -0
  387. data/lib/omnizip/implementations/base/lzma_encoder_base.rb +108 -0
  388. data/lib/omnizip/implementations/base/state_machine_base.rb +182 -0
  389. data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +421 -0
  390. data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +465 -0
  391. data/lib/omnizip/implementations/seven_zip/lzma/match_finder.rb +288 -0
  392. data/lib/omnizip/implementations/seven_zip/lzma/range_decoder.rb +200 -0
  393. data/lib/omnizip/implementations/seven_zip/lzma/range_encoder.rb +197 -0
  394. data/lib/omnizip/implementations/seven_zip/lzma/state_machine.rb +141 -0
  395. data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +519 -0
  396. data/lib/omnizip/implementations/xz_utils/lzma2/decoder.rb +723 -0
  397. data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +750 -0
  398. data/lib/omnizip/io/buffered_input.rb +146 -0
  399. data/lib/omnizip/io/buffered_output.rb +105 -0
  400. data/lib/omnizip/io/stream_manager.rb +115 -0
  401. data/lib/omnizip/link_handler/hard_link.rb +79 -0
  402. data/lib/omnizip/link_handler/symbolic_link.rb +74 -0
  403. data/lib/omnizip/link_handler.rb +124 -0
  404. data/lib/omnizip/metadata/archive_metadata.rb +114 -0
  405. data/lib/omnizip/metadata/entry_metadata.rb +146 -0
  406. data/lib/omnizip/metadata/metadata_editor.rb +171 -0
  407. data/lib/omnizip/metadata/metadata_registry.rb +64 -0
  408. data/lib/omnizip/metadata/metadata_validator.rb +99 -0
  409. data/lib/omnizip/metadata.rb +57 -0
  410. data/lib/omnizip/models/.keep +0 -0
  411. data/lib/omnizip/models/algorithm_metadata.rb +73 -0
  412. data/lib/omnizip/models/compression_options.rb +71 -0
  413. data/lib/omnizip/models/conversion_options.rb +87 -0
  414. data/lib/omnizip/models/conversion_result.rb +135 -0
  415. data/lib/omnizip/models/eta_result.rb +46 -0
  416. data/lib/omnizip/models/extraction_rule.rb +115 -0
  417. data/lib/omnizip/models/filter_chain.rb +144 -0
  418. data/lib/omnizip/models/filter_config.rb +183 -0
  419. data/lib/omnizip/models/match_result.rb +124 -0
  420. data/lib/omnizip/models/optimization_suggestion.rb +91 -0
  421. data/lib/omnizip/models/parallel_options.rb +104 -0
  422. data/lib/omnizip/models/performance_result.rb +79 -0
  423. data/lib/omnizip/models/profile_report.rb +82 -0
  424. data/lib/omnizip/models/progress_options.rb +38 -0
  425. data/lib/omnizip/models/split_options.rb +116 -0
  426. data/lib/omnizip/optimization_registry.rb +81 -0
  427. data/lib/omnizip/parallel/job_queue.rb +209 -0
  428. data/lib/omnizip/parallel/job_scheduler.rb +203 -0
  429. data/lib/omnizip/parallel/parallel_compressor.rb +347 -0
  430. data/lib/omnizip/parallel/parallel_extractor.rb +329 -0
  431. data/lib/omnizip/parallel/worker_pool.rb +223 -0
  432. data/lib/omnizip/parallel.rb +149 -0
  433. data/lib/omnizip/parity/chunked_block_processor.rb +196 -0
  434. data/lib/omnizip/parity/galois16.rb +145 -0
  435. data/lib/omnizip/parity/models/creator_packet.rb +73 -0
  436. data/lib/omnizip/parity/models/file_description_packet.rb +133 -0
  437. data/lib/omnizip/parity/models/ifsc_packet.rb +123 -0
  438. data/lib/omnizip/parity/models/main_packet.rb +128 -0
  439. data/lib/omnizip/parity/models/packet.rb +156 -0
  440. data/lib/omnizip/parity/models/packet_registry.rb +109 -0
  441. data/lib/omnizip/parity/models/recovery_slice_packet.rb +78 -0
  442. data/lib/omnizip/parity/par2_creator.rb +531 -0
  443. data/lib/omnizip/parity/par2_repairer.rb +407 -0
  444. data/lib/omnizip/parity/par2_verifier.rb +364 -0
  445. data/lib/omnizip/parity/par2cmdline_algorithm.rb +110 -0
  446. data/lib/omnizip/parity/par2cmdline_coefficients.rb +78 -0
  447. data/lib/omnizip/parity/reed_solomon_decoder.rb +266 -0
  448. data/lib/omnizip/parity/reed_solomon_encoder.rb +111 -0
  449. data/lib/omnizip/parity/reed_solomon_matrix.rb +342 -0
  450. data/lib/omnizip/parity.rb +186 -0
  451. data/lib/omnizip/password/encryption_registry.rb +65 -0
  452. data/lib/omnizip/password/encryption_strategy.rb +96 -0
  453. data/lib/omnizip/password/password_validator.rb +129 -0
  454. data/lib/omnizip/password/winzip_aes_strategy.rb +192 -0
  455. data/lib/omnizip/password/zip_crypto_strategy.rb +141 -0
  456. data/lib/omnizip/password.rb +87 -0
  457. data/lib/omnizip/pipe/stream_compressor.rb +124 -0
  458. data/lib/omnizip/pipe/stream_decompressor.rb +174 -0
  459. data/lib/omnizip/pipe.rb +121 -0
  460. data/lib/omnizip/platform/ntfs_streams.rb +201 -0
  461. data/lib/omnizip/platform.rb +189 -0
  462. data/lib/omnizip/profile/archive_profile.rb +39 -0
  463. data/lib/omnizip/profile/balanced_profile.rb +33 -0
  464. data/lib/omnizip/profile/binary_profile.rb +36 -0
  465. data/lib/omnizip/profile/compression_profile.rb +158 -0
  466. data/lib/omnizip/profile/custom_profile.rb +157 -0
  467. data/lib/omnizip/profile/fast_profile.rb +33 -0
  468. data/lib/omnizip/profile/maximum_profile.rb +33 -0
  469. data/lib/omnizip/profile/profile_detector.rb +110 -0
  470. data/lib/omnizip/profile/profile_registry.rb +161 -0
  471. data/lib/omnizip/profile/text_profile.rb +36 -0
  472. data/lib/omnizip/profile.rb +190 -0
  473. data/lib/omnizip/profiler/memory_profiler.rb +66 -0
  474. data/lib/omnizip/profiler/method_profiler.rb +49 -0
  475. data/lib/omnizip/profiler/report_generator.rb +169 -0
  476. data/lib/omnizip/profiler.rb +204 -0
  477. data/lib/omnizip/progress/callback_reporter.rb +36 -0
  478. data/lib/omnizip/progress/console_reporter.rb +62 -0
  479. data/lib/omnizip/progress/log_reporter.rb +91 -0
  480. data/lib/omnizip/progress/operation_progress.rb +118 -0
  481. data/lib/omnizip/progress/progress_bar.rb +156 -0
  482. data/lib/omnizip/progress/progress_reporter.rb +40 -0
  483. data/lib/omnizip/progress/progress_tracker.rb +190 -0
  484. data/lib/omnizip/progress/silent_reporter.rb +24 -0
  485. data/lib/omnizip/progress.rb +127 -0
  486. data/lib/omnizip/rubyzip_compat.rb +63 -0
  487. data/lib/omnizip/temp/safe_extract.rb +168 -0
  488. data/lib/omnizip/temp/temp_file.rb +124 -0
  489. data/lib/omnizip/temp/temp_file_pool.rb +109 -0
  490. data/lib/omnizip/temp.rb +181 -0
  491. data/lib/omnizip/version.rb +5 -0
  492. data/lib/omnizip/zip/entry.rb +156 -0
  493. data/lib/omnizip/zip/file.rb +485 -0
  494. data/lib/omnizip/zip/input_stream.rb +273 -0
  495. data/lib/omnizip/zip/output_stream.rb +324 -0
  496. data/lib/omnizip.rb +156 -0
  497. data/readme-docs/advanced-features.adoc +515 -0
  498. data/readme-docs/api-usage.adoc +444 -0
  499. data/readme-docs/architecture.adoc +449 -0
  500. data/readme-docs/archive-formats.adoc +479 -0
  501. data/readme-docs/cli-usage.adoc +222 -0
  502. data/readme-docs/compression-algorithms.adoc +442 -0
  503. data/readme-docs/compression-profiles.adoc +247 -0
  504. data/readme-docs/encryption-checksums.adoc +328 -0
  505. data/readme-docs/format-converter.adoc +325 -0
  506. data/readme-docs/installation.adoc +228 -0
  507. data/readme-docs/par2-archives.adoc +608 -0
  508. data/readme-docs/performance-profiler.adoc +389 -0
  509. data/readme-docs/preprocessing-filters.adoc +280 -0
  510. data/xz-file-format-1.2.1.txt +1174 -0
  511. metadata +617 -0
@@ -0,0 +1,240 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "pattern_matcher"
4
+ require_relative "filter_chain"
5
+ require_relative "../models/match_result"
6
+
7
+ module Omnizip
8
+ module Extraction
9
+ # Coordinates selective extraction from archives
10
+ #
11
+ # Extracts only files matching specified patterns, efficiently
12
+ # skipping non-matched files without decompression.
13
+ class SelectiveExtractor
14
+ attr_reader :archive, :filter
15
+
16
+ # Initialize a new selective extractor
17
+ #
18
+ # @param archive [Object] Archive to extract from
19
+ # @param filter [FilterChain, PatternMatcher, Object] Filter to apply
20
+ def initialize(archive, filter = nil)
21
+ @archive = archive
22
+ @filter = normalize_filter(filter)
23
+ end
24
+
25
+ # Extract matching files to destination
26
+ #
27
+ # @param dest [String] Destination directory
28
+ # @param options [Hash] Extraction options
29
+ # @option options [Boolean] :preserve_paths Keep directory structure
30
+ # @option options [Boolean] :flatten Extract all to root
31
+ # @option options [Boolean] :overwrite Overwrite existing files
32
+ # @option options [Progress::ProgressTracker] :progress Progress tracker
33
+ # @return [Array<String>] Paths of extracted files
34
+ def extract(dest, options = {})
35
+ FileUtils.mkdir_p(dest)
36
+ extracted = []
37
+
38
+ entries_to_extract = list_matches
39
+ total = entries_to_extract.size
40
+ current = 0
41
+
42
+ entries_to_extract.each do |entry|
43
+ dest_path = build_dest_path(entry, dest, options)
44
+ extract_entry(entry, dest_path, options)
45
+ extracted << dest_path
46
+
47
+ # Update progress if tracker provided
48
+ current += 1
49
+ update_progress(options[:progress], current, total, entry)
50
+ end
51
+
52
+ extracted
53
+ end
54
+
55
+ # Extract matching files to memory
56
+ #
57
+ # @return [Hash<String, String>] Hash of filename => content
58
+ def extract_to_memory
59
+ result = {}
60
+
61
+ list_matches.each do |entry|
62
+ filename = entry_filename(entry)
63
+ content = read_entry_content(entry)
64
+ result[filename] = content
65
+ end
66
+
67
+ result
68
+ end
69
+
70
+ # List matching entries without extracting
71
+ #
72
+ # @return [Array] Matching entries
73
+ def list_matches
74
+ return list_all if @filter.nil?
75
+
76
+ list_all.select do |entry|
77
+ filter_matches?(entry)
78
+ end
79
+ end
80
+
81
+ # Count matching entries
82
+ #
83
+ # @return [Integer]
84
+ def count_matches
85
+ list_matches.size
86
+ end
87
+
88
+ # Get match result with statistics
89
+ #
90
+ # @return [Models::MatchResult]
91
+ def match_result
92
+ all_entries = list_all
93
+ matches = if @filter
94
+ all_entries.select { |entry| filter_matches?(entry) }
95
+ else
96
+ all_entries
97
+ end
98
+
99
+ Models::MatchResult.new(
100
+ @filter&.to_s || "all",
101
+ matches: matches,
102
+ total_scanned: all_entries.size,
103
+ )
104
+ end
105
+
106
+ private
107
+
108
+ # Normalize filter to FilterChain or PatternMatcher
109
+ #
110
+ # @param filter [Object] Input filter
111
+ # @return [FilterChain, PatternMatcher, nil]
112
+ def normalize_filter(filter)
113
+ case filter
114
+ when FilterChain, PatternMatcher
115
+ filter
116
+ when nil
117
+ nil
118
+ else
119
+ PatternMatcher.new(filter)
120
+ end
121
+ end
122
+
123
+ # List all entries in archive
124
+ #
125
+ # @return [Array] All entries
126
+ def list_all
127
+ if @archive.respond_to?(:entries)
128
+ @archive.entries
129
+ elsif @archive.respond_to?(:each)
130
+ @archive.to_a
131
+ else
132
+ raise Error, "Archive does not support listing entries"
133
+ end
134
+ end
135
+
136
+ # Extract a single entry
137
+ #
138
+ # @param entry [Object] Entry to extract
139
+ # @param dest_path [String] Destination path
140
+ # @param options [Hash] Options
141
+ def extract_entry(entry, dest_path, options)
142
+ return if File.exist?(dest_path) && !options[:overwrite]
143
+
144
+ # Create parent directory
145
+ FileUtils.mkdir_p(File.dirname(dest_path))
146
+
147
+ # Extract content
148
+ content = read_entry_content(entry)
149
+ File.binwrite(dest_path, content)
150
+ end
151
+
152
+ # Read content from an entry
153
+ #
154
+ # @param entry [Object] Entry to read
155
+ # @return [String] Entry content
156
+ def read_entry_content(entry)
157
+ if entry.respond_to?(:read)
158
+ entry.read
159
+ elsif entry.respond_to?(:get_input_stream)
160
+ entry.get_input_stream.read
161
+ elsif @archive.respond_to?(:read)
162
+ @archive.read(entry)
163
+ else
164
+ raise Error, "Cannot read entry content"
165
+ end
166
+ end
167
+
168
+ # Build destination path for an entry
169
+ #
170
+ # @param entry [Object] Entry
171
+ # @param dest [String] Destination directory
172
+ # @param options [Hash] Options
173
+ # @return [String] Full destination path
174
+ def build_dest_path(entry, dest, options)
175
+ filename = entry_filename(entry)
176
+
177
+ if options[:flatten]
178
+ # Extract to root, use basename only
179
+ File.join(dest, File.basename(filename))
180
+ elsif options[:preserve_paths] != false
181
+ # Preserve directory structure (default)
182
+ File.join(dest, filename)
183
+ else
184
+ # Use basename
185
+ File.join(dest, File.basename(filename))
186
+ end
187
+ end
188
+
189
+ # Get filename from entry
190
+ #
191
+ # @param entry [Object] Entry
192
+ # @return [String] Filename
193
+ def entry_filename(entry)
194
+ if entry.respond_to?(:name)
195
+ entry.name
196
+ elsif entry.respond_to?(:path)
197
+ entry.path
198
+ elsif entry.respond_to?(:filename)
199
+ entry.filename
200
+ else
201
+ entry.to_s
202
+ end
203
+ end
204
+
205
+ # Update progress tracker
206
+ #
207
+ # @param tracker [Progress::ProgressTracker, nil] Progress tracker
208
+ # @param current [Integer] Current count
209
+ # @param total [Integer] Total count
210
+ # @param entry [Object] Current entry
211
+ def update_progress(tracker, current, total, entry)
212
+ return unless tracker
213
+
214
+ tracker.update(
215
+ current_bytes: current,
216
+ total_bytes: total,
217
+ current_file: entry_filename(entry),
218
+ )
219
+ end
220
+
221
+ # Check if entry matches the filter
222
+ #
223
+ # @param entry [Object] Entry to check
224
+ # @return [Boolean]
225
+ def filter_matches?(entry)
226
+ filename = entry_filename(entry)
227
+
228
+ case @filter
229
+ when FilterChain
230
+ @filter.match?(entry, filename: filename)
231
+ when PatternMatcher
232
+ @filter.match?(filename)
233
+ else
234
+ # For other pattern types, try match? with filename
235
+ @filter.match?(filename)
236
+ end
237
+ end
238
+ end
239
+ end
240
+ end
@@ -0,0 +1,111 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "extraction/pattern_matcher"
4
+ require_relative "extraction/filter_chain"
5
+ require_relative "extraction/selective_extractor"
6
+ require_relative "extraction/glob_pattern"
7
+ require_relative "extraction/regex_pattern"
8
+ require_relative "extraction/predicate_pattern"
9
+
10
+ module Omnizip
11
+ # Provides selective extraction capabilities for archives
12
+ #
13
+ # Supports extracting files matching glob patterns, regex patterns,
14
+ # or custom predicates without extracting the entire archive.
15
+ module Extraction
16
+ class << self
17
+ # Extract files matching a pattern from an archive
18
+ #
19
+ # @param archive [Object] Archive to extract from
20
+ # @param pattern [String, Regexp, Array] Pattern(s) to match
21
+ # @param dest [String] Destination directory
22
+ # @param options [Hash] Extraction options
23
+ # @option options [Boolean] :preserve_paths Keep directory structure
24
+ # @option options [Boolean] :flatten Extract all to destination root
25
+ # @option options [Boolean] :overwrite Overwrite existing files
26
+ # @return [Array<String>] Paths of extracted files
27
+ def extract_matching(archive, pattern, dest, options = {})
28
+ filter = build_filter(pattern)
29
+ extractor = SelectiveExtractor.new(archive, filter)
30
+ extractor.extract(dest, options)
31
+ end
32
+
33
+ # Extract files matching a pattern to memory
34
+ #
35
+ # @param archive [Object] Archive to extract from
36
+ # @param pattern [String, Regexp, Array] Pattern(s) to match
37
+ # @return [Hash<String, String>] Hash of filename => content
38
+ def extract_to_memory_matching(archive, pattern)
39
+ filter = build_filter(pattern)
40
+ extractor = SelectiveExtractor.new(archive, filter)
41
+ extractor.extract_to_memory
42
+ end
43
+
44
+ # List files matching a pattern without extracting
45
+ #
46
+ # @param archive [Object] Archive to list from
47
+ # @param pattern [String, Regexp, Array] Pattern(s) to match
48
+ # @return [Array] Matching entries
49
+ def list_matching(archive, pattern)
50
+ filter = build_filter(pattern)
51
+ extractor = SelectiveExtractor.new(archive, filter)
52
+ extractor.list_matches
53
+ end
54
+
55
+ # Count files matching a pattern
56
+ #
57
+ # @param archive [Object] Archive to count in
58
+ # @param pattern [String, Regexp, Array] Pattern(s) to match
59
+ # @return [Integer] Number of matches
60
+ def count_matching(archive, pattern)
61
+ filter = build_filter(pattern)
62
+ extractor = SelectiveExtractor.new(archive, filter)
63
+ extractor.count_matches
64
+ end
65
+
66
+ # Extract with a filter chain
67
+ #
68
+ # @param archive [Object] Archive to extract from
69
+ # @param filter [FilterChain] Filter chain to apply
70
+ # @param dest [String] Destination directory
71
+ # @param options [Hash] Extraction options
72
+ # @return [Array<String>] Paths of extracted files
73
+ def extract_with_filter(archive, filter, dest, options = {})
74
+ extractor = SelectiveExtractor.new(archive, filter)
75
+ extractor.extract(dest, options)
76
+ end
77
+
78
+ # Get match result with statistics
79
+ #
80
+ # @param archive [Object] Archive to analyze
81
+ # @param pattern [String, Regexp, Array] Pattern(s) to match
82
+ # @return [Models::MatchResult] Match result with statistics
83
+ def match_result(archive, pattern)
84
+ filter = build_filter(pattern)
85
+ extractor = SelectiveExtractor.new(archive, filter)
86
+ extractor.match_result
87
+ end
88
+
89
+ private
90
+
91
+ # Build filter from pattern(s)
92
+ #
93
+ # @param pattern [Object] Pattern or array of patterns
94
+ # @return [PatternMatcher, FilterChain]
95
+ def build_filter(pattern)
96
+ case pattern
97
+ when Array
98
+ # Multiple patterns - combine with OR logic
99
+ filter = FilterChain.new
100
+ pattern.each { |p| filter.include_pattern(p) }
101
+ filter
102
+ when FilterChain
103
+ pattern
104
+ else
105
+ # Single pattern
106
+ PatternMatcher.new(pattern)
107
+ end
108
+ end
109
+ end
110
+ end
111
+ end
@@ -0,0 +1,144 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Omnizip
4
+ module FileType
5
+ # Centralized MIME type classification for file type detection
6
+ #
7
+ # This class provides methods to classify MIME types into categories
8
+ # and determine appropriate compression profiles based on file type.
9
+ class MimeClassifier
10
+ # Text-based MIME types
11
+ TEXT_TYPES = %w[
12
+ text/plain
13
+ text/html
14
+ text/css
15
+ text/javascript
16
+ text/xml
17
+ text/csv
18
+ text/markdown
19
+ application/json
20
+ application/xml
21
+ application/javascript
22
+ application/ecmascript
23
+ application/x-httpd-php
24
+ application/x-sh
25
+ application/x-csh
26
+ application/x-perl
27
+ application/x-python
28
+ application/x-ruby
29
+ application/x-sql
30
+ application/sql
31
+ ].freeze
32
+
33
+ # Archive MIME types
34
+ ARCHIVE_TYPES = %w[
35
+ application/zip
36
+ application/x-7z-compressed
37
+ application/x-rar-compressed
38
+ application/x-tar
39
+ application/gzip
40
+ application/x-gzip
41
+ application/x-bzip2
42
+ application/x-xz
43
+ application/x-lzip
44
+ application/x-lzma
45
+ application/x-compress
46
+ application/zstd
47
+ application/x-archive
48
+ application/x-iso9660-image
49
+ ].freeze
50
+
51
+ # Executable MIME types
52
+ EXECUTABLE_TYPES = %w[
53
+ application/x-executable
54
+ application/x-mach-binary
55
+ application/x-elf
56
+ application/x-sharedlib
57
+ application/x-msdownload
58
+ application/x-dosexec
59
+ application/vnd.microsoft.portable-executable
60
+ ].freeze
61
+
62
+ # Binary/unknown MIME types (treated as binary data)
63
+ BINARY_TYPES = %w[
64
+ application/octet-stream
65
+ ].freeze
66
+
67
+ # Media MIME types (images, audio, video)
68
+ MEDIA_TYPES = [
69
+ /\Aimage\//,
70
+ /\Aaudio\//,
71
+ /\Avideo\//,
72
+ "application/pdf",
73
+ ].freeze
74
+
75
+ class << self
76
+ # Check if the MIME type is text-based
77
+ #
78
+ # @param mime_type [String] The MIME type to check
79
+ # @return [Boolean] true if text-based
80
+ def text?(mime_type)
81
+ return false unless mime_type
82
+
83
+ TEXT_TYPES.include?(mime_type) || mime_type.start_with?("text/")
84
+ end
85
+
86
+ # Check if the MIME type is an archive
87
+ #
88
+ # @param mime_type [String] The MIME type to check
89
+ # @return [Boolean] true if archive
90
+ def archive?(mime_type)
91
+ return false unless mime_type
92
+
93
+ ARCHIVE_TYPES.include?(mime_type)
94
+ end
95
+
96
+ # Check if the MIME type is executable
97
+ #
98
+ # @param mime_type [String] The MIME type to check
99
+ # @return [Boolean] true if executable
100
+ def executable?(mime_type)
101
+ return false unless mime_type
102
+
103
+ EXECUTABLE_TYPES.include?(mime_type) || BINARY_TYPES.include?(mime_type)
104
+ end
105
+
106
+ # Check if the MIME type is media (image/audio/video)
107
+ #
108
+ # @param mime_type [String] The MIME type to check
109
+ # @return [Boolean] true if media
110
+ def media?(mime_type)
111
+ return false unless mime_type
112
+
113
+ MEDIA_TYPES.any? do |pattern|
114
+ case pattern
115
+ when String
116
+ mime_type == pattern
117
+ when Regexp
118
+ pattern.match?(mime_type)
119
+ end
120
+ end
121
+ end
122
+
123
+ # Determine the recommended profile category for a MIME type
124
+ #
125
+ # @param mime_type [String] The MIME type to classify
126
+ # @return [Symbol] The recommended profile category
127
+ # (:text, :binary, :archive, :balanced)
128
+ def profile_category(mime_type)
129
+ return :balanced unless mime_type
130
+
131
+ if text?(mime_type)
132
+ :text
133
+ elsif executable?(mime_type)
134
+ :binary
135
+ elsif archive?(mime_type) || media?(mime_type)
136
+ :archive
137
+ else
138
+ :balanced
139
+ end
140
+ end
141
+ end
142
+ end
143
+ end
144
+ end
@@ -0,0 +1,113 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "marcel"
4
+ require_relative "file_type/mime_classifier"
5
+
6
+ module Omnizip
7
+ # File type detection module using Marcel for MIME type detection
8
+ #
9
+ # Provides MIME type detection using the Marcel library:
10
+ # - Path-based detection (examines file extension and content)
11
+ # - Data-based detection (analyzes binary data)
12
+ # - Stream-based detection (reads from IO streams)
13
+ #
14
+ # @example Detect file type from path
15
+ # mime_type = Omnizip::FileType.detect('app.exe')
16
+ # # => 'application/x-executable'
17
+ #
18
+ # @example Detect from binary data
19
+ # mime_type = Omnizip::FileType.detect_data(File.binread('image.png'))
20
+ # # => 'image/png'
21
+ #
22
+ # @example Detect from IO stream with filename hint
23
+ # File.open('document.pdf', 'rb') do |file|
24
+ # mime_type = Omnizip::FileType.detect_stream(file, filename: 'document.pdf')
25
+ # # => 'application/pdf'
26
+ # end
27
+ module FileType
28
+ class << self
29
+ # Detect MIME type from file path
30
+ #
31
+ # Uses Marcel to detect the MIME type by examining both the file
32
+ # extension and file content. This is the most accurate detection
33
+ # method when you have a file path.
34
+ #
35
+ # @param path [String, Pathname] File path
36
+ # @return [String, nil] MIME type string or nil if detection fails
37
+ #
38
+ # @example
39
+ # FileType.detect('document.pdf')
40
+ # # => 'application/pdf'
41
+ def detect(path)
42
+ return nil unless path
43
+ return nil unless File.exist?(path)
44
+
45
+ Marcel::MimeType.for(Pathname.new(path))
46
+ rescue StandardError
47
+ nil
48
+ end
49
+
50
+ # Detect MIME type from binary data
51
+ #
52
+ # Uses Marcel to analyze binary data for MIME type detection.
53
+ # Optionally accepts a filename hint for better accuracy.
54
+ #
55
+ # @param data [String] Binary data
56
+ # @param filename [String, nil] Optional filename hint
57
+ # @return [String, nil] MIME type string or nil if detection fails
58
+ #
59
+ # @example Without filename hint
60
+ # data = File.binread('image.png')
61
+ # FileType.detect_data(data)
62
+ # # => 'image/png'
63
+ #
64
+ # @example With filename hint
65
+ # FileType.detect_data(data, filename: 'image.png')
66
+ # # => 'image/png'
67
+ def detect_data(data, filename: nil)
68
+ return nil unless data
69
+ return nil if data.empty?
70
+
71
+ io = StringIO.new(data)
72
+ io.set_encoding(Encoding::BINARY)
73
+
74
+ Marcel::MimeType.for(io, name: filename)
75
+ rescue StandardError
76
+ nil
77
+ end
78
+
79
+ # Detect MIME type from IO stream
80
+ #
81
+ # Uses Marcel to analyze an IO stream for MIME type detection.
82
+ # Optionally accepts a filename hint for better accuracy.
83
+ # The stream position is preserved.
84
+ #
85
+ # @param io [IO] IO stream
86
+ # @param filename [String, nil] Optional filename hint
87
+ # @return [String, nil] MIME type string or nil if detection fails
88
+ #
89
+ # @example
90
+ # File.open('document.pdf', 'rb') do |file|
91
+ # FileType.detect_stream(file, filename: 'document.pdf')
92
+ # # => 'application/pdf'
93
+ # end
94
+ def detect_stream(io, filename: nil)
95
+ return nil unless io
96
+
97
+ # Save current position
98
+ original_pos = io.pos if io.respond_to?(:pos)
99
+
100
+ mime_type = Marcel::MimeType.for(io, name: filename)
101
+
102
+ # Restore position
103
+ io.seek(original_pos) if original_pos && io.respond_to?(:seek)
104
+
105
+ mime_type
106
+ rescue StandardError
107
+ # Attempt to restore position even on error
108
+ io.seek(original_pos) if original_pos && io.respond_to?(:seek)
109
+ nil
110
+ end
111
+ end
112
+ end
113
+ end