kreuzberg 4.0.0.pre.rc.29 → 4.0.0.rc1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (321) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +0 -6
  3. data/.rubocop.yaml +534 -1
  4. data/Gemfile +2 -1
  5. data/Gemfile.lock +28 -116
  6. data/README.md +269 -629
  7. data/Rakefile +0 -9
  8. data/Steepfile +4 -8
  9. data/examples/async_patterns.rb +58 -1
  10. data/ext/kreuzberg_rb/extconf.rb +5 -35
  11. data/ext/kreuzberg_rb/native/Cargo.toml +16 -55
  12. data/ext/kreuzberg_rb/native/build.rs +14 -12
  13. data/ext/kreuzberg_rb/native/include/ieeefp.h +1 -1
  14. data/ext/kreuzberg_rb/native/include/msvc_compat/strings.h +1 -1
  15. data/ext/kreuzberg_rb/native/include/strings.h +2 -2
  16. data/ext/kreuzberg_rb/native/include/unistd.h +1 -1
  17. data/ext/kreuzberg_rb/native/src/lib.rs +34 -897
  18. data/extconf.rb +6 -38
  19. data/kreuzberg.gemspec +20 -114
  20. data/lib/kreuzberg/api_proxy.rb +18 -2
  21. data/lib/kreuzberg/cache_api.rb +0 -22
  22. data/lib/kreuzberg/cli.rb +10 -2
  23. data/lib/kreuzberg/cli_proxy.rb +10 -0
  24. data/lib/kreuzberg/config.rb +22 -274
  25. data/lib/kreuzberg/errors.rb +7 -73
  26. data/lib/kreuzberg/extraction_api.rb +8 -237
  27. data/lib/kreuzberg/mcp_proxy.rb +11 -2
  28. data/lib/kreuzberg/ocr_backend_protocol.rb +73 -0
  29. data/lib/kreuzberg/post_processor_protocol.rb +71 -0
  30. data/lib/kreuzberg/result.rb +33 -151
  31. data/lib/kreuzberg/setup_lib_path.rb +2 -22
  32. data/lib/kreuzberg/validator_protocol.rb +73 -0
  33. data/lib/kreuzberg/version.rb +1 -1
  34. data/lib/kreuzberg.rb +13 -27
  35. data/pkg/kreuzberg-4.0.0.rc1.gem +0 -0
  36. data/sig/kreuzberg.rbs +12 -105
  37. data/spec/binding/cache_spec.rb +22 -22
  38. data/spec/binding/cli_proxy_spec.rb +4 -2
  39. data/spec/binding/cli_spec.rb +11 -12
  40. data/spec/binding/config_spec.rb +0 -74
  41. data/spec/binding/config_validation_spec.rb +6 -100
  42. data/spec/binding/error_handling_spec.rb +97 -283
  43. data/spec/binding/plugins/ocr_backend_spec.rb +8 -8
  44. data/spec/binding/plugins/postprocessor_spec.rb +11 -11
  45. data/spec/binding/plugins/validator_spec.rb +13 -12
  46. data/spec/examples.txt +104 -0
  47. data/spec/fixtures/config.toml +1 -0
  48. data/spec/fixtures/config.yaml +1 -0
  49. data/spec/fixtures/invalid_config.toml +1 -0
  50. data/spec/smoke/package_spec.rb +3 -2
  51. data/spec/spec_helper.rb +3 -1
  52. data/vendor/kreuzberg/Cargo.toml +67 -192
  53. data/vendor/kreuzberg/README.md +9 -97
  54. data/vendor/kreuzberg/build.rs +194 -516
  55. data/vendor/kreuzberg/src/api/handlers.rs +9 -130
  56. data/vendor/kreuzberg/src/api/mod.rs +3 -18
  57. data/vendor/kreuzberg/src/api/server.rs +71 -236
  58. data/vendor/kreuzberg/src/api/types.rs +7 -43
  59. data/vendor/kreuzberg/src/bin/profile_extract.rs +455 -0
  60. data/vendor/kreuzberg/src/cache/mod.rs +3 -27
  61. data/vendor/kreuzberg/src/chunking/mod.rs +79 -1705
  62. data/vendor/kreuzberg/src/core/batch_mode.rs +0 -60
  63. data/vendor/kreuzberg/src/core/config.rs +23 -905
  64. data/vendor/kreuzberg/src/core/extractor.rs +106 -403
  65. data/vendor/kreuzberg/src/core/io.rs +2 -4
  66. data/vendor/kreuzberg/src/core/mime.rs +12 -2
  67. data/vendor/kreuzberg/src/core/mod.rs +3 -22
  68. data/vendor/kreuzberg/src/core/pipeline.rs +78 -395
  69. data/vendor/kreuzberg/src/embeddings.rs +21 -169
  70. data/vendor/kreuzberg/src/error.rs +2 -2
  71. data/vendor/kreuzberg/src/extraction/archive.rs +31 -36
  72. data/vendor/kreuzberg/src/extraction/docx.rs +1 -365
  73. data/vendor/kreuzberg/src/extraction/email.rs +11 -12
  74. data/vendor/kreuzberg/src/extraction/excel.rs +129 -138
  75. data/vendor/kreuzberg/src/extraction/html.rs +170 -1447
  76. data/vendor/kreuzberg/src/extraction/image.rs +14 -138
  77. data/vendor/kreuzberg/src/extraction/libreoffice.rs +3 -13
  78. data/vendor/kreuzberg/src/extraction/mod.rs +5 -21
  79. data/vendor/kreuzberg/src/extraction/office_metadata/mod.rs +0 -2
  80. data/vendor/kreuzberg/src/extraction/pandoc/batch.rs +275 -0
  81. data/vendor/kreuzberg/src/extraction/pandoc/mime_types.rs +178 -0
  82. data/vendor/kreuzberg/src/extraction/pandoc/mod.rs +491 -0
  83. data/vendor/kreuzberg/src/extraction/pandoc/server.rs +496 -0
  84. data/vendor/kreuzberg/src/extraction/pandoc/subprocess.rs +1188 -0
  85. data/vendor/kreuzberg/src/extraction/pandoc/version.rs +162 -0
  86. data/vendor/kreuzberg/src/extraction/pptx.rs +94 -196
  87. data/vendor/kreuzberg/src/extraction/structured.rs +4 -5
  88. data/vendor/kreuzberg/src/extraction/table.rs +1 -2
  89. data/vendor/kreuzberg/src/extraction/text.rs +10 -18
  90. data/vendor/kreuzberg/src/extractors/archive.rs +0 -22
  91. data/vendor/kreuzberg/src/extractors/docx.rs +148 -69
  92. data/vendor/kreuzberg/src/extractors/email.rs +9 -37
  93. data/vendor/kreuzberg/src/extractors/excel.rs +40 -81
  94. data/vendor/kreuzberg/src/extractors/html.rs +173 -182
  95. data/vendor/kreuzberg/src/extractors/image.rs +8 -32
  96. data/vendor/kreuzberg/src/extractors/mod.rs +10 -171
  97. data/vendor/kreuzberg/src/extractors/pandoc.rs +201 -0
  98. data/vendor/kreuzberg/src/extractors/pdf.rs +64 -329
  99. data/vendor/kreuzberg/src/extractors/pptx.rs +34 -79
  100. data/vendor/kreuzberg/src/extractors/structured.rs +0 -16
  101. data/vendor/kreuzberg/src/extractors/text.rs +7 -30
  102. data/vendor/kreuzberg/src/extractors/xml.rs +8 -27
  103. data/vendor/kreuzberg/src/keywords/processor.rs +1 -9
  104. data/vendor/kreuzberg/src/keywords/rake.rs +1 -0
  105. data/vendor/kreuzberg/src/language_detection/mod.rs +51 -94
  106. data/vendor/kreuzberg/src/lib.rs +5 -17
  107. data/vendor/kreuzberg/src/mcp/mod.rs +1 -4
  108. data/vendor/kreuzberg/src/mcp/server.rs +21 -145
  109. data/vendor/kreuzberg/src/ocr/mod.rs +0 -2
  110. data/vendor/kreuzberg/src/ocr/processor.rs +8 -19
  111. data/vendor/kreuzberg/src/ocr/tesseract_backend.rs +0 -2
  112. data/vendor/kreuzberg/src/pdf/error.rs +1 -93
  113. data/vendor/kreuzberg/src/pdf/metadata.rs +100 -263
  114. data/vendor/kreuzberg/src/pdf/mod.rs +2 -33
  115. data/vendor/kreuzberg/src/pdf/rendering.rs +12 -12
  116. data/vendor/kreuzberg/src/pdf/table.rs +64 -61
  117. data/vendor/kreuzberg/src/pdf/text.rs +24 -416
  118. data/vendor/kreuzberg/src/plugins/extractor.rs +8 -40
  119. data/vendor/kreuzberg/src/plugins/mod.rs +0 -3
  120. data/vendor/kreuzberg/src/plugins/ocr.rs +14 -22
  121. data/vendor/kreuzberg/src/plugins/processor.rs +1 -10
  122. data/vendor/kreuzberg/src/plugins/registry.rs +0 -15
  123. data/vendor/kreuzberg/src/plugins/validator.rs +8 -20
  124. data/vendor/kreuzberg/src/stopwords/mod.rs +2 -2
  125. data/vendor/kreuzberg/src/text/mod.rs +0 -8
  126. data/vendor/kreuzberg/src/text/quality.rs +15 -28
  127. data/vendor/kreuzberg/src/text/string_utils.rs +10 -22
  128. data/vendor/kreuzberg/src/text/token_reduction/core.rs +50 -86
  129. data/vendor/kreuzberg/src/text/token_reduction/filters.rs +16 -37
  130. data/vendor/kreuzberg/src/text/token_reduction/simd_text.rs +1 -2
  131. data/vendor/kreuzberg/src/types.rs +67 -907
  132. data/vendor/kreuzberg/src/utils/mod.rs +0 -14
  133. data/vendor/kreuzberg/src/utils/quality.rs +3 -12
  134. data/vendor/kreuzberg/tests/api_tests.rs +0 -506
  135. data/vendor/kreuzberg/tests/archive_integration.rs +0 -2
  136. data/vendor/kreuzberg/tests/batch_orchestration.rs +12 -57
  137. data/vendor/kreuzberg/tests/batch_processing.rs +8 -32
  138. data/vendor/kreuzberg/tests/chunking_offset_demo.rs +92 -0
  139. data/vendor/kreuzberg/tests/concurrency_stress.rs +8 -40
  140. data/vendor/kreuzberg/tests/config_features.rs +1 -33
  141. data/vendor/kreuzberg/tests/config_loading_tests.rs +39 -16
  142. data/vendor/kreuzberg/tests/core_integration.rs +9 -35
  143. data/vendor/kreuzberg/tests/csv_integration.rs +81 -71
  144. data/vendor/kreuzberg/tests/docx_metadata_extraction_test.rs +25 -23
  145. data/vendor/kreuzberg/tests/email_integration.rs +1 -3
  146. data/vendor/kreuzberg/tests/error_handling.rs +34 -43
  147. data/vendor/kreuzberg/tests/format_integration.rs +1 -7
  148. data/vendor/kreuzberg/tests/helpers/mod.rs +0 -60
  149. data/vendor/kreuzberg/tests/image_integration.rs +0 -2
  150. data/vendor/kreuzberg/tests/mime_detection.rs +16 -17
  151. data/vendor/kreuzberg/tests/ocr_configuration.rs +0 -4
  152. data/vendor/kreuzberg/tests/ocr_errors.rs +0 -22
  153. data/vendor/kreuzberg/tests/ocr_quality.rs +0 -2
  154. data/vendor/kreuzberg/tests/pandoc_integration.rs +503 -0
  155. data/vendor/kreuzberg/tests/pdf_integration.rs +0 -2
  156. data/vendor/kreuzberg/tests/pipeline_integration.rs +2 -36
  157. data/vendor/kreuzberg/tests/plugin_ocr_backend_test.rs +0 -5
  158. data/vendor/kreuzberg/tests/plugin_postprocessor_test.rs +1 -17
  159. data/vendor/kreuzberg/tests/plugin_system.rs +0 -6
  160. data/vendor/kreuzberg/tests/registry_integration_tests.rs +22 -2
  161. data/vendor/kreuzberg/tests/security_validation.rs +1 -13
  162. data/vendor/kreuzberg/tests/test_fastembed.rs +23 -45
  163. metadata +25 -171
  164. data/.rubocop.yml +0 -543
  165. data/ext/kreuzberg_rb/native/.cargo/config.toml +0 -23
  166. data/ext/kreuzberg_rb/native/Cargo.lock +0 -7619
  167. data/lib/kreuzberg/error_context.rb +0 -136
  168. data/lib/kreuzberg/types.rb +0 -170
  169. data/lib/libpdfium.so +0 -0
  170. data/spec/binding/async_operations_spec.rb +0 -473
  171. data/spec/binding/batch_operations_spec.rb +0 -595
  172. data/spec/binding/batch_spec.rb +0 -359
  173. data/spec/binding/config_result_spec.rb +0 -377
  174. data/spec/binding/embeddings_spec.rb +0 -816
  175. data/spec/binding/error_recovery_spec.rb +0 -488
  176. data/spec/binding/font_config_spec.rb +0 -220
  177. data/spec/binding/images_spec.rb +0 -738
  178. data/spec/binding/keywords_extraction_spec.rb +0 -600
  179. data/spec/binding/metadata_types_spec.rb +0 -1228
  180. data/spec/binding/pages_extraction_spec.rb +0 -471
  181. data/spec/binding/tables_spec.rb +0 -641
  182. data/spec/unit/config/chunking_config_spec.rb +0 -213
  183. data/spec/unit/config/embedding_config_spec.rb +0 -343
  184. data/spec/unit/config/extraction_config_spec.rb +0 -438
  185. data/spec/unit/config/font_config_spec.rb +0 -285
  186. data/spec/unit/config/hierarchy_config_spec.rb +0 -314
  187. data/spec/unit/config/image_extraction_config_spec.rb +0 -209
  188. data/spec/unit/config/image_preprocessing_config_spec.rb +0 -249
  189. data/spec/unit/config/keyword_config_spec.rb +0 -229
  190. data/spec/unit/config/language_detection_config_spec.rb +0 -258
  191. data/spec/unit/config/ocr_config_spec.rb +0 -171
  192. data/spec/unit/config/page_config_spec.rb +0 -221
  193. data/spec/unit/config/pdf_config_spec.rb +0 -267
  194. data/spec/unit/config/postprocessor_config_spec.rb +0 -290
  195. data/spec/unit/config/tesseract_config_spec.rb +0 -181
  196. data/spec/unit/config/token_reduction_config_spec.rb +0 -251
  197. data/test/metadata_types_test.rb +0 -959
  198. data/vendor/Cargo.toml +0 -61
  199. data/vendor/kreuzberg/examples/bench_fixes.rs +0 -71
  200. data/vendor/kreuzberg/examples/test_pdfium_fork.rs +0 -62
  201. data/vendor/kreuzberg/src/chunking/processor.rs +0 -219
  202. data/vendor/kreuzberg/src/core/batch_optimizations.rs +0 -385
  203. data/vendor/kreuzberg/src/core/config_validation.rs +0 -949
  204. data/vendor/kreuzberg/src/core/formats.rs +0 -235
  205. data/vendor/kreuzberg/src/core/server_config.rs +0 -1220
  206. data/vendor/kreuzberg/src/extraction/capacity.rs +0 -263
  207. data/vendor/kreuzberg/src/extraction/markdown.rs +0 -216
  208. data/vendor/kreuzberg/src/extraction/office_metadata/odt_properties.rs +0 -284
  209. data/vendor/kreuzberg/src/extractors/bibtex.rs +0 -470
  210. data/vendor/kreuzberg/src/extractors/docbook.rs +0 -504
  211. data/vendor/kreuzberg/src/extractors/epub.rs +0 -696
  212. data/vendor/kreuzberg/src/extractors/fictionbook.rs +0 -492
  213. data/vendor/kreuzberg/src/extractors/jats.rs +0 -1054
  214. data/vendor/kreuzberg/src/extractors/jupyter.rs +0 -368
  215. data/vendor/kreuzberg/src/extractors/latex.rs +0 -653
  216. data/vendor/kreuzberg/src/extractors/markdown.rs +0 -701
  217. data/vendor/kreuzberg/src/extractors/odt.rs +0 -628
  218. data/vendor/kreuzberg/src/extractors/opml.rs +0 -635
  219. data/vendor/kreuzberg/src/extractors/orgmode.rs +0 -529
  220. data/vendor/kreuzberg/src/extractors/rst.rs +0 -577
  221. data/vendor/kreuzberg/src/extractors/rtf.rs +0 -809
  222. data/vendor/kreuzberg/src/extractors/security.rs +0 -484
  223. data/vendor/kreuzberg/src/extractors/security_tests.rs +0 -367
  224. data/vendor/kreuzberg/src/extractors/typst.rs +0 -651
  225. data/vendor/kreuzberg/src/language_detection/processor.rs +0 -218
  226. data/vendor/kreuzberg/src/ocr/language_registry.rs +0 -520
  227. data/vendor/kreuzberg/src/panic_context.rs +0 -154
  228. data/vendor/kreuzberg/src/pdf/bindings.rs +0 -306
  229. data/vendor/kreuzberg/src/pdf/bundled.rs +0 -408
  230. data/vendor/kreuzberg/src/pdf/fonts.rs +0 -358
  231. data/vendor/kreuzberg/src/pdf/hierarchy.rs +0 -903
  232. data/vendor/kreuzberg/src/text/quality_processor.rs +0 -231
  233. data/vendor/kreuzberg/src/text/utf8_validation.rs +0 -193
  234. data/vendor/kreuzberg/src/utils/pool.rs +0 -503
  235. data/vendor/kreuzberg/src/utils/pool_sizing.rs +0 -364
  236. data/vendor/kreuzberg/src/utils/string_pool.rs +0 -761
  237. data/vendor/kreuzberg/tests/api_embed.rs +0 -360
  238. data/vendor/kreuzberg/tests/api_extract_multipart.rs +0 -52
  239. data/vendor/kreuzberg/tests/api_large_pdf_extraction.rs +0 -471
  240. data/vendor/kreuzberg/tests/api_large_pdf_extraction_diagnostics.rs +0 -289
  241. data/vendor/kreuzberg/tests/batch_pooling_benchmark.rs +0 -154
  242. data/vendor/kreuzberg/tests/bibtex_parity_test.rs +0 -421
  243. data/vendor/kreuzberg/tests/config_integration_test.rs +0 -753
  244. data/vendor/kreuzberg/tests/data/hierarchy_ground_truth.json +0 -294
  245. data/vendor/kreuzberg/tests/docbook_extractor_tests.rs +0 -500
  246. data/vendor/kreuzberg/tests/docx_vs_pandoc_comparison.rs +0 -370
  247. data/vendor/kreuzberg/tests/epub_native_extractor_tests.rs +0 -275
  248. data/vendor/kreuzberg/tests/fictionbook_extractor_tests.rs +0 -228
  249. data/vendor/kreuzberg/tests/html_table_test.rs +0 -551
  250. data/vendor/kreuzberg/tests/instrumentation_test.rs +0 -139
  251. data/vendor/kreuzberg/tests/jats_extractor_tests.rs +0 -639
  252. data/vendor/kreuzberg/tests/jupyter_extractor_tests.rs +0 -704
  253. data/vendor/kreuzberg/tests/latex_extractor_tests.rs +0 -496
  254. data/vendor/kreuzberg/tests/markdown_extractor_tests.rs +0 -490
  255. data/vendor/kreuzberg/tests/ocr_language_registry.rs +0 -191
  256. data/vendor/kreuzberg/tests/odt_extractor_tests.rs +0 -674
  257. data/vendor/kreuzberg/tests/opml_extractor_tests.rs +0 -616
  258. data/vendor/kreuzberg/tests/orgmode_extractor_tests.rs +0 -822
  259. data/vendor/kreuzberg/tests/page_markers.rs +0 -297
  260. data/vendor/kreuzberg/tests/pdf_hierarchy_detection.rs +0 -301
  261. data/vendor/kreuzberg/tests/pdf_hierarchy_quality.rs +0 -589
  262. data/vendor/kreuzberg/tests/pdf_ocr_triggering.rs +0 -301
  263. data/vendor/kreuzberg/tests/pdf_text_merging.rs +0 -475
  264. data/vendor/kreuzberg/tests/pdfium_linking.rs +0 -340
  265. data/vendor/kreuzberg/tests/rst_extractor_tests.rs +0 -694
  266. data/vendor/kreuzberg/tests/rtf_extractor_tests.rs +0 -775
  267. data/vendor/kreuzberg/tests/typst_behavioral_tests.rs +0 -1260
  268. data/vendor/kreuzberg/tests/typst_extractor_tests.rs +0 -648
  269. data/vendor/kreuzberg-ffi/Cargo.toml +0 -67
  270. data/vendor/kreuzberg-ffi/README.md +0 -851
  271. data/vendor/kreuzberg-ffi/benches/result_view_benchmark.rs +0 -227
  272. data/vendor/kreuzberg-ffi/build.rs +0 -168
  273. data/vendor/kreuzberg-ffi/cbindgen.toml +0 -37
  274. data/vendor/kreuzberg-ffi/kreuzberg-ffi.pc.in +0 -12
  275. data/vendor/kreuzberg-ffi/kreuzberg.h +0 -3012
  276. data/vendor/kreuzberg-ffi/src/batch_streaming.rs +0 -588
  277. data/vendor/kreuzberg-ffi/src/config.rs +0 -1341
  278. data/vendor/kreuzberg-ffi/src/error.rs +0 -901
  279. data/vendor/kreuzberg-ffi/src/extraction.rs +0 -555
  280. data/vendor/kreuzberg-ffi/src/helpers.rs +0 -879
  281. data/vendor/kreuzberg-ffi/src/lib.rs +0 -977
  282. data/vendor/kreuzberg-ffi/src/memory.rs +0 -493
  283. data/vendor/kreuzberg-ffi/src/mime.rs +0 -329
  284. data/vendor/kreuzberg-ffi/src/panic_shield.rs +0 -265
  285. data/vendor/kreuzberg-ffi/src/plugins/document_extractor.rs +0 -442
  286. data/vendor/kreuzberg-ffi/src/plugins/mod.rs +0 -14
  287. data/vendor/kreuzberg-ffi/src/plugins/ocr_backend.rs +0 -628
  288. data/vendor/kreuzberg-ffi/src/plugins/post_processor.rs +0 -438
  289. data/vendor/kreuzberg-ffi/src/plugins/validator.rs +0 -329
  290. data/vendor/kreuzberg-ffi/src/result.rs +0 -510
  291. data/vendor/kreuzberg-ffi/src/result_pool.rs +0 -639
  292. data/vendor/kreuzberg-ffi/src/result_view.rs +0 -773
  293. data/vendor/kreuzberg-ffi/src/string_intern.rs +0 -568
  294. data/vendor/kreuzberg-ffi/src/types.rs +0 -363
  295. data/vendor/kreuzberg-ffi/src/util.rs +0 -210
  296. data/vendor/kreuzberg-ffi/src/validation.rs +0 -848
  297. data/vendor/kreuzberg-ffi/tests.disabled/README.md +0 -48
  298. data/vendor/kreuzberg-ffi/tests.disabled/config_loading_tests.rs +0 -299
  299. data/vendor/kreuzberg-ffi/tests.disabled/config_tests.rs +0 -346
  300. data/vendor/kreuzberg-ffi/tests.disabled/extractor_tests.rs +0 -232
  301. data/vendor/kreuzberg-ffi/tests.disabled/plugin_registration_tests.rs +0 -470
  302. data/vendor/kreuzberg-tesseract/.commitlintrc.json +0 -13
  303. data/vendor/kreuzberg-tesseract/.crate-ignore +0 -2
  304. data/vendor/kreuzberg-tesseract/Cargo.lock +0 -2933
  305. data/vendor/kreuzberg-tesseract/Cargo.toml +0 -57
  306. data/vendor/kreuzberg-tesseract/LICENSE +0 -22
  307. data/vendor/kreuzberg-tesseract/README.md +0 -399
  308. data/vendor/kreuzberg-tesseract/build.rs +0 -1127
  309. data/vendor/kreuzberg-tesseract/patches/README.md +0 -71
  310. data/vendor/kreuzberg-tesseract/patches/tesseract.diff +0 -199
  311. data/vendor/kreuzberg-tesseract/src/api.rs +0 -1371
  312. data/vendor/kreuzberg-tesseract/src/choice_iterator.rs +0 -77
  313. data/vendor/kreuzberg-tesseract/src/enums.rs +0 -297
  314. data/vendor/kreuzberg-tesseract/src/error.rs +0 -81
  315. data/vendor/kreuzberg-tesseract/src/lib.rs +0 -145
  316. data/vendor/kreuzberg-tesseract/src/monitor.rs +0 -57
  317. data/vendor/kreuzberg-tesseract/src/mutable_iterator.rs +0 -197
  318. data/vendor/kreuzberg-tesseract/src/page_iterator.rs +0 -253
  319. data/vendor/kreuzberg-tesseract/src/result_iterator.rs +0 -286
  320. data/vendor/kreuzberg-tesseract/src/result_renderer.rs +0 -183
  321. data/vendor/kreuzberg-tesseract/tests/integration_test.rs +0 -211
data/sig/kreuzberg.rbs CHANGED
@@ -3,16 +3,6 @@
3
3
  module Kreuzberg
4
4
  VERSION: String
5
5
 
6
- # Error code constants
7
- ERROR_CODE_SUCCESS: Integer
8
- ERROR_CODE_GENERIC: Integer
9
- ERROR_CODE_PANIC: Integer
10
- ERROR_CODE_INVALID_ARGUMENT: Integer
11
- ERROR_CODE_IO: Integer
12
- ERROR_CODE_PARSING: Integer
13
- ERROR_CODE_OCR: Integer
14
- ERROR_CODE_MISSING_DEPENDENCY: Integer
15
-
16
6
  # Config namespace (defined in lib/kreuzberg/config.rb)
17
7
  module Config
18
8
  class OCR
@@ -74,21 +64,12 @@ module Kreuzberg
74
64
  def to_h: () -> Hash[Symbol, untyped]
75
65
  end
76
66
 
77
- class FontConfig
78
- attr_accessor enabled: bool
79
- attr_accessor custom_font_dirs: Array[String]?
80
-
81
- def initialize: (?enabled: bool, ?custom_font_dirs: Array[String]?) -> void
82
- def to_h: () -> Hash[Symbol, untyped]
83
- end
84
-
85
67
  class PDF
86
68
  attr_reader extract_images: bool
87
69
  attr_reader passwords: Array[String]?
88
70
  attr_reader extract_metadata: bool
89
- attr_reader font_config: FontConfig?
90
71
 
91
- def initialize: (?extract_images: bool, ?passwords: (Array[String] | String)?, ?extract_metadata: bool, ?font_config: (FontConfig | Hash[Symbol, untyped])?) -> void
72
+ def initialize: (?extract_images: bool, ?passwords: (Array[String] | String)?, ?extract_metadata: bool) -> void
92
73
  def to_h: () -> Hash[Symbol, untyped]
93
74
  end
94
75
 
@@ -177,15 +158,6 @@ module Kreuzberg
177
158
  def to_h: () -> Hash[Symbol, untyped]
178
159
  end
179
160
 
180
- class PageConfig
181
- attr_reader extract_pages: bool
182
- attr_reader insert_page_markers: bool
183
- attr_reader marker_format: String
184
-
185
- def initialize: (?extract_pages: bool, ?insert_page_markers: bool, ?marker_format: String) -> void
186
- def to_h: () -> Hash[Symbol, untyped]
187
- end
188
-
189
161
  class Extraction
190
162
  attr_reader use_cache: bool
191
163
  attr_reader enable_quality_processing: bool
@@ -200,7 +172,6 @@ module Kreuzberg
200
172
  attr_reader token_reduction: TokenReduction?
201
173
  attr_reader keywords: Keywords?
202
174
  attr_reader html_options: HtmlOptions?
203
- attr_reader pages: PageConfig?
204
175
  attr_reader max_concurrent_extractions: Integer?
205
176
 
206
177
  def self.from_file: (String path) -> Extraction
@@ -218,7 +189,6 @@ module Kreuzberg
218
189
  ?token_reduction: (TokenReduction | Hash[Symbol, untyped])?,
219
190
  ?keywords: (Keywords | Hash[Symbol, untyped])?,
220
191
  ?html_options: (HtmlOptions | Hash[Symbol, untyped])?,
221
- ?pages: (PageConfig | Hash[Symbol, untyped])?,
222
192
  ?max_concurrent_extractions: Integer?
223
193
  ) -> void
224
194
  def to_h: () -> Hash[Symbol, untyped]
@@ -228,20 +198,13 @@ module Kreuzberg
228
198
  def normalize_config: [T] (T | Hash[Symbol, untyped] | nil value, Class klass) -> T?
229
199
  end
230
200
 
201
+ # Backwards compatibility alias
202
+ Ocr: singleton(OCR)
231
203
  end
232
204
 
233
205
  # Alias for Config::Extraction (for API consistency with other language bindings)
234
206
  ExtractionConfig: singleton(Config::Extraction)
235
207
 
236
- # Alias for Config::PageConfig (for API consistency with other language bindings)
237
- PageConfig: singleton(Config::PageConfig)
238
-
239
- # Keyword algorithm constants
240
- module KeywordAlgorithm
241
- YAKE: Symbol
242
- RAKE: Symbol
243
- end
244
-
245
208
  # Extraction result type
246
209
  type extraction_result_hash = {
247
210
  content: String,
@@ -261,13 +224,11 @@ module Kreuzberg
261
224
 
262
225
  type chunk_hash = {
263
226
  content: String,
264
- byte_start: Integer,
265
- byte_end: Integer,
227
+ char_start: Integer,
228
+ char_end: Integer,
266
229
  token_count: Integer?,
267
230
  chunk_index: Integer?,
268
231
  total_chunks: Integer?,
269
- first_page: Integer?,
270
- last_page: Integer?,
271
232
  embedding: Array[Float]?
272
233
  }
273
234
 
@@ -307,24 +268,20 @@ module Kreuzberg
307
268
  # Text chunk
308
269
  class Chunk
309
270
  attr_reader content: String
310
- attr_reader byte_start: Integer
311
- attr_reader byte_end: Integer
271
+ attr_reader char_start: Integer
272
+ attr_reader char_end: Integer
312
273
  attr_reader token_count: Integer?
313
274
  attr_reader chunk_index: Integer?
314
275
  attr_reader total_chunks: Integer?
315
- attr_reader first_page: Integer?
316
- attr_reader last_page: Integer?
317
276
  attr_reader embedding: Array[Float]?
318
277
 
319
278
  def initialize: (
320
279
  content: String,
321
- byte_start: Integer,
322
- byte_end: Integer,
280
+ char_start: Integer,
281
+ char_end: Integer,
323
282
  token_count: Integer?,
324
283
  chunk_index: Integer?,
325
284
  total_chunks: Integer?,
326
- first_page: Integer?,
327
- last_page: Integer?,
328
285
  embedding: Array[Float]?
329
286
  ) -> void
330
287
  def to_h: () -> chunk_hash
@@ -434,14 +391,6 @@ module Kreuzberg
434
391
  # Config loading (native method)
435
392
  def self._config_from_file_native: (String path) -> Hash[Symbol, untyped]
436
393
 
437
- # Error introspection (native methods)
438
- def self._last_error_code_native: () -> Integer
439
- def self._last_panic_context_json_native: () -> String?
440
- def self._get_error_details_native: () -> Hash[Symbol, untyped]
441
- def self._classify_error_native: (String message) -> Integer
442
- def self._error_code_name_native: (Integer code) -> String
443
- def self._error_code_description_native: (Integer code) -> String
444
-
445
394
  # Plugin registration
446
395
  def self.register_post_processor: (String name, _PostProcessor processor, ?stage: Symbol?) -> void
447
396
  def self.unregister_post_processor: (String name) -> void
@@ -464,67 +413,25 @@ module Kreuzberg
464
413
  def extract_text: (String file_path_or_bytes, Hash[Symbol, untyped] config) -> String
465
414
  end
466
415
 
467
- module ErrorContext
468
- def self.last_error_code: () -> Integer
469
- def self.last_panic_context: () -> Errors::PanicContext?
470
- def self.last_panic_context_json: () -> String?
471
- def self.error_details: () -> Hash[Symbol, untyped]
472
- def self.classify_error: (String message) -> Integer
473
- def self.error_code_name: (Integer code) -> String
474
- def self.error_code_description: (Integer code) -> String
475
- end
476
-
477
416
  module Errors
478
- # Panic context information from FFI error introspection
479
- class PanicContext
480
- attr_reader file: String
481
- attr_reader line: Integer
482
- attr_reader function: String
483
- attr_reader message: String
484
- attr_reader timestamp_secs: Integer
485
-
486
- def initialize: (
487
- file: String,
488
- line: Integer,
489
- function: String,
490
- message: String,
491
- timestamp_secs: Integer
492
- ) -> void
493
- def to_s: () -> String
494
- def to_h: () -> Hash[Symbol, String | Integer]
495
- def self.from_json: (String) -> PanicContext?
496
-
497
- private
498
-
499
- def self.with_defaults: (Hash[Symbol, untyped] sliced) -> {file: String, line: Integer, function: String, message: String, timestamp_secs: Integer}
500
- end
501
-
502
417
  class Error < StandardError
503
- attr_reader panic_context: PanicContext?
504
- attr_reader error_code: Integer?
505
-
506
- def initialize: (String message, ?panic_context: PanicContext?, ?error_code: Integer?) -> void
507
418
  end
508
419
 
509
420
  class ValidationError < Error
510
421
  end
511
422
 
512
423
  class ParsingError < Error
513
- attr_reader context: Hash[untyped, untyped]?
514
-
515
- def initialize: (String message, ?context: Hash[untyped, untyped]?, ?panic_context: PanicContext?, ?error_code: Integer?) -> void
424
+ def initialize: (String message, ?context: Hash[untyped, untyped]?) -> void
516
425
  end
517
426
 
518
427
  class OCRError < Error
519
- attr_reader context: Hash[untyped, untyped]?
520
-
521
- def initialize: (String message, ?context: Hash[untyped, untyped]?, ?panic_context: PanicContext?, ?error_code: Integer?) -> void
428
+ def initialize: (String message, ?context: Hash[untyped, untyped]?) -> void
522
429
  end
523
430
 
524
431
  class MissingDependencyError < Error
525
432
  attr_reader dependency: String?
526
433
 
527
- def initialize: (String message, ?dependency: String?, ?panic_context: PanicContext?, ?error_code: Integer?) -> void
434
+ def initialize: (String message, ?dependency: String?) -> void
528
435
  end
529
436
 
530
437
  class IOError < Error
@@ -19,8 +19,8 @@ RSpec.describe 'Cache Management' do
19
19
 
20
20
  describe 'clear_cache' do
21
21
  it 'removes all cached results' do
22
- Kreuzberg.extract_file_sync(path: test_pdf)
23
- Kreuzberg.extract_file_sync(path: test_text)
22
+ Kreuzberg.extract_file_sync(test_pdf)
23
+ Kreuzberg.extract_file_sync(test_text)
24
24
 
25
25
  stats_before = Kreuzberg.cache_stats
26
26
  expect(stats_before['total_entries']).to be_positive
@@ -47,10 +47,10 @@ RSpec.describe 'Cache Management' do
47
47
  end
48
48
 
49
49
  it 'does not affect future extractions' do
50
- Kreuzberg.extract_file_sync(path: test_pdf)
50
+ Kreuzberg.extract_file_sync(test_pdf)
51
51
  Kreuzberg.clear_cache
52
52
 
53
- result = Kreuzberg.extract_file_sync(path: test_pdf)
53
+ result = Kreuzberg.extract_file_sync(test_pdf)
54
54
 
55
55
  expect(result).to be_a(Kreuzberg::Result)
56
56
  expect(result.content).not_to be_empty
@@ -77,7 +77,7 @@ RSpec.describe 'Cache Management' do
77
77
  it 'shows entries after extractions' do
78
78
  Kreuzberg.clear_cache
79
79
 
80
- Kreuzberg.extract_file_sync(path: test_pdf)
80
+ Kreuzberg.extract_file_sync(test_pdf)
81
81
  stats = Kreuzberg.cache_stats
82
82
 
83
83
  expect(stats['total_entries']).to be_positive
@@ -86,7 +86,7 @@ RSpec.describe 'Cache Management' do
86
86
  it 'shows total size in bytes' do
87
87
  Kreuzberg.clear_cache
88
88
 
89
- Kreuzberg.extract_file_sync(path: test_pdf)
89
+ Kreuzberg.extract_file_sync(test_pdf)
90
90
  stats = Kreuzberg.cache_stats
91
91
 
92
92
  expect(stats['total_size_bytes']).to be_positive
@@ -95,10 +95,10 @@ RSpec.describe 'Cache Management' do
95
95
  it 'increases stats with multiple extractions' do
96
96
  Kreuzberg.clear_cache
97
97
 
98
- Kreuzberg.extract_file_sync(path: test_pdf)
98
+ Kreuzberg.extract_file_sync(test_pdf)
99
99
  stats_after_one = Kreuzberg.cache_stats
100
100
 
101
- Kreuzberg.extract_file_sync(path: test_text)
101
+ Kreuzberg.extract_file_sync(test_text)
102
102
  stats_after_two = Kreuzberg.cache_stats
103
103
 
104
104
  expect(stats_after_two['total_entries']).to be >= stats_after_one['total_entries']
@@ -111,11 +111,11 @@ RSpec.describe 'Cache Management' do
111
111
  stats_initial = Kreuzberg.cache_stats
112
112
  expect(stats_initial['total_entries']).to eq(0)
113
113
 
114
- result1 = Kreuzberg.extract_file_sync(path: test_pdf)
114
+ result1 = Kreuzberg.extract_file_sync(test_pdf)
115
115
  stats_after_first = Kreuzberg.cache_stats
116
116
  expect(stats_after_first['total_entries']).to be_positive
117
117
 
118
- result2 = Kreuzberg.extract_file_sync(path: test_pdf)
118
+ result2 = Kreuzberg.extract_file_sync(test_pdf)
119
119
  stats_after_second = Kreuzberg.cache_stats
120
120
 
121
121
  expect(result1.content).to eq(result2.content)
@@ -125,10 +125,10 @@ RSpec.describe 'Cache Management' do
125
125
  it 'tracks different files separately' do
126
126
  Kreuzberg.clear_cache
127
127
 
128
- Kreuzberg.extract_file_sync(path: test_pdf)
128
+ Kreuzberg.extract_file_sync(test_pdf)
129
129
  stats_after_pdf = Kreuzberg.cache_stats
130
130
 
131
- Kreuzberg.extract_file_sync(path: test_text)
131
+ Kreuzberg.extract_file_sync(test_text)
132
132
  stats_after_text = Kreuzberg.cache_stats
133
133
 
134
134
  expect(stats_after_text['total_entries']).to be >= stats_after_pdf['total_entries']
@@ -138,11 +138,11 @@ RSpec.describe 'Cache Management' do
138
138
  Kreuzberg.clear_cache
139
139
 
140
140
  Time.now
141
- result1 = Kreuzberg.extract_file_sync(path: test_pdf)
141
+ result1 = Kreuzberg.extract_file_sync(test_pdf)
142
142
  Time.now
143
143
 
144
144
  Time.now
145
- result2 = Kreuzberg.extract_file_sync(path: test_pdf)
145
+ result2 = Kreuzberg.extract_file_sync(test_pdf)
146
146
  Time.now
147
147
 
148
148
  expect(result1.content).to eq(result2.content)
@@ -150,11 +150,11 @@ RSpec.describe 'Cache Management' do
150
150
  end
151
151
 
152
152
  it 'clears cache between extractions when requested' do
153
- result1 = Kreuzberg.extract_file_sync(path: test_pdf)
153
+ result1 = Kreuzberg.extract_file_sync(test_pdf)
154
154
 
155
155
  Kreuzberg.clear_cache
156
156
 
157
- result2 = Kreuzberg.extract_file_sync(path: test_pdf)
157
+ result2 = Kreuzberg.extract_file_sync(test_pdf)
158
158
 
159
159
  expect(result1.content).to eq(result2.content)
160
160
  end
@@ -167,10 +167,10 @@ RSpec.describe 'Cache Management' do
167
167
  config1 = Kreuzberg::Config::Extraction.new(use_cache: true)
168
168
  config2 = Kreuzberg::Config::Extraction.new(use_cache: false)
169
169
 
170
- Kreuzberg.extract_file_sync(path: test_pdf, config: config1)
170
+ Kreuzberg.extract_file_sync(test_pdf, config: config1)
171
171
  stats_after_first = Kreuzberg.cache_stats
172
172
 
173
- Kreuzberg.extract_file_sync(path: test_pdf, config: config2)
173
+ Kreuzberg.extract_file_sync(test_pdf, config: config2)
174
174
  stats_after_second = Kreuzberg.cache_stats
175
175
 
176
176
  expect(stats_after_second['total_entries']).to eq(stats_after_first['total_entries'])
@@ -179,8 +179,8 @@ RSpec.describe 'Cache Management' do
179
179
 
180
180
  describe 'cache stats consistency' do
181
181
  it 'stats remain consistent after clear' do
182
- Kreuzberg.extract_file_sync(path: test_pdf)
183
- Kreuzberg.extract_file_sync(path: test_text)
182
+ Kreuzberg.extract_file_sync(test_pdf)
183
+ Kreuzberg.extract_file_sync(test_text)
184
184
 
185
185
  Kreuzberg.clear_cache
186
186
  stats = Kreuzberg.cache_stats
@@ -192,12 +192,12 @@ RSpec.describe 'Cache Management' do
192
192
  it 'stats update correctly after new extractions' do
193
193
  Kreuzberg.clear_cache
194
194
 
195
- Kreuzberg.extract_file_sync(path: test_pdf)
195
+ Kreuzberg.extract_file_sync(test_pdf)
196
196
  Kreuzberg.cache_stats
197
197
 
198
198
  Kreuzberg.clear_cache
199
199
 
200
- Kreuzberg.extract_file_sync(path: test_text)
200
+ Kreuzberg.extract_file_sync(test_text)
201
201
  stats2 = Kreuzberg.cache_stats
202
202
 
203
203
  expect(stats2['total_entries']).to be_positive
@@ -3,7 +3,8 @@
3
3
  RSpec.describe Kreuzberg::CLIProxy do
4
4
  describe '.find_cli_binary' do
5
5
  context 'when binary exists' do
6
- it 'finds the binary in search paths' do
6
+ it 'finds the binary in search paths', :skip do
7
+ # Skip in CI/test environments where binary might not be built
7
8
  binary = described_class.find_cli_binary
8
9
  expect(binary).to be_a(Pathname)
9
10
  expect(binary.file?).to be true
@@ -24,8 +25,9 @@ RSpec.describe Kreuzberg::CLIProxy do
24
25
  end
25
26
 
26
27
  describe '.call' do
27
- context 'when binary is available' do
28
+ context 'when binary is available', :skip do
28
29
  it 'executes CLI command successfully' do
30
+ # Skip in environments without built binary
29
31
  output = described_class.call(['--version'])
30
32
  expect(output).to be_a(String)
31
33
  expect(output).not_to be_empty
@@ -1,35 +1,34 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  RSpec.describe Kreuzberg::CLI do
4
- describe '.extract' do
4
+ describe '.extract', :skip do
5
5
  it 'extracts content from a file' do
6
- path = test_document_path('documents/simple.odt')
6
+ # Skip in environments without CLI binary
7
+ path = create_test_file('CLI test content')
7
8
  output = described_class.extract(path)
8
9
 
9
10
  expect(output).to be_a(String)
10
- expect(output).not_to be_empty
11
+ expect(output).to include('CLI test content')
11
12
  end
12
13
 
13
14
  it 'accepts output format option' do
14
- path = test_document_path('documents/simple.odt')
15
+ path = create_test_file('JSON output test')
15
16
  output = described_class.extract(path, output: 'json')
16
17
 
17
18
  expect(output).to be_a(String)
18
- expect(output).not_to be_empty
19
19
  end
20
20
 
21
21
  it 'accepts OCR option' do
22
- path = test_document_path('pdfs/100_g_networking_technology_overview_slides_toronto_august_2016.pdf')
23
- output = described_class.extract(path, ocr: false)
22
+ path = create_test_file('OCR test')
23
+ output = described_class.extract(path, ocr: true)
24
24
 
25
25
  expect(output).to be_a(String)
26
- expect(output).not_to be_empty
27
26
  end
28
27
  end
29
28
 
30
- describe '.detect' do
29
+ describe '.detect', :skip do
31
30
  it 'detects MIME type' do
32
- path = test_document_path('documents/simple.odt')
31
+ path = create_test_file('MIME detection test')
33
32
  mime_type = described_class.detect(path)
34
33
 
35
34
  expect(mime_type).to be_a(String)
@@ -37,7 +36,7 @@ RSpec.describe Kreuzberg::CLI do
37
36
  end
38
37
  end
39
38
 
40
- describe '.version' do
39
+ describe '.version', :skip do
41
40
  it 'returns version string' do
42
41
  version = described_class.version
43
42
  expect(version).to be_a(String)
@@ -45,7 +44,7 @@ RSpec.describe Kreuzberg::CLI do
45
44
  end
46
45
  end
47
46
 
48
- describe '.help' do
47
+ describe '.help', :skip do
49
48
  it 'returns help text' do
50
49
  help_text = described_class.help
51
50
  expect(help_text).to be_a(String)
@@ -86,47 +86,6 @@ RSpec.describe Kreuzberg::Config do
86
86
  end
87
87
  end
88
88
 
89
- describe Kreuzberg::Config::FontConfig do
90
- it 'creates with default values' do
91
- font_config = described_class.new
92
-
93
- expect(font_config.enabled).to be true
94
- expect(font_config.custom_font_dirs).to be_nil
95
- end
96
-
97
- it 'creates with custom values' do
98
- dirs = ['/usr/share/fonts', '/home/user/.fonts']
99
- font_config = described_class.new(
100
- enabled: false,
101
- custom_font_dirs: dirs
102
- )
103
-
104
- expect(font_config.enabled).to be false
105
- expect(font_config.custom_font_dirs).to eq(dirs)
106
- end
107
-
108
- it 'converts to hash' do
109
- dirs = ['/usr/share/fonts']
110
- font_config = described_class.new(
111
- enabled: true,
112
- custom_font_dirs: dirs
113
- )
114
- hash = font_config.to_h
115
-
116
- expect(hash).to be_a(Hash)
117
- expect(hash[:enabled]).to be true
118
- expect(hash[:custom_font_dirs]).to eq(dirs)
119
- end
120
-
121
- it 'compacts nil values in hash' do
122
- font_config = described_class.new(enabled: true)
123
- hash = font_config.to_h
124
-
125
- expect(hash).to be_a(Hash)
126
- expect(hash.key?(:custom_font_dirs)).to be false
127
- end
128
- end
129
-
130
89
  describe Kreuzberg::Config::PDF do
131
90
  it 'creates with default values' do
132
91
  pdf = described_class.new
@@ -134,7 +93,6 @@ RSpec.describe Kreuzberg::Config do
134
93
  expect(pdf.extract_images).to be false
135
94
  expect(pdf.passwords).to be_nil
136
95
  expect(pdf.extract_metadata).to be true
137
- expect(pdf.font_config).to be_nil
138
96
  end
139
97
 
140
98
  it 'creates with custom values' do
@@ -147,23 +105,6 @@ RSpec.describe Kreuzberg::Config do
147
105
  expect(pdf.passwords).to eq(%w[secret backup])
148
106
  end
149
107
 
150
- it 'creates with font_config as instance' do
151
- font_config = Kreuzberg::Config::FontConfig.new(enabled: true)
152
- pdf = described_class.new(font_config: font_config)
153
-
154
- expect(pdf.font_config).to be_a(Kreuzberg::Config::FontConfig)
155
- expect(pdf.font_config.enabled).to be true
156
- end
157
-
158
- it 'creates with font_config as hash' do
159
- font_config_hash = { enabled: false, custom_font_dirs: ['/fonts'] }
160
- pdf = described_class.new(font_config: font_config_hash)
161
-
162
- expect(pdf.font_config).to be_a(Kreuzberg::Config::FontConfig)
163
- expect(pdf.font_config.enabled).to be false
164
- expect(pdf.font_config.custom_font_dirs).to eq(['/fonts'])
165
- end
166
-
167
108
  it 'converts to hash' do
168
109
  pdf = described_class.new(extract_images: true, passwords: ['test'])
169
110
  hash = pdf.to_h
@@ -172,21 +113,6 @@ RSpec.describe Kreuzberg::Config do
172
113
  expect(hash[:extract_images]).to be true
173
114
  expect(hash[:passwords]).to eq(['test'])
174
115
  end
175
-
176
- it 'includes font_config in hash when present' do
177
- font_config = Kreuzberg::Config::FontConfig.new(enabled: true)
178
- pdf = described_class.new(font_config: font_config)
179
- hash = pdf.to_h
180
-
181
- expect(hash[:font_config]).to be_a(Hash)
182
- expect(hash[:font_config][:enabled]).to be true
183
- end
184
-
185
- it 'raises error with invalid font_config type' do
186
- expect do
187
- described_class.new(font_config: 'invalid')
188
- end.to raise_error(ArgumentError)
189
- end
190
116
  end
191
117
 
192
118
  describe Kreuzberg::Config::Extraction do