kreuzberg 4.0.0.pre.rc.29 → 4.0.0.rc1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (321) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +0 -6
  3. data/.rubocop.yaml +534 -1
  4. data/Gemfile +2 -1
  5. data/Gemfile.lock +28 -116
  6. data/README.md +269 -629
  7. data/Rakefile +0 -9
  8. data/Steepfile +4 -8
  9. data/examples/async_patterns.rb +58 -1
  10. data/ext/kreuzberg_rb/extconf.rb +5 -35
  11. data/ext/kreuzberg_rb/native/Cargo.toml +16 -55
  12. data/ext/kreuzberg_rb/native/build.rs +14 -12
  13. data/ext/kreuzberg_rb/native/include/ieeefp.h +1 -1
  14. data/ext/kreuzberg_rb/native/include/msvc_compat/strings.h +1 -1
  15. data/ext/kreuzberg_rb/native/include/strings.h +2 -2
  16. data/ext/kreuzberg_rb/native/include/unistd.h +1 -1
  17. data/ext/kreuzberg_rb/native/src/lib.rs +34 -897
  18. data/extconf.rb +6 -38
  19. data/kreuzberg.gemspec +20 -114
  20. data/lib/kreuzberg/api_proxy.rb +18 -2
  21. data/lib/kreuzberg/cache_api.rb +0 -22
  22. data/lib/kreuzberg/cli.rb +10 -2
  23. data/lib/kreuzberg/cli_proxy.rb +10 -0
  24. data/lib/kreuzberg/config.rb +22 -274
  25. data/lib/kreuzberg/errors.rb +7 -73
  26. data/lib/kreuzberg/extraction_api.rb +8 -237
  27. data/lib/kreuzberg/mcp_proxy.rb +11 -2
  28. data/lib/kreuzberg/ocr_backend_protocol.rb +73 -0
  29. data/lib/kreuzberg/post_processor_protocol.rb +71 -0
  30. data/lib/kreuzberg/result.rb +33 -151
  31. data/lib/kreuzberg/setup_lib_path.rb +2 -22
  32. data/lib/kreuzberg/validator_protocol.rb +73 -0
  33. data/lib/kreuzberg/version.rb +1 -1
  34. data/lib/kreuzberg.rb +13 -27
  35. data/pkg/kreuzberg-4.0.0.rc1.gem +0 -0
  36. data/sig/kreuzberg.rbs +12 -105
  37. data/spec/binding/cache_spec.rb +22 -22
  38. data/spec/binding/cli_proxy_spec.rb +4 -2
  39. data/spec/binding/cli_spec.rb +11 -12
  40. data/spec/binding/config_spec.rb +0 -74
  41. data/spec/binding/config_validation_spec.rb +6 -100
  42. data/spec/binding/error_handling_spec.rb +97 -283
  43. data/spec/binding/plugins/ocr_backend_spec.rb +8 -8
  44. data/spec/binding/plugins/postprocessor_spec.rb +11 -11
  45. data/spec/binding/plugins/validator_spec.rb +13 -12
  46. data/spec/examples.txt +104 -0
  47. data/spec/fixtures/config.toml +1 -0
  48. data/spec/fixtures/config.yaml +1 -0
  49. data/spec/fixtures/invalid_config.toml +1 -0
  50. data/spec/smoke/package_spec.rb +3 -2
  51. data/spec/spec_helper.rb +3 -1
  52. data/vendor/kreuzberg/Cargo.toml +67 -192
  53. data/vendor/kreuzberg/README.md +9 -97
  54. data/vendor/kreuzberg/build.rs +194 -516
  55. data/vendor/kreuzberg/src/api/handlers.rs +9 -130
  56. data/vendor/kreuzberg/src/api/mod.rs +3 -18
  57. data/vendor/kreuzberg/src/api/server.rs +71 -236
  58. data/vendor/kreuzberg/src/api/types.rs +7 -43
  59. data/vendor/kreuzberg/src/bin/profile_extract.rs +455 -0
  60. data/vendor/kreuzberg/src/cache/mod.rs +3 -27
  61. data/vendor/kreuzberg/src/chunking/mod.rs +79 -1705
  62. data/vendor/kreuzberg/src/core/batch_mode.rs +0 -60
  63. data/vendor/kreuzberg/src/core/config.rs +23 -905
  64. data/vendor/kreuzberg/src/core/extractor.rs +106 -403
  65. data/vendor/kreuzberg/src/core/io.rs +2 -4
  66. data/vendor/kreuzberg/src/core/mime.rs +12 -2
  67. data/vendor/kreuzberg/src/core/mod.rs +3 -22
  68. data/vendor/kreuzberg/src/core/pipeline.rs +78 -395
  69. data/vendor/kreuzberg/src/embeddings.rs +21 -169
  70. data/vendor/kreuzberg/src/error.rs +2 -2
  71. data/vendor/kreuzberg/src/extraction/archive.rs +31 -36
  72. data/vendor/kreuzberg/src/extraction/docx.rs +1 -365
  73. data/vendor/kreuzberg/src/extraction/email.rs +11 -12
  74. data/vendor/kreuzberg/src/extraction/excel.rs +129 -138
  75. data/vendor/kreuzberg/src/extraction/html.rs +170 -1447
  76. data/vendor/kreuzberg/src/extraction/image.rs +14 -138
  77. data/vendor/kreuzberg/src/extraction/libreoffice.rs +3 -13
  78. data/vendor/kreuzberg/src/extraction/mod.rs +5 -21
  79. data/vendor/kreuzberg/src/extraction/office_metadata/mod.rs +0 -2
  80. data/vendor/kreuzberg/src/extraction/pandoc/batch.rs +275 -0
  81. data/vendor/kreuzberg/src/extraction/pandoc/mime_types.rs +178 -0
  82. data/vendor/kreuzberg/src/extraction/pandoc/mod.rs +491 -0
  83. data/vendor/kreuzberg/src/extraction/pandoc/server.rs +496 -0
  84. data/vendor/kreuzberg/src/extraction/pandoc/subprocess.rs +1188 -0
  85. data/vendor/kreuzberg/src/extraction/pandoc/version.rs +162 -0
  86. data/vendor/kreuzberg/src/extraction/pptx.rs +94 -196
  87. data/vendor/kreuzberg/src/extraction/structured.rs +4 -5
  88. data/vendor/kreuzberg/src/extraction/table.rs +1 -2
  89. data/vendor/kreuzberg/src/extraction/text.rs +10 -18
  90. data/vendor/kreuzberg/src/extractors/archive.rs +0 -22
  91. data/vendor/kreuzberg/src/extractors/docx.rs +148 -69
  92. data/vendor/kreuzberg/src/extractors/email.rs +9 -37
  93. data/vendor/kreuzberg/src/extractors/excel.rs +40 -81
  94. data/vendor/kreuzberg/src/extractors/html.rs +173 -182
  95. data/vendor/kreuzberg/src/extractors/image.rs +8 -32
  96. data/vendor/kreuzberg/src/extractors/mod.rs +10 -171
  97. data/vendor/kreuzberg/src/extractors/pandoc.rs +201 -0
  98. data/vendor/kreuzberg/src/extractors/pdf.rs +64 -329
  99. data/vendor/kreuzberg/src/extractors/pptx.rs +34 -79
  100. data/vendor/kreuzberg/src/extractors/structured.rs +0 -16
  101. data/vendor/kreuzberg/src/extractors/text.rs +7 -30
  102. data/vendor/kreuzberg/src/extractors/xml.rs +8 -27
  103. data/vendor/kreuzberg/src/keywords/processor.rs +1 -9
  104. data/vendor/kreuzberg/src/keywords/rake.rs +1 -0
  105. data/vendor/kreuzberg/src/language_detection/mod.rs +51 -94
  106. data/vendor/kreuzberg/src/lib.rs +5 -17
  107. data/vendor/kreuzberg/src/mcp/mod.rs +1 -4
  108. data/vendor/kreuzberg/src/mcp/server.rs +21 -145
  109. data/vendor/kreuzberg/src/ocr/mod.rs +0 -2
  110. data/vendor/kreuzberg/src/ocr/processor.rs +8 -19
  111. data/vendor/kreuzberg/src/ocr/tesseract_backend.rs +0 -2
  112. data/vendor/kreuzberg/src/pdf/error.rs +1 -93
  113. data/vendor/kreuzberg/src/pdf/metadata.rs +100 -263
  114. data/vendor/kreuzberg/src/pdf/mod.rs +2 -33
  115. data/vendor/kreuzberg/src/pdf/rendering.rs +12 -12
  116. data/vendor/kreuzberg/src/pdf/table.rs +64 -61
  117. data/vendor/kreuzberg/src/pdf/text.rs +24 -416
  118. data/vendor/kreuzberg/src/plugins/extractor.rs +8 -40
  119. data/vendor/kreuzberg/src/plugins/mod.rs +0 -3
  120. data/vendor/kreuzberg/src/plugins/ocr.rs +14 -22
  121. data/vendor/kreuzberg/src/plugins/processor.rs +1 -10
  122. data/vendor/kreuzberg/src/plugins/registry.rs +0 -15
  123. data/vendor/kreuzberg/src/plugins/validator.rs +8 -20
  124. data/vendor/kreuzberg/src/stopwords/mod.rs +2 -2
  125. data/vendor/kreuzberg/src/text/mod.rs +0 -8
  126. data/vendor/kreuzberg/src/text/quality.rs +15 -28
  127. data/vendor/kreuzberg/src/text/string_utils.rs +10 -22
  128. data/vendor/kreuzberg/src/text/token_reduction/core.rs +50 -86
  129. data/vendor/kreuzberg/src/text/token_reduction/filters.rs +16 -37
  130. data/vendor/kreuzberg/src/text/token_reduction/simd_text.rs +1 -2
  131. data/vendor/kreuzberg/src/types.rs +67 -907
  132. data/vendor/kreuzberg/src/utils/mod.rs +0 -14
  133. data/vendor/kreuzberg/src/utils/quality.rs +3 -12
  134. data/vendor/kreuzberg/tests/api_tests.rs +0 -506
  135. data/vendor/kreuzberg/tests/archive_integration.rs +0 -2
  136. data/vendor/kreuzberg/tests/batch_orchestration.rs +12 -57
  137. data/vendor/kreuzberg/tests/batch_processing.rs +8 -32
  138. data/vendor/kreuzberg/tests/chunking_offset_demo.rs +92 -0
  139. data/vendor/kreuzberg/tests/concurrency_stress.rs +8 -40
  140. data/vendor/kreuzberg/tests/config_features.rs +1 -33
  141. data/vendor/kreuzberg/tests/config_loading_tests.rs +39 -16
  142. data/vendor/kreuzberg/tests/core_integration.rs +9 -35
  143. data/vendor/kreuzberg/tests/csv_integration.rs +81 -71
  144. data/vendor/kreuzberg/tests/docx_metadata_extraction_test.rs +25 -23
  145. data/vendor/kreuzberg/tests/email_integration.rs +1 -3
  146. data/vendor/kreuzberg/tests/error_handling.rs +34 -43
  147. data/vendor/kreuzberg/tests/format_integration.rs +1 -7
  148. data/vendor/kreuzberg/tests/helpers/mod.rs +0 -60
  149. data/vendor/kreuzberg/tests/image_integration.rs +0 -2
  150. data/vendor/kreuzberg/tests/mime_detection.rs +16 -17
  151. data/vendor/kreuzberg/tests/ocr_configuration.rs +0 -4
  152. data/vendor/kreuzberg/tests/ocr_errors.rs +0 -22
  153. data/vendor/kreuzberg/tests/ocr_quality.rs +0 -2
  154. data/vendor/kreuzberg/tests/pandoc_integration.rs +503 -0
  155. data/vendor/kreuzberg/tests/pdf_integration.rs +0 -2
  156. data/vendor/kreuzberg/tests/pipeline_integration.rs +2 -36
  157. data/vendor/kreuzberg/tests/plugin_ocr_backend_test.rs +0 -5
  158. data/vendor/kreuzberg/tests/plugin_postprocessor_test.rs +1 -17
  159. data/vendor/kreuzberg/tests/plugin_system.rs +0 -6
  160. data/vendor/kreuzberg/tests/registry_integration_tests.rs +22 -2
  161. data/vendor/kreuzberg/tests/security_validation.rs +1 -13
  162. data/vendor/kreuzberg/tests/test_fastembed.rs +23 -45
  163. metadata +25 -171
  164. data/.rubocop.yml +0 -543
  165. data/ext/kreuzberg_rb/native/.cargo/config.toml +0 -23
  166. data/ext/kreuzberg_rb/native/Cargo.lock +0 -7619
  167. data/lib/kreuzberg/error_context.rb +0 -136
  168. data/lib/kreuzberg/types.rb +0 -170
  169. data/lib/libpdfium.so +0 -0
  170. data/spec/binding/async_operations_spec.rb +0 -473
  171. data/spec/binding/batch_operations_spec.rb +0 -595
  172. data/spec/binding/batch_spec.rb +0 -359
  173. data/spec/binding/config_result_spec.rb +0 -377
  174. data/spec/binding/embeddings_spec.rb +0 -816
  175. data/spec/binding/error_recovery_spec.rb +0 -488
  176. data/spec/binding/font_config_spec.rb +0 -220
  177. data/spec/binding/images_spec.rb +0 -738
  178. data/spec/binding/keywords_extraction_spec.rb +0 -600
  179. data/spec/binding/metadata_types_spec.rb +0 -1228
  180. data/spec/binding/pages_extraction_spec.rb +0 -471
  181. data/spec/binding/tables_spec.rb +0 -641
  182. data/spec/unit/config/chunking_config_spec.rb +0 -213
  183. data/spec/unit/config/embedding_config_spec.rb +0 -343
  184. data/spec/unit/config/extraction_config_spec.rb +0 -438
  185. data/spec/unit/config/font_config_spec.rb +0 -285
  186. data/spec/unit/config/hierarchy_config_spec.rb +0 -314
  187. data/spec/unit/config/image_extraction_config_spec.rb +0 -209
  188. data/spec/unit/config/image_preprocessing_config_spec.rb +0 -249
  189. data/spec/unit/config/keyword_config_spec.rb +0 -229
  190. data/spec/unit/config/language_detection_config_spec.rb +0 -258
  191. data/spec/unit/config/ocr_config_spec.rb +0 -171
  192. data/spec/unit/config/page_config_spec.rb +0 -221
  193. data/spec/unit/config/pdf_config_spec.rb +0 -267
  194. data/spec/unit/config/postprocessor_config_spec.rb +0 -290
  195. data/spec/unit/config/tesseract_config_spec.rb +0 -181
  196. data/spec/unit/config/token_reduction_config_spec.rb +0 -251
  197. data/test/metadata_types_test.rb +0 -959
  198. data/vendor/Cargo.toml +0 -61
  199. data/vendor/kreuzberg/examples/bench_fixes.rs +0 -71
  200. data/vendor/kreuzberg/examples/test_pdfium_fork.rs +0 -62
  201. data/vendor/kreuzberg/src/chunking/processor.rs +0 -219
  202. data/vendor/kreuzberg/src/core/batch_optimizations.rs +0 -385
  203. data/vendor/kreuzberg/src/core/config_validation.rs +0 -949
  204. data/vendor/kreuzberg/src/core/formats.rs +0 -235
  205. data/vendor/kreuzberg/src/core/server_config.rs +0 -1220
  206. data/vendor/kreuzberg/src/extraction/capacity.rs +0 -263
  207. data/vendor/kreuzberg/src/extraction/markdown.rs +0 -216
  208. data/vendor/kreuzberg/src/extraction/office_metadata/odt_properties.rs +0 -284
  209. data/vendor/kreuzberg/src/extractors/bibtex.rs +0 -470
  210. data/vendor/kreuzberg/src/extractors/docbook.rs +0 -504
  211. data/vendor/kreuzberg/src/extractors/epub.rs +0 -696
  212. data/vendor/kreuzberg/src/extractors/fictionbook.rs +0 -492
  213. data/vendor/kreuzberg/src/extractors/jats.rs +0 -1054
  214. data/vendor/kreuzberg/src/extractors/jupyter.rs +0 -368
  215. data/vendor/kreuzberg/src/extractors/latex.rs +0 -653
  216. data/vendor/kreuzberg/src/extractors/markdown.rs +0 -701
  217. data/vendor/kreuzberg/src/extractors/odt.rs +0 -628
  218. data/vendor/kreuzberg/src/extractors/opml.rs +0 -635
  219. data/vendor/kreuzberg/src/extractors/orgmode.rs +0 -529
  220. data/vendor/kreuzberg/src/extractors/rst.rs +0 -577
  221. data/vendor/kreuzberg/src/extractors/rtf.rs +0 -809
  222. data/vendor/kreuzberg/src/extractors/security.rs +0 -484
  223. data/vendor/kreuzberg/src/extractors/security_tests.rs +0 -367
  224. data/vendor/kreuzberg/src/extractors/typst.rs +0 -651
  225. data/vendor/kreuzberg/src/language_detection/processor.rs +0 -218
  226. data/vendor/kreuzberg/src/ocr/language_registry.rs +0 -520
  227. data/vendor/kreuzberg/src/panic_context.rs +0 -154
  228. data/vendor/kreuzberg/src/pdf/bindings.rs +0 -306
  229. data/vendor/kreuzberg/src/pdf/bundled.rs +0 -408
  230. data/vendor/kreuzberg/src/pdf/fonts.rs +0 -358
  231. data/vendor/kreuzberg/src/pdf/hierarchy.rs +0 -903
  232. data/vendor/kreuzberg/src/text/quality_processor.rs +0 -231
  233. data/vendor/kreuzberg/src/text/utf8_validation.rs +0 -193
  234. data/vendor/kreuzberg/src/utils/pool.rs +0 -503
  235. data/vendor/kreuzberg/src/utils/pool_sizing.rs +0 -364
  236. data/vendor/kreuzberg/src/utils/string_pool.rs +0 -761
  237. data/vendor/kreuzberg/tests/api_embed.rs +0 -360
  238. data/vendor/kreuzberg/tests/api_extract_multipart.rs +0 -52
  239. data/vendor/kreuzberg/tests/api_large_pdf_extraction.rs +0 -471
  240. data/vendor/kreuzberg/tests/api_large_pdf_extraction_diagnostics.rs +0 -289
  241. data/vendor/kreuzberg/tests/batch_pooling_benchmark.rs +0 -154
  242. data/vendor/kreuzberg/tests/bibtex_parity_test.rs +0 -421
  243. data/vendor/kreuzberg/tests/config_integration_test.rs +0 -753
  244. data/vendor/kreuzberg/tests/data/hierarchy_ground_truth.json +0 -294
  245. data/vendor/kreuzberg/tests/docbook_extractor_tests.rs +0 -500
  246. data/vendor/kreuzberg/tests/docx_vs_pandoc_comparison.rs +0 -370
  247. data/vendor/kreuzberg/tests/epub_native_extractor_tests.rs +0 -275
  248. data/vendor/kreuzberg/tests/fictionbook_extractor_tests.rs +0 -228
  249. data/vendor/kreuzberg/tests/html_table_test.rs +0 -551
  250. data/vendor/kreuzberg/tests/instrumentation_test.rs +0 -139
  251. data/vendor/kreuzberg/tests/jats_extractor_tests.rs +0 -639
  252. data/vendor/kreuzberg/tests/jupyter_extractor_tests.rs +0 -704
  253. data/vendor/kreuzberg/tests/latex_extractor_tests.rs +0 -496
  254. data/vendor/kreuzberg/tests/markdown_extractor_tests.rs +0 -490
  255. data/vendor/kreuzberg/tests/ocr_language_registry.rs +0 -191
  256. data/vendor/kreuzberg/tests/odt_extractor_tests.rs +0 -674
  257. data/vendor/kreuzberg/tests/opml_extractor_tests.rs +0 -616
  258. data/vendor/kreuzberg/tests/orgmode_extractor_tests.rs +0 -822
  259. data/vendor/kreuzberg/tests/page_markers.rs +0 -297
  260. data/vendor/kreuzberg/tests/pdf_hierarchy_detection.rs +0 -301
  261. data/vendor/kreuzberg/tests/pdf_hierarchy_quality.rs +0 -589
  262. data/vendor/kreuzberg/tests/pdf_ocr_triggering.rs +0 -301
  263. data/vendor/kreuzberg/tests/pdf_text_merging.rs +0 -475
  264. data/vendor/kreuzberg/tests/pdfium_linking.rs +0 -340
  265. data/vendor/kreuzberg/tests/rst_extractor_tests.rs +0 -694
  266. data/vendor/kreuzberg/tests/rtf_extractor_tests.rs +0 -775
  267. data/vendor/kreuzberg/tests/typst_behavioral_tests.rs +0 -1260
  268. data/vendor/kreuzberg/tests/typst_extractor_tests.rs +0 -648
  269. data/vendor/kreuzberg-ffi/Cargo.toml +0 -67
  270. data/vendor/kreuzberg-ffi/README.md +0 -851
  271. data/vendor/kreuzberg-ffi/benches/result_view_benchmark.rs +0 -227
  272. data/vendor/kreuzberg-ffi/build.rs +0 -168
  273. data/vendor/kreuzberg-ffi/cbindgen.toml +0 -37
  274. data/vendor/kreuzberg-ffi/kreuzberg-ffi.pc.in +0 -12
  275. data/vendor/kreuzberg-ffi/kreuzberg.h +0 -3012
  276. data/vendor/kreuzberg-ffi/src/batch_streaming.rs +0 -588
  277. data/vendor/kreuzberg-ffi/src/config.rs +0 -1341
  278. data/vendor/kreuzberg-ffi/src/error.rs +0 -901
  279. data/vendor/kreuzberg-ffi/src/extraction.rs +0 -555
  280. data/vendor/kreuzberg-ffi/src/helpers.rs +0 -879
  281. data/vendor/kreuzberg-ffi/src/lib.rs +0 -977
  282. data/vendor/kreuzberg-ffi/src/memory.rs +0 -493
  283. data/vendor/kreuzberg-ffi/src/mime.rs +0 -329
  284. data/vendor/kreuzberg-ffi/src/panic_shield.rs +0 -265
  285. data/vendor/kreuzberg-ffi/src/plugins/document_extractor.rs +0 -442
  286. data/vendor/kreuzberg-ffi/src/plugins/mod.rs +0 -14
  287. data/vendor/kreuzberg-ffi/src/plugins/ocr_backend.rs +0 -628
  288. data/vendor/kreuzberg-ffi/src/plugins/post_processor.rs +0 -438
  289. data/vendor/kreuzberg-ffi/src/plugins/validator.rs +0 -329
  290. data/vendor/kreuzberg-ffi/src/result.rs +0 -510
  291. data/vendor/kreuzberg-ffi/src/result_pool.rs +0 -639
  292. data/vendor/kreuzberg-ffi/src/result_view.rs +0 -773
  293. data/vendor/kreuzberg-ffi/src/string_intern.rs +0 -568
  294. data/vendor/kreuzberg-ffi/src/types.rs +0 -363
  295. data/vendor/kreuzberg-ffi/src/util.rs +0 -210
  296. data/vendor/kreuzberg-ffi/src/validation.rs +0 -848
  297. data/vendor/kreuzberg-ffi/tests.disabled/README.md +0 -48
  298. data/vendor/kreuzberg-ffi/tests.disabled/config_loading_tests.rs +0 -299
  299. data/vendor/kreuzberg-ffi/tests.disabled/config_tests.rs +0 -346
  300. data/vendor/kreuzberg-ffi/tests.disabled/extractor_tests.rs +0 -232
  301. data/vendor/kreuzberg-ffi/tests.disabled/plugin_registration_tests.rs +0 -470
  302. data/vendor/kreuzberg-tesseract/.commitlintrc.json +0 -13
  303. data/vendor/kreuzberg-tesseract/.crate-ignore +0 -2
  304. data/vendor/kreuzberg-tesseract/Cargo.lock +0 -2933
  305. data/vendor/kreuzberg-tesseract/Cargo.toml +0 -57
  306. data/vendor/kreuzberg-tesseract/LICENSE +0 -22
  307. data/vendor/kreuzberg-tesseract/README.md +0 -399
  308. data/vendor/kreuzberg-tesseract/build.rs +0 -1127
  309. data/vendor/kreuzberg-tesseract/patches/README.md +0 -71
  310. data/vendor/kreuzberg-tesseract/patches/tesseract.diff +0 -199
  311. data/vendor/kreuzberg-tesseract/src/api.rs +0 -1371
  312. data/vendor/kreuzberg-tesseract/src/choice_iterator.rs +0 -77
  313. data/vendor/kreuzberg-tesseract/src/enums.rs +0 -297
  314. data/vendor/kreuzberg-tesseract/src/error.rs +0 -81
  315. data/vendor/kreuzberg-tesseract/src/lib.rs +0 -145
  316. data/vendor/kreuzberg-tesseract/src/monitor.rs +0 -57
  317. data/vendor/kreuzberg-tesseract/src/mutable_iterator.rs +0 -197
  318. data/vendor/kreuzberg-tesseract/src/page_iterator.rs +0 -253
  319. data/vendor/kreuzberg-tesseract/src/result_iterator.rs +0 -286
  320. data/vendor/kreuzberg-tesseract/src/result_renderer.rs +0 -183
  321. data/vendor/kreuzberg-tesseract/tests/integration_test.rs +0 -211
@@ -19,7 +19,7 @@ RSpec.describe 'PostProcessor Plugin System' do
19
19
  end
20
20
 
21
21
  Kreuzberg.register_post_processor('upcase', processor)
22
- result = Kreuzberg.extract_file_sync(path: test_pdf)
22
+ result = Kreuzberg.extract_file_sync(test_pdf)
23
23
 
24
24
  expect(processor_called).to be true
25
25
  expect(result.content).to eq(result.content.upcase)
@@ -32,7 +32,7 @@ RSpec.describe 'PostProcessor Plugin System' do
32
32
  end
33
33
 
34
34
  Kreuzberg.register_post_processor('prefix', processor)
35
- result = Kreuzberg.extract_file_sync(path: test_pdf)
35
+ result = Kreuzberg.extract_file_sync(test_pdf)
36
36
 
37
37
  expect(result.content).to start_with('[PROCESSED]')
38
38
  end
@@ -45,7 +45,7 @@ RSpec.describe 'PostProcessor Plugin System' do
45
45
  end
46
46
 
47
47
  Kreuzberg.register_post_processor('metadata_adder', processor)
48
- result = Kreuzberg.extract_file_sync(path: test_pdf)
48
+ result = Kreuzberg.extract_file_sync(test_pdf)
49
49
 
50
50
  expect(result.metadata['custom_field']).to eq('custom_value')
51
51
  expect(result.metadata['word_count']).to be_positive
@@ -67,7 +67,7 @@ RSpec.describe 'PostProcessor Plugin System' do
67
67
 
68
68
  processor = WordCountProcessor.new
69
69
  Kreuzberg.register_post_processor('word_count', processor)
70
- result = Kreuzberg.extract_file_sync(path: test_pdf)
70
+ result = Kreuzberg.extract_file_sync(test_pdf)
71
71
 
72
72
  expect(result.metadata['word_count']).to be_positive
73
73
  expect(result.metadata['processor_name']).to eq('WordCountProcessor')
@@ -89,7 +89,7 @@ RSpec.describe 'PostProcessor Plugin System' do
89
89
 
90
90
  processor = TruncateProcessor.new(50)
91
91
  Kreuzberg.register_post_processor('truncate', processor)
92
- result = Kreuzberg.extract_file_sync(path: test_pdf)
92
+ result = Kreuzberg.extract_file_sync(test_pdf)
93
93
 
94
94
  expect(result.content.length).to be <= 53
95
95
  end
@@ -109,7 +109,7 @@ RSpec.describe 'PostProcessor Plugin System' do
109
109
 
110
110
  Kreuzberg.register_post_processor('proc1', processor1)
111
111
  Kreuzberg.register_post_processor('proc2', processor2)
112
- result = Kreuzberg.extract_file_sync(path: test_pdf)
112
+ result = Kreuzberg.extract_file_sync(test_pdf)
113
113
 
114
114
  expect(result.metadata['processor1']).to eq('executed')
115
115
  expect(result.metadata['processor2']).to eq('executed')
@@ -125,7 +125,7 @@ RSpec.describe 'PostProcessor Plugin System' do
125
125
 
126
126
  Kreuzberg.register_post_processor('removable', processor)
127
127
  Kreuzberg.unregister_post_processor('removable')
128
- result = Kreuzberg.extract_file_sync(path: test_pdf)
128
+ result = Kreuzberg.extract_file_sync(test_pdf)
129
129
 
130
130
  expect(result.metadata['should_not_appear']).to be_nil
131
131
  end
@@ -151,7 +151,7 @@ RSpec.describe 'PostProcessor Plugin System' do
151
151
  Kreuzberg.register_post_processor('keep3', processor3)
152
152
 
153
153
  Kreuzberg.unregister_post_processor('remove')
154
- result = Kreuzberg.extract_file_sync(path: test_pdf)
154
+ result = Kreuzberg.extract_file_sync(test_pdf)
155
155
 
156
156
  expect(result.metadata['keep1']).to eq('value1')
157
157
  expect(result.metadata['remove']).to be_nil
@@ -175,7 +175,7 @@ RSpec.describe 'PostProcessor Plugin System' do
175
175
  Kreuzberg.register_post_processor('proc2', processor2)
176
176
 
177
177
  Kreuzberg.clear_post_processors
178
- result = Kreuzberg.extract_file_sync(path: test_pdf)
178
+ result = Kreuzberg.extract_file_sync(test_pdf)
179
179
 
180
180
  expect(result.metadata['proc1']).to be_nil
181
181
  expect(result.metadata['proc2']).to be_nil
@@ -191,7 +191,7 @@ RSpec.describe 'PostProcessor Plugin System' do
191
191
  Kreuzberg.register_post_processor('failing', processor)
192
192
 
193
193
  expect do
194
- Kreuzberg.extract_file_sync(path: test_pdf)
194
+ Kreuzberg.extract_file_sync(test_pdf)
195
195
  end.to raise_error(StandardError, /Post-processor error/)
196
196
  end
197
197
 
@@ -203,7 +203,7 @@ RSpec.describe 'PostProcessor Plugin System' do
203
203
  Kreuzberg.register_post_processor('invalid', processor)
204
204
 
205
205
  expect do
206
- Kreuzberg.extract_file_sync(path: test_pdf)
206
+ Kreuzberg.extract_file_sync(test_pdf)
207
207
  end.to raise_error
208
208
  end
209
209
  end
@@ -17,17 +17,18 @@ RSpec.describe 'Validator Plugin System' do
17
17
  end
18
18
 
19
19
  Kreuzberg.register_validator('check_called', validator)
20
- Kreuzberg.extract_file_sync(path: test_pdf)
20
+ Kreuzberg.extract_file_sync(test_pdf)
21
21
 
22
22
  expect(validator_called).to be true
23
23
  end
24
24
 
25
25
  it 'allows extraction to proceed when validator passes' do
26
26
  validator = lambda do |result|
27
+ # Validation passes - do nothing
27
28
  end
28
29
 
29
30
  Kreuzberg.register_validator('pass_validator', validator)
30
- result = Kreuzberg.extract_file_sync(path: test_pdf)
31
+ result = Kreuzberg.extract_file_sync(test_pdf)
31
32
 
32
33
  expect(result).to be_a(Kreuzberg::Result)
33
34
  expect(result.content).not_to be_empty
@@ -43,7 +44,7 @@ RSpec.describe 'Validator Plugin System' do
43
44
  Kreuzberg.register_validator('min_length', validator)
44
45
 
45
46
  expect do
46
- Kreuzberg.extract_file_sync(path: test_pdf)
47
+ Kreuzberg.extract_file_sync(test_pdf)
47
48
  end.to raise_error(Kreuzberg::Errors::ValidationError, /Content too short/)
48
49
  end
49
50
  end
@@ -66,7 +67,7 @@ RSpec.describe 'Validator Plugin System' do
66
67
 
67
68
  validator = MinimumLengthValidator.new(10)
68
69
  Kreuzberg.register_validator('min_length', validator)
69
- result = Kreuzberg.extract_file_sync(path: test_pdf)
70
+ result = Kreuzberg.extract_file_sync(test_pdf)
70
71
 
71
72
  expect(result).to be_a(Kreuzberg::Result)
72
73
  expect(result.content.length).to be >= 10
@@ -85,7 +86,7 @@ RSpec.describe 'Validator Plugin System' do
85
86
 
86
87
  validator = NonEmptyValidator.new
87
88
  Kreuzberg.register_validator('non_empty', validator)
88
- result = Kreuzberg.extract_file_sync(path: test_pdf)
89
+ result = Kreuzberg.extract_file_sync(test_pdf)
89
90
 
90
91
  expect(result.content.strip).not_to be_empty
91
92
  end
@@ -99,7 +100,7 @@ RSpec.describe 'Validator Plugin System' do
99
100
  end
100
101
 
101
102
  Kreuzberg.register_validator('capture', validator)
102
- Kreuzberg.extract_file_sync(path: test_pdf)
103
+ Kreuzberg.extract_file_sync(test_pdf)
103
104
 
104
105
  expect(received_result).to be_a(Hash)
105
106
  expect(received_result).to have_key('content')
@@ -115,7 +116,7 @@ RSpec.describe 'Validator Plugin System' do
115
116
  end
116
117
 
117
118
  Kreuzberg.register_validator('capture_content', validator)
118
- result = Kreuzberg.extract_file_sync(path: test_pdf)
119
+ result = Kreuzberg.extract_file_sync(test_pdf)
119
120
 
120
121
  expect(received_content).to eq(result.content)
121
122
  end
@@ -136,7 +137,7 @@ RSpec.describe 'Validator Plugin System' do
136
137
 
137
138
  Kreuzberg.register_validator('val1', validator1)
138
139
  Kreuzberg.register_validator('val2', validator2)
139
- Kreuzberg.extract_file_sync(path: test_pdf)
140
+ Kreuzberg.extract_file_sync(test_pdf)
140
141
 
141
142
  expect(validator1_called).to be true
142
143
  expect(validator2_called).to be true
@@ -155,7 +156,7 @@ RSpec.describe 'Validator Plugin System' do
155
156
  Kreuzberg.register_validator('never_reached', validator2)
156
157
 
157
158
  expect do
158
- Kreuzberg.extract_file_sync(path: test_pdf)
159
+ Kreuzberg.extract_file_sync(test_pdf)
159
160
  end.to raise_error(Kreuzberg::Errors::ValidationError, /First validator failed/)
160
161
  end
161
162
  end
@@ -170,7 +171,7 @@ RSpec.describe 'Validator Plugin System' do
170
171
  Kreuzberg.unregister_validator('removable')
171
172
 
172
173
  expect do
173
- Kreuzberg.extract_file_sync(path: test_pdf)
174
+ Kreuzberg.extract_file_sync(test_pdf)
174
175
  end.not_to raise_error
175
176
  end
176
177
 
@@ -195,7 +196,7 @@ RSpec.describe 'Validator Plugin System' do
195
196
  Kreuzberg.register_validator('keep3', validator3)
196
197
 
197
198
  Kreuzberg.unregister_validator('remove')
198
- Kreuzberg.extract_file_sync(path: test_pdf)
199
+ Kreuzberg.extract_file_sync(test_pdf)
199
200
 
200
201
  expect(validator1_called).to be true
201
202
  expect(validator3_called).to be true
@@ -218,7 +219,7 @@ RSpec.describe 'Validator Plugin System' do
218
219
  Kreuzberg.clear_validators
219
220
 
220
221
  expect do
221
- Kreuzberg.extract_file_sync(path: test_pdf)
222
+ Kreuzberg.extract_file_sync(test_pdf)
222
223
  end.not_to raise_error
223
224
  end
224
225
  end
data/spec/examples.txt ADDED
@@ -0,0 +1,104 @@
1
+ example_id | status | run_time |
2
+ ---------------------------------------------------------------------------------- | ------ | --------------- |
3
+ ./spec/binding/plugins/ocr_backend_spec.rb[1:1:1] | failed | 0.00173 seconds |
4
+ ./spec/binding/plugins/ocr_backend_spec.rb[1:1:2] | failed | 0.0018 seconds |
5
+ ./spec/binding/plugins/ocr_backend_spec.rb[1:2:1] | failed | 0.00192 seconds |
6
+ ./spec/binding/plugins/ocr_backend_spec.rb[1:2:2] | failed | 0.00581 seconds |
7
+ ./spec/binding/plugins/ocr_backend_spec.rb[1:3:1] | failed | 0.00184 seconds |
8
+ ./spec/binding/plugins/ocr_backend_spec.rb[1:4:1] | passed | 0.00088 seconds |
9
+ ./spec/binding/plugins/ocr_backend_spec.rb[1:4:2] | passed | 0.00045 seconds |
10
+ ./spec/binding/plugins/ocr_backend_spec.rb[1:5:1] | passed | 0.00007 seconds |
11
+ ./spec/binding/plugins/ocr_backend_spec.rb[1:5:2] | passed | 0.00052 seconds |
12
+ ./spec/binding/plugins/ocr_backend_spec.rb[1:6:1:1] | passed | 0.00012 seconds |
13
+ ./spec/binding/plugins/ocr_backend_spec.rb[1:6:1:2] | passed | 0.00079 seconds |
14
+ ./spec/binding/plugins/ocr_backend_spec.rb[1:6:2:1] | passed | 0.00004 seconds |
15
+ ./spec/binding/plugins/ocr_backend_spec.rb[1:6:2:2] | passed | 0.00029 seconds |
16
+ ./spec/binding/plugins/postprocessor_spec.rb[1:1:1] | failed | 0.00139 seconds |
17
+ ./spec/binding/plugins/postprocessor_spec.rb[1:1:2] | failed | 0.00153 seconds |
18
+ ./spec/binding/plugins/postprocessor_spec.rb[1:1:3] | failed | 0.0014 seconds |
19
+ ./spec/binding/plugins/postprocessor_spec.rb[1:2:1] | failed | 0.00182 seconds |
20
+ ./spec/binding/plugins/postprocessor_spec.rb[1:2:2] | failed | 0.00209 seconds |
21
+ ./spec/binding/plugins/postprocessor_spec.rb[1:3:1] | failed | 0.00165 seconds |
22
+ ./spec/binding/plugins/postprocessor_spec.rb[1:4:1] | failed | 0.00142 seconds |
23
+ ./spec/binding/plugins/postprocessor_spec.rb[1:4:2] | failed | 0.00148 seconds |
24
+ ./spec/binding/plugins/postprocessor_spec.rb[1:5:1] | failed | 0.00148 seconds |
25
+ ./spec/binding/plugins/postprocessor_spec.rb[1:6:1] | passed | 0.0001 seconds |
26
+ ./spec/binding/plugins/postprocessor_spec.rb[1:6:2] | passed | 0.00011 seconds |
27
+ ./spec/binding/plugins/postprocessor_spec.rb[1:7:1] | passed | 0.00003 seconds |
28
+ ./spec/binding/plugins/postprocessor_spec.rb[1:7:2] | passed | 0.00002 seconds |
29
+ ./spec/binding/plugins/postprocessor_spec.rb[1:7:3] | passed | 0.00003 seconds |
30
+ ./spec/binding/plugins/postprocessor_spec.rb[1:7:4] | passed | 0.00006 seconds |
31
+ ./spec/binding/plugins/validator_spec.rb[1:1:1] | failed | 0.00154 seconds |
32
+ ./spec/binding/plugins/validator_spec.rb[1:1:2] | failed | 0.00171 seconds |
33
+ ./spec/binding/plugins/validator_spec.rb[1:1:3] | passed | 0.00099 seconds |
34
+ ./spec/binding/plugins/validator_spec.rb[1:2:1] | failed | 0.00186 seconds |
35
+ ./spec/binding/plugins/validator_spec.rb[1:2:2] | failed | 0.0016 seconds |
36
+ ./spec/binding/plugins/validator_spec.rb[1:3:1] | failed | 0.00182 seconds |
37
+ ./spec/binding/plugins/validator_spec.rb[1:3:2] | failed | 0.0128 seconds |
38
+ ./spec/binding/plugins/validator_spec.rb[1:4:1] | failed | 0.00156 seconds |
39
+ ./spec/binding/plugins/validator_spec.rb[1:4:2] | passed | 0.0001 seconds |
40
+ ./spec/binding/plugins/validator_spec.rb[1:5:1] | failed | 0.00445 seconds |
41
+ ./spec/binding/plugins/validator_spec.rb[1:5:2] | failed | 0.00198 seconds |
42
+ ./spec/binding/plugins/validator_spec.rb[1:6:1] | failed | 0.00179 seconds |
43
+ ./spec/binding/plugins/validator_spec.rb[1:7:1] | passed | 0.00068 seconds |
44
+ ./spec/binding/plugins/validator_spec.rb[1:7:2] | passed | 0.00088 seconds |
45
+ ./spec/binding/plugins/validator_spec.rb[1:7:3] | passed | 0.00045 seconds |
46
+ ./spec/binding/plugins/validator_spec.rb[1:7:4] | passed | 0.00004 seconds |
47
+ /Users/naamanhirschfeld/workspace/kreuzberg/e2e/ruby/spec/email_spec.rb[1:1] | passed | 0.01048 seconds |
48
+ /Users/naamanhirschfeld/workspace/kreuzberg/e2e/ruby/spec/html_spec.rb[1:1] | passed | 1.95 seconds |
49
+ /Users/naamanhirschfeld/workspace/kreuzberg/e2e/ruby/spec/html_spec.rb[1:2] | passed | 0.00031 seconds |
50
+ /Users/naamanhirschfeld/workspace/kreuzberg/e2e/ruby/spec/image_spec.rb[1:1] | passed | 0.0027 seconds |
51
+ /Users/naamanhirschfeld/workspace/kreuzberg/e2e/ruby/spec/ocr_spec.rb[1:1] | passed | 0.04721 seconds |
52
+ /Users/naamanhirschfeld/workspace/kreuzberg/e2e/ruby/spec/ocr_spec.rb[1:2] | passed | 0.04402 seconds |
53
+ /Users/naamanhirschfeld/workspace/kreuzberg/e2e/ruby/spec/ocr_spec.rb[1:3] | passed | 3.41 seconds |
54
+ /Users/naamanhirschfeld/workspace/kreuzberg/e2e/ruby/spec/ocr_spec.rb[1:4] | passed | 0.34493 seconds |
55
+ /Users/naamanhirschfeld/workspace/kreuzberg/e2e/ruby/spec/ocr_spec.rb[1:5] | passed | 0.33223 seconds |
56
+ /Users/naamanhirschfeld/workspace/kreuzberg/e2e/ruby/spec/office_spec.rb[1:1] | passed | 2.74 seconds |
57
+ /Users/naamanhirschfeld/workspace/kreuzberg/e2e/ruby/spec/office_spec.rb[1:2] | passed | 0.00021 seconds |
58
+ /Users/naamanhirschfeld/workspace/kreuzberg/e2e/ruby/spec/office_spec.rb[1:3] | passed | 0.00035 seconds |
59
+ /Users/naamanhirschfeld/workspace/kreuzberg/e2e/ruby/spec/office_spec.rb[1:4] | passed | 0.00021 seconds |
60
+ /Users/naamanhirschfeld/workspace/kreuzberg/e2e/ruby/spec/office_spec.rb[1:5] | passed | 0.0003 seconds |
61
+ /Users/naamanhirschfeld/workspace/kreuzberg/e2e/ruby/spec/office_spec.rb[1:6] | passed | 0.00027 seconds |
62
+ /Users/naamanhirschfeld/workspace/kreuzberg/e2e/ruby/spec/office_spec.rb[1:7] | passed | 0.00023 seconds |
63
+ /Users/naamanhirschfeld/workspace/kreuzberg/e2e/ruby/spec/office_spec.rb[1:8] | passed | 0.00016 seconds |
64
+ /Users/naamanhirschfeld/workspace/kreuzberg/e2e/ruby/spec/office_spec.rb[1:9] | passed | 2.65 seconds |
65
+ /Users/naamanhirschfeld/workspace/kreuzberg/e2e/ruby/spec/office_spec.rb[1:10] | passed | 0.0003 seconds |
66
+ /Users/naamanhirschfeld/workspace/kreuzberg/e2e/ruby/spec/office_spec.rb[1:11] | passed | 0.0002 seconds |
67
+ /Users/naamanhirschfeld/workspace/kreuzberg/e2e/ruby/spec/office_spec.rb[1:12] | passed | 0.00984 seconds |
68
+ /Users/naamanhirschfeld/workspace/kreuzberg/e2e/ruby/spec/office_spec.rb[1:13] | passed | 0.00096 seconds |
69
+ /Users/naamanhirschfeld/workspace/kreuzberg/e2e/ruby/spec/office_spec.rb[1:14] | passed | 0.00115 seconds |
70
+ /Users/naamanhirschfeld/workspace/kreuzberg/e2e/ruby/spec/office_spec.rb[1:15] | passed | 0.00038 seconds |
71
+ /Users/naamanhirschfeld/workspace/kreuzberg/e2e/ruby/spec/office_spec.rb[1:16] | passed | 0.00448 seconds |
72
+ /Users/naamanhirschfeld/workspace/kreuzberg/e2e/ruby/spec/pdf_spec.rb[1:1] | passed | 0.99668 seconds |
73
+ /Users/naamanhirschfeld/workspace/kreuzberg/e2e/ruby/spec/pdf_spec.rb[1:2] | passed | 4.11 seconds |
74
+ /Users/naamanhirschfeld/workspace/kreuzberg/e2e/ruby/spec/pdf_spec.rb[1:3] | passed | 0.00451 seconds |
75
+ /Users/naamanhirschfeld/workspace/kreuzberg/e2e/ruby/spec/pdf_spec.rb[1:4] | passed | 0.07588 seconds |
76
+ /Users/naamanhirschfeld/workspace/kreuzberg/e2e/ruby/spec/pdf_spec.rb[1:5] | passed | 0.00339 seconds |
77
+ /Users/naamanhirschfeld/workspace/kreuzberg/e2e/ruby/spec/pdf_spec.rb[1:6] | passed | 0.00238 seconds |
78
+ /Users/naamanhirschfeld/workspace/kreuzberg/e2e/ruby/spec/pdf_spec.rb[1:7] | passed | 0.24683 seconds |
79
+ /Users/naamanhirschfeld/workspace/kreuzberg/e2e/ruby/spec/pdf_spec.rb[1:8] | passed | 0.07999 seconds |
80
+ /Users/naamanhirschfeld/workspace/kreuzberg/e2e/ruby/spec/pdf_spec.rb[1:9] | passed | 0.01214 seconds |
81
+ /Users/naamanhirschfeld/workspace/kreuzberg/e2e/ruby/spec/pdf_spec.rb[1:10] | passed | 0.00095 seconds |
82
+ /Users/naamanhirschfeld/workspace/kreuzberg/e2e/ruby/spec/pdf_spec.rb[1:11] | passed | 0.03728 seconds |
83
+ /Users/naamanhirschfeld/workspace/kreuzberg/e2e/ruby/spec/pdf_spec.rb[1:12] | passed | 0.01741 seconds |
84
+ /Users/naamanhirschfeld/workspace/kreuzberg/e2e/ruby/spec/pdf_spec.rb[1:13] | passed | 0.0036 seconds |
85
+ /Users/naamanhirschfeld/workspace/kreuzberg/e2e/ruby/spec/pdf_spec.rb[1:14] | passed | 0.89424 seconds |
86
+ /Users/naamanhirschfeld/workspace/kreuzberg/e2e/ruby/spec/plugin_apis_spec.rb[1:1] | passed | 0.00228 seconds |
87
+ /Users/naamanhirschfeld/workspace/kreuzberg/e2e/ruby/spec/plugin_apis_spec.rb[1:2] | passed | 0.0012 seconds |
88
+ /Users/naamanhirschfeld/workspace/kreuzberg/e2e/ruby/spec/plugin_apis_spec.rb[2:1] | passed | 0.0008 seconds |
89
+ /Users/naamanhirschfeld/workspace/kreuzberg/e2e/ruby/spec/plugin_apis_spec.rb[2:2] | passed | 0.00119 seconds |
90
+ /Users/naamanhirschfeld/workspace/kreuzberg/e2e/ruby/spec/plugin_apis_spec.rb[2:3] | passed | 0.0013 seconds |
91
+ /Users/naamanhirschfeld/workspace/kreuzberg/e2e/ruby/spec/plugin_apis_spec.rb[3:1] | passed | 0.00184 seconds |
92
+ /Users/naamanhirschfeld/workspace/kreuzberg/e2e/ruby/spec/plugin_apis_spec.rb[3:2] | passed | 0.00053 seconds |
93
+ /Users/naamanhirschfeld/workspace/kreuzberg/e2e/ruby/spec/plugin_apis_spec.rb[3:3] | passed | 0.00004 seconds |
94
+ /Users/naamanhirschfeld/workspace/kreuzberg/e2e/ruby/spec/plugin_apis_spec.rb[4:1] | passed | 0.00049 seconds |
95
+ /Users/naamanhirschfeld/workspace/kreuzberg/e2e/ruby/spec/plugin_apis_spec.rb[4:2] | passed | 0.00006 seconds |
96
+ /Users/naamanhirschfeld/workspace/kreuzberg/e2e/ruby/spec/plugin_apis_spec.rb[4:3] | passed | 0.00005 seconds |
97
+ /Users/naamanhirschfeld/workspace/kreuzberg/e2e/ruby/spec/plugin_apis_spec.rb[5:1] | passed | 0.00007 seconds |
98
+ /Users/naamanhirschfeld/workspace/kreuzberg/e2e/ruby/spec/plugin_apis_spec.rb[5:2] | passed | 0.00011 seconds |
99
+ /Users/naamanhirschfeld/workspace/kreuzberg/e2e/ruby/spec/plugin_apis_spec.rb[6:1] | passed | 0.00003 seconds |
100
+ /Users/naamanhirschfeld/workspace/kreuzberg/e2e/ruby/spec/plugin_apis_spec.rb[6:2] | passed | 0.00002 seconds |
101
+ /Users/naamanhirschfeld/workspace/kreuzberg/e2e/ruby/spec/structured_spec.rb[1:1] | passed | 0.00101 seconds |
102
+ /Users/naamanhirschfeld/workspace/kreuzberg/e2e/ruby/spec/structured_spec.rb[1:2] | passed | 0.00041 seconds |
103
+ /Users/naamanhirschfeld/workspace/kreuzberg/e2e/ruby/spec/structured_spec.rb[1:3] | passed | 0.00035 seconds |
104
+ /Users/naamanhirschfeld/workspace/kreuzberg/e2e/ruby/spec/xml_spec.rb[1:1] | passed | 0.00078 seconds |
@@ -1,3 +1,4 @@
1
+ # Test configuration file for Kreuzberg Ruby bindings
1
2
 
2
3
  use_cache = false
3
4
  enable_quality_processing = true
@@ -1,3 +1,4 @@
1
+ # Test configuration file for Kreuzberg Ruby bindings
1
2
 
2
3
  use_cache: false
3
4
  enable_quality_processing: true
@@ -1,3 +1,4 @@
1
+ # Invalid TOML file for testing error handling
1
2
  use_cache = "not_a_boolean"
2
3
  [ocr
3
4
  backend = "tesseract"
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ # Basic smoke tests to verify package structure and imports work
3
4
  require 'stringio'
4
5
 
5
6
  RSpec.describe 'Kreuzberg package' do
@@ -158,7 +159,7 @@ RSpec.describe 'Kreuzberg package' do
158
159
  describe 'basic extraction smoke tests' do
159
160
  it 'extracts inline text via bytes API' do
160
161
  bytes = StringIO.new('Hello from Kreuzberg')
161
- result = Kreuzberg.extract_bytes_sync(data: bytes.string, mime_type: 'text/plain')
162
+ result = Kreuzberg.extract_bytes_sync(bytes.string, 'text/plain')
162
163
 
163
164
  expect(result.content).to include('Hello')
164
165
  expect(result.mime_type).to eq('text/plain')
@@ -166,7 +167,7 @@ RSpec.describe 'Kreuzberg package' do
166
167
 
167
168
  it 'extracts from small temp file via sync API' do
168
169
  file = create_test_file('Simple document for smoke testing')
169
- result = Kreuzberg.extract_file_sync(path: file)
170
+ result = Kreuzberg.extract_file_sync(file)
170
171
 
171
172
  expect(result.content).to include('Simple document')
172
173
  expect(result.mime_type).to eq('text/plain')
data/spec/spec_helper.rb CHANGED
@@ -22,13 +22,15 @@ RSpec.configure do |config|
22
22
  config.order = :random
23
23
  Kernel.srand config.seed
24
24
 
25
+ # Helpers
25
26
  config.include(Module.new do
26
27
  def fixture_path(filename)
27
28
  File.join(__dir__, 'fixtures', filename)
28
29
  end
29
30
 
30
31
  def test_document_path(relative_path)
31
- File.expand_path(File.join(__dir__, '..', '..', '..', 'test_documents', relative_path))
32
+ # Go up from packages/ruby/spec to project root, then into test_documents
33
+ File.join(__dir__, '..', '..', '..', 'test_documents', relative_path)
32
34
  end
33
35
 
34
36
  def create_test_file(content, filename: 'test.txt')