kreuzberg 4.0.0.pre.rc.7 → 4.0.0.pre.rc.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile.lock +13 -12
  3. data/README.md +22 -0
  4. data/ext/kreuzberg_rb/native/.cargo/config.toml +1 -1
  5. data/ext/kreuzberg_rb/native/Cargo.lock +397 -183
  6. data/ext/kreuzberg_rb/native/Cargo.toml +3 -3
  7. data/ext/kreuzberg_rb/native/src/lib.rs +36 -13
  8. data/kreuzberg.gemspec +34 -2
  9. data/lib/kreuzberg/cache_api.rb +35 -0
  10. data/lib/kreuzberg/error_context.rb +49 -1
  11. data/lib/kreuzberg/extraction_api.rb +255 -0
  12. data/lib/kreuzberg/version.rb +1 -1
  13. data/lib/kreuzberg.rb +6 -0
  14. data/lib/libpdfium.dylib +0 -0
  15. data/sig/kreuzberg.rbs +9 -0
  16. data/vendor/Cargo.toml +44 -0
  17. data/vendor/kreuzberg/Cargo.toml +65 -35
  18. data/vendor/kreuzberg/README.md +50 -0
  19. data/vendor/kreuzberg/build.rs +548 -190
  20. data/vendor/kreuzberg/src/api/mod.rs +0 -2
  21. data/vendor/kreuzberg/src/core/pipeline.rs +13 -0
  22. data/vendor/kreuzberg/src/embeddings.rs +71 -3
  23. data/vendor/kreuzberg/src/error.rs +1 -1
  24. data/vendor/kreuzberg/src/extraction/docx.rs +1 -1
  25. data/vendor/kreuzberg/src/extraction/html.rs +37 -5
  26. data/vendor/kreuzberg/src/extractors/pdf.rs +99 -47
  27. data/vendor/kreuzberg/src/mcp/mod.rs +3 -2
  28. data/vendor/kreuzberg/src/mcp/server.rs +106 -0
  29. data/vendor/kreuzberg/src/pdf/bindings.rs +44 -0
  30. data/vendor/kreuzberg/src/pdf/bundled.rs +346 -0
  31. data/vendor/kreuzberg/src/pdf/metadata.rs +2 -2
  32. data/vendor/kreuzberg/src/pdf/mod.rs +6 -0
  33. data/vendor/kreuzberg/src/pdf/rendering.rs +2 -2
  34. data/vendor/kreuzberg/src/pdf/table.rs +3 -0
  35. data/vendor/kreuzberg/src/pdf/text.rs +2 -2
  36. data/vendor/kreuzberg/src/text/quality_processor.rs +1 -1
  37. data/vendor/kreuzberg/tests/concurrency_stress.rs +1 -1
  38. data/vendor/kreuzberg/tests/format_integration.rs +4 -1
  39. data/vendor/kreuzberg/tests/pdfium_linking.rs +374 -0
  40. data/vendor/kreuzberg-ffi/Cargo.toml +63 -0
  41. data/vendor/kreuzberg-ffi/README.md +851 -0
  42. data/vendor/kreuzberg-ffi/build.rs +176 -0
  43. data/vendor/kreuzberg-ffi/cbindgen.toml +27 -0
  44. data/vendor/kreuzberg-ffi/kreuzberg-ffi-install.pc +12 -0
  45. data/vendor/kreuzberg-ffi/kreuzberg-ffi.pc.in +12 -0
  46. data/vendor/kreuzberg-ffi/kreuzberg.h +1087 -0
  47. data/vendor/kreuzberg-ffi/src/lib.rs +3616 -0
  48. data/vendor/kreuzberg-ffi/src/panic_shield.rs +247 -0
  49. data/vendor/kreuzberg-ffi/tests.disabled/README.md +48 -0
  50. data/vendor/kreuzberg-ffi/tests.disabled/config_loading_tests.rs +299 -0
  51. data/vendor/kreuzberg-ffi/tests.disabled/config_tests.rs +346 -0
  52. data/vendor/kreuzberg-ffi/tests.disabled/extractor_tests.rs +232 -0
  53. data/vendor/kreuzberg-ffi/tests.disabled/plugin_registration_tests.rs +470 -0
  54. data/vendor/kreuzberg-tesseract/.commitlintrc.json +13 -0
  55. data/vendor/kreuzberg-tesseract/.crate-ignore +2 -0
  56. data/vendor/kreuzberg-tesseract/Cargo.lock +2933 -0
  57. data/vendor/kreuzberg-tesseract/Cargo.toml +48 -0
  58. data/vendor/kreuzberg-tesseract/LICENSE +22 -0
  59. data/vendor/kreuzberg-tesseract/README.md +399 -0
  60. data/vendor/kreuzberg-tesseract/build.rs +1354 -0
  61. data/vendor/kreuzberg-tesseract/patches/README.md +71 -0
  62. data/vendor/kreuzberg-tesseract/patches/tesseract.diff +199 -0
  63. data/vendor/kreuzberg-tesseract/src/api.rs +1371 -0
  64. data/vendor/kreuzberg-tesseract/src/choice_iterator.rs +77 -0
  65. data/vendor/kreuzberg-tesseract/src/enums.rs +297 -0
  66. data/vendor/kreuzberg-tesseract/src/error.rs +81 -0
  67. data/vendor/kreuzberg-tesseract/src/lib.rs +145 -0
  68. data/vendor/kreuzberg-tesseract/src/monitor.rs +57 -0
  69. data/vendor/kreuzberg-tesseract/src/mutable_iterator.rs +197 -0
  70. data/vendor/kreuzberg-tesseract/src/page_iterator.rs +253 -0
  71. data/vendor/kreuzberg-tesseract/src/result_iterator.rs +286 -0
  72. data/vendor/kreuzberg-tesseract/src/result_renderer.rs +183 -0
  73. data/vendor/kreuzberg-tesseract/tests/integration_test.rs +211 -0
  74. data/vendor/rb-sys/src/lib.rs +1 -0
  75. metadata +41 -3
  76. data/vendor/rb-sys/bin/release.sh +0 -22
@@ -0,0 +1,470 @@
1
+ //! FFI plugin registration integration tests.
2
+ //!
3
+ //! Tests the FFI layer for registering and managing validators and post-processors.
4
+
5
+ use std::ffi::{CStr, CString};
6
+ use std::os::raw::c_char;
7
+ use std::ptr;
8
+
9
+ unsafe extern "C" {
10
+ fn kreuzberg_register_validator(name: *const c_char, callback: ValidatorCallback, priority: i32) -> bool;
11
+ fn kreuzberg_unregister_validator(name: *const c_char) -> bool;
12
+ fn kreuzberg_list_validators() -> *mut c_char;
13
+ fn kreuzberg_clear_validators() -> bool;
14
+ fn kreuzberg_free_string(s: *mut c_char);
15
+ fn kreuzberg_last_error() -> *const c_char;
16
+ }
17
+
18
+ unsafe extern "C" {
19
+ fn kreuzberg_unregister_ocr_backend(name: *const c_char) -> bool;
20
+ fn kreuzberg_list_ocr_backends() -> *mut c_char;
21
+ }
22
+
23
+ type ValidatorCallback = unsafe extern "C" fn(
24
+ content: *const c_char,
25
+ mime_type: *const c_char,
26
+ metadata_json: *const c_char,
27
+ config_json: *const c_char,
28
+ ) -> *mut c_char;
29
+
30
+ /// Helper to convert *const c_char to String
31
+ unsafe fn c_str_to_string(ptr: *const c_char) -> Option<String> {
32
+ if ptr.is_null() {
33
+ None
34
+ } else {
35
+ unsafe { Some(CStr::from_ptr(ptr).to_string_lossy().into_owned()) }
36
+ }
37
+ }
38
+
39
+ /// Helper to get last error message
40
+ unsafe fn get_last_error() -> Option<String> {
41
+ let error_ptr = unsafe { kreuzberg_last_error() };
42
+ unsafe { c_str_to_string(error_ptr) }
43
+ }
44
+
45
+ /// Mock validator callback that always passes
46
+ unsafe extern "C" fn passing_validator_callback(
47
+ _content: *const c_char,
48
+ _mime_type: *const c_char,
49
+ _metadata_json: *const c_char,
50
+ _config_json: *const c_char,
51
+ ) -> *mut c_char {
52
+ ptr::null_mut()
53
+ }
54
+
55
+ /// Mock validator callback that always fails
56
+ unsafe extern "C" fn failing_validator_callback(
57
+ _content: *const c_char,
58
+ _mime_type: *const c_char,
59
+ _metadata_json: *const c_char,
60
+ _config_json: *const c_char,
61
+ ) -> *mut c_char {
62
+ let error_msg = CString::new("Validation failed: content too short").unwrap();
63
+ error_msg.into_raw()
64
+ }
65
+
66
+ /// Test successful validator registration.
67
+ #[test]
68
+ fn test_register_validator_succeeds() {
69
+ unsafe {
70
+ kreuzberg_clear_validators();
71
+
72
+ let name = CString::new("test-validator").unwrap();
73
+ let result = kreuzberg_register_validator(name.as_ptr(), passing_validator_callback, 50);
74
+
75
+ assert!(result, "Validator registration should succeed");
76
+
77
+ let list_ptr = kreuzberg_list_validators();
78
+ assert!(!list_ptr.is_null(), "List should not be null");
79
+
80
+ let list_json = c_str_to_string(list_ptr).expect("Should have valid JSON");
81
+ kreuzberg_free_string(list_ptr);
82
+
83
+ assert!(
84
+ list_json.contains("test-validator"),
85
+ "List should contain registered validator"
86
+ );
87
+
88
+ kreuzberg_clear_validators();
89
+ }
90
+ }
91
+
92
+ /// Test registering multiple validators.
93
+ #[test]
94
+ fn test_register_multiple_validators_succeeds() {
95
+ unsafe {
96
+ kreuzberg_clear_validators();
97
+
98
+ let validator1 = CString::new("validator-1").unwrap();
99
+ let validator2 = CString::new("validator-2").unwrap();
100
+ let validator3 = CString::new("validator-3").unwrap();
101
+
102
+ assert!(
103
+ kreuzberg_register_validator(validator1.as_ptr(), passing_validator_callback, 100),
104
+ "First validator registration should succeed"
105
+ );
106
+ assert!(
107
+ kreuzberg_register_validator(validator2.as_ptr(), passing_validator_callback, 50),
108
+ "Second validator registration should succeed"
109
+ );
110
+ assert!(
111
+ kreuzberg_register_validator(validator3.as_ptr(), failing_validator_callback, 25),
112
+ "Third validator registration should succeed"
113
+ );
114
+
115
+ let list_ptr = kreuzberg_list_validators();
116
+ let list_json = c_str_to_string(list_ptr).expect("Should have valid JSON");
117
+ kreuzberg_free_string(list_ptr);
118
+
119
+ assert!(list_json.contains("validator-1"), "Should contain validator-1");
120
+ assert!(list_json.contains("validator-2"), "Should contain validator-2");
121
+ assert!(list_json.contains("validator-3"), "Should contain validator-3");
122
+
123
+ kreuzberg_clear_validators();
124
+ }
125
+ }
126
+
127
+ /// Test unregistering validator.
128
+ #[test]
129
+ fn test_unregister_validator_succeeds() {
130
+ unsafe {
131
+ kreuzberg_clear_validators();
132
+
133
+ let name = CString::new("temp-validator").unwrap();
134
+ kreuzberg_register_validator(name.as_ptr(), passing_validator_callback, 50);
135
+
136
+ let result = kreuzberg_unregister_validator(name.as_ptr());
137
+ assert!(result, "Unregistration should succeed");
138
+
139
+ let list_ptr = kreuzberg_list_validators();
140
+ let list_json = c_str_to_string(list_ptr).expect("Should have valid JSON");
141
+ kreuzberg_free_string(list_ptr);
142
+
143
+ assert!(
144
+ !list_json.contains("temp-validator"),
145
+ "List should not contain unregistered validator"
146
+ );
147
+
148
+ kreuzberg_clear_validators();
149
+ }
150
+ }
151
+
152
+ /// Test unregistering non-existent validator fails gracefully.
153
+ #[test]
154
+ fn test_unregister_nonexistent_validator_fails_gracefully() {
155
+ unsafe {
156
+ kreuzberg_clear_validators();
157
+
158
+ let name = CString::new("nonexistent-validator").unwrap();
159
+ let result = kreuzberg_unregister_validator(name.as_ptr());
160
+
161
+ assert!(result, "Unregistering non-existent validator should succeed (no-op)");
162
+
163
+ kreuzberg_clear_validators();
164
+ }
165
+ }
166
+
167
+ /// Test registering validator with null name fails gracefully.
168
+ #[test]
169
+ fn test_register_validator_with_null_name_fails_gracefully() {
170
+ unsafe {
171
+ let result = kreuzberg_register_validator(ptr::null(), passing_validator_callback, 50);
172
+
173
+ assert!(!result, "Registration with null name should fail");
174
+
175
+ let error = get_last_error();
176
+ assert!(error.is_some(), "Should have error message");
177
+ let error_msg = error.unwrap();
178
+ assert!(
179
+ error_msg.contains("null") || error_msg.contains("invalid") || error_msg.contains("empty"),
180
+ "Error should mention null/invalid: {}",
181
+ error_msg
182
+ );
183
+ }
184
+ }
185
+
186
+ /// Test registering validator with empty name fails gracefully.
187
+ #[test]
188
+ fn test_register_validator_with_empty_name_fails_gracefully() {
189
+ unsafe {
190
+ let name = CString::new("").unwrap();
191
+ let result = kreuzberg_register_validator(name.as_ptr(), passing_validator_callback, 50);
192
+
193
+ assert!(!result, "Registration with empty name should fail");
194
+
195
+ let error = get_last_error();
196
+ assert!(error.is_some(), "Should have error message");
197
+ let error_msg = error.unwrap();
198
+ assert!(
199
+ error_msg.contains("empty") || error_msg.contains("invalid"),
200
+ "Error should mention empty/invalid: {}",
201
+ error_msg
202
+ );
203
+ }
204
+ }
205
+
206
+ /// Test registering validator with whitespace in name fails gracefully.
207
+ #[test]
208
+ fn test_register_validator_with_whitespace_in_name_fails_gracefully() {
209
+ unsafe {
210
+ let name = CString::new("validator with spaces").unwrap();
211
+ let result = kreuzberg_register_validator(name.as_ptr(), passing_validator_callback, 50);
212
+
213
+ assert!(!result, "Registration with whitespace in name should fail");
214
+
215
+ let error = get_last_error();
216
+ assert!(error.is_some(), "Should have error message");
217
+ let error_msg = error.unwrap();
218
+ assert!(
219
+ error_msg.contains("whitespace") || error_msg.contains("invalid"),
220
+ "Error should mention whitespace/invalid: {}",
221
+ error_msg
222
+ );
223
+ }
224
+ }
225
+
226
+ /// Test registering validator with invalid UTF-8 fails gracefully.
227
+ #[test]
228
+ fn test_register_validator_with_invalid_utf8_fails_gracefully() {
229
+ unsafe {
230
+ let invalid_bytes = vec![
231
+ b'v', b'a', b'l', b'i', b'd', b'a', b't', b'o', b'r', b'-', 0xFF, 0xFE, 0x00,
232
+ ];
233
+ let name_ptr = invalid_bytes.as_ptr() as *const i8;
234
+ let result = kreuzberg_register_validator(name_ptr, passing_validator_callback, 50);
235
+
236
+ assert!(!result, "Should fail with invalid UTF-8");
237
+ let error = get_last_error();
238
+ assert!(error.is_some(), "Should have error message on failure");
239
+ assert!(
240
+ error.unwrap().contains("Invalid UTF-8"),
241
+ "Error should mention UTF-8 issue"
242
+ );
243
+
244
+ kreuzberg_clear_validators();
245
+ }
246
+ }
247
+
248
+ /// Test clearing all validators.
249
+ #[test]
250
+ fn test_clear_validators_succeeds() {
251
+ unsafe {
252
+ kreuzberg_clear_validators();
253
+
254
+ let v1 = CString::new("validator-1").unwrap();
255
+ let v2 = CString::new("validator-2").unwrap();
256
+ kreuzberg_register_validator(v1.as_ptr(), passing_validator_callback, 50);
257
+ kreuzberg_register_validator(v2.as_ptr(), passing_validator_callback, 50);
258
+
259
+ let result = kreuzberg_clear_validators();
260
+ assert!(result, "Clear should succeed");
261
+
262
+ let list_ptr = kreuzberg_list_validators();
263
+ let list_json = c_str_to_string(list_ptr).expect("Should have valid JSON");
264
+ kreuzberg_free_string(list_ptr);
265
+
266
+ let validators: Vec<String> = serde_json::from_str(&list_json).unwrap_or_default();
267
+ assert_eq!(validators.len(), 0, "List should be empty after clear");
268
+ }
269
+ }
270
+
271
+ /// Test listing validators returns valid JSON.
272
+ #[test]
273
+ fn test_list_validators_returns_valid_json() {
274
+ unsafe {
275
+ kreuzberg_clear_validators();
276
+
277
+ let name = CString::new("test-validator").unwrap();
278
+ kreuzberg_register_validator(name.as_ptr(), passing_validator_callback, 50);
279
+
280
+ let list_ptr = kreuzberg_list_validators();
281
+ assert!(!list_ptr.is_null(), "List should not be null");
282
+
283
+ let list_json = c_str_to_string(list_ptr).expect("Should have valid JSON");
284
+ kreuzberg_free_string(list_ptr);
285
+
286
+ let validators: Vec<String> = serde_json::from_str(&list_json).expect("Should be valid JSON array");
287
+ assert!(
288
+ validators.contains(&"test-validator".to_string()),
289
+ "Should contain registered validator"
290
+ );
291
+
292
+ kreuzberg_clear_validators();
293
+ }
294
+ }
295
+
296
+ /// Test listing empty validators returns empty array.
297
+ #[test]
298
+ fn test_list_empty_validators_returns_empty_array() {
299
+ unsafe {
300
+ kreuzberg_clear_validators();
301
+
302
+ let list_ptr = kreuzberg_list_validators();
303
+ assert!(!list_ptr.is_null(), "List should not be null");
304
+
305
+ let list_json = c_str_to_string(list_ptr).expect("Should have valid JSON");
306
+ kreuzberg_free_string(list_ptr);
307
+
308
+ let validators: Vec<String> = serde_json::from_str(&list_json).expect("Should be valid JSON array");
309
+ assert_eq!(validators.len(), 0, "Should be empty array");
310
+ }
311
+ }
312
+
313
+ /// Test registering duplicate validator replaces previous one.
314
+ #[test]
315
+ fn test_register_duplicate_validator_replaces_previous() {
316
+ unsafe {
317
+ kreuzberg_clear_validators();
318
+
319
+ let name = CString::new("duplicate-validator").unwrap();
320
+
321
+ kreuzberg_register_validator(name.as_ptr(), passing_validator_callback, 50);
322
+
323
+ let result = kreuzberg_register_validator(name.as_ptr(), failing_validator_callback, 100);
324
+
325
+ assert!(result, "Duplicate registration should succeed (replace)");
326
+
327
+ let list_ptr = kreuzberg_list_validators();
328
+ let list_json = c_str_to_string(list_ptr).expect("Should have valid JSON");
329
+ kreuzberg_free_string(list_ptr);
330
+
331
+ let validators: Vec<String> = serde_json::from_str(&list_json).unwrap();
332
+ let duplicate_count = validators.iter().filter(|v| *v == "duplicate-validator").count();
333
+ assert_eq!(duplicate_count, 1, "Should only have one instance of the validator");
334
+
335
+ kreuzberg_clear_validators();
336
+ }
337
+ }
338
+
339
+ /// Test validator priorities are respected.
340
+ #[test]
341
+ fn test_validator_priorities_are_registered() {
342
+ unsafe {
343
+ kreuzberg_clear_validators();
344
+
345
+ let low_priority = CString::new("low-priority-validator").unwrap();
346
+ let high_priority = CString::new("high-priority-validator").unwrap();
347
+
348
+ kreuzberg_register_validator(low_priority.as_ptr(), passing_validator_callback, 10);
349
+ kreuzberg_register_validator(high_priority.as_ptr(), passing_validator_callback, 100);
350
+
351
+ let list_ptr = kreuzberg_list_validators();
352
+ let list_json = c_str_to_string(list_ptr).expect("Should have valid JSON");
353
+ kreuzberg_free_string(list_ptr);
354
+
355
+ assert!(
356
+ list_json.contains("low-priority-validator"),
357
+ "Should contain low priority validator"
358
+ );
359
+ assert!(
360
+ list_json.contains("high-priority-validator"),
361
+ "Should contain high priority validator"
362
+ );
363
+
364
+ kreuzberg_clear_validators();
365
+ }
366
+ }
367
+
368
+ /// Test listing OCR backends returns valid JSON.
369
+ #[test]
370
+ fn test_list_ocr_backends_returns_valid_json() {
371
+ unsafe {
372
+ let list_ptr = kreuzberg_list_ocr_backends();
373
+ assert!(!list_ptr.is_null(), "List should not be null");
374
+
375
+ let list_json = c_str_to_string(list_ptr).expect("Should have valid JSON");
376
+ kreuzberg_free_string(list_ptr);
377
+
378
+ let backends: Vec<String> = serde_json::from_str(&list_json).expect("Should be valid JSON array");
379
+
380
+ assert!(backends.is_empty() || !backends.is_empty(), "Should be a valid array");
381
+ }
382
+ }
383
+
384
+ /// Test unregistering non-existent OCR backend succeeds gracefully.
385
+ #[test]
386
+ fn test_unregister_nonexistent_ocr_backend_succeeds_gracefully() {
387
+ unsafe {
388
+ let name = CString::new("nonexistent-ocr-backend").unwrap();
389
+ let result = kreuzberg_unregister_ocr_backend(name.as_ptr());
390
+
391
+ assert!(result, "Unregistering non-existent OCR backend should succeed (no-op)");
392
+ }
393
+ }
394
+
395
+ /// Test unregistering OCR backend with null name fails gracefully.
396
+ #[test]
397
+ fn test_unregister_ocr_backend_with_null_name_fails_gracefully() {
398
+ unsafe {
399
+ let result = kreuzberg_unregister_ocr_backend(ptr::null());
400
+
401
+ assert!(!result, "Unregistration with null name should fail");
402
+
403
+ let error = get_last_error();
404
+ assert!(error.is_some(), "Should have error message");
405
+ let error_msg = error.unwrap();
406
+ assert!(
407
+ error_msg.contains("NULL") || error_msg.contains("null"),
408
+ "Error should mention null: {}",
409
+ error_msg
410
+ );
411
+ }
412
+ }
413
+
414
+ /// Test unregistering OCR backend with empty name fails gracefully.
415
+ #[test]
416
+ fn test_unregister_ocr_backend_with_empty_name_fails_gracefully() {
417
+ unsafe {
418
+ let name = CString::new("").unwrap();
419
+ let result = kreuzberg_unregister_ocr_backend(name.as_ptr());
420
+
421
+ assert!(!result, "Unregistration with empty name should fail");
422
+
423
+ let error = get_last_error();
424
+ assert!(error.is_some(), "Should have error message");
425
+ let error_msg = error.unwrap();
426
+ assert!(
427
+ error_msg.contains("empty") || error_msg.contains("invalid"),
428
+ "Error should mention empty/invalid: {}",
429
+ error_msg
430
+ );
431
+ }
432
+ }
433
+
434
+ /// Test unregistering OCR backend with whitespace in name fails gracefully.
435
+ #[test]
436
+ fn test_unregister_ocr_backend_with_whitespace_in_name_fails_gracefully() {
437
+ unsafe {
438
+ let name = CString::new("ocr backend with spaces").unwrap();
439
+ let result = kreuzberg_unregister_ocr_backend(name.as_ptr());
440
+
441
+ assert!(!result, "Unregistration with whitespace in name should fail");
442
+
443
+ let error = get_last_error();
444
+ assert!(error.is_some(), "Should have error message");
445
+ let error_msg = error.unwrap();
446
+ assert!(
447
+ error_msg.contains("whitespace") || error_msg.contains("invalid"),
448
+ "Error should mention whitespace/invalid: {}",
449
+ error_msg
450
+ );
451
+ }
452
+ }
453
+
454
+ /// Test unregistering OCR backend with invalid UTF-8 fails gracefully.
455
+ #[test]
456
+ fn test_unregister_ocr_backend_with_invalid_utf8_fails_gracefully() {
457
+ unsafe {
458
+ let invalid_bytes = [b'o', b'c', b'r', b'-', 0xFF, 0xFE, 0x00];
459
+ let name_ptr = invalid_bytes.as_ptr() as *const i8;
460
+ let result = kreuzberg_unregister_ocr_backend(name_ptr);
461
+
462
+ assert!(!result, "Should fail with invalid UTF-8");
463
+ let error = get_last_error();
464
+ assert!(error.is_some(), "Should have error message on failure");
465
+ assert!(
466
+ error.unwrap().contains("Invalid UTF-8"),
467
+ "Error should mention UTF-8 issue"
468
+ );
469
+ }
470
+ }
@@ -0,0 +1,13 @@
1
+ {
2
+ "extends": ["@commitlint/config-conventional"],
3
+ "rules": {
4
+ "body-max-line-length": [2, "always", 100],
5
+ "header-max-length": [2, "always", 100],
6
+ "subject-case": [2, "never", ["sentence-case", "start-case", "pascal-case", "upper-case"]],
7
+ "type-enum": [
8
+ 2,
9
+ "always",
10
+ ["feat", "fix", "docs", "style", "refactor", "perf", "test", "build", "ci", "chore", "revert"]
11
+ ]
12
+ }
13
+ }
@@ -0,0 +1,2 @@
1
+ /third_party/
2
+ /tessdata/