kreuzberg 4.0.0.pre.rc.14 → 4.0.0.pre.rc.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -2
- data/ext/kreuzberg_rb/native/Cargo.lock +25 -215
- data/ext/kreuzberg_rb/native/Cargo.toml +1 -2
- data/ext/kreuzberg_rb/native/build.rs +38 -1
- data/lib/kreuzberg/result.rb +1 -1
- data/lib/kreuzberg/version.rb +1 -1
- data/lib/{libpdfium.dylib → libpdfium.so} +0 -0
- data/vendor/Cargo.toml +2 -2
- data/vendor/kreuzberg/Cargo.toml +1 -1
- data/vendor/kreuzberg/build.rs +54 -10
- data/vendor/kreuzberg/src/api/mod.rs +8 -0
- data/vendor/kreuzberg/src/extraction/html.rs +40 -7
- data/vendor/kreuzberg/src/pdf/bundled.rs +115 -9
- data/vendor/kreuzberg/tests/format_integration.rs +1 -0
- data/vendor/kreuzberg-tesseract/Cargo.toml +1 -1
- metadata +11 -21
- data/vendor/kreuzberg-ffi/Cargo.toml +0 -63
- data/vendor/kreuzberg-ffi/README.md +0 -851
- data/vendor/kreuzberg-ffi/build.rs +0 -176
- data/vendor/kreuzberg-ffi/cbindgen.toml +0 -27
- data/vendor/kreuzberg-ffi/kreuzberg-ffi.pc.in +0 -12
- data/vendor/kreuzberg-ffi/kreuzberg.h +0 -1087
- data/vendor/kreuzberg-ffi/src/lib.rs +0 -3616
- data/vendor/kreuzberg-ffi/src/panic_shield.rs +0 -247
- data/vendor/kreuzberg-ffi/tests.disabled/README.md +0 -48
- data/vendor/kreuzberg-ffi/tests.disabled/config_loading_tests.rs +0 -299
- data/vendor/kreuzberg-ffi/tests.disabled/config_tests.rs +0 -346
- data/vendor/kreuzberg-ffi/tests.disabled/extractor_tests.rs +0 -232
- data/vendor/kreuzberg-ffi/tests.disabled/plugin_registration_tests.rs +0 -470
|
@@ -1,346 +0,0 @@
|
|
|
1
|
-
use std::ffi::{CStr, CString};
|
|
2
|
-
use std::fs;
|
|
3
|
-
use std::os::raw::c_char;
|
|
4
|
-
use std::ptr;
|
|
5
|
-
use tempfile::TempDir;
|
|
6
|
-
|
|
7
|
-
unsafe extern "C" {
|
|
8
|
-
fn kreuzberg_config_from_file(path: *const c_char) -> *mut std::ffi::c_void;
|
|
9
|
-
|
|
10
|
-
fn kreuzberg_config_discover() -> *mut std::ffi::c_void;
|
|
11
|
-
|
|
12
|
-
fn kreuzberg_last_error() -> *const c_char;
|
|
13
|
-
|
|
14
|
-
fn kreuzberg_free_config(config: *mut std::ffi::c_void);
|
|
15
|
-
}
|
|
16
|
-
|
|
17
|
-
#[test]
|
|
18
|
-
fn test_config_from_file_toml() {
|
|
19
|
-
unsafe {
|
|
20
|
-
let temp_dir = TempDir::new().unwrap();
|
|
21
|
-
let config_path = temp_dir.path().join("kreuzberg.toml");
|
|
22
|
-
|
|
23
|
-
let config_content = r#"
|
|
24
|
-
[ocr]
|
|
25
|
-
enabled = true
|
|
26
|
-
backend = "tesseract"
|
|
27
|
-
|
|
28
|
-
[chunking]
|
|
29
|
-
enabled = false
|
|
30
|
-
"#;
|
|
31
|
-
|
|
32
|
-
fs::write(&config_path, config_content).unwrap();
|
|
33
|
-
|
|
34
|
-
let path_str = CString::new(config_path.to_str().unwrap()).unwrap();
|
|
35
|
-
let config_ptr = kreuzberg_config_from_file(path_str.as_ptr());
|
|
36
|
-
|
|
37
|
-
assert!(!config_ptr.is_null(), "Config should be loaded successfully");
|
|
38
|
-
|
|
39
|
-
kreuzberg_free_config(config_ptr);
|
|
40
|
-
}
|
|
41
|
-
}
|
|
42
|
-
|
|
43
|
-
#[test]
|
|
44
|
-
fn test_config_from_file_yaml() {
|
|
45
|
-
unsafe {
|
|
46
|
-
let temp_dir = TempDir::new().unwrap();
|
|
47
|
-
let config_path = temp_dir.path().join("kreuzberg.yaml");
|
|
48
|
-
|
|
49
|
-
let config_content = r#"
|
|
50
|
-
ocr:
|
|
51
|
-
enabled: true
|
|
52
|
-
backend: tesseract
|
|
53
|
-
|
|
54
|
-
chunking:
|
|
55
|
-
enabled: false
|
|
56
|
-
"#;
|
|
57
|
-
|
|
58
|
-
fs::write(&config_path, config_content).unwrap();
|
|
59
|
-
|
|
60
|
-
let path_str = CString::new(config_path.to_str().unwrap()).unwrap();
|
|
61
|
-
let config_ptr = kreuzberg_config_from_file(path_str.as_ptr());
|
|
62
|
-
|
|
63
|
-
assert!(!config_ptr.is_null(), "Config should be loaded successfully");
|
|
64
|
-
|
|
65
|
-
kreuzberg_free_config(config_ptr);
|
|
66
|
-
}
|
|
67
|
-
}
|
|
68
|
-
|
|
69
|
-
#[test]
|
|
70
|
-
fn test_config_from_file_json() {
|
|
71
|
-
unsafe {
|
|
72
|
-
let temp_dir = TempDir::new().unwrap();
|
|
73
|
-
let config_path = temp_dir.path().join("kreuzberg.json");
|
|
74
|
-
|
|
75
|
-
let config_content = r#"
|
|
76
|
-
{
|
|
77
|
-
"ocr": {
|
|
78
|
-
"enabled": true,
|
|
79
|
-
"backend": "tesseract"
|
|
80
|
-
},
|
|
81
|
-
"chunking": {
|
|
82
|
-
"enabled": false
|
|
83
|
-
}
|
|
84
|
-
}
|
|
85
|
-
"#;
|
|
86
|
-
|
|
87
|
-
fs::write(&config_path, config_content).unwrap();
|
|
88
|
-
|
|
89
|
-
let path_str = CString::new(config_path.to_str().unwrap()).unwrap();
|
|
90
|
-
let config_ptr = kreuzberg_config_from_file(path_str.as_ptr());
|
|
91
|
-
|
|
92
|
-
assert!(!config_ptr.is_null(), "Config should be loaded successfully");
|
|
93
|
-
|
|
94
|
-
kreuzberg_free_config(config_ptr);
|
|
95
|
-
}
|
|
96
|
-
}
|
|
97
|
-
|
|
98
|
-
#[test]
|
|
99
|
-
fn test_config_from_file_null_path() {
|
|
100
|
-
unsafe {
|
|
101
|
-
let config_ptr = kreuzberg_config_from_file(ptr::null());
|
|
102
|
-
|
|
103
|
-
assert!(config_ptr.is_null(), "Should return NULL for NULL path");
|
|
104
|
-
|
|
105
|
-
let error = kreuzberg_last_error();
|
|
106
|
-
assert!(!error.is_null());
|
|
107
|
-
let error_str = CStr::from_ptr(error).to_str().unwrap();
|
|
108
|
-
assert!(error_str.contains("NULL"), "Error should mention NULL: {}", error_str);
|
|
109
|
-
}
|
|
110
|
-
}
|
|
111
|
-
|
|
112
|
-
#[test]
|
|
113
|
-
fn test_config_from_file_nonexistent() {
|
|
114
|
-
unsafe {
|
|
115
|
-
let path = CString::new("/nonexistent/path/kreuzberg.toml").unwrap();
|
|
116
|
-
let config_ptr = kreuzberg_config_from_file(path.as_ptr());
|
|
117
|
-
|
|
118
|
-
assert!(config_ptr.is_null(), "Should return NULL for nonexistent file");
|
|
119
|
-
|
|
120
|
-
let error = kreuzberg_last_error();
|
|
121
|
-
assert!(!error.is_null());
|
|
122
|
-
let error_str = CStr::from_ptr(error).to_str().unwrap();
|
|
123
|
-
assert!(
|
|
124
|
-
error_str.contains("IO") || error_str.contains("not found") || error_str.contains("No such"),
|
|
125
|
-
"Error should indicate file not found: {}",
|
|
126
|
-
error_str
|
|
127
|
-
);
|
|
128
|
-
}
|
|
129
|
-
}
|
|
130
|
-
|
|
131
|
-
#[test]
|
|
132
|
-
fn test_config_from_file_invalid_toml() {
|
|
133
|
-
unsafe {
|
|
134
|
-
let temp_dir = TempDir::new().unwrap();
|
|
135
|
-
let config_path = temp_dir.path().join("invalid.toml");
|
|
136
|
-
|
|
137
|
-
let config_content = r#"
|
|
138
|
-
[ocr
|
|
139
|
-
enabled = true # Missing closing bracket
|
|
140
|
-
"#;
|
|
141
|
-
|
|
142
|
-
fs::write(&config_path, config_content).unwrap();
|
|
143
|
-
|
|
144
|
-
let path_str = CString::new(config_path.to_str().unwrap()).unwrap();
|
|
145
|
-
let config_ptr = kreuzberg_config_from_file(path_str.as_ptr());
|
|
146
|
-
|
|
147
|
-
assert!(config_ptr.is_null(), "Should return NULL for invalid TOML");
|
|
148
|
-
|
|
149
|
-
let error = kreuzberg_last_error();
|
|
150
|
-
assert!(!error.is_null());
|
|
151
|
-
}
|
|
152
|
-
}
|
|
153
|
-
|
|
154
|
-
#[test]
|
|
155
|
-
fn test_config_from_file_invalid_json() {
|
|
156
|
-
unsafe {
|
|
157
|
-
let temp_dir = TempDir::new().unwrap();
|
|
158
|
-
let config_path = temp_dir.path().join("invalid.json");
|
|
159
|
-
|
|
160
|
-
let config_content = r#"
|
|
161
|
-
{
|
|
162
|
-
"ocr": {
|
|
163
|
-
"enabled": true,
|
|
164
|
-
} // Trailing comma is invalid in strict JSON
|
|
165
|
-
}
|
|
166
|
-
"#;
|
|
167
|
-
|
|
168
|
-
fs::write(&config_path, config_content).unwrap();
|
|
169
|
-
|
|
170
|
-
let path_str = CString::new(config_path.to_str().unwrap()).unwrap();
|
|
171
|
-
let config_ptr = kreuzberg_config_from_file(path_str.as_ptr());
|
|
172
|
-
|
|
173
|
-
assert!(config_ptr.is_null(), "Should return NULL for invalid JSON");
|
|
174
|
-
|
|
175
|
-
let error = kreuzberg_last_error();
|
|
176
|
-
assert!(!error.is_null());
|
|
177
|
-
}
|
|
178
|
-
}
|
|
179
|
-
|
|
180
|
-
#[test]
|
|
181
|
-
fn test_config_from_file_no_extension() {
|
|
182
|
-
unsafe {
|
|
183
|
-
let temp_dir = TempDir::new().unwrap();
|
|
184
|
-
let config_path = temp_dir.path().join("kreuzberg");
|
|
185
|
-
|
|
186
|
-
fs::write(&config_path, "some content").unwrap();
|
|
187
|
-
|
|
188
|
-
let path_str = CString::new(config_path.to_str().unwrap()).unwrap();
|
|
189
|
-
let config_ptr = kreuzberg_config_from_file(path_str.as_ptr());
|
|
190
|
-
|
|
191
|
-
assert!(config_ptr.is_null(), "Should return NULL for file without extension");
|
|
192
|
-
|
|
193
|
-
let error = kreuzberg_last_error();
|
|
194
|
-
assert!(!error.is_null());
|
|
195
|
-
let error_str = CStr::from_ptr(error).to_str().unwrap();
|
|
196
|
-
assert!(
|
|
197
|
-
error_str.contains("extension") || error_str.contains("format"),
|
|
198
|
-
"Error should mention extension: {}",
|
|
199
|
-
error_str
|
|
200
|
-
);
|
|
201
|
-
}
|
|
202
|
-
}
|
|
203
|
-
|
|
204
|
-
#[test]
|
|
205
|
-
fn test_config_from_file_invalid_utf8_path() {
|
|
206
|
-
unsafe {
|
|
207
|
-
let invalid_path = b"/tmp/test\xFF\xFEinvalid.toml\0";
|
|
208
|
-
|
|
209
|
-
let config_ptr = kreuzberg_config_from_file(invalid_path.as_ptr() as *const c_char);
|
|
210
|
-
|
|
211
|
-
assert!(config_ptr.is_null(), "Should return NULL for invalid UTF-8 path");
|
|
212
|
-
|
|
213
|
-
let error = kreuzberg_last_error();
|
|
214
|
-
assert!(!error.is_null());
|
|
215
|
-
let error_str = CStr::from_ptr(error).to_str().unwrap();
|
|
216
|
-
assert!(error_str.contains("UTF-8"));
|
|
217
|
-
}
|
|
218
|
-
}
|
|
219
|
-
|
|
220
|
-
#[test]
|
|
221
|
-
fn test_config_discover_not_found() {
|
|
222
|
-
unsafe {
|
|
223
|
-
let temp_dir = TempDir::new().unwrap();
|
|
224
|
-
let original_dir = std::env::current_dir().unwrap();
|
|
225
|
-
|
|
226
|
-
std::env::set_current_dir(&temp_dir).unwrap();
|
|
227
|
-
|
|
228
|
-
let config_ptr = kreuzberg_config_discover();
|
|
229
|
-
|
|
230
|
-
assert!(config_ptr.is_null(), "Should return NULL when no config found");
|
|
231
|
-
|
|
232
|
-
let error = kreuzberg_last_error();
|
|
233
|
-
if !error.is_null() {
|
|
234
|
-
let error_str = CStr::from_ptr(error).to_str().unwrap();
|
|
235
|
-
assert!(
|
|
236
|
-
error_str.is_empty() || error_str.contains("not found") || error_str.contains("IO"),
|
|
237
|
-
"Error should be empty or indicate not found: {}",
|
|
238
|
-
error_str
|
|
239
|
-
);
|
|
240
|
-
}
|
|
241
|
-
|
|
242
|
-
std::env::set_current_dir(original_dir).unwrap();
|
|
243
|
-
}
|
|
244
|
-
}
|
|
245
|
-
|
|
246
|
-
#[test]
|
|
247
|
-
fn test_config_discover_toml() {
|
|
248
|
-
unsafe {
|
|
249
|
-
let temp_dir = TempDir::new().unwrap();
|
|
250
|
-
let config_path = temp_dir.path().join("kreuzberg.toml");
|
|
251
|
-
|
|
252
|
-
let config_content = r#"
|
|
253
|
-
[ocr]
|
|
254
|
-
enabled = true
|
|
255
|
-
"#;
|
|
256
|
-
|
|
257
|
-
fs::write(&config_path, config_content).unwrap();
|
|
258
|
-
|
|
259
|
-
let original_dir = std::env::current_dir().unwrap();
|
|
260
|
-
std::env::set_current_dir(&temp_dir).unwrap();
|
|
261
|
-
|
|
262
|
-
let config_ptr = kreuzberg_config_discover();
|
|
263
|
-
|
|
264
|
-
assert!(!config_ptr.is_null(), "Should discover config in current directory");
|
|
265
|
-
|
|
266
|
-
kreuzberg_free_config(config_ptr);
|
|
267
|
-
|
|
268
|
-
std::env::set_current_dir(original_dir).unwrap();
|
|
269
|
-
}
|
|
270
|
-
}
|
|
271
|
-
|
|
272
|
-
#[test]
|
|
273
|
-
fn test_config_discover_yaml() {
|
|
274
|
-
unsafe {
|
|
275
|
-
let temp_dir = TempDir::new().unwrap();
|
|
276
|
-
let config_path = temp_dir.path().join("kreuzberg.yaml");
|
|
277
|
-
|
|
278
|
-
let config_content = r#"
|
|
279
|
-
ocr:
|
|
280
|
-
enabled: true
|
|
281
|
-
"#;
|
|
282
|
-
|
|
283
|
-
fs::write(&config_path, config_content).unwrap();
|
|
284
|
-
|
|
285
|
-
let original_dir = std::env::current_dir().unwrap();
|
|
286
|
-
std::env::set_current_dir(&temp_dir).unwrap();
|
|
287
|
-
|
|
288
|
-
let config_ptr = kreuzberg_config_discover();
|
|
289
|
-
|
|
290
|
-
assert!(!config_ptr.is_null(), "Should discover YAML config");
|
|
291
|
-
|
|
292
|
-
kreuzberg_free_config(config_ptr);
|
|
293
|
-
|
|
294
|
-
std::env::set_current_dir(original_dir).unwrap();
|
|
295
|
-
}
|
|
296
|
-
}
|
|
297
|
-
|
|
298
|
-
#[test]
|
|
299
|
-
fn test_config_discover_parent_directory() {
|
|
300
|
-
unsafe {
|
|
301
|
-
let temp_dir = TempDir::new().unwrap();
|
|
302
|
-
let config_path = temp_dir.path().join("kreuzberg.toml");
|
|
303
|
-
|
|
304
|
-
let config_content = r#"
|
|
305
|
-
[ocr]
|
|
306
|
-
enabled = true
|
|
307
|
-
"#;
|
|
308
|
-
|
|
309
|
-
fs::write(&config_path, config_content).unwrap();
|
|
310
|
-
|
|
311
|
-
let subdir = temp_dir.path().join("subdir");
|
|
312
|
-
fs::create_dir(&subdir).unwrap();
|
|
313
|
-
|
|
314
|
-
let original_dir = std::env::current_dir().unwrap();
|
|
315
|
-
std::env::set_current_dir(&subdir).unwrap();
|
|
316
|
-
|
|
317
|
-
let config_ptr = kreuzberg_config_discover();
|
|
318
|
-
|
|
319
|
-
assert!(!config_ptr.is_null(), "Should discover config in parent directory");
|
|
320
|
-
|
|
321
|
-
kreuzberg_free_config(config_ptr);
|
|
322
|
-
|
|
323
|
-
std::env::set_current_dir(original_dir).unwrap();
|
|
324
|
-
}
|
|
325
|
-
}
|
|
326
|
-
|
|
327
|
-
#[test]
|
|
328
|
-
fn test_config_discover_preference_order() {
|
|
329
|
-
unsafe {
|
|
330
|
-
let temp_dir = TempDir::new().unwrap();
|
|
331
|
-
|
|
332
|
-
fs::write(temp_dir.path().join("kreuzberg.toml"), "[ocr]\nenabled = true").unwrap();
|
|
333
|
-
fs::write(temp_dir.path().join("kreuzberg.yaml"), "ocr:\n enabled: false").unwrap();
|
|
334
|
-
|
|
335
|
-
let original_dir = std::env::current_dir().unwrap();
|
|
336
|
-
std::env::set_current_dir(&temp_dir).unwrap();
|
|
337
|
-
|
|
338
|
-
let config_ptr = kreuzberg_config_discover();
|
|
339
|
-
|
|
340
|
-
assert!(!config_ptr.is_null(), "Should discover a config file");
|
|
341
|
-
|
|
342
|
-
kreuzberg_free_config(config_ptr);
|
|
343
|
-
|
|
344
|
-
std::env::set_current_dir(original_dir).unwrap();
|
|
345
|
-
}
|
|
346
|
-
}
|
|
@@ -1,232 +0,0 @@
|
|
|
1
|
-
use std::ffi::{CStr, CString};
|
|
2
|
-
use std::os::raw::c_char;
|
|
3
|
-
use std::ptr;
|
|
4
|
-
|
|
5
|
-
unsafe extern "C" {
|
|
6
|
-
fn kreuzberg_register_document_extractor(
|
|
7
|
-
name: *const c_char,
|
|
8
|
-
callback: unsafe extern "C" fn(*const u8, usize, *const c_char, *const c_char) -> *mut c_char,
|
|
9
|
-
mime_types: *const c_char,
|
|
10
|
-
priority: i32,
|
|
11
|
-
) -> bool;
|
|
12
|
-
|
|
13
|
-
fn kreuzberg_unregister_document_extractor(name: *const c_char) -> bool;
|
|
14
|
-
|
|
15
|
-
fn kreuzberg_list_document_extractors() -> *mut c_char;
|
|
16
|
-
|
|
17
|
-
fn kreuzberg_last_error() -> *const c_char;
|
|
18
|
-
|
|
19
|
-
fn kreuzberg_free_string(s: *mut c_char);
|
|
20
|
-
}
|
|
21
|
-
|
|
22
|
-
unsafe extern "C" fn test_extractor_callback(
|
|
23
|
-
_content: *const u8,
|
|
24
|
-
_content_len: usize,
|
|
25
|
-
_mime_type: *const c_char,
|
|
26
|
-
_config_json: *const c_char,
|
|
27
|
-
) -> *mut c_char {
|
|
28
|
-
let result = r#"{
|
|
29
|
-
"content": "test extracted content",
|
|
30
|
-
"mime_type": "text/plain",
|
|
31
|
-
"metadata": {}
|
|
32
|
-
}"#;
|
|
33
|
-
CString::new(result).unwrap().into_raw()
|
|
34
|
-
}
|
|
35
|
-
|
|
36
|
-
#[allow(dead_code)]
|
|
37
|
-
unsafe extern "C" fn failing_extractor_callback(
|
|
38
|
-
_content: *const u8,
|
|
39
|
-
_content_len: usize,
|
|
40
|
-
_mime_type: *const c_char,
|
|
41
|
-
_config_json: *const c_char,
|
|
42
|
-
) -> *mut c_char {
|
|
43
|
-
ptr::null_mut()
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
#[test]
|
|
47
|
-
fn test_register_document_extractor_success() {
|
|
48
|
-
unsafe {
|
|
49
|
-
let name = CString::new("test-extractor").unwrap();
|
|
50
|
-
let mime_types = CString::new("application/x-test,text/x-test").unwrap();
|
|
51
|
-
|
|
52
|
-
let success =
|
|
53
|
-
kreuzberg_register_document_extractor(name.as_ptr(), test_extractor_callback, mime_types.as_ptr(), 100);
|
|
54
|
-
|
|
55
|
-
assert!(success, "Failed to register extractor");
|
|
56
|
-
|
|
57
|
-
kreuzberg_unregister_document_extractor(name.as_ptr());
|
|
58
|
-
}
|
|
59
|
-
}
|
|
60
|
-
|
|
61
|
-
#[test]
|
|
62
|
-
fn test_register_document_extractor_null_name() {
|
|
63
|
-
unsafe {
|
|
64
|
-
let mime_types = CString::new("application/x-test").unwrap();
|
|
65
|
-
|
|
66
|
-
let success =
|
|
67
|
-
kreuzberg_register_document_extractor(ptr::null(), test_extractor_callback, mime_types.as_ptr(), 100);
|
|
68
|
-
|
|
69
|
-
assert!(!success, "Should fail with NULL name");
|
|
70
|
-
|
|
71
|
-
let error = kreuzberg_last_error();
|
|
72
|
-
assert!(!error.is_null());
|
|
73
|
-
let error_str = CStr::from_ptr(error).to_str().unwrap();
|
|
74
|
-
assert!(error_str.contains("NULL"), "Error should mention NULL: {}", error_str);
|
|
75
|
-
}
|
|
76
|
-
}
|
|
77
|
-
|
|
78
|
-
#[test]
|
|
79
|
-
fn test_register_document_extractor_null_mime_types() {
|
|
80
|
-
unsafe {
|
|
81
|
-
let name = CString::new("test-extractor").unwrap();
|
|
82
|
-
|
|
83
|
-
let success = kreuzberg_register_document_extractor(name.as_ptr(), test_extractor_callback, ptr::null(), 100);
|
|
84
|
-
|
|
85
|
-
assert!(!success, "Should fail with NULL MIME types");
|
|
86
|
-
|
|
87
|
-
let error = kreuzberg_last_error();
|
|
88
|
-
assert!(!error.is_null());
|
|
89
|
-
let error_str = CStr::from_ptr(error).to_str().unwrap();
|
|
90
|
-
assert!(error_str.contains("MIME") || error_str.contains("NULL"));
|
|
91
|
-
}
|
|
92
|
-
}
|
|
93
|
-
|
|
94
|
-
#[test]
|
|
95
|
-
fn test_register_document_extractor_empty_mime_types() {
|
|
96
|
-
unsafe {
|
|
97
|
-
let name = CString::new("test-extractor").unwrap();
|
|
98
|
-
let mime_types = CString::new("").unwrap();
|
|
99
|
-
|
|
100
|
-
let success =
|
|
101
|
-
kreuzberg_register_document_extractor(name.as_ptr(), test_extractor_callback, mime_types.as_ptr(), 100);
|
|
102
|
-
|
|
103
|
-
assert!(!success, "Should fail with empty MIME types");
|
|
104
|
-
|
|
105
|
-
let error = kreuzberg_last_error();
|
|
106
|
-
assert!(!error.is_null());
|
|
107
|
-
let error_str = CStr::from_ptr(error).to_str().unwrap();
|
|
108
|
-
assert!(error_str.contains("MIME"));
|
|
109
|
-
}
|
|
110
|
-
}
|
|
111
|
-
|
|
112
|
-
#[test]
|
|
113
|
-
fn test_unregister_document_extractor_success() {
|
|
114
|
-
unsafe {
|
|
115
|
-
let name = CString::new("test-extractor-unregister").unwrap();
|
|
116
|
-
let mime_types = CString::new("application/x-test").unwrap();
|
|
117
|
-
|
|
118
|
-
let success =
|
|
119
|
-
kreuzberg_register_document_extractor(name.as_ptr(), test_extractor_callback, mime_types.as_ptr(), 100);
|
|
120
|
-
assert!(success);
|
|
121
|
-
|
|
122
|
-
let success = kreuzberg_unregister_document_extractor(name.as_ptr());
|
|
123
|
-
assert!(success, "Failed to unregister extractor");
|
|
124
|
-
}
|
|
125
|
-
}
|
|
126
|
-
|
|
127
|
-
#[test]
|
|
128
|
-
fn test_unregister_document_extractor_null_name() {
|
|
129
|
-
unsafe {
|
|
130
|
-
let success = kreuzberg_unregister_document_extractor(ptr::null());
|
|
131
|
-
assert!(!success, "Should fail with NULL name");
|
|
132
|
-
|
|
133
|
-
let error = kreuzberg_last_error();
|
|
134
|
-
assert!(!error.is_null());
|
|
135
|
-
let error_str = CStr::from_ptr(error).to_str().unwrap();
|
|
136
|
-
assert!(error_str.contains("NULL"));
|
|
137
|
-
}
|
|
138
|
-
}
|
|
139
|
-
|
|
140
|
-
#[test]
|
|
141
|
-
fn test_unregister_nonexistent_extractor() {
|
|
142
|
-
unsafe {
|
|
143
|
-
let name = CString::new("nonexistent-extractor").unwrap();
|
|
144
|
-
|
|
145
|
-
let success = kreuzberg_unregister_document_extractor(name.as_ptr());
|
|
146
|
-
assert!(success, "Unregistering nonexistent extractor should succeed");
|
|
147
|
-
}
|
|
148
|
-
}
|
|
149
|
-
|
|
150
|
-
#[test]
|
|
151
|
-
fn test_list_document_extractors() {
|
|
152
|
-
unsafe {
|
|
153
|
-
let name1 = CString::new("test-extractor-1").unwrap();
|
|
154
|
-
let name2 = CString::new("test-extractor-2").unwrap();
|
|
155
|
-
let mime_types = CString::new("application/x-test").unwrap();
|
|
156
|
-
|
|
157
|
-
kreuzberg_register_document_extractor(name1.as_ptr(), test_extractor_callback, mime_types.as_ptr(), 100);
|
|
158
|
-
kreuzberg_register_document_extractor(name2.as_ptr(), test_extractor_callback, mime_types.as_ptr(), 100);
|
|
159
|
-
|
|
160
|
-
let list_ptr = kreuzberg_list_document_extractors();
|
|
161
|
-
assert!(!list_ptr.is_null(), "List should not be NULL");
|
|
162
|
-
|
|
163
|
-
let list_str = CStr::from_ptr(list_ptr).to_str().unwrap();
|
|
164
|
-
assert!(list_str.contains("test-extractor-1"));
|
|
165
|
-
assert!(list_str.contains("test-extractor-2"));
|
|
166
|
-
|
|
167
|
-
kreuzberg_free_string(list_ptr);
|
|
168
|
-
|
|
169
|
-
kreuzberg_unregister_document_extractor(name1.as_ptr());
|
|
170
|
-
kreuzberg_unregister_document_extractor(name2.as_ptr());
|
|
171
|
-
}
|
|
172
|
-
}
|
|
173
|
-
|
|
174
|
-
#[test]
|
|
175
|
-
fn test_register_multiple_mime_types() {
|
|
176
|
-
unsafe {
|
|
177
|
-
let name = CString::new("multi-mime-extractor").unwrap();
|
|
178
|
-
let mime_types = CString::new("application/x-test1, text/x-test2 , image/x-test3").unwrap();
|
|
179
|
-
|
|
180
|
-
let success =
|
|
181
|
-
kreuzberg_register_document_extractor(name.as_ptr(), test_extractor_callback, mime_types.as_ptr(), 100);
|
|
182
|
-
|
|
183
|
-
assert!(success, "Failed to register with multiple MIME types");
|
|
184
|
-
|
|
185
|
-
kreuzberg_unregister_document_extractor(name.as_ptr());
|
|
186
|
-
}
|
|
187
|
-
}
|
|
188
|
-
|
|
189
|
-
#[test]
|
|
190
|
-
fn test_register_with_different_priorities() {
|
|
191
|
-
unsafe {
|
|
192
|
-
let name_high = CString::new("high-priority-extractor").unwrap();
|
|
193
|
-
let name_low = CString::new("low-priority-extractor").unwrap();
|
|
194
|
-
let mime_types = CString::new("application/x-test").unwrap();
|
|
195
|
-
|
|
196
|
-
let success1 = kreuzberg_register_document_extractor(
|
|
197
|
-
name_high.as_ptr(),
|
|
198
|
-
test_extractor_callback,
|
|
199
|
-
mime_types.as_ptr(),
|
|
200
|
-
200,
|
|
201
|
-
);
|
|
202
|
-
let success2 =
|
|
203
|
-
kreuzberg_register_document_extractor(name_low.as_ptr(), test_extractor_callback, mime_types.as_ptr(), 50);
|
|
204
|
-
|
|
205
|
-
assert!(success1 && success2, "Failed to register extractors");
|
|
206
|
-
|
|
207
|
-
kreuzberg_unregister_document_extractor(name_high.as_ptr());
|
|
208
|
-
kreuzberg_unregister_document_extractor(name_low.as_ptr());
|
|
209
|
-
}
|
|
210
|
-
}
|
|
211
|
-
|
|
212
|
-
#[test]
|
|
213
|
-
fn test_invalid_utf8_name() {
|
|
214
|
-
unsafe {
|
|
215
|
-
let invalid_name = b"test\xFF\xFEinvalid\0";
|
|
216
|
-
let mime_types = CString::new("application/x-test").unwrap();
|
|
217
|
-
|
|
218
|
-
let success = kreuzberg_register_document_extractor(
|
|
219
|
-
invalid_name.as_ptr() as *const c_char,
|
|
220
|
-
test_extractor_callback,
|
|
221
|
-
mime_types.as_ptr(),
|
|
222
|
-
100,
|
|
223
|
-
);
|
|
224
|
-
|
|
225
|
-
assert!(!success, "Should fail with invalid UTF-8 name");
|
|
226
|
-
|
|
227
|
-
let error = kreuzberg_last_error();
|
|
228
|
-
assert!(!error.is_null());
|
|
229
|
-
let error_str = CStr::from_ptr(error).to_str().unwrap();
|
|
230
|
-
assert!(error_str.contains("UTF-8"));
|
|
231
|
-
}
|
|
232
|
-
}
|