kreuzberg 4.0.0.pre.rc.29 → 4.0.0.rc1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +0 -6
- data/.rubocop.yaml +534 -1
- data/Gemfile +2 -1
- data/Gemfile.lock +28 -116
- data/README.md +269 -629
- data/Rakefile +0 -9
- data/Steepfile +4 -8
- data/examples/async_patterns.rb +58 -1
- data/ext/kreuzberg_rb/extconf.rb +5 -35
- data/ext/kreuzberg_rb/native/Cargo.toml +16 -55
- data/ext/kreuzberg_rb/native/build.rs +14 -12
- data/ext/kreuzberg_rb/native/include/ieeefp.h +1 -1
- data/ext/kreuzberg_rb/native/include/msvc_compat/strings.h +1 -1
- data/ext/kreuzberg_rb/native/include/strings.h +2 -2
- data/ext/kreuzberg_rb/native/include/unistd.h +1 -1
- data/ext/kreuzberg_rb/native/src/lib.rs +34 -897
- data/extconf.rb +6 -38
- data/kreuzberg.gemspec +20 -114
- data/lib/kreuzberg/api_proxy.rb +18 -2
- data/lib/kreuzberg/cache_api.rb +0 -22
- data/lib/kreuzberg/cli.rb +10 -2
- data/lib/kreuzberg/cli_proxy.rb +10 -0
- data/lib/kreuzberg/config.rb +22 -274
- data/lib/kreuzberg/errors.rb +7 -73
- data/lib/kreuzberg/extraction_api.rb +8 -237
- data/lib/kreuzberg/mcp_proxy.rb +11 -2
- data/lib/kreuzberg/ocr_backend_protocol.rb +73 -0
- data/lib/kreuzberg/post_processor_protocol.rb +71 -0
- data/lib/kreuzberg/result.rb +33 -151
- data/lib/kreuzberg/setup_lib_path.rb +2 -22
- data/lib/kreuzberg/validator_protocol.rb +73 -0
- data/lib/kreuzberg/version.rb +1 -1
- data/lib/kreuzberg.rb +13 -27
- data/pkg/kreuzberg-4.0.0.rc1.gem +0 -0
- data/sig/kreuzberg.rbs +12 -105
- data/spec/binding/cache_spec.rb +22 -22
- data/spec/binding/cli_proxy_spec.rb +4 -2
- data/spec/binding/cli_spec.rb +11 -12
- data/spec/binding/config_spec.rb +0 -74
- data/spec/binding/config_validation_spec.rb +6 -100
- data/spec/binding/error_handling_spec.rb +97 -283
- data/spec/binding/plugins/ocr_backend_spec.rb +8 -8
- data/spec/binding/plugins/postprocessor_spec.rb +11 -11
- data/spec/binding/plugins/validator_spec.rb +13 -12
- data/spec/examples.txt +104 -0
- data/spec/fixtures/config.toml +1 -0
- data/spec/fixtures/config.yaml +1 -0
- data/spec/fixtures/invalid_config.toml +1 -0
- data/spec/smoke/package_spec.rb +3 -2
- data/spec/spec_helper.rb +3 -1
- data/vendor/kreuzberg/Cargo.toml +67 -192
- data/vendor/kreuzberg/README.md +9 -97
- data/vendor/kreuzberg/build.rs +194 -516
- data/vendor/kreuzberg/src/api/handlers.rs +9 -130
- data/vendor/kreuzberg/src/api/mod.rs +3 -18
- data/vendor/kreuzberg/src/api/server.rs +71 -236
- data/vendor/kreuzberg/src/api/types.rs +7 -43
- data/vendor/kreuzberg/src/bin/profile_extract.rs +455 -0
- data/vendor/kreuzberg/src/cache/mod.rs +3 -27
- data/vendor/kreuzberg/src/chunking/mod.rs +79 -1705
- data/vendor/kreuzberg/src/core/batch_mode.rs +0 -60
- data/vendor/kreuzberg/src/core/config.rs +23 -905
- data/vendor/kreuzberg/src/core/extractor.rs +106 -403
- data/vendor/kreuzberg/src/core/io.rs +2 -4
- data/vendor/kreuzberg/src/core/mime.rs +12 -2
- data/vendor/kreuzberg/src/core/mod.rs +3 -22
- data/vendor/kreuzberg/src/core/pipeline.rs +78 -395
- data/vendor/kreuzberg/src/embeddings.rs +21 -169
- data/vendor/kreuzberg/src/error.rs +2 -2
- data/vendor/kreuzberg/src/extraction/archive.rs +31 -36
- data/vendor/kreuzberg/src/extraction/docx.rs +1 -365
- data/vendor/kreuzberg/src/extraction/email.rs +11 -12
- data/vendor/kreuzberg/src/extraction/excel.rs +129 -138
- data/vendor/kreuzberg/src/extraction/html.rs +170 -1447
- data/vendor/kreuzberg/src/extraction/image.rs +14 -138
- data/vendor/kreuzberg/src/extraction/libreoffice.rs +3 -13
- data/vendor/kreuzberg/src/extraction/mod.rs +5 -21
- data/vendor/kreuzberg/src/extraction/office_metadata/mod.rs +0 -2
- data/vendor/kreuzberg/src/extraction/pandoc/batch.rs +275 -0
- data/vendor/kreuzberg/src/extraction/pandoc/mime_types.rs +178 -0
- data/vendor/kreuzberg/src/extraction/pandoc/mod.rs +491 -0
- data/vendor/kreuzberg/src/extraction/pandoc/server.rs +496 -0
- data/vendor/kreuzberg/src/extraction/pandoc/subprocess.rs +1188 -0
- data/vendor/kreuzberg/src/extraction/pandoc/version.rs +162 -0
- data/vendor/kreuzberg/src/extraction/pptx.rs +94 -196
- data/vendor/kreuzberg/src/extraction/structured.rs +4 -5
- data/vendor/kreuzberg/src/extraction/table.rs +1 -2
- data/vendor/kreuzberg/src/extraction/text.rs +10 -18
- data/vendor/kreuzberg/src/extractors/archive.rs +0 -22
- data/vendor/kreuzberg/src/extractors/docx.rs +148 -69
- data/vendor/kreuzberg/src/extractors/email.rs +9 -37
- data/vendor/kreuzberg/src/extractors/excel.rs +40 -81
- data/vendor/kreuzberg/src/extractors/html.rs +173 -182
- data/vendor/kreuzberg/src/extractors/image.rs +8 -32
- data/vendor/kreuzberg/src/extractors/mod.rs +10 -171
- data/vendor/kreuzberg/src/extractors/pandoc.rs +201 -0
- data/vendor/kreuzberg/src/extractors/pdf.rs +64 -329
- data/vendor/kreuzberg/src/extractors/pptx.rs +34 -79
- data/vendor/kreuzberg/src/extractors/structured.rs +0 -16
- data/vendor/kreuzberg/src/extractors/text.rs +7 -30
- data/vendor/kreuzberg/src/extractors/xml.rs +8 -27
- data/vendor/kreuzberg/src/keywords/processor.rs +1 -9
- data/vendor/kreuzberg/src/keywords/rake.rs +1 -0
- data/vendor/kreuzberg/src/language_detection/mod.rs +51 -94
- data/vendor/kreuzberg/src/lib.rs +5 -17
- data/vendor/kreuzberg/src/mcp/mod.rs +1 -4
- data/vendor/kreuzberg/src/mcp/server.rs +21 -145
- data/vendor/kreuzberg/src/ocr/mod.rs +0 -2
- data/vendor/kreuzberg/src/ocr/processor.rs +8 -19
- data/vendor/kreuzberg/src/ocr/tesseract_backend.rs +0 -2
- data/vendor/kreuzberg/src/pdf/error.rs +1 -93
- data/vendor/kreuzberg/src/pdf/metadata.rs +100 -263
- data/vendor/kreuzberg/src/pdf/mod.rs +2 -33
- data/vendor/kreuzberg/src/pdf/rendering.rs +12 -12
- data/vendor/kreuzberg/src/pdf/table.rs +64 -61
- data/vendor/kreuzberg/src/pdf/text.rs +24 -416
- data/vendor/kreuzberg/src/plugins/extractor.rs +8 -40
- data/vendor/kreuzberg/src/plugins/mod.rs +0 -3
- data/vendor/kreuzberg/src/plugins/ocr.rs +14 -22
- data/vendor/kreuzberg/src/plugins/processor.rs +1 -10
- data/vendor/kreuzberg/src/plugins/registry.rs +0 -15
- data/vendor/kreuzberg/src/plugins/validator.rs +8 -20
- data/vendor/kreuzberg/src/stopwords/mod.rs +2 -2
- data/vendor/kreuzberg/src/text/mod.rs +0 -8
- data/vendor/kreuzberg/src/text/quality.rs +15 -28
- data/vendor/kreuzberg/src/text/string_utils.rs +10 -22
- data/vendor/kreuzberg/src/text/token_reduction/core.rs +50 -86
- data/vendor/kreuzberg/src/text/token_reduction/filters.rs +16 -37
- data/vendor/kreuzberg/src/text/token_reduction/simd_text.rs +1 -2
- data/vendor/kreuzberg/src/types.rs +67 -907
- data/vendor/kreuzberg/src/utils/mod.rs +0 -14
- data/vendor/kreuzberg/src/utils/quality.rs +3 -12
- data/vendor/kreuzberg/tests/api_tests.rs +0 -506
- data/vendor/kreuzberg/tests/archive_integration.rs +0 -2
- data/vendor/kreuzberg/tests/batch_orchestration.rs +12 -57
- data/vendor/kreuzberg/tests/batch_processing.rs +8 -32
- data/vendor/kreuzberg/tests/chunking_offset_demo.rs +92 -0
- data/vendor/kreuzberg/tests/concurrency_stress.rs +8 -40
- data/vendor/kreuzberg/tests/config_features.rs +1 -33
- data/vendor/kreuzberg/tests/config_loading_tests.rs +39 -16
- data/vendor/kreuzberg/tests/core_integration.rs +9 -35
- data/vendor/kreuzberg/tests/csv_integration.rs +81 -71
- data/vendor/kreuzberg/tests/docx_metadata_extraction_test.rs +25 -23
- data/vendor/kreuzberg/tests/email_integration.rs +1 -3
- data/vendor/kreuzberg/tests/error_handling.rs +34 -43
- data/vendor/kreuzberg/tests/format_integration.rs +1 -7
- data/vendor/kreuzberg/tests/helpers/mod.rs +0 -60
- data/vendor/kreuzberg/tests/image_integration.rs +0 -2
- data/vendor/kreuzberg/tests/mime_detection.rs +16 -17
- data/vendor/kreuzberg/tests/ocr_configuration.rs +0 -4
- data/vendor/kreuzberg/tests/ocr_errors.rs +0 -22
- data/vendor/kreuzberg/tests/ocr_quality.rs +0 -2
- data/vendor/kreuzberg/tests/pandoc_integration.rs +503 -0
- data/vendor/kreuzberg/tests/pdf_integration.rs +0 -2
- data/vendor/kreuzberg/tests/pipeline_integration.rs +2 -36
- data/vendor/kreuzberg/tests/plugin_ocr_backend_test.rs +0 -5
- data/vendor/kreuzberg/tests/plugin_postprocessor_test.rs +1 -17
- data/vendor/kreuzberg/tests/plugin_system.rs +0 -6
- data/vendor/kreuzberg/tests/registry_integration_tests.rs +22 -2
- data/vendor/kreuzberg/tests/security_validation.rs +1 -13
- data/vendor/kreuzberg/tests/test_fastembed.rs +23 -45
- metadata +25 -171
- data/.rubocop.yml +0 -543
- data/ext/kreuzberg_rb/native/.cargo/config.toml +0 -23
- data/ext/kreuzberg_rb/native/Cargo.lock +0 -7619
- data/lib/kreuzberg/error_context.rb +0 -136
- data/lib/kreuzberg/types.rb +0 -170
- data/lib/libpdfium.so +0 -0
- data/spec/binding/async_operations_spec.rb +0 -473
- data/spec/binding/batch_operations_spec.rb +0 -595
- data/spec/binding/batch_spec.rb +0 -359
- data/spec/binding/config_result_spec.rb +0 -377
- data/spec/binding/embeddings_spec.rb +0 -816
- data/spec/binding/error_recovery_spec.rb +0 -488
- data/spec/binding/font_config_spec.rb +0 -220
- data/spec/binding/images_spec.rb +0 -738
- data/spec/binding/keywords_extraction_spec.rb +0 -600
- data/spec/binding/metadata_types_spec.rb +0 -1228
- data/spec/binding/pages_extraction_spec.rb +0 -471
- data/spec/binding/tables_spec.rb +0 -641
- data/spec/unit/config/chunking_config_spec.rb +0 -213
- data/spec/unit/config/embedding_config_spec.rb +0 -343
- data/spec/unit/config/extraction_config_spec.rb +0 -438
- data/spec/unit/config/font_config_spec.rb +0 -285
- data/spec/unit/config/hierarchy_config_spec.rb +0 -314
- data/spec/unit/config/image_extraction_config_spec.rb +0 -209
- data/spec/unit/config/image_preprocessing_config_spec.rb +0 -249
- data/spec/unit/config/keyword_config_spec.rb +0 -229
- data/spec/unit/config/language_detection_config_spec.rb +0 -258
- data/spec/unit/config/ocr_config_spec.rb +0 -171
- data/spec/unit/config/page_config_spec.rb +0 -221
- data/spec/unit/config/pdf_config_spec.rb +0 -267
- data/spec/unit/config/postprocessor_config_spec.rb +0 -290
- data/spec/unit/config/tesseract_config_spec.rb +0 -181
- data/spec/unit/config/token_reduction_config_spec.rb +0 -251
- data/test/metadata_types_test.rb +0 -959
- data/vendor/Cargo.toml +0 -61
- data/vendor/kreuzberg/examples/bench_fixes.rs +0 -71
- data/vendor/kreuzberg/examples/test_pdfium_fork.rs +0 -62
- data/vendor/kreuzberg/src/chunking/processor.rs +0 -219
- data/vendor/kreuzberg/src/core/batch_optimizations.rs +0 -385
- data/vendor/kreuzberg/src/core/config_validation.rs +0 -949
- data/vendor/kreuzberg/src/core/formats.rs +0 -235
- data/vendor/kreuzberg/src/core/server_config.rs +0 -1220
- data/vendor/kreuzberg/src/extraction/capacity.rs +0 -263
- data/vendor/kreuzberg/src/extraction/markdown.rs +0 -216
- data/vendor/kreuzberg/src/extraction/office_metadata/odt_properties.rs +0 -284
- data/vendor/kreuzberg/src/extractors/bibtex.rs +0 -470
- data/vendor/kreuzberg/src/extractors/docbook.rs +0 -504
- data/vendor/kreuzberg/src/extractors/epub.rs +0 -696
- data/vendor/kreuzberg/src/extractors/fictionbook.rs +0 -492
- data/vendor/kreuzberg/src/extractors/jats.rs +0 -1054
- data/vendor/kreuzberg/src/extractors/jupyter.rs +0 -368
- data/vendor/kreuzberg/src/extractors/latex.rs +0 -653
- data/vendor/kreuzberg/src/extractors/markdown.rs +0 -701
- data/vendor/kreuzberg/src/extractors/odt.rs +0 -628
- data/vendor/kreuzberg/src/extractors/opml.rs +0 -635
- data/vendor/kreuzberg/src/extractors/orgmode.rs +0 -529
- data/vendor/kreuzberg/src/extractors/rst.rs +0 -577
- data/vendor/kreuzberg/src/extractors/rtf.rs +0 -809
- data/vendor/kreuzberg/src/extractors/security.rs +0 -484
- data/vendor/kreuzberg/src/extractors/security_tests.rs +0 -367
- data/vendor/kreuzberg/src/extractors/typst.rs +0 -651
- data/vendor/kreuzberg/src/language_detection/processor.rs +0 -218
- data/vendor/kreuzberg/src/ocr/language_registry.rs +0 -520
- data/vendor/kreuzberg/src/panic_context.rs +0 -154
- data/vendor/kreuzberg/src/pdf/bindings.rs +0 -306
- data/vendor/kreuzberg/src/pdf/bundled.rs +0 -408
- data/vendor/kreuzberg/src/pdf/fonts.rs +0 -358
- data/vendor/kreuzberg/src/pdf/hierarchy.rs +0 -903
- data/vendor/kreuzberg/src/text/quality_processor.rs +0 -231
- data/vendor/kreuzberg/src/text/utf8_validation.rs +0 -193
- data/vendor/kreuzberg/src/utils/pool.rs +0 -503
- data/vendor/kreuzberg/src/utils/pool_sizing.rs +0 -364
- data/vendor/kreuzberg/src/utils/string_pool.rs +0 -761
- data/vendor/kreuzberg/tests/api_embed.rs +0 -360
- data/vendor/kreuzberg/tests/api_extract_multipart.rs +0 -52
- data/vendor/kreuzberg/tests/api_large_pdf_extraction.rs +0 -471
- data/vendor/kreuzberg/tests/api_large_pdf_extraction_diagnostics.rs +0 -289
- data/vendor/kreuzberg/tests/batch_pooling_benchmark.rs +0 -154
- data/vendor/kreuzberg/tests/bibtex_parity_test.rs +0 -421
- data/vendor/kreuzberg/tests/config_integration_test.rs +0 -753
- data/vendor/kreuzberg/tests/data/hierarchy_ground_truth.json +0 -294
- data/vendor/kreuzberg/tests/docbook_extractor_tests.rs +0 -500
- data/vendor/kreuzberg/tests/docx_vs_pandoc_comparison.rs +0 -370
- data/vendor/kreuzberg/tests/epub_native_extractor_tests.rs +0 -275
- data/vendor/kreuzberg/tests/fictionbook_extractor_tests.rs +0 -228
- data/vendor/kreuzberg/tests/html_table_test.rs +0 -551
- data/vendor/kreuzberg/tests/instrumentation_test.rs +0 -139
- data/vendor/kreuzberg/tests/jats_extractor_tests.rs +0 -639
- data/vendor/kreuzberg/tests/jupyter_extractor_tests.rs +0 -704
- data/vendor/kreuzberg/tests/latex_extractor_tests.rs +0 -496
- data/vendor/kreuzberg/tests/markdown_extractor_tests.rs +0 -490
- data/vendor/kreuzberg/tests/ocr_language_registry.rs +0 -191
- data/vendor/kreuzberg/tests/odt_extractor_tests.rs +0 -674
- data/vendor/kreuzberg/tests/opml_extractor_tests.rs +0 -616
- data/vendor/kreuzberg/tests/orgmode_extractor_tests.rs +0 -822
- data/vendor/kreuzberg/tests/page_markers.rs +0 -297
- data/vendor/kreuzberg/tests/pdf_hierarchy_detection.rs +0 -301
- data/vendor/kreuzberg/tests/pdf_hierarchy_quality.rs +0 -589
- data/vendor/kreuzberg/tests/pdf_ocr_triggering.rs +0 -301
- data/vendor/kreuzberg/tests/pdf_text_merging.rs +0 -475
- data/vendor/kreuzberg/tests/pdfium_linking.rs +0 -340
- data/vendor/kreuzberg/tests/rst_extractor_tests.rs +0 -694
- data/vendor/kreuzberg/tests/rtf_extractor_tests.rs +0 -775
- data/vendor/kreuzberg/tests/typst_behavioral_tests.rs +0 -1260
- data/vendor/kreuzberg/tests/typst_extractor_tests.rs +0 -648
- data/vendor/kreuzberg-ffi/Cargo.toml +0 -67
- data/vendor/kreuzberg-ffi/README.md +0 -851
- data/vendor/kreuzberg-ffi/benches/result_view_benchmark.rs +0 -227
- data/vendor/kreuzberg-ffi/build.rs +0 -168
- data/vendor/kreuzberg-ffi/cbindgen.toml +0 -37
- data/vendor/kreuzberg-ffi/kreuzberg-ffi.pc.in +0 -12
- data/vendor/kreuzberg-ffi/kreuzberg.h +0 -3012
- data/vendor/kreuzberg-ffi/src/batch_streaming.rs +0 -588
- data/vendor/kreuzberg-ffi/src/config.rs +0 -1341
- data/vendor/kreuzberg-ffi/src/error.rs +0 -901
- data/vendor/kreuzberg-ffi/src/extraction.rs +0 -555
- data/vendor/kreuzberg-ffi/src/helpers.rs +0 -879
- data/vendor/kreuzberg-ffi/src/lib.rs +0 -977
- data/vendor/kreuzberg-ffi/src/memory.rs +0 -493
- data/vendor/kreuzberg-ffi/src/mime.rs +0 -329
- data/vendor/kreuzberg-ffi/src/panic_shield.rs +0 -265
- data/vendor/kreuzberg-ffi/src/plugins/document_extractor.rs +0 -442
- data/vendor/kreuzberg-ffi/src/plugins/mod.rs +0 -14
- data/vendor/kreuzberg-ffi/src/plugins/ocr_backend.rs +0 -628
- data/vendor/kreuzberg-ffi/src/plugins/post_processor.rs +0 -438
- data/vendor/kreuzberg-ffi/src/plugins/validator.rs +0 -329
- data/vendor/kreuzberg-ffi/src/result.rs +0 -510
- data/vendor/kreuzberg-ffi/src/result_pool.rs +0 -639
- data/vendor/kreuzberg-ffi/src/result_view.rs +0 -773
- data/vendor/kreuzberg-ffi/src/string_intern.rs +0 -568
- data/vendor/kreuzberg-ffi/src/types.rs +0 -363
- data/vendor/kreuzberg-ffi/src/util.rs +0 -210
- data/vendor/kreuzberg-ffi/src/validation.rs +0 -848
- data/vendor/kreuzberg-ffi/tests.disabled/README.md +0 -48
- data/vendor/kreuzberg-ffi/tests.disabled/config_loading_tests.rs +0 -299
- data/vendor/kreuzberg-ffi/tests.disabled/config_tests.rs +0 -346
- data/vendor/kreuzberg-ffi/tests.disabled/extractor_tests.rs +0 -232
- data/vendor/kreuzberg-ffi/tests.disabled/plugin_registration_tests.rs +0 -470
- data/vendor/kreuzberg-tesseract/.commitlintrc.json +0 -13
- data/vendor/kreuzberg-tesseract/.crate-ignore +0 -2
- data/vendor/kreuzberg-tesseract/Cargo.lock +0 -2933
- data/vendor/kreuzberg-tesseract/Cargo.toml +0 -57
- data/vendor/kreuzberg-tesseract/LICENSE +0 -22
- data/vendor/kreuzberg-tesseract/README.md +0 -399
- data/vendor/kreuzberg-tesseract/build.rs +0 -1127
- data/vendor/kreuzberg-tesseract/patches/README.md +0 -71
- data/vendor/kreuzberg-tesseract/patches/tesseract.diff +0 -199
- data/vendor/kreuzberg-tesseract/src/api.rs +0 -1371
- data/vendor/kreuzberg-tesseract/src/choice_iterator.rs +0 -77
- data/vendor/kreuzberg-tesseract/src/enums.rs +0 -297
- data/vendor/kreuzberg-tesseract/src/error.rs +0 -81
- data/vendor/kreuzberg-tesseract/src/lib.rs +0 -145
- data/vendor/kreuzberg-tesseract/src/monitor.rs +0 -57
- data/vendor/kreuzberg-tesseract/src/mutable_iterator.rs +0 -197
- data/vendor/kreuzberg-tesseract/src/page_iterator.rs +0 -253
- data/vendor/kreuzberg-tesseract/src/result_iterator.rs +0 -286
- data/vendor/kreuzberg-tesseract/src/result_renderer.rs +0 -183
- data/vendor/kreuzberg-tesseract/tests/integration_test.rs +0 -211
|
@@ -1,329 +0,0 @@
|
|
|
1
|
-
//! MIME type detection and validation functions.
|
|
2
|
-
//!
|
|
3
|
-
//! This module provides FFI functions for:
|
|
4
|
-
//! - Detecting MIME types from file paths, bytes, or content
|
|
5
|
-
//! - Validating MIME types against supported formats
|
|
6
|
-
//! - Getting file extensions for MIME types
|
|
7
|
-
|
|
8
|
-
use crate::ffi_panic_guard;
|
|
9
|
-
use crate::helpers::{clear_last_error, set_last_error, string_to_c_string};
|
|
10
|
-
use std::ffi::CStr;
|
|
11
|
-
use std::os::raw::c_char;
|
|
12
|
-
use std::ptr;
|
|
13
|
-
|
|
14
|
-
/// Detect MIME type from a file path.
|
|
15
|
-
///
|
|
16
|
-
/// # Safety
|
|
17
|
-
///
|
|
18
|
-
/// - `file_path` must be a valid null-terminated C string
|
|
19
|
-
/// - The returned string must be freed with `kreuzberg_free_string`
|
|
20
|
-
/// - Returns NULL on error (check `kreuzberg_last_error`)
|
|
21
|
-
#[unsafe(no_mangle)]
|
|
22
|
-
pub unsafe extern "C" fn kreuzberg_detect_mime_type(file_path: *const c_char, check_exists: bool) -> *mut c_char {
|
|
23
|
-
ffi_panic_guard!("kreuzberg_detect_mime_type", {
|
|
24
|
-
clear_last_error();
|
|
25
|
-
|
|
26
|
-
if file_path.is_null() {
|
|
27
|
-
set_last_error("file_path cannot be NULL".to_string());
|
|
28
|
-
return ptr::null_mut();
|
|
29
|
-
}
|
|
30
|
-
|
|
31
|
-
let path_str = match unsafe { CStr::from_ptr(file_path) }.to_str() {
|
|
32
|
-
Ok(s) => s,
|
|
33
|
-
Err(e) => {
|
|
34
|
-
set_last_error(format!("Invalid UTF-8 in file path: {}", e));
|
|
35
|
-
return ptr::null_mut();
|
|
36
|
-
}
|
|
37
|
-
};
|
|
38
|
-
|
|
39
|
-
match kreuzberg::core::mime::detect_mime_type(path_str, check_exists) {
|
|
40
|
-
Ok(mime) => match string_to_c_string(mime) {
|
|
41
|
-
Ok(ptr) => ptr,
|
|
42
|
-
Err(e) => {
|
|
43
|
-
set_last_error(e);
|
|
44
|
-
ptr::null_mut()
|
|
45
|
-
}
|
|
46
|
-
},
|
|
47
|
-
Err(e) => {
|
|
48
|
-
set_last_error(e.to_string());
|
|
49
|
-
ptr::null_mut()
|
|
50
|
-
}
|
|
51
|
-
}
|
|
52
|
-
})
|
|
53
|
-
}
|
|
54
|
-
|
|
55
|
-
/// Validate that a MIME type is supported by Kreuzberg.
|
|
56
|
-
///
|
|
57
|
-
/// # Safety
|
|
58
|
-
///
|
|
59
|
-
/// - `mime_type` must be a valid null-terminated C string
|
|
60
|
-
/// - The returned string must be freed with `kreuzberg_free_string`
|
|
61
|
-
/// - Returns NULL on error (check `kreuzberg_last_error`)
|
|
62
|
-
#[unsafe(no_mangle)]
|
|
63
|
-
pub unsafe extern "C" fn kreuzberg_validate_mime_type(mime_type: *const c_char) -> *mut c_char {
|
|
64
|
-
ffi_panic_guard!("kreuzberg_validate_mime_type", {
|
|
65
|
-
clear_last_error();
|
|
66
|
-
|
|
67
|
-
if mime_type.is_null() {
|
|
68
|
-
set_last_error("mime_type cannot be NULL".to_string());
|
|
69
|
-
return ptr::null_mut();
|
|
70
|
-
}
|
|
71
|
-
|
|
72
|
-
let mime_type_str = match unsafe { CStr::from_ptr(mime_type) }.to_str() {
|
|
73
|
-
Ok(s) => s,
|
|
74
|
-
Err(e) => {
|
|
75
|
-
set_last_error(format!("Invalid UTF-8 in mime_type: {}", e));
|
|
76
|
-
return ptr::null_mut();
|
|
77
|
-
}
|
|
78
|
-
};
|
|
79
|
-
|
|
80
|
-
match kreuzberg::validate_mime_type(mime_type_str) {
|
|
81
|
-
Ok(validated) => match string_to_c_string(validated) {
|
|
82
|
-
Ok(ptr) => ptr,
|
|
83
|
-
Err(e) => {
|
|
84
|
-
set_last_error(e);
|
|
85
|
-
ptr::null_mut()
|
|
86
|
-
}
|
|
87
|
-
},
|
|
88
|
-
Err(e) => {
|
|
89
|
-
set_last_error(e.to_string());
|
|
90
|
-
ptr::null_mut()
|
|
91
|
-
}
|
|
92
|
-
}
|
|
93
|
-
})
|
|
94
|
-
}
|
|
95
|
-
|
|
96
|
-
/// Detect MIME type from raw bytes.
|
|
97
|
-
///
|
|
98
|
-
/// # Safety
|
|
99
|
-
///
|
|
100
|
-
/// - `bytes` must point to a valid buffer of at least `len` bytes
|
|
101
|
-
/// - The returned string must be freed with `kreuzberg_free_string`
|
|
102
|
-
/// - Returns NULL on error (check `kreuzberg_last_error`)
|
|
103
|
-
///
|
|
104
|
-
/// # Example (C)
|
|
105
|
-
///
|
|
106
|
-
/// ```c
|
|
107
|
-
/// uint8_t data[512];
|
|
108
|
-
/// // ... read data ...
|
|
109
|
-
/// char* mime = kreuzberg_detect_mime_type_from_bytes(data, 512);
|
|
110
|
-
/// if (mime != NULL) {
|
|
111
|
-
/// printf("Detected MIME type: %s\n", mime);
|
|
112
|
-
/// kreuzberg_free_string(mime);
|
|
113
|
-
/// }
|
|
114
|
-
/// ```
|
|
115
|
-
#[unsafe(no_mangle)]
|
|
116
|
-
pub unsafe extern "C" fn kreuzberg_detect_mime_type_from_bytes(bytes: *const u8, len: usize) -> *mut c_char {
|
|
117
|
-
ffi_panic_guard!("kreuzberg_detect_mime_type_from_bytes", {
|
|
118
|
-
clear_last_error();
|
|
119
|
-
|
|
120
|
-
if bytes.is_null() {
|
|
121
|
-
set_last_error("bytes cannot be NULL".to_string());
|
|
122
|
-
return ptr::null_mut();
|
|
123
|
-
}
|
|
124
|
-
|
|
125
|
-
let slice = unsafe { std::slice::from_raw_parts(bytes, len) };
|
|
126
|
-
|
|
127
|
-
match kreuzberg::core::mime::detect_mime_type_from_bytes(slice) {
|
|
128
|
-
Ok(mime) => match string_to_c_string(mime) {
|
|
129
|
-
Ok(ptr) => ptr,
|
|
130
|
-
Err(e) => {
|
|
131
|
-
set_last_error(e);
|
|
132
|
-
ptr::null_mut()
|
|
133
|
-
}
|
|
134
|
-
},
|
|
135
|
-
Err(e) => {
|
|
136
|
-
set_last_error(e.to_string());
|
|
137
|
-
ptr::null_mut()
|
|
138
|
-
}
|
|
139
|
-
}
|
|
140
|
-
})
|
|
141
|
-
}
|
|
142
|
-
|
|
143
|
-
/// Detect MIME type from file path (checks extension and reads file content).
|
|
144
|
-
///
|
|
145
|
-
/// # Safety
|
|
146
|
-
///
|
|
147
|
-
/// - `file_path` must be a valid null-terminated C string
|
|
148
|
-
/// - The returned string must be freed with `kreuzberg_free_string`
|
|
149
|
-
/// - Returns NULL on error (check `kreuzberg_last_error`)
|
|
150
|
-
///
|
|
151
|
-
/// # Example (C)
|
|
152
|
-
///
|
|
153
|
-
/// ```c
|
|
154
|
-
/// char* mime = kreuzberg_detect_mime_type_from_path("document.pdf");
|
|
155
|
-
/// if (mime == NULL) {
|
|
156
|
-
/// const char* error = kreuzberg_last_error();
|
|
157
|
-
/// printf("Failed to detect MIME type: %s\n", error);
|
|
158
|
-
/// } else {
|
|
159
|
-
/// printf("MIME type: %s\n", mime);
|
|
160
|
-
/// kreuzberg_free_string(mime);
|
|
161
|
-
/// }
|
|
162
|
-
/// ```
|
|
163
|
-
#[unsafe(no_mangle)]
|
|
164
|
-
pub unsafe extern "C" fn kreuzberg_detect_mime_type_from_path(file_path: *const c_char) -> *mut c_char {
|
|
165
|
-
ffi_panic_guard!("kreuzberg_detect_mime_type_from_path", {
|
|
166
|
-
clear_last_error();
|
|
167
|
-
|
|
168
|
-
if file_path.is_null() {
|
|
169
|
-
set_last_error("file_path cannot be NULL".to_string());
|
|
170
|
-
return ptr::null_mut();
|
|
171
|
-
}
|
|
172
|
-
|
|
173
|
-
let path_str = match unsafe { CStr::from_ptr(file_path) }.to_str() {
|
|
174
|
-
Ok(s) => s,
|
|
175
|
-
Err(e) => {
|
|
176
|
-
set_last_error(format!("Invalid UTF-8 in file path: {}", e));
|
|
177
|
-
return ptr::null_mut();
|
|
178
|
-
}
|
|
179
|
-
};
|
|
180
|
-
|
|
181
|
-
match kreuzberg::core::mime::detect_mime_type(path_str, true) {
|
|
182
|
-
Ok(mime) => match string_to_c_string(mime) {
|
|
183
|
-
Ok(ptr) => ptr,
|
|
184
|
-
Err(e) => {
|
|
185
|
-
set_last_error(e);
|
|
186
|
-
ptr::null_mut()
|
|
187
|
-
}
|
|
188
|
-
},
|
|
189
|
-
Err(e) => {
|
|
190
|
-
set_last_error(e.to_string());
|
|
191
|
-
ptr::null_mut()
|
|
192
|
-
}
|
|
193
|
-
}
|
|
194
|
-
})
|
|
195
|
-
}
|
|
196
|
-
|
|
197
|
-
/// Get file extensions for a MIME type.
|
|
198
|
-
///
|
|
199
|
-
/// Returns a JSON array of file extensions (e.g., ["pdf"] for "application/pdf").
|
|
200
|
-
///
|
|
201
|
-
/// # Safety
|
|
202
|
-
///
|
|
203
|
-
/// - `mime_type` must be a valid null-terminated C string
|
|
204
|
-
/// - The returned string must be freed with `kreuzberg_free_string`
|
|
205
|
-
/// - Returns NULL on error (check `kreuzberg_last_error`)
|
|
206
|
-
///
|
|
207
|
-
/// # Example (C)
|
|
208
|
-
///
|
|
209
|
-
/// ```c
|
|
210
|
-
/// char* extensions = kreuzberg_get_extensions_for_mime("application/pdf");
|
|
211
|
-
/// if (extensions != NULL) {
|
|
212
|
-
/// printf("Extensions: %s\n", extensions);
|
|
213
|
-
/// kreuzberg_free_string(extensions);
|
|
214
|
-
/// }
|
|
215
|
-
/// ```
|
|
216
|
-
#[unsafe(no_mangle)]
|
|
217
|
-
pub unsafe extern "C" fn kreuzberg_get_extensions_for_mime(mime_type: *const c_char) -> *mut c_char {
|
|
218
|
-
ffi_panic_guard!("kreuzberg_get_extensions_for_mime", {
|
|
219
|
-
clear_last_error();
|
|
220
|
-
|
|
221
|
-
if mime_type.is_null() {
|
|
222
|
-
set_last_error("mime_type cannot be NULL".to_string());
|
|
223
|
-
return ptr::null_mut();
|
|
224
|
-
}
|
|
225
|
-
|
|
226
|
-
let mime_str = match unsafe { CStr::from_ptr(mime_type) }.to_str() {
|
|
227
|
-
Ok(s) => s,
|
|
228
|
-
Err(e) => {
|
|
229
|
-
set_last_error(format!("Invalid UTF-8 in MIME type: {}", e));
|
|
230
|
-
return ptr::null_mut();
|
|
231
|
-
}
|
|
232
|
-
};
|
|
233
|
-
|
|
234
|
-
match kreuzberg::core::mime::get_extensions_for_mime(mime_str) {
|
|
235
|
-
Ok(extensions) => match serde_json::to_string(&extensions) {
|
|
236
|
-
Ok(json) => match string_to_c_string(json) {
|
|
237
|
-
Ok(ptr) => ptr,
|
|
238
|
-
Err(e) => {
|
|
239
|
-
set_last_error(e);
|
|
240
|
-
ptr::null_mut()
|
|
241
|
-
}
|
|
242
|
-
},
|
|
243
|
-
Err(e) => {
|
|
244
|
-
set_last_error(format!("Failed to serialize extensions: {}", e));
|
|
245
|
-
ptr::null_mut()
|
|
246
|
-
}
|
|
247
|
-
},
|
|
248
|
-
Err(e) => {
|
|
249
|
-
set_last_error(e.to_string());
|
|
250
|
-
ptr::null_mut()
|
|
251
|
-
}
|
|
252
|
-
}
|
|
253
|
-
})
|
|
254
|
-
}
|
|
255
|
-
|
|
256
|
-
#[cfg(test)]
|
|
257
|
-
mod tests {
|
|
258
|
-
use super::*;
|
|
259
|
-
use std::ffi::CString;
|
|
260
|
-
|
|
261
|
-
#[test]
|
|
262
|
-
fn test_detect_mime_type_null_path() {
|
|
263
|
-
let result = unsafe { kreuzberg_detect_mime_type(ptr::null(), false) };
|
|
264
|
-
assert!(result.is_null());
|
|
265
|
-
}
|
|
266
|
-
|
|
267
|
-
#[test]
|
|
268
|
-
fn test_validate_mime_type_null() {
|
|
269
|
-
let result = unsafe { kreuzberg_validate_mime_type(ptr::null()) };
|
|
270
|
-
assert!(result.is_null());
|
|
271
|
-
}
|
|
272
|
-
|
|
273
|
-
#[test]
|
|
274
|
-
fn test_detect_mime_type_from_bytes_null() {
|
|
275
|
-
let result = unsafe { kreuzberg_detect_mime_type_from_bytes(ptr::null(), 0) };
|
|
276
|
-
assert!(result.is_null());
|
|
277
|
-
}
|
|
278
|
-
|
|
279
|
-
#[test]
|
|
280
|
-
fn test_detect_mime_type_from_path_null() {
|
|
281
|
-
let result = unsafe { kreuzberg_detect_mime_type_from_path(ptr::null()) };
|
|
282
|
-
assert!(result.is_null());
|
|
283
|
-
}
|
|
284
|
-
|
|
285
|
-
#[test]
|
|
286
|
-
fn test_get_extensions_for_mime_null() {
|
|
287
|
-
let result = unsafe { kreuzberg_get_extensions_for_mime(ptr::null()) };
|
|
288
|
-
assert!(result.is_null());
|
|
289
|
-
}
|
|
290
|
-
|
|
291
|
-
#[test]
|
|
292
|
-
fn test_validate_mime_type_valid() {
|
|
293
|
-
let mime = CString::new("application/pdf").unwrap();
|
|
294
|
-
let result = unsafe { kreuzberg_validate_mime_type(mime.as_ptr()) };
|
|
295
|
-
assert!(!result.is_null());
|
|
296
|
-
unsafe {
|
|
297
|
-
crate::kreuzberg_free_string(result);
|
|
298
|
-
}
|
|
299
|
-
}
|
|
300
|
-
|
|
301
|
-
#[test]
|
|
302
|
-
fn test_detect_mime_type_from_bytes_pdf() {
|
|
303
|
-
// PDF magic bytes: %PDF-
|
|
304
|
-
let pdf_bytes = b"%PDF-1.4\n";
|
|
305
|
-
let result = unsafe { kreuzberg_detect_mime_type_from_bytes(pdf_bytes.as_ptr(), pdf_bytes.len()) };
|
|
306
|
-
assert!(!result.is_null());
|
|
307
|
-
|
|
308
|
-
let mime_str = unsafe { CStr::from_ptr(result).to_str().unwrap() };
|
|
309
|
-
assert_eq!(mime_str, "application/pdf");
|
|
310
|
-
|
|
311
|
-
unsafe {
|
|
312
|
-
crate::kreuzberg_free_string(result);
|
|
313
|
-
}
|
|
314
|
-
}
|
|
315
|
-
|
|
316
|
-
#[test]
|
|
317
|
-
fn test_get_extensions_for_mime_pdf() {
|
|
318
|
-
let mime = CString::new("application/pdf").unwrap();
|
|
319
|
-
let result = unsafe { kreuzberg_get_extensions_for_mime(mime.as_ptr()) };
|
|
320
|
-
assert!(!result.is_null());
|
|
321
|
-
|
|
322
|
-
let extensions_str = unsafe { CStr::from_ptr(result).to_str().unwrap() };
|
|
323
|
-
assert!(extensions_str.contains("pdf"));
|
|
324
|
-
|
|
325
|
-
unsafe {
|
|
326
|
-
crate::kreuzberg_free_string(result);
|
|
327
|
-
}
|
|
328
|
-
}
|
|
329
|
-
}
|
|
@@ -1,265 +0,0 @@
|
|
|
1
|
-
use kreuzberg::panic_context::PanicContext;
|
|
2
|
-
use std::cell::RefCell;
|
|
3
|
-
|
|
4
|
-
/// Structured error that includes both the error message and optional panic context.
|
|
5
|
-
#[derive(Debug, Clone)]
|
|
6
|
-
pub struct StructuredError {
|
|
7
|
-
/// The error message
|
|
8
|
-
pub message: String,
|
|
9
|
-
/// Optional panic context if this error originated from a panic
|
|
10
|
-
pub panic_context: Option<PanicContext>,
|
|
11
|
-
/// Error code for programmatic error handling
|
|
12
|
-
pub code: ErrorCode,
|
|
13
|
-
}
|
|
14
|
-
|
|
15
|
-
/// Error codes for different types of errors.
|
|
16
|
-
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
|
17
|
-
#[repr(i32)]
|
|
18
|
-
pub enum ErrorCode {
|
|
19
|
-
/// No error
|
|
20
|
-
Success = 0,
|
|
21
|
-
/// Generic error
|
|
22
|
-
GenericError = 1,
|
|
23
|
-
/// Panic was caught
|
|
24
|
-
Panic = 2,
|
|
25
|
-
/// Invalid argument
|
|
26
|
-
InvalidArgument = 3,
|
|
27
|
-
/// IO error
|
|
28
|
-
IoError = 4,
|
|
29
|
-
/// Parsing error
|
|
30
|
-
ParsingError = 5,
|
|
31
|
-
/// OCR error
|
|
32
|
-
OcrError = 6,
|
|
33
|
-
/// Missing dependency
|
|
34
|
-
MissingDependency = 7,
|
|
35
|
-
}
|
|
36
|
-
|
|
37
|
-
impl StructuredError {
|
|
38
|
-
/// Creates a new StructuredError from a panic context.
|
|
39
|
-
pub fn from_panic(context: PanicContext) -> Self {
|
|
40
|
-
Self {
|
|
41
|
-
message: context.format(),
|
|
42
|
-
panic_context: Some(context),
|
|
43
|
-
code: ErrorCode::Panic,
|
|
44
|
-
}
|
|
45
|
-
}
|
|
46
|
-
|
|
47
|
-
/// Creates a new StructuredError from a regular error message.
|
|
48
|
-
pub fn from_message(message: String, code: ErrorCode) -> Self {
|
|
49
|
-
Self {
|
|
50
|
-
message,
|
|
51
|
-
panic_context: None,
|
|
52
|
-
code,
|
|
53
|
-
}
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
/// Returns the full error message including panic context if available.
|
|
57
|
-
pub fn full_message(&self) -> String {
|
|
58
|
-
if let Some(ref ctx) = self.panic_context {
|
|
59
|
-
format!("{} (at {}:{}:{})", self.message, ctx.file, ctx.line, ctx.function)
|
|
60
|
-
} else {
|
|
61
|
-
self.message.clone()
|
|
62
|
-
}
|
|
63
|
-
}
|
|
64
|
-
}
|
|
65
|
-
|
|
66
|
-
thread_local! {
|
|
67
|
-
static LAST_STRUCTURED_ERROR: RefCell<Option<StructuredError>> = const { RefCell::new(None) };
|
|
68
|
-
}
|
|
69
|
-
|
|
70
|
-
/// Sets the last structured error.
|
|
71
|
-
pub fn set_structured_error(error: StructuredError) {
|
|
72
|
-
LAST_STRUCTURED_ERROR.with(|last| *last.borrow_mut() = Some(error));
|
|
73
|
-
}
|
|
74
|
-
|
|
75
|
-
/// Gets the last structured error message (for compatibility with existing code).
|
|
76
|
-
pub fn get_last_error_message() -> Option<String> {
|
|
77
|
-
LAST_STRUCTURED_ERROR.with(|last| last.borrow().as_ref().map(|e| e.full_message()))
|
|
78
|
-
}
|
|
79
|
-
|
|
80
|
-
/// Gets the last error code.
|
|
81
|
-
pub fn get_last_error_code() -> ErrorCode {
|
|
82
|
-
LAST_STRUCTURED_ERROR.with(|last| last.borrow().as_ref().map(|e| e.code).unwrap_or(ErrorCode::Success))
|
|
83
|
-
}
|
|
84
|
-
|
|
85
|
-
/// Gets the last panic context if the last error was a panic.
|
|
86
|
-
pub fn get_last_panic_context() -> Option<PanicContext> {
|
|
87
|
-
LAST_STRUCTURED_ERROR.with(|last| last.borrow().as_ref().and_then(|e| e.panic_context.clone()))
|
|
88
|
-
}
|
|
89
|
-
|
|
90
|
-
/// Clears the last structured error.
|
|
91
|
-
pub fn clear_structured_error() {
|
|
92
|
-
LAST_STRUCTURED_ERROR.with(|last| *last.borrow_mut() = None);
|
|
93
|
-
}
|
|
94
|
-
|
|
95
|
-
/// Macro to wrap FFI functions with panic catching.
|
|
96
|
-
///
|
|
97
|
-
/// This macro catches panics at FFI boundaries and converts them to structured errors.
|
|
98
|
-
/// It captures file, line, and function information for better error reporting.
|
|
99
|
-
///
|
|
100
|
-
/// # Usage
|
|
101
|
-
///
|
|
102
|
-
/// ```rust,ignore
|
|
103
|
-
/// #[no_mangle]
|
|
104
|
-
/// pub extern "C" fn my_ffi_function(arg: *const c_char) -> *mut ExtractionResult {
|
|
105
|
-
/// ffi_panic_guard!("my_ffi_function", {
|
|
106
|
-
/// // Your FFI function body here
|
|
107
|
-
/// // Return the result normally
|
|
108
|
-
/// })
|
|
109
|
-
/// }
|
|
110
|
-
/// ```
|
|
111
|
-
///
|
|
112
|
-
/// For bool-returning functions:
|
|
113
|
-
///
|
|
114
|
-
/// ```rust,ignore
|
|
115
|
-
/// #[no_mangle]
|
|
116
|
-
/// pub extern "C" fn my_bool_function(arg: *const c_char) -> bool {
|
|
117
|
-
/// ffi_panic_guard_bool!("my_bool_function", {
|
|
118
|
-
/// // Your FFI function body here
|
|
119
|
-
/// // Return true or false normally
|
|
120
|
-
/// })
|
|
121
|
-
/// }
|
|
122
|
-
/// ```
|
|
123
|
-
///
|
|
124
|
-
/// The macro will:
|
|
125
|
-
/// - Catch any panics that occur in the wrapped code
|
|
126
|
-
/// - Create a PanicContext with file/line/function information
|
|
127
|
-
/// - Store the structured error in thread-local storage
|
|
128
|
-
/// - Return a null pointer (for pointer-returning functions) or false (for bool-returning functions) to indicate failure
|
|
129
|
-
#[macro_export]
|
|
130
|
-
macro_rules! ffi_panic_guard {
|
|
131
|
-
($function_name:expr, $body:expr) => {{
|
|
132
|
-
match std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| $body)) {
|
|
133
|
-
Ok(result) => result,
|
|
134
|
-
Err(panic_info) => {
|
|
135
|
-
let context =
|
|
136
|
-
kreuzberg::panic_context::PanicContext::new(file!(), line!(), $function_name, panic_info.as_ref());
|
|
137
|
-
$crate::panic_shield::set_structured_error($crate::panic_shield::StructuredError::from_panic(context));
|
|
138
|
-
std::ptr::null_mut()
|
|
139
|
-
}
|
|
140
|
-
}
|
|
141
|
-
}};
|
|
142
|
-
}
|
|
143
|
-
|
|
144
|
-
/// Macro to wrap FFI functions that return bool with panic catching.
|
|
145
|
-
///
|
|
146
|
-
/// This variant of ffi_panic_guard returns false on panic (suitable for bool-returning functions).
|
|
147
|
-
#[macro_export]
|
|
148
|
-
macro_rules! ffi_panic_guard_bool {
|
|
149
|
-
($function_name:expr, $body:expr) => {{
|
|
150
|
-
match std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| $body)) {
|
|
151
|
-
Ok(result) => result,
|
|
152
|
-
Err(panic_info) => {
|
|
153
|
-
let context =
|
|
154
|
-
kreuzberg::panic_context::PanicContext::new(file!(), line!(), $function_name, panic_info.as_ref());
|
|
155
|
-
$crate::panic_shield::set_structured_error($crate::panic_shield::StructuredError::from_panic(context));
|
|
156
|
-
false
|
|
157
|
-
}
|
|
158
|
-
}
|
|
159
|
-
}};
|
|
160
|
-
}
|
|
161
|
-
|
|
162
|
-
/// Macro to wrap FFI functions that return i32 with panic catching.
|
|
163
|
-
///
|
|
164
|
-
/// This variant of ffi_panic_guard returns -1 on panic (suitable for i32-returning functions).
|
|
165
|
-
#[macro_export]
|
|
166
|
-
macro_rules! ffi_panic_guard_i32 {
|
|
167
|
-
($function_name:expr, $body:expr) => {{
|
|
168
|
-
match std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| $body)) {
|
|
169
|
-
Ok(result) => result,
|
|
170
|
-
Err(panic_info) => {
|
|
171
|
-
let context =
|
|
172
|
-
kreuzberg::panic_context::PanicContext::new(file!(), line!(), $function_name, panic_info.as_ref());
|
|
173
|
-
$crate::panic_shield::set_structured_error($crate::panic_shield::StructuredError::from_panic(context));
|
|
174
|
-
-1
|
|
175
|
-
}
|
|
176
|
-
}
|
|
177
|
-
}};
|
|
178
|
-
}
|
|
179
|
-
|
|
180
|
-
#[cfg(test)]
|
|
181
|
-
mod tests {
|
|
182
|
-
use super::*;
|
|
183
|
-
|
|
184
|
-
#[test]
|
|
185
|
-
fn test_structured_error_from_panic() {
|
|
186
|
-
let panic_msg = "test panic".to_string();
|
|
187
|
-
let ctx = PanicContext::new("test.rs", 42, "test_fn", &panic_msg);
|
|
188
|
-
let err = StructuredError::from_panic(ctx);
|
|
189
|
-
|
|
190
|
-
assert_eq!(err.code, ErrorCode::Panic);
|
|
191
|
-
assert!(err.panic_context.is_some());
|
|
192
|
-
assert!(err.message.contains("test panic"));
|
|
193
|
-
}
|
|
194
|
-
|
|
195
|
-
#[test]
|
|
196
|
-
fn test_structured_error_from_message() {
|
|
197
|
-
let err = StructuredError::from_message("test error".to_string(), ErrorCode::GenericError);
|
|
198
|
-
|
|
199
|
-
assert_eq!(err.code, ErrorCode::GenericError);
|
|
200
|
-
assert!(err.panic_context.is_none());
|
|
201
|
-
assert_eq!(err.message, "test error");
|
|
202
|
-
}
|
|
203
|
-
|
|
204
|
-
#[test]
|
|
205
|
-
fn test_error_storage() {
|
|
206
|
-
clear_structured_error();
|
|
207
|
-
assert!(get_last_error_message().is_none());
|
|
208
|
-
|
|
209
|
-
let err = StructuredError::from_message("test".to_string(), ErrorCode::IoError);
|
|
210
|
-
set_structured_error(err);
|
|
211
|
-
|
|
212
|
-
assert_eq!(get_last_error_message(), Some("test".to_string()));
|
|
213
|
-
assert_eq!(get_last_error_code(), ErrorCode::IoError);
|
|
214
|
-
|
|
215
|
-
clear_structured_error();
|
|
216
|
-
assert!(get_last_error_message().is_none());
|
|
217
|
-
}
|
|
218
|
-
|
|
219
|
-
#[test]
|
|
220
|
-
fn test_panic_context_extraction() {
|
|
221
|
-
clear_structured_error();
|
|
222
|
-
|
|
223
|
-
let panic_msg = "panic message".to_string();
|
|
224
|
-
let ctx = PanicContext::new("file.rs", 10, "func", &panic_msg);
|
|
225
|
-
let err = StructuredError::from_panic(ctx);
|
|
226
|
-
set_structured_error(err);
|
|
227
|
-
|
|
228
|
-
let retrieved_ctx = get_last_panic_context();
|
|
229
|
-
assert!(retrieved_ctx.is_some());
|
|
230
|
-
|
|
231
|
-
let ctx = retrieved_ctx.unwrap();
|
|
232
|
-
assert_eq!(ctx.file, "file.rs");
|
|
233
|
-
assert_eq!(ctx.line, 10);
|
|
234
|
-
assert_eq!(ctx.function, "func");
|
|
235
|
-
}
|
|
236
|
-
|
|
237
|
-
#[test]
|
|
238
|
-
fn test_ffi_panic_guard_success() {
|
|
239
|
-
let result = crate::ffi_panic_guard!("test_success", { Box::into_raw(Box::new(42)) });
|
|
240
|
-
assert!(!result.is_null());
|
|
241
|
-
unsafe {
|
|
242
|
-
assert_eq!(*result, 42);
|
|
243
|
-
let _ = Box::from_raw(result);
|
|
244
|
-
}
|
|
245
|
-
}
|
|
246
|
-
|
|
247
|
-
#[test]
|
|
248
|
-
fn test_ffi_panic_guard_panic() {
|
|
249
|
-
clear_structured_error();
|
|
250
|
-
|
|
251
|
-
let result: *mut i32 = crate::ffi_panic_guard!("test_panic", {
|
|
252
|
-
panic!("intentional panic");
|
|
253
|
-
#[allow(unreachable_code)]
|
|
254
|
-
Box::into_raw(Box::new(42))
|
|
255
|
-
});
|
|
256
|
-
|
|
257
|
-
assert!(result.is_null());
|
|
258
|
-
assert!(get_last_error_message().is_some());
|
|
259
|
-
assert_eq!(get_last_error_code(), ErrorCode::Panic);
|
|
260
|
-
|
|
261
|
-
let msg = get_last_error_message().unwrap();
|
|
262
|
-
assert!(msg.contains("intentional panic"));
|
|
263
|
-
assert!(msg.contains("test_panic"));
|
|
264
|
-
}
|
|
265
|
-
}
|