kreuzberg 4.0.0.rc1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +8 -0
- data/.rspec +3 -0
- data/.rubocop.yaml +534 -0
- data/Gemfile +9 -0
- data/Gemfile.lock +157 -0
- data/README.md +421 -0
- data/Rakefile +25 -0
- data/Steepfile +47 -0
- data/examples/async_patterns.rb +340 -0
- data/ext/kreuzberg_rb/extconf.rb +35 -0
- data/ext/kreuzberg_rb/native/Cargo.toml +36 -0
- data/ext/kreuzberg_rb/native/README.md +425 -0
- data/ext/kreuzberg_rb/native/build.rs +17 -0
- data/ext/kreuzberg_rb/native/include/ieeefp.h +11 -0
- data/ext/kreuzberg_rb/native/include/msvc_compat/strings.h +14 -0
- data/ext/kreuzberg_rb/native/include/strings.h +20 -0
- data/ext/kreuzberg_rb/native/include/unistd.h +47 -0
- data/ext/kreuzberg_rb/native/src/lib.rs +2939 -0
- data/extconf.rb +28 -0
- data/kreuzberg.gemspec +105 -0
- data/lib/kreuzberg/api_proxy.rb +142 -0
- data/lib/kreuzberg/cache_api.rb +45 -0
- data/lib/kreuzberg/cli.rb +55 -0
- data/lib/kreuzberg/cli_proxy.rb +127 -0
- data/lib/kreuzberg/config.rb +684 -0
- data/lib/kreuzberg/errors.rb +50 -0
- data/lib/kreuzberg/extraction_api.rb +84 -0
- data/lib/kreuzberg/mcp_proxy.rb +186 -0
- data/lib/kreuzberg/ocr_backend_protocol.rb +113 -0
- data/lib/kreuzberg/post_processor_protocol.rb +86 -0
- data/lib/kreuzberg/result.rb +216 -0
- data/lib/kreuzberg/setup_lib_path.rb +79 -0
- data/lib/kreuzberg/validator_protocol.rb +89 -0
- data/lib/kreuzberg/version.rb +5 -0
- data/lib/kreuzberg.rb +82 -0
- data/pkg/kreuzberg-4.0.0.rc1.gem +0 -0
- data/sig/kreuzberg/internal.rbs +184 -0
- data/sig/kreuzberg.rbs +468 -0
- data/spec/binding/cache_spec.rb +227 -0
- data/spec/binding/cli_proxy_spec.rb +87 -0
- data/spec/binding/cli_spec.rb +54 -0
- data/spec/binding/config_spec.rb +345 -0
- data/spec/binding/config_validation_spec.rb +283 -0
- data/spec/binding/error_handling_spec.rb +213 -0
- data/spec/binding/errors_spec.rb +66 -0
- data/spec/binding/plugins/ocr_backend_spec.rb +307 -0
- data/spec/binding/plugins/postprocessor_spec.rb +269 -0
- data/spec/binding/plugins/validator_spec.rb +274 -0
- data/spec/examples.txt +104 -0
- data/spec/fixtures/config.toml +39 -0
- data/spec/fixtures/config.yaml +42 -0
- data/spec/fixtures/invalid_config.toml +4 -0
- data/spec/smoke/package_spec.rb +178 -0
- data/spec/spec_helper.rb +42 -0
- data/vendor/kreuzberg/Cargo.toml +134 -0
- data/vendor/kreuzberg/README.md +175 -0
- data/vendor/kreuzberg/build.rs +460 -0
- data/vendor/kreuzberg/src/api/error.rs +81 -0
- data/vendor/kreuzberg/src/api/handlers.rs +199 -0
- data/vendor/kreuzberg/src/api/mod.rs +79 -0
- data/vendor/kreuzberg/src/api/server.rs +353 -0
- data/vendor/kreuzberg/src/api/types.rs +170 -0
- data/vendor/kreuzberg/src/bin/profile_extract.rs +455 -0
- data/vendor/kreuzberg/src/cache/mod.rs +1143 -0
- data/vendor/kreuzberg/src/chunking/mod.rs +677 -0
- data/vendor/kreuzberg/src/core/batch_mode.rs +35 -0
- data/vendor/kreuzberg/src/core/config.rs +1032 -0
- data/vendor/kreuzberg/src/core/extractor.rs +903 -0
- data/vendor/kreuzberg/src/core/io.rs +327 -0
- data/vendor/kreuzberg/src/core/mime.rs +615 -0
- data/vendor/kreuzberg/src/core/mod.rs +42 -0
- data/vendor/kreuzberg/src/core/pipeline.rs +906 -0
- data/vendor/kreuzberg/src/embeddings.rs +323 -0
- data/vendor/kreuzberg/src/error.rs +431 -0
- data/vendor/kreuzberg/src/extraction/archive.rs +954 -0
- data/vendor/kreuzberg/src/extraction/docx.rs +40 -0
- data/vendor/kreuzberg/src/extraction/email.rs +854 -0
- data/vendor/kreuzberg/src/extraction/excel.rs +688 -0
- data/vendor/kreuzberg/src/extraction/html.rs +553 -0
- data/vendor/kreuzberg/src/extraction/image.rs +368 -0
- data/vendor/kreuzberg/src/extraction/libreoffice.rs +564 -0
- data/vendor/kreuzberg/src/extraction/mod.rs +77 -0
- data/vendor/kreuzberg/src/extraction/office_metadata/app_properties.rs +398 -0
- data/vendor/kreuzberg/src/extraction/office_metadata/core_properties.rs +247 -0
- data/vendor/kreuzberg/src/extraction/office_metadata/custom_properties.rs +240 -0
- data/vendor/kreuzberg/src/extraction/office_metadata/mod.rs +128 -0
- data/vendor/kreuzberg/src/extraction/pandoc/batch.rs +275 -0
- data/vendor/kreuzberg/src/extraction/pandoc/mime_types.rs +178 -0
- data/vendor/kreuzberg/src/extraction/pandoc/mod.rs +491 -0
- data/vendor/kreuzberg/src/extraction/pandoc/server.rs +496 -0
- data/vendor/kreuzberg/src/extraction/pandoc/subprocess.rs +1188 -0
- data/vendor/kreuzberg/src/extraction/pandoc/version.rs +162 -0
- data/vendor/kreuzberg/src/extraction/pptx.rs +3000 -0
- data/vendor/kreuzberg/src/extraction/structured.rs +490 -0
- data/vendor/kreuzberg/src/extraction/table.rs +328 -0
- data/vendor/kreuzberg/src/extraction/text.rs +269 -0
- data/vendor/kreuzberg/src/extraction/xml.rs +333 -0
- data/vendor/kreuzberg/src/extractors/archive.rs +425 -0
- data/vendor/kreuzberg/src/extractors/docx.rs +479 -0
- data/vendor/kreuzberg/src/extractors/email.rs +129 -0
- data/vendor/kreuzberg/src/extractors/excel.rs +344 -0
- data/vendor/kreuzberg/src/extractors/html.rs +410 -0
- data/vendor/kreuzberg/src/extractors/image.rs +195 -0
- data/vendor/kreuzberg/src/extractors/mod.rs +268 -0
- data/vendor/kreuzberg/src/extractors/pandoc.rs +201 -0
- data/vendor/kreuzberg/src/extractors/pdf.rs +496 -0
- data/vendor/kreuzberg/src/extractors/pptx.rs +234 -0
- data/vendor/kreuzberg/src/extractors/structured.rs +126 -0
- data/vendor/kreuzberg/src/extractors/text.rs +242 -0
- data/vendor/kreuzberg/src/extractors/xml.rs +128 -0
- data/vendor/kreuzberg/src/image/dpi.rs +164 -0
- data/vendor/kreuzberg/src/image/mod.rs +6 -0
- data/vendor/kreuzberg/src/image/preprocessing.rs +417 -0
- data/vendor/kreuzberg/src/image/resize.rs +89 -0
- data/vendor/kreuzberg/src/keywords/config.rs +154 -0
- data/vendor/kreuzberg/src/keywords/mod.rs +237 -0
- data/vendor/kreuzberg/src/keywords/processor.rs +267 -0
- data/vendor/kreuzberg/src/keywords/rake.rs +294 -0
- data/vendor/kreuzberg/src/keywords/types.rs +68 -0
- data/vendor/kreuzberg/src/keywords/yake.rs +163 -0
- data/vendor/kreuzberg/src/language_detection/mod.rs +942 -0
- data/vendor/kreuzberg/src/lib.rs +102 -0
- data/vendor/kreuzberg/src/mcp/mod.rs +32 -0
- data/vendor/kreuzberg/src/mcp/server.rs +1966 -0
- data/vendor/kreuzberg/src/ocr/cache.rs +469 -0
- data/vendor/kreuzberg/src/ocr/error.rs +37 -0
- data/vendor/kreuzberg/src/ocr/hocr.rs +216 -0
- data/vendor/kreuzberg/src/ocr/mod.rs +58 -0
- data/vendor/kreuzberg/src/ocr/processor.rs +847 -0
- data/vendor/kreuzberg/src/ocr/table/mod.rs +4 -0
- data/vendor/kreuzberg/src/ocr/table/tsv_parser.rs +144 -0
- data/vendor/kreuzberg/src/ocr/tesseract_backend.rs +450 -0
- data/vendor/kreuzberg/src/ocr/types.rs +393 -0
- data/vendor/kreuzberg/src/ocr/utils.rs +47 -0
- data/vendor/kreuzberg/src/ocr/validation.rs +206 -0
- data/vendor/kreuzberg/src/pdf/error.rs +122 -0
- data/vendor/kreuzberg/src/pdf/images.rs +139 -0
- data/vendor/kreuzberg/src/pdf/metadata.rs +346 -0
- data/vendor/kreuzberg/src/pdf/mod.rs +50 -0
- data/vendor/kreuzberg/src/pdf/rendering.rs +369 -0
- data/vendor/kreuzberg/src/pdf/table.rs +420 -0
- data/vendor/kreuzberg/src/pdf/text.rs +161 -0
- data/vendor/kreuzberg/src/plugins/extractor.rs +1010 -0
- data/vendor/kreuzberg/src/plugins/mod.rs +209 -0
- data/vendor/kreuzberg/src/plugins/ocr.rs +629 -0
- data/vendor/kreuzberg/src/plugins/processor.rs +641 -0
- data/vendor/kreuzberg/src/plugins/registry.rs +1324 -0
- data/vendor/kreuzberg/src/plugins/traits.rs +258 -0
- data/vendor/kreuzberg/src/plugins/validator.rs +955 -0
- data/vendor/kreuzberg/src/stopwords/mod.rs +1470 -0
- data/vendor/kreuzberg/src/text/mod.rs +19 -0
- data/vendor/kreuzberg/src/text/quality.rs +697 -0
- data/vendor/kreuzberg/src/text/string_utils.rs +217 -0
- data/vendor/kreuzberg/src/text/token_reduction/cjk_utils.rs +164 -0
- data/vendor/kreuzberg/src/text/token_reduction/config.rs +100 -0
- data/vendor/kreuzberg/src/text/token_reduction/core.rs +796 -0
- data/vendor/kreuzberg/src/text/token_reduction/filters.rs +902 -0
- data/vendor/kreuzberg/src/text/token_reduction/mod.rs +160 -0
- data/vendor/kreuzberg/src/text/token_reduction/semantic.rs +619 -0
- data/vendor/kreuzberg/src/text/token_reduction/simd_text.rs +147 -0
- data/vendor/kreuzberg/src/types.rs +873 -0
- data/vendor/kreuzberg/src/utils/mod.rs +17 -0
- data/vendor/kreuzberg/src/utils/quality.rs +959 -0
- data/vendor/kreuzberg/src/utils/string_utils.rs +381 -0
- data/vendor/kreuzberg/stopwords/af_stopwords.json +53 -0
- data/vendor/kreuzberg/stopwords/ar_stopwords.json +482 -0
- data/vendor/kreuzberg/stopwords/bg_stopwords.json +261 -0
- data/vendor/kreuzberg/stopwords/bn_stopwords.json +400 -0
- data/vendor/kreuzberg/stopwords/br_stopwords.json +1205 -0
- data/vendor/kreuzberg/stopwords/ca_stopwords.json +280 -0
- data/vendor/kreuzberg/stopwords/cs_stopwords.json +425 -0
- data/vendor/kreuzberg/stopwords/da_stopwords.json +172 -0
- data/vendor/kreuzberg/stopwords/de_stopwords.json +622 -0
- data/vendor/kreuzberg/stopwords/el_stopwords.json +849 -0
- data/vendor/kreuzberg/stopwords/en_stopwords.json +1300 -0
- data/vendor/kreuzberg/stopwords/eo_stopwords.json +175 -0
- data/vendor/kreuzberg/stopwords/es_stopwords.json +734 -0
- data/vendor/kreuzberg/stopwords/et_stopwords.json +37 -0
- data/vendor/kreuzberg/stopwords/eu_stopwords.json +100 -0
- data/vendor/kreuzberg/stopwords/fa_stopwords.json +801 -0
- data/vendor/kreuzberg/stopwords/fi_stopwords.json +849 -0
- data/vendor/kreuzberg/stopwords/fr_stopwords.json +693 -0
- data/vendor/kreuzberg/stopwords/ga_stopwords.json +111 -0
- data/vendor/kreuzberg/stopwords/gl_stopwords.json +162 -0
- data/vendor/kreuzberg/stopwords/gu_stopwords.json +226 -0
- data/vendor/kreuzberg/stopwords/ha_stopwords.json +41 -0
- data/vendor/kreuzberg/stopwords/he_stopwords.json +196 -0
- data/vendor/kreuzberg/stopwords/hi_stopwords.json +227 -0
- data/vendor/kreuzberg/stopwords/hr_stopwords.json +181 -0
- data/vendor/kreuzberg/stopwords/hu_stopwords.json +791 -0
- data/vendor/kreuzberg/stopwords/hy_stopwords.json +47 -0
- data/vendor/kreuzberg/stopwords/id_stopwords.json +760 -0
- data/vendor/kreuzberg/stopwords/it_stopwords.json +634 -0
- data/vendor/kreuzberg/stopwords/ja_stopwords.json +136 -0
- data/vendor/kreuzberg/stopwords/kn_stopwords.json +84 -0
- data/vendor/kreuzberg/stopwords/ko_stopwords.json +681 -0
- data/vendor/kreuzberg/stopwords/ku_stopwords.json +64 -0
- data/vendor/kreuzberg/stopwords/la_stopwords.json +51 -0
- data/vendor/kreuzberg/stopwords/lt_stopwords.json +476 -0
- data/vendor/kreuzberg/stopwords/lv_stopwords.json +163 -0
- data/vendor/kreuzberg/stopwords/ml_stopwords.json +1 -0
- data/vendor/kreuzberg/stopwords/mr_stopwords.json +101 -0
- data/vendor/kreuzberg/stopwords/ms_stopwords.json +477 -0
- data/vendor/kreuzberg/stopwords/ne_stopwords.json +490 -0
- data/vendor/kreuzberg/stopwords/nl_stopwords.json +415 -0
- data/vendor/kreuzberg/stopwords/no_stopwords.json +223 -0
- data/vendor/kreuzberg/stopwords/pl_stopwords.json +331 -0
- data/vendor/kreuzberg/stopwords/pt_stopwords.json +562 -0
- data/vendor/kreuzberg/stopwords/ro_stopwords.json +436 -0
- data/vendor/kreuzberg/stopwords/ru_stopwords.json +561 -0
- data/vendor/kreuzberg/stopwords/si_stopwords.json +193 -0
- data/vendor/kreuzberg/stopwords/sk_stopwords.json +420 -0
- data/vendor/kreuzberg/stopwords/sl_stopwords.json +448 -0
- data/vendor/kreuzberg/stopwords/so_stopwords.json +32 -0
- data/vendor/kreuzberg/stopwords/st_stopwords.json +33 -0
- data/vendor/kreuzberg/stopwords/sv_stopwords.json +420 -0
- data/vendor/kreuzberg/stopwords/sw_stopwords.json +76 -0
- data/vendor/kreuzberg/stopwords/ta_stopwords.json +129 -0
- data/vendor/kreuzberg/stopwords/te_stopwords.json +54 -0
- data/vendor/kreuzberg/stopwords/th_stopwords.json +118 -0
- data/vendor/kreuzberg/stopwords/tl_stopwords.json +149 -0
- data/vendor/kreuzberg/stopwords/tr_stopwords.json +506 -0
- data/vendor/kreuzberg/stopwords/uk_stopwords.json +75 -0
- data/vendor/kreuzberg/stopwords/ur_stopwords.json +519 -0
- data/vendor/kreuzberg/stopwords/vi_stopwords.json +647 -0
- data/vendor/kreuzberg/stopwords/yo_stopwords.json +62 -0
- data/vendor/kreuzberg/stopwords/zh_stopwords.json +796 -0
- data/vendor/kreuzberg/stopwords/zu_stopwords.json +31 -0
- data/vendor/kreuzberg/tests/api_tests.rs +966 -0
- data/vendor/kreuzberg/tests/archive_integration.rs +543 -0
- data/vendor/kreuzberg/tests/batch_orchestration.rs +542 -0
- data/vendor/kreuzberg/tests/batch_processing.rs +304 -0
- data/vendor/kreuzberg/tests/chunking_offset_demo.rs +92 -0
- data/vendor/kreuzberg/tests/concurrency_stress.rs +509 -0
- data/vendor/kreuzberg/tests/config_features.rs +580 -0
- data/vendor/kreuzberg/tests/config_loading_tests.rs +439 -0
- data/vendor/kreuzberg/tests/core_integration.rs +493 -0
- data/vendor/kreuzberg/tests/csv_integration.rs +424 -0
- data/vendor/kreuzberg/tests/docx_metadata_extraction_test.rs +124 -0
- data/vendor/kreuzberg/tests/email_integration.rs +325 -0
- data/vendor/kreuzberg/tests/error_handling.rs +393 -0
- data/vendor/kreuzberg/tests/format_integration.rs +159 -0
- data/vendor/kreuzberg/tests/helpers/mod.rs +142 -0
- data/vendor/kreuzberg/tests/image_integration.rs +253 -0
- data/vendor/kreuzberg/tests/keywords_integration.rs +479 -0
- data/vendor/kreuzberg/tests/keywords_quality.rs +509 -0
- data/vendor/kreuzberg/tests/mime_detection.rs +428 -0
- data/vendor/kreuzberg/tests/ocr_configuration.rs +510 -0
- data/vendor/kreuzberg/tests/ocr_errors.rs +676 -0
- data/vendor/kreuzberg/tests/ocr_quality.rs +627 -0
- data/vendor/kreuzberg/tests/ocr_stress.rs +469 -0
- data/vendor/kreuzberg/tests/pandoc_integration.rs +503 -0
- data/vendor/kreuzberg/tests/pdf_integration.rs +43 -0
- data/vendor/kreuzberg/tests/pipeline_integration.rs +1412 -0
- data/vendor/kreuzberg/tests/plugin_ocr_backend_test.rs +771 -0
- data/vendor/kreuzberg/tests/plugin_postprocessor_test.rs +561 -0
- data/vendor/kreuzberg/tests/plugin_system.rs +921 -0
- data/vendor/kreuzberg/tests/plugin_validator_test.rs +783 -0
- data/vendor/kreuzberg/tests/registry_integration_tests.rs +607 -0
- data/vendor/kreuzberg/tests/security_validation.rs +404 -0
- data/vendor/kreuzberg/tests/stopwords_integration_test.rs +888 -0
- data/vendor/kreuzberg/tests/test_fastembed.rs +609 -0
- data/vendor/kreuzberg/tests/xlsx_metadata_extraction_test.rs +87 -0
- metadata +471 -0
|
@@ -0,0 +1,431 @@
|
|
|
1
|
+
//! Error types for Kreuzberg.
|
|
2
|
+
//!
|
|
3
|
+
//! This module defines all error types used throughout the library. All errors
|
|
4
|
+
//! inherit from `KreuzbergError` and follow Rust error handling best practices:
|
|
5
|
+
//!
|
|
6
|
+
//! - Use `thiserror` for automatic `Error` trait implementation
|
|
7
|
+
//! - Preserve error chains with `#[source]` attributes
|
|
8
|
+
//! - Include context in error messages (file paths, config values, etc.)
|
|
9
|
+
//!
|
|
10
|
+
//! # Error Handling Philosophy
|
|
11
|
+
//!
|
|
12
|
+
//! **System errors MUST always bubble up unchanged:**
|
|
13
|
+
//! - `KreuzbergError::Io` (from `std::io::Error`) - File system errors, permission errors
|
|
14
|
+
//! - These indicate real system problems that users need to know about
|
|
15
|
+
//! - Never wrap or suppress these - they must surface to enable bug reports
|
|
16
|
+
//!
|
|
17
|
+
//! **Application errors are wrapped with context:**
|
|
18
|
+
//! - `Parsing` - Document format errors, corrupt files
|
|
19
|
+
//! - `Validation` - Invalid configuration or parameters
|
|
20
|
+
//! - `Ocr` - OCR processing failures
|
|
21
|
+
//! - `MissingDependency` - Missing optional system dependencies
|
|
22
|
+
//!
|
|
23
|
+
//! # Example
|
|
24
|
+
//!
|
|
25
|
+
//! ```rust
|
|
26
|
+
//! use kreuzberg::{KreuzbergError, Result};
|
|
27
|
+
//!
|
|
28
|
+
//! fn process_file(path: &str) -> Result<String> {
|
|
29
|
+
//! // IO errors bubble up automatically via ?
|
|
30
|
+
//! let content = std::fs::read_to_string(path)?;
|
|
31
|
+
//!
|
|
32
|
+
//! // Application errors include context
|
|
33
|
+
//! if content.is_empty() {
|
|
34
|
+
//! return Err(KreuzbergError::validation(
|
|
35
|
+
//! format!("File is empty: {}", path)
|
|
36
|
+
//! ));
|
|
37
|
+
//! }
|
|
38
|
+
//!
|
|
39
|
+
//! Ok(content)
|
|
40
|
+
//! }
|
|
41
|
+
//! ```
|
|
42
|
+
use thiserror::Error;
|
|
43
|
+
|
|
44
|
+
/// Result type alias using `KreuzbergError`.
|
|
45
|
+
///
|
|
46
|
+
/// This is the standard return type for all fallible operations in Kreuzberg.
|
|
47
|
+
pub type Result<T> = std::result::Result<T, KreuzbergError>;
|
|
48
|
+
|
|
49
|
+
/// Main error type for all Kreuzberg operations.
|
|
50
|
+
///
|
|
51
|
+
/// All errors in Kreuzberg use this enum, which preserves error chains
|
|
52
|
+
/// and provides context for debugging.
|
|
53
|
+
///
|
|
54
|
+
/// # Variants
|
|
55
|
+
///
|
|
56
|
+
/// - `Io` - File system and I/O errors (always bubble up)
|
|
57
|
+
/// - `Parsing` - Document parsing errors (corrupt files, unsupported features)
|
|
58
|
+
/// - `Ocr` - OCR processing errors
|
|
59
|
+
/// - `Validation` - Input validation errors (invalid paths, config, parameters)
|
|
60
|
+
/// - `Cache` - Cache operation errors (non-fatal, can be ignored)
|
|
61
|
+
/// - `ImageProcessing` - Image manipulation errors
|
|
62
|
+
/// - `Serialization` - JSON/MessagePack serialization errors
|
|
63
|
+
/// - `MissingDependency` - Missing optional dependencies (tesseract, pandoc, etc.)
|
|
64
|
+
/// - `Plugin` - Plugin-specific errors
|
|
65
|
+
/// - `LockPoisoned` - Mutex/RwLock poisoning (should not happen in normal operation)
|
|
66
|
+
/// - `UnsupportedFormat` - Unsupported MIME type or file format
|
|
67
|
+
/// - `Other` - Catch-all for uncommon errors
|
|
68
|
+
#[derive(Debug, Error)]
|
|
69
|
+
pub enum KreuzbergError {
|
|
70
|
+
#[error("IO error: {0}")]
|
|
71
|
+
Io(#[from] std::io::Error),
|
|
72
|
+
|
|
73
|
+
#[error("Parsing error: {message}")]
|
|
74
|
+
Parsing {
|
|
75
|
+
message: String,
|
|
76
|
+
#[source]
|
|
77
|
+
source: Option<Box<dyn std::error::Error + Send + Sync>>,
|
|
78
|
+
},
|
|
79
|
+
|
|
80
|
+
#[error("OCR error: {message}")]
|
|
81
|
+
Ocr {
|
|
82
|
+
message: String,
|
|
83
|
+
#[source]
|
|
84
|
+
source: Option<Box<dyn std::error::Error + Send + Sync>>,
|
|
85
|
+
},
|
|
86
|
+
|
|
87
|
+
#[error("Validation error: {message}")]
|
|
88
|
+
Validation {
|
|
89
|
+
message: String,
|
|
90
|
+
#[source]
|
|
91
|
+
source: Option<Box<dyn std::error::Error + Send + Sync>>,
|
|
92
|
+
},
|
|
93
|
+
|
|
94
|
+
#[error("Cache error: {message}")]
|
|
95
|
+
Cache {
|
|
96
|
+
message: String,
|
|
97
|
+
#[source]
|
|
98
|
+
source: Option<Box<dyn std::error::Error + Send + Sync>>,
|
|
99
|
+
},
|
|
100
|
+
|
|
101
|
+
#[error("Image processing error: {message}")]
|
|
102
|
+
ImageProcessing {
|
|
103
|
+
message: String,
|
|
104
|
+
#[source]
|
|
105
|
+
source: Option<Box<dyn std::error::Error + Send + Sync>>,
|
|
106
|
+
},
|
|
107
|
+
|
|
108
|
+
#[error("Serialization error: {message}")]
|
|
109
|
+
Serialization {
|
|
110
|
+
message: String,
|
|
111
|
+
#[source]
|
|
112
|
+
source: Option<Box<dyn std::error::Error + Send + Sync>>,
|
|
113
|
+
},
|
|
114
|
+
|
|
115
|
+
#[error("Missing dependency: {0}")]
|
|
116
|
+
MissingDependency(String),
|
|
117
|
+
|
|
118
|
+
#[error("Plugin error in '{plugin_name}': {message}")]
|
|
119
|
+
Plugin { message: String, plugin_name: String },
|
|
120
|
+
|
|
121
|
+
#[error("Lock poisoned: {0}")]
|
|
122
|
+
LockPoisoned(String),
|
|
123
|
+
|
|
124
|
+
#[error("Unsupported format: {0}")]
|
|
125
|
+
UnsupportedFormat(String),
|
|
126
|
+
|
|
127
|
+
#[error("{0}")]
|
|
128
|
+
Other(String),
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
#[cfg(feature = "excel")]
|
|
132
|
+
impl From<calamine::Error> for KreuzbergError {
|
|
133
|
+
fn from(err: calamine::Error) -> Self {
|
|
134
|
+
KreuzbergError::Parsing {
|
|
135
|
+
message: err.to_string(),
|
|
136
|
+
source: Some(Box::new(err)),
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
impl From<serde_json::Error> for KreuzbergError {
|
|
142
|
+
fn from(err: serde_json::Error) -> Self {
|
|
143
|
+
KreuzbergError::Serialization {
|
|
144
|
+
message: err.to_string(),
|
|
145
|
+
source: Some(Box::new(err)),
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
impl From<rmp_serde::encode::Error> for KreuzbergError {
|
|
151
|
+
fn from(err: rmp_serde::encode::Error) -> Self {
|
|
152
|
+
KreuzbergError::Serialization {
|
|
153
|
+
message: err.to_string(),
|
|
154
|
+
source: Some(Box::new(err)),
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
impl From<rmp_serde::decode::Error> for KreuzbergError {
|
|
160
|
+
fn from(err: rmp_serde::decode::Error) -> Self {
|
|
161
|
+
KreuzbergError::Serialization {
|
|
162
|
+
message: err.to_string(),
|
|
163
|
+
source: Some(Box::new(err)),
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
#[cfg(feature = "pdf")]
|
|
169
|
+
impl From<crate::pdf::error::PdfError> for KreuzbergError {
|
|
170
|
+
fn from(err: crate::pdf::error::PdfError) -> Self {
|
|
171
|
+
KreuzbergError::Parsing {
|
|
172
|
+
message: err.to_string(),
|
|
173
|
+
source: Some(Box::new(err)),
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
macro_rules! error_constructor {
|
|
179
|
+
($name:ident, $variant:ident) => {
|
|
180
|
+
paste::paste! {
|
|
181
|
+
#[doc = "Create a " $variant " error"]
|
|
182
|
+
pub fn $name<S: Into<String>>(message: S) -> Self {
|
|
183
|
+
Self::$variant {
|
|
184
|
+
message: message.into(),
|
|
185
|
+
source: None,
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
#[doc = "Create a " $variant " error with source"]
|
|
190
|
+
pub fn [<$name _with_source>]<S: Into<String>, E: std::error::Error + Send + Sync + 'static>(
|
|
191
|
+
message: S,
|
|
192
|
+
source: E,
|
|
193
|
+
) -> Self {
|
|
194
|
+
Self::$variant {
|
|
195
|
+
message: message.into(),
|
|
196
|
+
source: Some(Box::new(source)),
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
};
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
impl KreuzbergError {
|
|
204
|
+
error_constructor!(parsing, Parsing);
|
|
205
|
+
error_constructor!(ocr, Ocr);
|
|
206
|
+
error_constructor!(validation, Validation);
|
|
207
|
+
error_constructor!(cache, Cache);
|
|
208
|
+
error_constructor!(image_processing, ImageProcessing);
|
|
209
|
+
error_constructor!(serialization, Serialization);
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
#[cfg(test)]
|
|
213
|
+
mod tests {
|
|
214
|
+
use super::*;
|
|
215
|
+
|
|
216
|
+
#[test]
|
|
217
|
+
fn test_io_error_from() {
|
|
218
|
+
let io_err = std::io::Error::new(std::io::ErrorKind::NotFound, "file not found");
|
|
219
|
+
let krz_err: KreuzbergError = io_err.into();
|
|
220
|
+
assert!(matches!(krz_err, KreuzbergError::Io(_)));
|
|
221
|
+
assert!(krz_err.to_string().contains("IO error"));
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
#[test]
|
|
225
|
+
fn test_parsing_error() {
|
|
226
|
+
let err = KreuzbergError::parsing("invalid format");
|
|
227
|
+
assert_eq!(err.to_string(), "Parsing error: invalid format");
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
#[test]
|
|
231
|
+
fn test_parsing_error_with_source() {
|
|
232
|
+
let source = std::io::Error::new(std::io::ErrorKind::InvalidData, "bad data");
|
|
233
|
+
let err = KreuzbergError::parsing_with_source("invalid format", source);
|
|
234
|
+
assert_eq!(err.to_string(), "Parsing error: invalid format");
|
|
235
|
+
assert!(std::error::Error::source(&err).is_some());
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
#[test]
|
|
239
|
+
fn test_ocr_error() {
|
|
240
|
+
let err = KreuzbergError::ocr("OCR failed");
|
|
241
|
+
assert_eq!(err.to_string(), "OCR error: OCR failed");
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
#[test]
|
|
245
|
+
fn test_ocr_error_with_source() {
|
|
246
|
+
let source = std::io::Error::other("tesseract failed");
|
|
247
|
+
let err = KreuzbergError::ocr_with_source("OCR failed", source);
|
|
248
|
+
assert_eq!(err.to_string(), "OCR error: OCR failed");
|
|
249
|
+
assert!(std::error::Error::source(&err).is_some());
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
#[test]
|
|
253
|
+
fn test_validation_error() {
|
|
254
|
+
let err = KreuzbergError::validation("invalid input");
|
|
255
|
+
assert_eq!(err.to_string(), "Validation error: invalid input");
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
#[test]
|
|
259
|
+
fn test_validation_error_with_source() {
|
|
260
|
+
let source = std::io::Error::new(std::io::ErrorKind::InvalidInput, "bad param");
|
|
261
|
+
let err = KreuzbergError::validation_with_source("invalid input", source);
|
|
262
|
+
assert_eq!(err.to_string(), "Validation error: invalid input");
|
|
263
|
+
assert!(std::error::Error::source(&err).is_some());
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
#[test]
|
|
267
|
+
fn test_cache_error() {
|
|
268
|
+
let err = KreuzbergError::cache("cache write failed");
|
|
269
|
+
assert_eq!(err.to_string(), "Cache error: cache write failed");
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
#[test]
|
|
273
|
+
fn test_cache_error_with_source() {
|
|
274
|
+
let source = std::io::Error::new(std::io::ErrorKind::PermissionDenied, "cannot write");
|
|
275
|
+
let err = KreuzbergError::cache_with_source("cache write failed", source);
|
|
276
|
+
assert_eq!(err.to_string(), "Cache error: cache write failed");
|
|
277
|
+
assert!(std::error::Error::source(&err).is_some());
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
#[test]
|
|
281
|
+
fn test_image_processing_error() {
|
|
282
|
+
let err = KreuzbergError::image_processing("resize failed");
|
|
283
|
+
assert_eq!(err.to_string(), "Image processing error: resize failed");
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
#[test]
|
|
287
|
+
fn test_image_processing_error_with_source() {
|
|
288
|
+
let source = std::io::Error::other("image decode failed");
|
|
289
|
+
let err = KreuzbergError::image_processing_with_source("resize failed", source);
|
|
290
|
+
assert_eq!(err.to_string(), "Image processing error: resize failed");
|
|
291
|
+
assert!(std::error::Error::source(&err).is_some());
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
#[test]
|
|
295
|
+
fn test_serialization_error() {
|
|
296
|
+
let err = KreuzbergError::serialization("JSON parse error");
|
|
297
|
+
assert_eq!(err.to_string(), "Serialization error: JSON parse error");
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
#[test]
|
|
301
|
+
fn test_serialization_error_with_source() {
|
|
302
|
+
let source = std::io::Error::new(std::io::ErrorKind::InvalidData, "bad format");
|
|
303
|
+
let err = KreuzbergError::serialization_with_source("JSON parse error", source);
|
|
304
|
+
assert_eq!(err.to_string(), "Serialization error: JSON parse error");
|
|
305
|
+
assert!(std::error::Error::source(&err).is_some());
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
#[test]
|
|
309
|
+
fn test_missing_dependency_error() {
|
|
310
|
+
let err = KreuzbergError::MissingDependency("tesseract not found".to_string());
|
|
311
|
+
assert_eq!(err.to_string(), "Missing dependency: tesseract not found");
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
#[test]
|
|
315
|
+
fn test_plugin_error() {
|
|
316
|
+
let err = KreuzbergError::Plugin {
|
|
317
|
+
message: "extraction failed".to_string(),
|
|
318
|
+
plugin_name: "pdf-extractor".to_string(),
|
|
319
|
+
};
|
|
320
|
+
assert_eq!(err.to_string(), "Plugin error in 'pdf-extractor': extraction failed");
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
#[test]
|
|
324
|
+
fn test_unsupported_format_error() {
|
|
325
|
+
let err = KreuzbergError::UnsupportedFormat("application/unknown".to_string());
|
|
326
|
+
assert_eq!(err.to_string(), "Unsupported format: application/unknown");
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
#[test]
|
|
330
|
+
fn test_other_error() {
|
|
331
|
+
let err = KreuzbergError::Other("unexpected error".to_string());
|
|
332
|
+
assert_eq!(err.to_string(), "unexpected error");
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
#[test]
|
|
336
|
+
#[cfg(feature = "excel")]
|
|
337
|
+
fn test_calamine_error_conversion() {
|
|
338
|
+
let cal_err = calamine::Error::Msg("invalid Excel file");
|
|
339
|
+
let krz_err: KreuzbergError = cal_err.into();
|
|
340
|
+
assert!(matches!(krz_err, KreuzbergError::Parsing { .. }));
|
|
341
|
+
assert!(krz_err.to_string().contains("Parsing error"));
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
#[test]
|
|
345
|
+
fn test_serde_json_error_conversion() {
|
|
346
|
+
let json_err = serde_json::from_str::<serde_json::Value>("invalid json").unwrap_err();
|
|
347
|
+
let krz_err: KreuzbergError = json_err.into();
|
|
348
|
+
assert!(matches!(krz_err, KreuzbergError::Serialization { .. }));
|
|
349
|
+
assert!(krz_err.to_string().contains("Serialization error"));
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
#[test]
|
|
353
|
+
fn test_rmp_encode_error_conversion() {
|
|
354
|
+
use std::collections::HashMap;
|
|
355
|
+
let mut map: HashMap<Vec<u8>, String> = HashMap::new();
|
|
356
|
+
map.insert(vec![255, 255], "test".to_string());
|
|
357
|
+
|
|
358
|
+
let result = rmp_serde::to_vec(&map);
|
|
359
|
+
if let Err(rmp_err) = result {
|
|
360
|
+
let krz_err: KreuzbergError = rmp_err.into();
|
|
361
|
+
assert!(matches!(krz_err, KreuzbergError::Serialization { .. }));
|
|
362
|
+
}
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
#[test]
|
|
366
|
+
fn test_rmp_decode_error_conversion() {
|
|
367
|
+
let invalid_msgpack = vec![0xFF, 0xFF, 0xFF];
|
|
368
|
+
let rmp_err = rmp_serde::from_slice::<String>(&invalid_msgpack).unwrap_err();
|
|
369
|
+
let krz_err: KreuzbergError = rmp_err.into();
|
|
370
|
+
assert!(matches!(krz_err, KreuzbergError::Serialization { .. }));
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
#[test]
|
|
374
|
+
#[cfg(feature = "pdf")]
|
|
375
|
+
fn test_pdf_error_conversion() {
|
|
376
|
+
let pdf_err = crate::pdf::error::PdfError::InvalidPdf("corrupt PDF".to_string());
|
|
377
|
+
let krz_err: KreuzbergError = pdf_err.into();
|
|
378
|
+
assert!(matches!(krz_err, KreuzbergError::Parsing { .. }));
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
#[test]
|
|
382
|
+
fn test_error_debug() {
|
|
383
|
+
let err = KreuzbergError::validation("test");
|
|
384
|
+
let debug_str = format!("{:?}", err);
|
|
385
|
+
assert!(debug_str.contains("Validation"));
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
#[test]
|
|
389
|
+
fn test_lock_poisoned_error() {
|
|
390
|
+
let err = KreuzbergError::LockPoisoned("Registry lock poisoned".to_string());
|
|
391
|
+
assert_eq!(err.to_string(), "Lock poisoned: Registry lock poisoned");
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
#[test]
|
|
395
|
+
fn test_io_error_bubbles_unchanged() {
|
|
396
|
+
fn read_file() -> Result<String> {
|
|
397
|
+
let content = std::fs::read_to_string("/nonexistent/file.txt")?;
|
|
398
|
+
Ok(content)
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
let result = read_file();
|
|
402
|
+
assert!(result.is_err());
|
|
403
|
+
assert!(matches!(result.unwrap_err(), KreuzbergError::Io(_)));
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
#[test]
|
|
407
|
+
fn test_io_error_not_found() {
|
|
408
|
+
let io_err = std::io::Error::new(std::io::ErrorKind::NotFound, "file not found");
|
|
409
|
+
let krz_err: KreuzbergError = io_err.into();
|
|
410
|
+
assert!(matches!(krz_err, KreuzbergError::Io(_)));
|
|
411
|
+
assert!(krz_err.to_string().contains("file not found"));
|
|
412
|
+
}
|
|
413
|
+
|
|
414
|
+
#[test]
|
|
415
|
+
fn test_io_error_permission_denied() {
|
|
416
|
+
let io_err = std::io::Error::new(std::io::ErrorKind::PermissionDenied, "permission denied");
|
|
417
|
+
let krz_err: KreuzbergError = io_err.into();
|
|
418
|
+
assert!(matches!(krz_err, KreuzbergError::Io(_)));
|
|
419
|
+
assert!(krz_err.to_string().contains("permission denied"));
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
#[test]
|
|
423
|
+
fn test_io_error_invalid_data_vs_parsing() {
|
|
424
|
+
let io_err = std::io::Error::new(std::io::ErrorKind::InvalidData, "corrupted data");
|
|
425
|
+
let krz_err: KreuzbergError = io_err.into();
|
|
426
|
+
assert!(matches!(krz_err, KreuzbergError::Io(_)));
|
|
427
|
+
|
|
428
|
+
let parse_err = KreuzbergError::parsing("corrupted format");
|
|
429
|
+
assert!(matches!(parse_err, KreuzbergError::Parsing { .. }));
|
|
430
|
+
}
|
|
431
|
+
}
|