aiecs 1.0.8__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aiecs might be problematic. Click here for more details.

Files changed (45) hide show
  1. aiecs/__init__.py +1 -1
  2. aiecs/aiecs_client.py +159 -1
  3. aiecs/config/config.py +4 -0
  4. aiecs/domain/context/__init__.py +24 -0
  5. aiecs/main.py +20 -2
  6. aiecs/scripts/dependance_check/__init__.py +18 -0
  7. aiecs/scripts/{download_nlp_data.py → dependance_check/download_nlp_data.py} +50 -8
  8. aiecs/scripts/dependance_patch/__init__.py +8 -0
  9. aiecs/scripts/dependance_patch/fix_weasel/__init__.py +12 -0
  10. aiecs/scripts/tools_develop/README.md +340 -0
  11. aiecs/scripts/tools_develop/__init__.py +16 -0
  12. aiecs/scripts/tools_develop/check_type_annotations.py +263 -0
  13. aiecs/scripts/tools_develop/validate_tool_schemas.py +346 -0
  14. aiecs/tools/__init__.py +33 -14
  15. aiecs/tools/docs/__init__.py +103 -0
  16. aiecs/tools/docs/ai_document_orchestrator.py +543 -0
  17. aiecs/tools/docs/ai_document_writer_orchestrator.py +2199 -0
  18. aiecs/tools/docs/content_insertion_tool.py +1214 -0
  19. aiecs/tools/docs/document_creator_tool.py +1161 -0
  20. aiecs/tools/docs/document_layout_tool.py +1090 -0
  21. aiecs/tools/docs/document_parser_tool.py +904 -0
  22. aiecs/tools/docs/document_writer_tool.py +1583 -0
  23. aiecs/tools/langchain_adapter.py +102 -51
  24. aiecs/tools/schema_generator.py +265 -0
  25. aiecs/tools/task_tools/image_tool.py +1 -1
  26. aiecs/tools/task_tools/office_tool.py +9 -0
  27. aiecs/tools/task_tools/scraper_tool.py +1 -1
  28. {aiecs-1.0.8.dist-info → aiecs-1.1.0.dist-info}/METADATA +1 -1
  29. {aiecs-1.0.8.dist-info → aiecs-1.1.0.dist-info}/RECORD +44 -28
  30. aiecs-1.1.0.dist-info/entry_points.txt +9 -0
  31. aiecs-1.0.8.dist-info/entry_points.txt +0 -7
  32. /aiecs/scripts/{DEPENDENCY_SYSTEM_SUMMARY.md → dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md} +0 -0
  33. /aiecs/scripts/{README_DEPENDENCY_CHECKER.md → dependance_check/README_DEPENDENCY_CHECKER.md} +0 -0
  34. /aiecs/scripts/{dependency_checker.py → dependance_check/dependency_checker.py} +0 -0
  35. /aiecs/scripts/{dependency_fixer.py → dependance_check/dependency_fixer.py} +0 -0
  36. /aiecs/scripts/{quick_dependency_check.py → dependance_check/quick_dependency_check.py} +0 -0
  37. /aiecs/scripts/{setup_nlp_data.sh → dependance_check/setup_nlp_data.sh} +0 -0
  38. /aiecs/scripts/{README_WEASEL_PATCH.md → dependance_patch/fix_weasel/README_WEASEL_PATCH.md} +0 -0
  39. /aiecs/scripts/{fix_weasel_validator.py → dependance_patch/fix_weasel/fix_weasel_validator.py} +0 -0
  40. /aiecs/scripts/{fix_weasel_validator.sh → dependance_patch/fix_weasel/fix_weasel_validator.sh} +0 -0
  41. /aiecs/scripts/{patch_weasel_library.sh → dependance_patch/fix_weasel/patch_weasel_library.sh} +0 -0
  42. /aiecs/scripts/{run_weasel_patch.sh → dependance_patch/fix_weasel/run_weasel_patch.sh} +0 -0
  43. {aiecs-1.0.8.dist-info → aiecs-1.1.0.dist-info}/WHEEL +0 -0
  44. {aiecs-1.0.8.dist-info → aiecs-1.1.0.dist-info}/licenses/LICENSE +0 -0
  45. {aiecs-1.0.8.dist-info → aiecs-1.1.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,543 @@
1
+ import os
2
+ import asyncio
3
+ import logging
4
+ from typing import Dict, Any, List, Optional, Union, Callable
5
+ from enum import Enum
6
+ from datetime import datetime
7
+
8
+ from pydantic import BaseModel, Field, ValidationError, ConfigDict
9
+ from pydantic_settings import BaseSettings
10
+
11
+ from aiecs.tools.base_tool import BaseTool
12
+ from aiecs.tools import register_tool
13
+
14
+
15
+ class ProcessingMode(str, Enum):
16
+ """AI document processing modes"""
17
+ SUMMARIZE = "summarize"
18
+ EXTRACT_INFO = "extract_info"
19
+ ANALYZE = "analyze"
20
+ TRANSLATE = "translate"
21
+ CLASSIFY = "classify"
22
+ ANSWER_QUESTIONS = "answer_questions"
23
+ CUSTOM = "custom"
24
+
25
+
26
+ class AIProvider(str, Enum):
27
+ """Supported AI providers"""
28
+ OPENAI = "openai"
29
+ VERTEX_AI = "vertex_ai"
30
+ XAI = "xai"
31
+ LOCAL = "local"
32
+
33
+
34
+ class OrchestratorSettings(BaseSettings):
35
+ """Configuration for AI Document Orchestrator"""
36
+ default_ai_provider: AIProvider = AIProvider.OPENAI
37
+ max_chunk_size: int = 4000 # For AI processing
38
+ max_concurrent_requests: int = 5
39
+ default_temperature: float = 0.1
40
+ max_tokens: int = 2000
41
+ timeout: int = 60
42
+
43
+ model_config = ConfigDict(env_prefix="AI_DOC_ORCHESTRATOR_")
44
+
45
+
46
+ class AIDocumentOrchestratorError(Exception):
47
+ """Base exception for AI Document Orchestrator errors"""
48
+ pass
49
+
50
+
51
+ class AIProviderError(AIDocumentOrchestratorError):
52
+ """Raised when AI provider operations fail"""
53
+ pass
54
+
55
+
56
+ class ProcessingError(AIDocumentOrchestratorError):
57
+ """Raised when document processing fails"""
58
+ pass
59
+
60
+
61
+ @register_tool("ai_document_orchestrator")
62
+ class AIDocumentOrchestrator(BaseTool):
63
+ """
64
+ AI-powered document processing orchestrator that:
65
+ 1. Coordinates document parsing with AI analysis
66
+ 2. Manages AI provider interactions
67
+ 3. Handles complex document processing workflows
68
+ 4. Provides intelligent content analysis and extraction
69
+
70
+ Integrates with:
71
+ - DocumentParserTool for document parsing
72
+ - Various AI providers for content analysis
73
+ - Existing AIECS infrastructure
74
+ """
75
+
76
+ def __init__(self, config: Optional[Dict] = None):
77
+ """Initialize AI Document Orchestrator with settings"""
78
+ super().__init__(config)
79
+ # Initialize settings with config if provided
80
+ if config:
81
+ try:
82
+ # For BaseSettings, use dictionary unpacking
83
+ self.settings = OrchestratorSettings(**config)
84
+ except ValidationError as e:
85
+ raise ValueError(f"Invalid settings: {e}")
86
+ else:
87
+ self.settings = OrchestratorSettings()
88
+
89
+ self.logger = logging.getLogger(__name__)
90
+
91
+ # Initialize document parser
92
+ self._init_document_parser()
93
+
94
+ # Initialize AI providers
95
+ self._init_ai_providers()
96
+
97
+ # Processing templates
98
+ self._init_processing_templates()
99
+
100
+ def _init_document_parser(self):
101
+ """Initialize document parser tool"""
102
+ try:
103
+ from aiecs.tools.docs.document_parser_tool import DocumentParserTool
104
+ self.document_parser = DocumentParserTool()
105
+ except ImportError:
106
+ self.logger.error("DocumentParserTool not available")
107
+ self.document_parser = None
108
+
109
+ def _init_ai_providers(self):
110
+ """Initialize AI providers"""
111
+ self.ai_providers = {}
112
+
113
+ try:
114
+ # Initialize AIECS client for AI operations
115
+ from aiecs import AIECS
116
+ self.aiecs_client = AIECS()
117
+ self.ai_providers["aiecs"] = self.aiecs_client
118
+ except ImportError:
119
+ self.logger.warning("AIECS client not available")
120
+ self.aiecs_client = None
121
+
122
+ def _init_processing_templates(self):
123
+ """Initialize processing templates for different AI tasks"""
124
+ self.processing_templates = {
125
+ ProcessingMode.SUMMARIZE: {
126
+ "system_prompt": "You are an expert document summarizer. Create concise, informative summaries.",
127
+ "user_prompt_template": "Please summarize the following document content:\n\n{content}\n\nProvide a clear, structured summary highlighting the key points."
128
+ },
129
+ ProcessingMode.EXTRACT_INFO: {
130
+ "system_prompt": "You are an expert information extractor. Extract specific information from documents.",
131
+ "user_prompt_template": "Extract the following information from the document:\n{extraction_criteria}\n\nDocument content:\n{content}\n\nProvide the extracted information in a structured format."
132
+ },
133
+ ProcessingMode.ANALYZE: {
134
+ "system_prompt": "You are an expert document analyzer. Provide thorough analysis of document content.",
135
+ "user_prompt_template": "Analyze the following document content and provide insights:\n\n{content}\n\nInclude analysis of:\n- Main themes and topics\n- Key findings\n- Important details\n- Overall structure and organization"
136
+ },
137
+ ProcessingMode.TRANSLATE: {
138
+ "system_prompt": "You are an expert translator. Provide accurate translations while preserving meaning and context.",
139
+ "user_prompt_template": "Translate the following document content to {target_language}:\n\n{content}\n\nMaintain the original structure and formatting where possible."
140
+ },
141
+ ProcessingMode.CLASSIFY: {
142
+ "system_prompt": "You are an expert document classifier. Classify documents accurately based on their content.",
143
+ "user_prompt_template": "Classify the following document content into the appropriate categories:\n\nCategories: {categories}\n\nDocument content:\n{content}\n\nProvide the classification with confidence scores and reasoning."
144
+ },
145
+ ProcessingMode.ANSWER_QUESTIONS: {
146
+ "system_prompt": "You are an expert document analyst. Answer questions based on document content accurately.",
147
+ "user_prompt_template": "Based on the following document content, answer these questions:\n\nQuestions:\n{questions}\n\nDocument content:\n{content}\n\nProvide clear, accurate answers with references to the relevant parts of the document."
148
+ },
149
+ ProcessingMode.CUSTOM: {
150
+ "system_prompt": "You are an expert document analyst. Follow the custom instructions provided.",
151
+ "user_prompt_template": "{custom_prompt}\n\nDocument content:\n{content}\n\nPlease provide your analysis based on the custom instructions above."
152
+ }
153
+ }
154
+
155
+ # Schema definitions
156
+ class ProcessDocumentSchema(BaseModel):
157
+ """Schema for process_document operation"""
158
+ source: str = Field(description="URL or file path to the document")
159
+ processing_mode: ProcessingMode = Field(description="AI processing mode to apply")
160
+ ai_provider: Optional[AIProvider] = Field(default=None, description="AI provider to use")
161
+ processing_params: Optional[Dict[str, Any]] = Field(default=None, description="Additional processing parameters")
162
+ parse_params: Optional[Dict[str, Any]] = Field(default=None, description="Document parsing parameters")
163
+ ai_params: Optional[Dict[str, Any]] = Field(default=None, description="AI provider parameters")
164
+
165
+ class BatchProcessSchema(BaseModel):
166
+ """Schema for batch_process_documents operation"""
167
+ sources: List[str] = Field(description="List of URLs or file paths")
168
+ processing_mode: ProcessingMode = Field(description="AI processing mode to apply")
169
+ ai_provider: Optional[AIProvider] = Field(default=None, description="AI provider to use")
170
+ processing_params: Optional[Dict[str, Any]] = Field(default=None, description="Additional processing parameters")
171
+ max_concurrent: Optional[int] = Field(default=None, description="Maximum concurrent processing")
172
+
173
+ class AnalyzeDocumentSchema(BaseModel):
174
+ """Schema for analyze_document operation (AI-first approach)"""
175
+ source: str = Field(description="URL or file path to the document")
176
+ analysis_type: str = Field(description="Type of analysis to perform")
177
+ custom_prompt: Optional[str] = Field(default=None, description="Custom AI prompt for analysis")
178
+ ai_provider: Optional[AIProvider] = Field(default=None, description="AI provider to use")
179
+
180
+ def process_document(self,
181
+ source: str,
182
+ processing_mode: ProcessingMode,
183
+ ai_provider: Optional[AIProvider] = None,
184
+ processing_params: Optional[Dict[str, Any]] = None,
185
+ parse_params: Optional[Dict[str, Any]] = None,
186
+ ai_params: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
187
+ """
188
+ Process a document using AI with intelligent orchestration
189
+
190
+ Args:
191
+ source: URL or file path to document
192
+ processing_mode: AI processing mode to apply
193
+ ai_provider: AI provider to use (optional)
194
+ processing_params: Additional processing parameters
195
+ parse_params: Document parsing parameters
196
+ ai_params: AI provider parameters
197
+
198
+ Returns:
199
+ Dict containing processed results and metadata
200
+ """
201
+ try:
202
+ start_time = datetime.now()
203
+
204
+ # Step 1: Parse the document
205
+ self.logger.info(f"Starting document processing: {source}")
206
+ parsed_result = self._parse_document(source, parse_params or {})
207
+
208
+ # Step 2: Prepare content for AI processing
209
+ content = self._prepare_content_for_ai(parsed_result, processing_mode)
210
+
211
+ # Step 3: Process with AI
212
+ ai_result = self._process_with_ai(
213
+ content,
214
+ processing_mode,
215
+ ai_provider or self.settings.default_ai_provider,
216
+ processing_params or {},
217
+ ai_params or {}
218
+ )
219
+
220
+ # Step 4: Combine results
221
+ result = {
222
+ "source": source,
223
+ "processing_mode": processing_mode,
224
+ "ai_provider": ai_provider or self.settings.default_ai_provider,
225
+ "document_info": {
226
+ "type": parsed_result.get("document_type"),
227
+ "detection_confidence": parsed_result.get("detection_confidence"),
228
+ "content_stats": parsed_result.get("content_stats")
229
+ },
230
+ "ai_result": ai_result,
231
+ "processing_metadata": {
232
+ "start_time": start_time.isoformat(),
233
+ "end_time": datetime.now().isoformat(),
234
+ "processing_duration": (datetime.now() - start_time).total_seconds()
235
+ }
236
+ }
237
+
238
+ # Step 5: Post-process if needed
239
+ result = self._post_process_result(result, processing_mode, processing_params or {})
240
+
241
+ return result
242
+
243
+ except Exception as e:
244
+ raise ProcessingError(f"Document processing failed: {str(e)}")
245
+
246
+ async def process_document_async(self,
247
+ source: str,
248
+ processing_mode: ProcessingMode,
249
+ ai_provider: Optional[AIProvider] = None,
250
+ processing_params: Optional[Dict[str, Any]] = None,
251
+ parse_params: Optional[Dict[str, Any]] = None,
252
+ ai_params: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
253
+ """Async version of process_document"""
254
+ return await asyncio.to_thread(
255
+ self.process_document,
256
+ source=source,
257
+ processing_mode=processing_mode,
258
+ ai_provider=ai_provider,
259
+ processing_params=processing_params,
260
+ parse_params=parse_params,
261
+ ai_params=ai_params
262
+ )
263
+
264
+ def batch_process_documents(self,
265
+ sources: List[str],
266
+ processing_mode: ProcessingMode,
267
+ ai_provider: Optional[AIProvider] = None,
268
+ processing_params: Optional[Dict[str, Any]] = None,
269
+ max_concurrent: Optional[int] = None) -> Dict[str, Any]:
270
+ """
271
+ Process multiple documents in batch with intelligent orchestration
272
+
273
+ Args:
274
+ sources: List of URLs or file paths
275
+ processing_mode: AI processing mode to apply
276
+ ai_provider: AI provider to use
277
+ processing_params: Additional processing parameters
278
+ max_concurrent: Maximum concurrent processing
279
+
280
+ Returns:
281
+ Dict containing batch processing results
282
+ """
283
+ try:
284
+ start_time = datetime.now()
285
+ max_concurrent = max_concurrent or self.settings.max_concurrent_requests
286
+
287
+ # Process documents in batches
288
+ results = asyncio.run(self._batch_process_async(
289
+ sources, processing_mode, ai_provider, processing_params, max_concurrent
290
+ ))
291
+
292
+ # Aggregate results
293
+ batch_result = {
294
+ "sources": sources,
295
+ "processing_mode": processing_mode,
296
+ "ai_provider": ai_provider or self.settings.default_ai_provider,
297
+ "total_documents": len(sources),
298
+ "successful_documents": len([r for r in results if r.get("status") == "success"]),
299
+ "failed_documents": len([r for r in results if r.get("status") == "error"]),
300
+ "results": results,
301
+ "batch_metadata": {
302
+ "start_time": start_time.isoformat(),
303
+ "end_time": datetime.now().isoformat(),
304
+ "total_duration": (datetime.now() - start_time).total_seconds()
305
+ }
306
+ }
307
+
308
+ return batch_result
309
+
310
+ except Exception as e:
311
+ raise ProcessingError(f"Batch processing failed: {str(e)}")
312
+
313
+ def analyze_document(self,
314
+ source: str,
315
+ analysis_type: str,
316
+ custom_prompt: Optional[str] = None,
317
+ ai_provider: Optional[AIProvider] = None) -> Dict[str, Any]:
318
+ """
319
+ Perform AI-first document analysis
320
+
321
+ Args:
322
+ source: URL or file path to document
323
+ analysis_type: Type of analysis to perform
324
+ custom_prompt: Custom AI prompt for analysis
325
+ ai_provider: AI provider to use
326
+
327
+ Returns:
328
+ Dict containing analysis results
329
+ """
330
+ try:
331
+ # Parse document first
332
+ parsed_result = self._parse_document(source, {})
333
+ content = parsed_result.get("content", "")
334
+
335
+ # Prepare AI prompt
336
+ if custom_prompt:
337
+ prompt = custom_prompt.format(content=content, analysis_type=analysis_type)
338
+ else:
339
+ prompt = f"Perform {analysis_type} analysis on the following document:\n\n{content}"
340
+
341
+ # Process with AI
342
+ ai_result = self._call_ai_provider(
343
+ prompt,
344
+ ai_provider or self.settings.default_ai_provider,
345
+ {}
346
+ )
347
+
348
+ return {
349
+ "source": source,
350
+ "analysis_type": analysis_type,
351
+ "document_info": {
352
+ "type": parsed_result.get("document_type"),
353
+ "content_stats": parsed_result.get("content_stats")
354
+ },
355
+ "analysis_result": ai_result,
356
+ "timestamp": datetime.now().isoformat()
357
+ }
358
+
359
+ except Exception as e:
360
+ raise ProcessingError(f"Document analysis failed: {str(e)}")
361
+
362
+ def _parse_document(self, source: str, parse_params: Dict[str, Any]) -> Dict[str, Any]:
363
+ """Parse document using DocumentParserTool"""
364
+ if not self.document_parser:
365
+ raise ProcessingError("DocumentParserTool not available")
366
+
367
+ try:
368
+ return self.document_parser.parse_document(source, **parse_params)
369
+ except Exception as e:
370
+ raise ProcessingError(f"Document parsing failed: {str(e)}")
371
+
372
+ def _prepare_content_for_ai(self, parsed_result: Dict[str, Any], processing_mode: ProcessingMode) -> str:
373
+ """Prepare parsed content for AI processing"""
374
+ content = parsed_result.get("content", "")
375
+
376
+ if isinstance(content, dict):
377
+ # Extract text from structured content
378
+ text_content = content.get("text", str(content))
379
+ else:
380
+ text_content = str(content)
381
+
382
+ # Chunk content if too large
383
+ max_size = self.settings.max_chunk_size
384
+ if len(text_content) > max_size:
385
+ # For now, truncate - could implement smart chunking
386
+ text_content = text_content[:max_size] + "\n\n[Content truncated...]"
387
+
388
+ return text_content
389
+
390
+ def _process_with_ai(self,
391
+ content: str,
392
+ processing_mode: ProcessingMode,
393
+ ai_provider: AIProvider,
394
+ processing_params: Dict[str, Any],
395
+ ai_params: Dict[str, Any]) -> Dict[str, Any]:
396
+ """Process content with AI based on processing mode"""
397
+ try:
398
+ # Get processing template
399
+ template = self.processing_templates.get(processing_mode)
400
+ if not template:
401
+ raise ProcessingError(f"No template found for processing mode: {processing_mode}")
402
+
403
+ # Format prompt
404
+ prompt = self._format_prompt(template, content, processing_params)
405
+
406
+ # Call AI provider
407
+ ai_result = self._call_ai_provider(prompt, ai_provider, ai_params)
408
+
409
+ return {
410
+ "processing_mode": processing_mode,
411
+ "prompt_used": prompt,
412
+ "ai_response": ai_result,
413
+ "ai_provider": ai_provider
414
+ }
415
+
416
+ except Exception as e:
417
+ raise AIProviderError(f"AI processing failed: {str(e)}")
418
+
419
+ def _format_prompt(self, template: Dict[str, str], content: str, params: Dict[str, Any]) -> str:
420
+ """Format AI prompt using template and parameters"""
421
+ user_prompt = template["user_prompt_template"]
422
+
423
+ # Replace content placeholder
424
+ formatted_prompt = user_prompt.replace("{content}", content)
425
+
426
+ # Replace other parameters
427
+ for key, value in params.items():
428
+ placeholder = f"{{{key}}}"
429
+ if placeholder in formatted_prompt:
430
+ formatted_prompt = formatted_prompt.replace(placeholder, str(value))
431
+
432
+ return formatted_prompt
433
+
434
+ def _call_ai_provider(self, prompt: str, ai_provider: AIProvider, ai_params: Dict[str, Any]) -> str:
435
+ """Call AI provider with prompt"""
436
+ try:
437
+ if self.aiecs_client:
438
+ # Use AIECS client for AI operations
439
+ from aiecs.domain.task.task_context import TaskContext
440
+
441
+ task_context = TaskContext(
442
+ task_id=f"doc_processing_{datetime.now().timestamp()}",
443
+ task_type="document_processing",
444
+ input_data={"prompt": prompt},
445
+ metadata=ai_params
446
+ )
447
+
448
+ # This would need to be adapted based on actual AIECS API
449
+ result = self.aiecs_client.process_task(task_context)
450
+ return result.get("response", "")
451
+ else:
452
+ # Fallback to direct AI provider calls
453
+ return self._direct_ai_call(prompt, ai_provider, ai_params)
454
+
455
+ except Exception as e:
456
+ raise AIProviderError(f"AI provider call failed: {str(e)}")
457
+
458
+ def _direct_ai_call(self, prompt: str, ai_provider: AIProvider, ai_params: Dict[str, Any]) -> str:
459
+ """Direct AI provider call (fallback)"""
460
+ # This is a placeholder for direct AI provider integration
461
+ # In a real implementation, you would integrate with specific AI APIs
462
+ self.logger.warning("Using mock AI response - implement actual AI provider integration")
463
+ return f"Mock AI response for prompt: {prompt[:100]}..."
464
+
465
+ async def _batch_process_async(self,
466
+ sources: List[str],
467
+ processing_mode: ProcessingMode,
468
+ ai_provider: Optional[AIProvider],
469
+ processing_params: Optional[Dict[str, Any]],
470
+ max_concurrent: int) -> List[Dict[str, Any]]:
471
+ """Process documents in parallel with concurrency control"""
472
+ semaphore = asyncio.Semaphore(max_concurrent)
473
+
474
+ async def process_single(source: str) -> Dict[str, Any]:
475
+ async with semaphore:
476
+ try:
477
+ result = await self.process_document_async(
478
+ source=source,
479
+ processing_mode=processing_mode,
480
+ ai_provider=ai_provider,
481
+ processing_params=processing_params
482
+ )
483
+ return {"source": source, "status": "success", "result": result}
484
+ except Exception as e:
485
+ return {"source": source, "status": "error", "error": str(e)}
486
+
487
+ tasks = [process_single(source) for source in sources]
488
+ return await asyncio.gather(*tasks)
489
+
490
+ def _post_process_result(self, result: Dict[str, Any], processing_mode: ProcessingMode, params: Dict[str, Any]) -> Dict[str, Any]:
491
+ """Post-process results based on processing mode"""
492
+ # Add any post-processing logic here
493
+ # For example, formatting, validation, additional analysis
494
+
495
+ if processing_mode == ProcessingMode.EXTRACT_INFO:
496
+ # Validate extracted information
497
+ result["validation"] = self._validate_extracted_info(result, params)
498
+ elif processing_mode == ProcessingMode.CLASSIFY:
499
+ # Add confidence scoring
500
+ result["confidence_analysis"] = self._analyze_classification_confidence(result)
501
+
502
+ return result
503
+
504
+ def _validate_extracted_info(self, result: Dict[str, Any], params: Dict[str, Any]) -> Dict[str, str]:
505
+ """Validate extracted information"""
506
+ # Placeholder for validation logic
507
+ return {"status": "validated", "notes": "Validation completed"}
508
+
509
+ def _analyze_classification_confidence(self, result: Dict[str, Any]) -> Dict[str, Any]:
510
+ """Analyze classification confidence"""
511
+ # Placeholder for confidence analysis
512
+ return {"overall_confidence": 0.85, "factors": ["content_quality", "model_certainty"]}
513
+
514
+ # Utility methods for custom processing
515
+ def create_custom_processor(self,
516
+ system_prompt: str,
517
+ user_prompt_template: str) -> Callable:
518
+ """Create a custom processing function"""
519
+ def custom_processor(source: str, **kwargs) -> Dict[str, Any]:
520
+ # Add custom template
521
+ self.processing_templates[ProcessingMode.CUSTOM] = {
522
+ "system_prompt": system_prompt,
523
+ "user_prompt_template": user_prompt_template
524
+ }
525
+
526
+ return self.process_document(
527
+ source=source,
528
+ processing_mode=ProcessingMode.CUSTOM,
529
+ processing_params=kwargs
530
+ )
531
+
532
+ return custom_processor
533
+
534
+ def get_processing_stats(self) -> Dict[str, Any]:
535
+ """Get processing statistics"""
536
+ # Placeholder for statistics tracking
537
+ return {
538
+ "total_documents_processed": 0,
539
+ "average_processing_time": 0,
540
+ "success_rate": 1.0,
541
+ "most_common_document_types": [],
542
+ "ai_provider_usage": {}
543
+ }