aiecs 1.0.8__py3-none-any.whl → 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aiecs might be problematic. Click here for more details.

Files changed (81) hide show
  1. aiecs/__init__.py +1 -1
  2. aiecs/aiecs_client.py +159 -1
  3. aiecs/config/config.py +6 -0
  4. aiecs/domain/__init__.py +95 -0
  5. aiecs/domain/community/__init__.py +159 -0
  6. aiecs/domain/community/agent_adapter.py +516 -0
  7. aiecs/domain/community/analytics.py +465 -0
  8. aiecs/domain/community/collaborative_workflow.py +99 -7
  9. aiecs/domain/community/communication_hub.py +649 -0
  10. aiecs/domain/community/community_builder.py +322 -0
  11. aiecs/domain/community/community_integration.py +365 -12
  12. aiecs/domain/community/community_manager.py +481 -5
  13. aiecs/domain/community/decision_engine.py +459 -13
  14. aiecs/domain/community/exceptions.py +238 -0
  15. aiecs/domain/community/models/__init__.py +36 -0
  16. aiecs/domain/community/resource_manager.py +1 -1
  17. aiecs/domain/community/shared_context_manager.py +621 -0
  18. aiecs/domain/context/__init__.py +24 -0
  19. aiecs/domain/context/context_engine.py +37 -33
  20. aiecs/main.py +20 -2
  21. aiecs/scripts/aid/VERSION_MANAGEMENT.md +97 -0
  22. aiecs/scripts/aid/__init__.py +15 -0
  23. aiecs/scripts/aid/version_manager.py +224 -0
  24. aiecs/scripts/dependance_check/__init__.py +18 -0
  25. aiecs/scripts/{download_nlp_data.py → dependance_check/download_nlp_data.py} +51 -8
  26. aiecs/scripts/dependance_patch/__init__.py +8 -0
  27. aiecs/scripts/dependance_patch/fix_weasel/__init__.py +12 -0
  28. aiecs/scripts/tools_develop/README.md +340 -0
  29. aiecs/scripts/tools_develop/__init__.py +16 -0
  30. aiecs/scripts/tools_develop/check_type_annotations.py +263 -0
  31. aiecs/scripts/tools_develop/validate_tool_schemas.py +346 -0
  32. aiecs/tools/__init__.py +53 -34
  33. aiecs/tools/docs/__init__.py +106 -0
  34. aiecs/tools/docs/ai_document_orchestrator.py +556 -0
  35. aiecs/tools/docs/ai_document_writer_orchestrator.py +2222 -0
  36. aiecs/tools/docs/content_insertion_tool.py +1234 -0
  37. aiecs/tools/docs/document_creator_tool.py +1179 -0
  38. aiecs/tools/docs/document_layout_tool.py +1105 -0
  39. aiecs/tools/docs/document_parser_tool.py +924 -0
  40. aiecs/tools/docs/document_writer_tool.py +1636 -0
  41. aiecs/tools/langchain_adapter.py +102 -51
  42. aiecs/tools/schema_generator.py +265 -0
  43. aiecs/tools/statistics/__init__.py +82 -0
  44. aiecs/tools/statistics/ai_data_analysis_orchestrator.py +581 -0
  45. aiecs/tools/statistics/ai_insight_generator_tool.py +473 -0
  46. aiecs/tools/statistics/ai_report_orchestrator_tool.py +629 -0
  47. aiecs/tools/statistics/data_loader_tool.py +518 -0
  48. aiecs/tools/statistics/data_profiler_tool.py +599 -0
  49. aiecs/tools/statistics/data_transformer_tool.py +531 -0
  50. aiecs/tools/statistics/data_visualizer_tool.py +460 -0
  51. aiecs/tools/statistics/model_trainer_tool.py +470 -0
  52. aiecs/tools/statistics/statistical_analyzer_tool.py +426 -0
  53. aiecs/tools/task_tools/chart_tool.py +2 -1
  54. aiecs/tools/task_tools/image_tool.py +43 -43
  55. aiecs/tools/task_tools/office_tool.py +48 -36
  56. aiecs/tools/task_tools/pandas_tool.py +37 -33
  57. aiecs/tools/task_tools/report_tool.py +67 -56
  58. aiecs/tools/task_tools/research_tool.py +32 -31
  59. aiecs/tools/task_tools/scraper_tool.py +53 -46
  60. aiecs/tools/task_tools/search_tool.py +1123 -0
  61. aiecs/tools/task_tools/stats_tool.py +20 -15
  62. {aiecs-1.0.8.dist-info → aiecs-1.2.0.dist-info}/METADATA +5 -1
  63. aiecs-1.2.0.dist-info/RECORD +135 -0
  64. aiecs-1.2.0.dist-info/entry_points.txt +10 -0
  65. aiecs/tools/task_tools/search_api.py +0 -7
  66. aiecs-1.0.8.dist-info/RECORD +0 -98
  67. aiecs-1.0.8.dist-info/entry_points.txt +0 -7
  68. /aiecs/scripts/{DEPENDENCY_SYSTEM_SUMMARY.md → dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md} +0 -0
  69. /aiecs/scripts/{README_DEPENDENCY_CHECKER.md → dependance_check/README_DEPENDENCY_CHECKER.md} +0 -0
  70. /aiecs/scripts/{dependency_checker.py → dependance_check/dependency_checker.py} +0 -0
  71. /aiecs/scripts/{dependency_fixer.py → dependance_check/dependency_fixer.py} +0 -0
  72. /aiecs/scripts/{quick_dependency_check.py → dependance_check/quick_dependency_check.py} +0 -0
  73. /aiecs/scripts/{setup_nlp_data.sh → dependance_check/setup_nlp_data.sh} +0 -0
  74. /aiecs/scripts/{README_WEASEL_PATCH.md → dependance_patch/fix_weasel/README_WEASEL_PATCH.md} +0 -0
  75. /aiecs/scripts/{fix_weasel_validator.py → dependance_patch/fix_weasel/fix_weasel_validator.py} +0 -0
  76. /aiecs/scripts/{fix_weasel_validator.sh → dependance_patch/fix_weasel/fix_weasel_validator.sh} +0 -0
  77. /aiecs/scripts/{patch_weasel_library.sh → dependance_patch/fix_weasel/patch_weasel_library.sh} +0 -0
  78. /aiecs/scripts/{run_weasel_patch.sh → dependance_patch/fix_weasel/run_weasel_patch.sh} +0 -0
  79. {aiecs-1.0.8.dist-info → aiecs-1.2.0.dist-info}/WHEEL +0 -0
  80. {aiecs-1.0.8.dist-info → aiecs-1.2.0.dist-info}/licenses/LICENSE +0 -0
  81. {aiecs-1.0.8.dist-info → aiecs-1.2.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,556 @@
1
+ import os
2
+ import asyncio
3
+ import logging
4
+ from typing import Dict, Any, List, Optional, Union, Callable
5
+ from enum import Enum
6
+ from datetime import datetime
7
+
8
+ from pydantic import BaseModel, Field, ValidationError, ConfigDict
9
+
10
+ from aiecs.tools.base_tool import BaseTool
11
+ from aiecs.tools import register_tool
12
+
13
+
14
+ class ProcessingMode(str, Enum):
15
+ """AI document processing modes"""
16
+ SUMMARIZE = "summarize"
17
+ EXTRACT_INFO = "extract_info"
18
+ ANALYZE = "analyze"
19
+ TRANSLATE = "translate"
20
+ CLASSIFY = "classify"
21
+ ANSWER_QUESTIONS = "answer_questions"
22
+ CUSTOM = "custom"
23
+
24
+
25
+ class AIProvider(str, Enum):
26
+ """Supported AI providers"""
27
+ OPENAI = "openai"
28
+ VERTEX_AI = "vertex_ai"
29
+ XAI = "xai"
30
+ LOCAL = "local"
31
+
32
+
33
+
34
+
35
+ class AIDocumentOrchestratorError(Exception):
36
+ """Base exception for AI Document Orchestrator errors"""
37
+ pass
38
+
39
+
40
+ class AIProviderError(AIDocumentOrchestratorError):
41
+ """Raised when AI provider operations fail"""
42
+ pass
43
+
44
+
45
+ class ProcessingError(AIDocumentOrchestratorError):
46
+ """Raised when document processing fails"""
47
+ pass
48
+
49
+
50
+ @register_tool("ai_document_orchestrator")
51
+ class AIDocumentOrchestrator(BaseTool):
52
+ """
53
+ AI-powered document processing orchestrator that:
54
+ 1. Coordinates document parsing with AI analysis
55
+ 2. Manages AI provider interactions
56
+ 3. Handles complex document processing workflows
57
+ 4. Provides intelligent content analysis and extraction
58
+
59
+ Integrates with:
60
+ - DocumentParserTool for document parsing
61
+ - Various AI providers for content analysis
62
+ - Existing AIECS infrastructure
63
+ """
64
+
65
+ # Configuration schema
66
+ class Config(BaseModel):
67
+ """Configuration for the AI document orchestrator tool"""
68
+ model_config = ConfigDict(env_prefix="AI_DOC_ORCHESTRATOR_")
69
+
70
+ default_ai_provider: str = Field(
71
+ default="openai",
72
+ description="Default AI provider to use"
73
+ )
74
+ max_chunk_size: int = Field(
75
+ default=4000,
76
+ description="Maximum chunk size for AI processing"
77
+ )
78
+ max_concurrent_requests: int = Field(
79
+ default=5,
80
+ description="Maximum concurrent AI requests"
81
+ )
82
+ default_temperature: float = Field(
83
+ default=0.1,
84
+ description="Default temperature for AI model"
85
+ )
86
+ max_tokens: int = Field(
87
+ default=2000,
88
+ description="Maximum tokens for AI response"
89
+ )
90
+ timeout: int = Field(
91
+ default=60,
92
+ description="Timeout in seconds for AI operations"
93
+ )
94
+
95
+ def __init__(self, config: Optional[Dict] = None):
96
+ """Initialize AI Document Orchestrator with settings"""
97
+ super().__init__(config)
98
+
99
+ # Parse configuration
100
+ self.config = self.Config(**(config or {}))
101
+
102
+ self.logger = logging.getLogger(__name__)
103
+
104
+ # Initialize document parser
105
+ self._init_document_parser()
106
+
107
+ # Initialize AI providers
108
+ self._init_ai_providers()
109
+
110
+ # Processing templates
111
+ self._init_processing_templates()
112
+
113
+ def _init_document_parser(self):
114
+ """Initialize document parser tool"""
115
+ try:
116
+ from aiecs.tools.docs.document_parser_tool import DocumentParserTool
117
+ self.document_parser = DocumentParserTool()
118
+ except ImportError:
119
+ self.logger.error("DocumentParserTool not available")
120
+ self.document_parser = None
121
+
122
+ def _init_ai_providers(self):
123
+ """Initialize AI providers"""
124
+ self.ai_providers = {}
125
+
126
+ try:
127
+ # Initialize AIECS client for AI operations
128
+ from aiecs import AIECS
129
+ self.aiecs_client = AIECS()
130
+ self.ai_providers["aiecs"] = self.aiecs_client
131
+ except ImportError:
132
+ self.logger.warning("AIECS client not available")
133
+ self.aiecs_client = None
134
+
135
+ def _init_processing_templates(self):
136
+ """Initialize processing templates for different AI tasks"""
137
+ self.processing_templates = {
138
+ ProcessingMode.SUMMARIZE: {
139
+ "system_prompt": "You are an expert document summarizer. Create concise, informative summaries.",
140
+ "user_prompt_template": "Please summarize the following document content:\n\n{content}\n\nProvide a clear, structured summary highlighting the key points."
141
+ },
142
+ ProcessingMode.EXTRACT_INFO: {
143
+ "system_prompt": "You are an expert information extractor. Extract specific information from documents.",
144
+ "user_prompt_template": "Extract the following information from the document:\n{extraction_criteria}\n\nDocument content:\n{content}\n\nProvide the extracted information in a structured format."
145
+ },
146
+ ProcessingMode.ANALYZE: {
147
+ "system_prompt": "You are an expert document analyzer. Provide thorough analysis of document content.",
148
+ "user_prompt_template": "Analyze the following document content and provide insights:\n\n{content}\n\nInclude analysis of:\n- Main themes and topics\n- Key findings\n- Important details\n- Overall structure and organization"
149
+ },
150
+ ProcessingMode.TRANSLATE: {
151
+ "system_prompt": "You are an expert translator. Provide accurate translations while preserving meaning and context.",
152
+ "user_prompt_template": "Translate the following document content to {target_language}:\n\n{content}\n\nMaintain the original structure and formatting where possible."
153
+ },
154
+ ProcessingMode.CLASSIFY: {
155
+ "system_prompt": "You are an expert document classifier. Classify documents accurately based on their content.",
156
+ "user_prompt_template": "Classify the following document content into the appropriate categories:\n\nCategories: {categories}\n\nDocument content:\n{content}\n\nProvide the classification with confidence scores and reasoning."
157
+ },
158
+ ProcessingMode.ANSWER_QUESTIONS: {
159
+ "system_prompt": "You are an expert document analyst. Answer questions based on document content accurately.",
160
+ "user_prompt_template": "Based on the following document content, answer these questions:\n\nQuestions:\n{questions}\n\nDocument content:\n{content}\n\nProvide clear, accurate answers with references to the relevant parts of the document."
161
+ },
162
+ ProcessingMode.CUSTOM: {
163
+ "system_prompt": "You are an expert document analyst. Follow the custom instructions provided.",
164
+ "user_prompt_template": "{custom_prompt}\n\nDocument content:\n{content}\n\nPlease provide your analysis based on the custom instructions above."
165
+ }
166
+ }
167
+
168
+ # Schema definitions
169
+ class ProcessDocumentSchema(BaseModel):
170
+ """Schema for process_document operation"""
171
+ source: str = Field(description="URL or file path to the document")
172
+ processing_mode: ProcessingMode = Field(description="AI processing mode to apply")
173
+ ai_provider: Optional[AIProvider] = Field(default=None, description="AI provider to use")
174
+ processing_params: Optional[Dict[str, Any]] = Field(default=None, description="Additional processing parameters")
175
+ parse_params: Optional[Dict[str, Any]] = Field(default=None, description="Document parsing parameters")
176
+ ai_params: Optional[Dict[str, Any]] = Field(default=None, description="AI provider parameters")
177
+
178
+ class BatchProcessSchema(BaseModel):
179
+ """Schema for batch_process_documents operation"""
180
+ sources: List[str] = Field(description="List of URLs or file paths")
181
+ processing_mode: ProcessingMode = Field(description="AI processing mode to apply")
182
+ ai_provider: Optional[AIProvider] = Field(default=None, description="AI provider to use")
183
+ processing_params: Optional[Dict[str, Any]] = Field(default=None, description="Additional processing parameters")
184
+ max_concurrent: Optional[int] = Field(default=None, description="Maximum concurrent processing")
185
+
186
+ class AnalyzeDocumentSchema(BaseModel):
187
+ """Schema for analyze_document operation (AI-first approach)"""
188
+ source: str = Field(description="URL or file path to the document")
189
+ analysis_type: str = Field(description="Type of analysis to perform")
190
+ custom_prompt: Optional[str] = Field(default=None, description="Custom AI prompt for analysis")
191
+ ai_provider: Optional[AIProvider] = Field(default=None, description="AI provider to use")
192
+
193
+ def process_document(self,
194
+ source: str,
195
+ processing_mode: ProcessingMode,
196
+ ai_provider: Optional[AIProvider] = None,
197
+ processing_params: Optional[Dict[str, Any]] = None,
198
+ parse_params: Optional[Dict[str, Any]] = None,
199
+ ai_params: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
200
+ """
201
+ Process a document using AI with intelligent orchestration
202
+
203
+ Args:
204
+ source: URL or file path to document
205
+ processing_mode: AI processing mode to apply
206
+ ai_provider: AI provider to use (optional)
207
+ processing_params: Additional processing parameters
208
+ parse_params: Document parsing parameters
209
+ ai_params: AI provider parameters
210
+
211
+ Returns:
212
+ Dict containing processed results and metadata
213
+ """
214
+ try:
215
+ start_time = datetime.now()
216
+
217
+ # Step 1: Parse the document
218
+ self.logger.info(f"Starting document processing: {source}")
219
+ parsed_result = self._parse_document(source, parse_params or {})
220
+
221
+ # Step 2: Prepare content for AI processing
222
+ content = self._prepare_content_for_ai(parsed_result, processing_mode)
223
+
224
+ # Step 3: Process with AI
225
+ ai_result = self._process_with_ai(
226
+ content,
227
+ processing_mode,
228
+ ai_provider or self.config.default_ai_provider,
229
+ processing_params or {},
230
+ ai_params or {}
231
+ )
232
+
233
+ # Step 4: Combine results
234
+ result = {
235
+ "source": source,
236
+ "processing_mode": processing_mode,
237
+ "ai_provider": ai_provider or self.config.default_ai_provider,
238
+ "document_info": {
239
+ "type": parsed_result.get("document_type"),
240
+ "detection_confidence": parsed_result.get("detection_confidence"),
241
+ "content_stats": parsed_result.get("content_stats")
242
+ },
243
+ "ai_result": ai_result,
244
+ "processing_metadata": {
245
+ "start_time": start_time.isoformat(),
246
+ "end_time": datetime.now().isoformat(),
247
+ "processing_duration": (datetime.now() - start_time).total_seconds()
248
+ }
249
+ }
250
+
251
+ # Step 5: Post-process if needed
252
+ result = self._post_process_result(result, processing_mode, processing_params or {})
253
+
254
+ return result
255
+
256
+ except Exception as e:
257
+ raise ProcessingError(f"Document processing failed: {str(e)}")
258
+
259
+ async def process_document_async(self,
260
+ source: str,
261
+ processing_mode: ProcessingMode,
262
+ ai_provider: Optional[AIProvider] = None,
263
+ processing_params: Optional[Dict[str, Any]] = None,
264
+ parse_params: Optional[Dict[str, Any]] = None,
265
+ ai_params: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
266
+ """Async version of process_document"""
267
+ return await asyncio.to_thread(
268
+ self.process_document,
269
+ source=source,
270
+ processing_mode=processing_mode,
271
+ ai_provider=ai_provider,
272
+ processing_params=processing_params,
273
+ parse_params=parse_params,
274
+ ai_params=ai_params
275
+ )
276
+
277
+ def batch_process_documents(self,
278
+ sources: List[str],
279
+ processing_mode: ProcessingMode,
280
+ ai_provider: Optional[AIProvider] = None,
281
+ processing_params: Optional[Dict[str, Any]] = None,
282
+ max_concurrent: Optional[int] = None) -> Dict[str, Any]:
283
+ """
284
+ Process multiple documents in batch with intelligent orchestration
285
+
286
+ Args:
287
+ sources: List of URLs or file paths
288
+ processing_mode: AI processing mode to apply
289
+ ai_provider: AI provider to use
290
+ processing_params: Additional processing parameters
291
+ max_concurrent: Maximum concurrent processing
292
+
293
+ Returns:
294
+ Dict containing batch processing results
295
+ """
296
+ try:
297
+ start_time = datetime.now()
298
+ max_concurrent = max_concurrent or self.config.max_concurrent_requests
299
+
300
+ # Process documents in batches
301
+ results = asyncio.run(self._batch_process_async(
302
+ sources, processing_mode, ai_provider, processing_params, max_concurrent
303
+ ))
304
+
305
+ # Aggregate results
306
+ batch_result = {
307
+ "sources": sources,
308
+ "processing_mode": processing_mode,
309
+ "ai_provider": ai_provider or self.config.default_ai_provider,
310
+ "total_documents": len(sources),
311
+ "successful_documents": len([r for r in results if r.get("status") == "success"]),
312
+ "failed_documents": len([r for r in results if r.get("status") == "error"]),
313
+ "results": results,
314
+ "batch_metadata": {
315
+ "start_time": start_time.isoformat(),
316
+ "end_time": datetime.now().isoformat(),
317
+ "total_duration": (datetime.now() - start_time).total_seconds()
318
+ }
319
+ }
320
+
321
+ return batch_result
322
+
323
+ except Exception as e:
324
+ raise ProcessingError(f"Batch processing failed: {str(e)}")
325
+
326
+ def analyze_document(self,
327
+ source: str,
328
+ analysis_type: str,
329
+ custom_prompt: Optional[str] = None,
330
+ ai_provider: Optional[AIProvider] = None) -> Dict[str, Any]:
331
+ """
332
+ Perform AI-first document analysis
333
+
334
+ Args:
335
+ source: URL or file path to document
336
+ analysis_type: Type of analysis to perform
337
+ custom_prompt: Custom AI prompt for analysis
338
+ ai_provider: AI provider to use
339
+
340
+ Returns:
341
+ Dict containing analysis results
342
+ """
343
+ try:
344
+ # Parse document first
345
+ parsed_result = self._parse_document(source, {})
346
+ content = parsed_result.get("content", "")
347
+
348
+ # Prepare AI prompt
349
+ if custom_prompt:
350
+ prompt = custom_prompt.format(content=content, analysis_type=analysis_type)
351
+ else:
352
+ prompt = f"Perform {analysis_type} analysis on the following document:\n\n{content}"
353
+
354
+ # Process with AI
355
+ ai_result = self._call_ai_provider(
356
+ prompt,
357
+ ai_provider or self.config.default_ai_provider,
358
+ {}
359
+ )
360
+
361
+ return {
362
+ "source": source,
363
+ "analysis_type": analysis_type,
364
+ "document_info": {
365
+ "type": parsed_result.get("document_type"),
366
+ "content_stats": parsed_result.get("content_stats")
367
+ },
368
+ "analysis_result": ai_result,
369
+ "timestamp": datetime.now().isoformat()
370
+ }
371
+
372
+ except Exception as e:
373
+ raise ProcessingError(f"Document analysis failed: {str(e)}")
374
+
375
+ def _parse_document(self, source: str, parse_params: Dict[str, Any]) -> Dict[str, Any]:
376
+ """Parse document using DocumentParserTool"""
377
+ if not self.document_parser:
378
+ raise ProcessingError("DocumentParserTool not available")
379
+
380
+ try:
381
+ return self.document_parser.parse_document(source, **parse_params)
382
+ except Exception as e:
383
+ raise ProcessingError(f"Document parsing failed: {str(e)}")
384
+
385
+ def _prepare_content_for_ai(self, parsed_result: Dict[str, Any], processing_mode: ProcessingMode) -> str:
386
+ """Prepare parsed content for AI processing"""
387
+ content = parsed_result.get("content", "")
388
+
389
+ if isinstance(content, dict):
390
+ # Extract text from structured content
391
+ text_content = content.get("text", str(content))
392
+ else:
393
+ text_content = str(content)
394
+
395
+ # Chunk content if too large
396
+ max_size = self.config.max_chunk_size
397
+ if len(text_content) > max_size:
398
+ # For now, truncate - could implement smart chunking
399
+ text_content = text_content[:max_size] + "\n\n[Content truncated...]"
400
+
401
+ return text_content
402
+
403
+ def _process_with_ai(self,
404
+ content: str,
405
+ processing_mode: ProcessingMode,
406
+ ai_provider: AIProvider,
407
+ processing_params: Dict[str, Any],
408
+ ai_params: Dict[str, Any]) -> Dict[str, Any]:
409
+ """Process content with AI based on processing mode"""
410
+ try:
411
+ # Get processing template
412
+ template = self.processing_templates.get(processing_mode)
413
+ if not template:
414
+ raise ProcessingError(f"No template found for processing mode: {processing_mode}")
415
+
416
+ # Format prompt
417
+ prompt = self._format_prompt(template, content, processing_params)
418
+
419
+ # Call AI provider
420
+ ai_result = self._call_ai_provider(prompt, ai_provider, ai_params)
421
+
422
+ return {
423
+ "processing_mode": processing_mode,
424
+ "prompt_used": prompt,
425
+ "ai_response": ai_result,
426
+ "ai_provider": ai_provider
427
+ }
428
+
429
+ except Exception as e:
430
+ raise AIProviderError(f"AI processing failed: {str(e)}")
431
+
432
+ def _format_prompt(self, template: Dict[str, str], content: str, params: Dict[str, Any]) -> str:
433
+ """Format AI prompt using template and parameters"""
434
+ user_prompt = template["user_prompt_template"]
435
+
436
+ # Replace content placeholder
437
+ formatted_prompt = user_prompt.replace("{content}", content)
438
+
439
+ # Replace other parameters
440
+ for key, value in params.items():
441
+ placeholder = f"{{{key}}}"
442
+ if placeholder in formatted_prompt:
443
+ formatted_prompt = formatted_prompt.replace(placeholder, str(value))
444
+
445
+ return formatted_prompt
446
+
447
+ def _call_ai_provider(self, prompt: str, ai_provider: AIProvider, ai_params: Dict[str, Any]) -> str:
448
+ """Call AI provider with prompt"""
449
+ try:
450
+ if self.aiecs_client:
451
+ # Use AIECS client for AI operations
452
+ from aiecs.domain.task.task_context import TaskContext
453
+
454
+ task_context = TaskContext(
455
+ task_id=f"doc_processing_{datetime.now().timestamp()}",
456
+ task_type="document_processing",
457
+ input_data={"prompt": prompt},
458
+ metadata=ai_params
459
+ )
460
+
461
+ # This would need to be adapted based on actual AIECS API
462
+ result = self.aiecs_client.process_task(task_context)
463
+ return result.get("response", "")
464
+ else:
465
+ # Fallback to direct AI provider calls
466
+ return self._direct_ai_call(prompt, ai_provider, ai_params)
467
+
468
+ except Exception as e:
469
+ raise AIProviderError(f"AI provider call failed: {str(e)}")
470
+
471
+ def _direct_ai_call(self, prompt: str, ai_provider: AIProvider, ai_params: Dict[str, Any]) -> str:
472
+ """Direct AI provider call (fallback)"""
473
+ # This is a placeholder for direct AI provider integration
474
+ # In a real implementation, you would integrate with specific AI APIs
475
+ self.logger.warning("Using mock AI response - implement actual AI provider integration")
476
+ return f"Mock AI response for prompt: {prompt[:100]}..."
477
+
478
+ async def _batch_process_async(self,
479
+ sources: List[str],
480
+ processing_mode: ProcessingMode,
481
+ ai_provider: Optional[AIProvider],
482
+ processing_params: Optional[Dict[str, Any]],
483
+ max_concurrent: int) -> List[Dict[str, Any]]:
484
+ """Process documents in parallel with concurrency control"""
485
+ semaphore = asyncio.Semaphore(max_concurrent)
486
+
487
+ async def process_single(source: str) -> Dict[str, Any]:
488
+ async with semaphore:
489
+ try:
490
+ result = await self.process_document_async(
491
+ source=source,
492
+ processing_mode=processing_mode,
493
+ ai_provider=ai_provider,
494
+ processing_params=processing_params
495
+ )
496
+ return {"source": source, "status": "success", "result": result}
497
+ except Exception as e:
498
+ return {"source": source, "status": "error", "error": str(e)}
499
+
500
+ tasks = [process_single(source) for source in sources]
501
+ return await asyncio.gather(*tasks)
502
+
503
+ def _post_process_result(self, result: Dict[str, Any], processing_mode: ProcessingMode, params: Dict[str, Any]) -> Dict[str, Any]:
504
+ """Post-process results based on processing mode"""
505
+ # Add any post-processing logic here
506
+ # For example, formatting, validation, additional analysis
507
+
508
+ if processing_mode == ProcessingMode.EXTRACT_INFO:
509
+ # Validate extracted information
510
+ result["validation"] = self._validate_extracted_info(result, params)
511
+ elif processing_mode == ProcessingMode.CLASSIFY:
512
+ # Add confidence scoring
513
+ result["confidence_analysis"] = self._analyze_classification_confidence(result)
514
+
515
+ return result
516
+
517
+ def _validate_extracted_info(self, result: Dict[str, Any], params: Dict[str, Any]) -> Dict[str, str]:
518
+ """Validate extracted information"""
519
+ # Placeholder for validation logic
520
+ return {"status": "validated", "notes": "Validation completed"}
521
+
522
+ def _analyze_classification_confidence(self, result: Dict[str, Any]) -> Dict[str, Any]:
523
+ """Analyze classification confidence"""
524
+ # Placeholder for confidence analysis
525
+ return {"overall_confidence": 0.85, "factors": ["content_quality", "model_certainty"]}
526
+
527
+ # Utility methods for custom processing
528
+ def create_custom_processor(self,
529
+ system_prompt: str,
530
+ user_prompt_template: str) -> Callable:
531
+ """Create a custom processing function"""
532
+ def custom_processor(source: str, **kwargs) -> Dict[str, Any]:
533
+ # Add custom template
534
+ self.processing_templates[ProcessingMode.CUSTOM] = {
535
+ "system_prompt": system_prompt,
536
+ "user_prompt_template": user_prompt_template
537
+ }
538
+
539
+ return self.process_document(
540
+ source=source,
541
+ processing_mode=ProcessingMode.CUSTOM,
542
+ processing_params=kwargs
543
+ )
544
+
545
+ return custom_processor
546
+
547
+ def get_processing_stats(self) -> Dict[str, Any]:
548
+ """Get processing statistics"""
549
+ # Placeholder for statistics tracking
550
+ return {
551
+ "total_documents_processed": 0,
552
+ "average_processing_time": 0,
553
+ "success_rate": 1.0,
554
+ "most_common_document_types": [],
555
+ "ai_provider_usage": {}
556
+ }