aiecs 1.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (302) hide show
  1. aiecs/__init__.py +72 -0
  2. aiecs/__main__.py +41 -0
  3. aiecs/aiecs_client.py +469 -0
  4. aiecs/application/__init__.py +10 -0
  5. aiecs/application/executors/__init__.py +10 -0
  6. aiecs/application/executors/operation_executor.py +363 -0
  7. aiecs/application/knowledge_graph/__init__.py +7 -0
  8. aiecs/application/knowledge_graph/builder/__init__.py +37 -0
  9. aiecs/application/knowledge_graph/builder/document_builder.py +375 -0
  10. aiecs/application/knowledge_graph/builder/graph_builder.py +356 -0
  11. aiecs/application/knowledge_graph/builder/schema_mapping.py +531 -0
  12. aiecs/application/knowledge_graph/builder/structured_pipeline.py +443 -0
  13. aiecs/application/knowledge_graph/builder/text_chunker.py +319 -0
  14. aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
  15. aiecs/application/knowledge_graph/extractors/base.py +100 -0
  16. aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +327 -0
  17. aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +349 -0
  18. aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +244 -0
  19. aiecs/application/knowledge_graph/fusion/__init__.py +23 -0
  20. aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +387 -0
  21. aiecs/application/knowledge_graph/fusion/entity_linker.py +343 -0
  22. aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +580 -0
  23. aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +189 -0
  24. aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
  25. aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +344 -0
  26. aiecs/application/knowledge_graph/pattern_matching/query_executor.py +378 -0
  27. aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
  28. aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +199 -0
  29. aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
  30. aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
  31. aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +347 -0
  32. aiecs/application/knowledge_graph/reasoning/inference_engine.py +504 -0
  33. aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +167 -0
  34. aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
  35. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
  36. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +630 -0
  37. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +654 -0
  38. aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +477 -0
  39. aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +390 -0
  40. aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +217 -0
  41. aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +169 -0
  42. aiecs/application/knowledge_graph/reasoning/query_planner.py +872 -0
  43. aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +554 -0
  44. aiecs/application/knowledge_graph/retrieval/__init__.py +19 -0
  45. aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +596 -0
  46. aiecs/application/knowledge_graph/search/__init__.py +59 -0
  47. aiecs/application/knowledge_graph/search/hybrid_search.py +423 -0
  48. aiecs/application/knowledge_graph/search/reranker.py +295 -0
  49. aiecs/application/knowledge_graph/search/reranker_strategies.py +553 -0
  50. aiecs/application/knowledge_graph/search/text_similarity.py +398 -0
  51. aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
  52. aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +329 -0
  53. aiecs/application/knowledge_graph/traversal/path_scorer.py +269 -0
  54. aiecs/application/knowledge_graph/validators/__init__.py +13 -0
  55. aiecs/application/knowledge_graph/validators/relation_validator.py +189 -0
  56. aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
  57. aiecs/application/knowledge_graph/visualization/graph_visualizer.py +321 -0
  58. aiecs/common/__init__.py +9 -0
  59. aiecs/common/knowledge_graph/__init__.py +17 -0
  60. aiecs/common/knowledge_graph/runnable.py +484 -0
  61. aiecs/config/__init__.py +16 -0
  62. aiecs/config/config.py +498 -0
  63. aiecs/config/graph_config.py +137 -0
  64. aiecs/config/registry.py +23 -0
  65. aiecs/core/__init__.py +46 -0
  66. aiecs/core/interface/__init__.py +34 -0
  67. aiecs/core/interface/execution_interface.py +152 -0
  68. aiecs/core/interface/storage_interface.py +171 -0
  69. aiecs/domain/__init__.py +289 -0
  70. aiecs/domain/agent/__init__.py +189 -0
  71. aiecs/domain/agent/base_agent.py +697 -0
  72. aiecs/domain/agent/exceptions.py +103 -0
  73. aiecs/domain/agent/graph_aware_mixin.py +559 -0
  74. aiecs/domain/agent/hybrid_agent.py +490 -0
  75. aiecs/domain/agent/integration/__init__.py +26 -0
  76. aiecs/domain/agent/integration/context_compressor.py +222 -0
  77. aiecs/domain/agent/integration/context_engine_adapter.py +252 -0
  78. aiecs/domain/agent/integration/retry_policy.py +219 -0
  79. aiecs/domain/agent/integration/role_config.py +213 -0
  80. aiecs/domain/agent/knowledge_aware_agent.py +646 -0
  81. aiecs/domain/agent/lifecycle.py +296 -0
  82. aiecs/domain/agent/llm_agent.py +300 -0
  83. aiecs/domain/agent/memory/__init__.py +12 -0
  84. aiecs/domain/agent/memory/conversation.py +197 -0
  85. aiecs/domain/agent/migration/__init__.py +14 -0
  86. aiecs/domain/agent/migration/conversion.py +160 -0
  87. aiecs/domain/agent/migration/legacy_wrapper.py +90 -0
  88. aiecs/domain/agent/models.py +317 -0
  89. aiecs/domain/agent/observability.py +407 -0
  90. aiecs/domain/agent/persistence.py +289 -0
  91. aiecs/domain/agent/prompts/__init__.py +29 -0
  92. aiecs/domain/agent/prompts/builder.py +161 -0
  93. aiecs/domain/agent/prompts/formatters.py +189 -0
  94. aiecs/domain/agent/prompts/template.py +255 -0
  95. aiecs/domain/agent/registry.py +260 -0
  96. aiecs/domain/agent/tool_agent.py +257 -0
  97. aiecs/domain/agent/tools/__init__.py +12 -0
  98. aiecs/domain/agent/tools/schema_generator.py +221 -0
  99. aiecs/domain/community/__init__.py +155 -0
  100. aiecs/domain/community/agent_adapter.py +477 -0
  101. aiecs/domain/community/analytics.py +481 -0
  102. aiecs/domain/community/collaborative_workflow.py +642 -0
  103. aiecs/domain/community/communication_hub.py +645 -0
  104. aiecs/domain/community/community_builder.py +320 -0
  105. aiecs/domain/community/community_integration.py +800 -0
  106. aiecs/domain/community/community_manager.py +813 -0
  107. aiecs/domain/community/decision_engine.py +879 -0
  108. aiecs/domain/community/exceptions.py +225 -0
  109. aiecs/domain/community/models/__init__.py +33 -0
  110. aiecs/domain/community/models/community_models.py +268 -0
  111. aiecs/domain/community/resource_manager.py +457 -0
  112. aiecs/domain/community/shared_context_manager.py +603 -0
  113. aiecs/domain/context/__init__.py +58 -0
  114. aiecs/domain/context/context_engine.py +989 -0
  115. aiecs/domain/context/conversation_models.py +354 -0
  116. aiecs/domain/context/graph_memory.py +467 -0
  117. aiecs/domain/execution/__init__.py +12 -0
  118. aiecs/domain/execution/model.py +57 -0
  119. aiecs/domain/knowledge_graph/__init__.py +19 -0
  120. aiecs/domain/knowledge_graph/models/__init__.py +52 -0
  121. aiecs/domain/knowledge_graph/models/entity.py +130 -0
  122. aiecs/domain/knowledge_graph/models/evidence.py +194 -0
  123. aiecs/domain/knowledge_graph/models/inference_rule.py +186 -0
  124. aiecs/domain/knowledge_graph/models/path.py +179 -0
  125. aiecs/domain/knowledge_graph/models/path_pattern.py +173 -0
  126. aiecs/domain/knowledge_graph/models/query.py +272 -0
  127. aiecs/domain/knowledge_graph/models/query_plan.py +187 -0
  128. aiecs/domain/knowledge_graph/models/relation.py +136 -0
  129. aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
  130. aiecs/domain/knowledge_graph/schema/entity_type.py +135 -0
  131. aiecs/domain/knowledge_graph/schema/graph_schema.py +271 -0
  132. aiecs/domain/knowledge_graph/schema/property_schema.py +155 -0
  133. aiecs/domain/knowledge_graph/schema/relation_type.py +171 -0
  134. aiecs/domain/knowledge_graph/schema/schema_manager.py +496 -0
  135. aiecs/domain/knowledge_graph/schema/type_enums.py +205 -0
  136. aiecs/domain/task/__init__.py +13 -0
  137. aiecs/domain/task/dsl_processor.py +613 -0
  138. aiecs/domain/task/model.py +62 -0
  139. aiecs/domain/task/task_context.py +268 -0
  140. aiecs/infrastructure/__init__.py +24 -0
  141. aiecs/infrastructure/graph_storage/__init__.py +11 -0
  142. aiecs/infrastructure/graph_storage/base.py +601 -0
  143. aiecs/infrastructure/graph_storage/batch_operations.py +449 -0
  144. aiecs/infrastructure/graph_storage/cache.py +429 -0
  145. aiecs/infrastructure/graph_storage/distributed.py +226 -0
  146. aiecs/infrastructure/graph_storage/error_handling.py +390 -0
  147. aiecs/infrastructure/graph_storage/graceful_degradation.py +306 -0
  148. aiecs/infrastructure/graph_storage/health_checks.py +378 -0
  149. aiecs/infrastructure/graph_storage/in_memory.py +514 -0
  150. aiecs/infrastructure/graph_storage/index_optimization.py +483 -0
  151. aiecs/infrastructure/graph_storage/lazy_loading.py +410 -0
  152. aiecs/infrastructure/graph_storage/metrics.py +357 -0
  153. aiecs/infrastructure/graph_storage/migration.py +413 -0
  154. aiecs/infrastructure/graph_storage/pagination.py +471 -0
  155. aiecs/infrastructure/graph_storage/performance_monitoring.py +466 -0
  156. aiecs/infrastructure/graph_storage/postgres.py +871 -0
  157. aiecs/infrastructure/graph_storage/query_optimizer.py +635 -0
  158. aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
  159. aiecs/infrastructure/graph_storage/sqlite.py +623 -0
  160. aiecs/infrastructure/graph_storage/streaming.py +495 -0
  161. aiecs/infrastructure/messaging/__init__.py +13 -0
  162. aiecs/infrastructure/messaging/celery_task_manager.py +383 -0
  163. aiecs/infrastructure/messaging/websocket_manager.py +298 -0
  164. aiecs/infrastructure/monitoring/__init__.py +34 -0
  165. aiecs/infrastructure/monitoring/executor_metrics.py +174 -0
  166. aiecs/infrastructure/monitoring/global_metrics_manager.py +213 -0
  167. aiecs/infrastructure/monitoring/structured_logger.py +48 -0
  168. aiecs/infrastructure/monitoring/tracing_manager.py +410 -0
  169. aiecs/infrastructure/persistence/__init__.py +24 -0
  170. aiecs/infrastructure/persistence/context_engine_client.py +187 -0
  171. aiecs/infrastructure/persistence/database_manager.py +333 -0
  172. aiecs/infrastructure/persistence/file_storage.py +754 -0
  173. aiecs/infrastructure/persistence/redis_client.py +220 -0
  174. aiecs/llm/__init__.py +86 -0
  175. aiecs/llm/callbacks/__init__.py +11 -0
  176. aiecs/llm/callbacks/custom_callbacks.py +264 -0
  177. aiecs/llm/client_factory.py +420 -0
  178. aiecs/llm/clients/__init__.py +33 -0
  179. aiecs/llm/clients/base_client.py +193 -0
  180. aiecs/llm/clients/googleai_client.py +181 -0
  181. aiecs/llm/clients/openai_client.py +131 -0
  182. aiecs/llm/clients/vertex_client.py +437 -0
  183. aiecs/llm/clients/xai_client.py +184 -0
  184. aiecs/llm/config/__init__.py +51 -0
  185. aiecs/llm/config/config_loader.py +275 -0
  186. aiecs/llm/config/config_validator.py +236 -0
  187. aiecs/llm/config/model_config.py +151 -0
  188. aiecs/llm/utils/__init__.py +10 -0
  189. aiecs/llm/utils/validate_config.py +91 -0
  190. aiecs/main.py +363 -0
  191. aiecs/scripts/__init__.py +3 -0
  192. aiecs/scripts/aid/VERSION_MANAGEMENT.md +97 -0
  193. aiecs/scripts/aid/__init__.py +19 -0
  194. aiecs/scripts/aid/version_manager.py +215 -0
  195. aiecs/scripts/dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md +242 -0
  196. aiecs/scripts/dependance_check/README_DEPENDENCY_CHECKER.md +310 -0
  197. aiecs/scripts/dependance_check/__init__.py +17 -0
  198. aiecs/scripts/dependance_check/dependency_checker.py +938 -0
  199. aiecs/scripts/dependance_check/dependency_fixer.py +391 -0
  200. aiecs/scripts/dependance_check/download_nlp_data.py +396 -0
  201. aiecs/scripts/dependance_check/quick_dependency_check.py +270 -0
  202. aiecs/scripts/dependance_check/setup_nlp_data.sh +217 -0
  203. aiecs/scripts/dependance_patch/__init__.py +7 -0
  204. aiecs/scripts/dependance_patch/fix_weasel/README_WEASEL_PATCH.md +126 -0
  205. aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
  206. aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.py +128 -0
  207. aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.sh +82 -0
  208. aiecs/scripts/dependance_patch/fix_weasel/patch_weasel_library.sh +188 -0
  209. aiecs/scripts/dependance_patch/fix_weasel/run_weasel_patch.sh +41 -0
  210. aiecs/scripts/tools_develop/README.md +449 -0
  211. aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
  212. aiecs/scripts/tools_develop/__init__.py +21 -0
  213. aiecs/scripts/tools_develop/check_type_annotations.py +259 -0
  214. aiecs/scripts/tools_develop/validate_tool_schemas.py +422 -0
  215. aiecs/scripts/tools_develop/verify_tools.py +356 -0
  216. aiecs/tasks/__init__.py +1 -0
  217. aiecs/tasks/worker.py +172 -0
  218. aiecs/tools/__init__.py +299 -0
  219. aiecs/tools/apisource/__init__.py +99 -0
  220. aiecs/tools/apisource/intelligence/__init__.py +19 -0
  221. aiecs/tools/apisource/intelligence/data_fusion.py +381 -0
  222. aiecs/tools/apisource/intelligence/query_analyzer.py +413 -0
  223. aiecs/tools/apisource/intelligence/search_enhancer.py +388 -0
  224. aiecs/tools/apisource/monitoring/__init__.py +9 -0
  225. aiecs/tools/apisource/monitoring/metrics.py +303 -0
  226. aiecs/tools/apisource/providers/__init__.py +115 -0
  227. aiecs/tools/apisource/providers/base.py +664 -0
  228. aiecs/tools/apisource/providers/census.py +401 -0
  229. aiecs/tools/apisource/providers/fred.py +564 -0
  230. aiecs/tools/apisource/providers/newsapi.py +412 -0
  231. aiecs/tools/apisource/providers/worldbank.py +357 -0
  232. aiecs/tools/apisource/reliability/__init__.py +12 -0
  233. aiecs/tools/apisource/reliability/error_handler.py +375 -0
  234. aiecs/tools/apisource/reliability/fallback_strategy.py +391 -0
  235. aiecs/tools/apisource/tool.py +850 -0
  236. aiecs/tools/apisource/utils/__init__.py +9 -0
  237. aiecs/tools/apisource/utils/validators.py +338 -0
  238. aiecs/tools/base_tool.py +201 -0
  239. aiecs/tools/docs/__init__.py +121 -0
  240. aiecs/tools/docs/ai_document_orchestrator.py +599 -0
  241. aiecs/tools/docs/ai_document_writer_orchestrator.py +2403 -0
  242. aiecs/tools/docs/content_insertion_tool.py +1333 -0
  243. aiecs/tools/docs/document_creator_tool.py +1317 -0
  244. aiecs/tools/docs/document_layout_tool.py +1166 -0
  245. aiecs/tools/docs/document_parser_tool.py +994 -0
  246. aiecs/tools/docs/document_writer_tool.py +1818 -0
  247. aiecs/tools/knowledge_graph/__init__.py +17 -0
  248. aiecs/tools/knowledge_graph/graph_reasoning_tool.py +734 -0
  249. aiecs/tools/knowledge_graph/graph_search_tool.py +923 -0
  250. aiecs/tools/knowledge_graph/kg_builder_tool.py +476 -0
  251. aiecs/tools/langchain_adapter.py +542 -0
  252. aiecs/tools/schema_generator.py +275 -0
  253. aiecs/tools/search_tool/__init__.py +100 -0
  254. aiecs/tools/search_tool/analyzers.py +589 -0
  255. aiecs/tools/search_tool/cache.py +260 -0
  256. aiecs/tools/search_tool/constants.py +128 -0
  257. aiecs/tools/search_tool/context.py +216 -0
  258. aiecs/tools/search_tool/core.py +749 -0
  259. aiecs/tools/search_tool/deduplicator.py +123 -0
  260. aiecs/tools/search_tool/error_handler.py +271 -0
  261. aiecs/tools/search_tool/metrics.py +371 -0
  262. aiecs/tools/search_tool/rate_limiter.py +178 -0
  263. aiecs/tools/search_tool/schemas.py +277 -0
  264. aiecs/tools/statistics/__init__.py +80 -0
  265. aiecs/tools/statistics/ai_data_analysis_orchestrator.py +643 -0
  266. aiecs/tools/statistics/ai_insight_generator_tool.py +505 -0
  267. aiecs/tools/statistics/ai_report_orchestrator_tool.py +694 -0
  268. aiecs/tools/statistics/data_loader_tool.py +564 -0
  269. aiecs/tools/statistics/data_profiler_tool.py +658 -0
  270. aiecs/tools/statistics/data_transformer_tool.py +573 -0
  271. aiecs/tools/statistics/data_visualizer_tool.py +495 -0
  272. aiecs/tools/statistics/model_trainer_tool.py +487 -0
  273. aiecs/tools/statistics/statistical_analyzer_tool.py +459 -0
  274. aiecs/tools/task_tools/__init__.py +86 -0
  275. aiecs/tools/task_tools/chart_tool.py +732 -0
  276. aiecs/tools/task_tools/classfire_tool.py +922 -0
  277. aiecs/tools/task_tools/image_tool.py +447 -0
  278. aiecs/tools/task_tools/office_tool.py +684 -0
  279. aiecs/tools/task_tools/pandas_tool.py +635 -0
  280. aiecs/tools/task_tools/report_tool.py +635 -0
  281. aiecs/tools/task_tools/research_tool.py +392 -0
  282. aiecs/tools/task_tools/scraper_tool.py +715 -0
  283. aiecs/tools/task_tools/stats_tool.py +688 -0
  284. aiecs/tools/temp_file_manager.py +130 -0
  285. aiecs/tools/tool_executor/__init__.py +37 -0
  286. aiecs/tools/tool_executor/tool_executor.py +881 -0
  287. aiecs/utils/LLM_output_structor.py +445 -0
  288. aiecs/utils/__init__.py +34 -0
  289. aiecs/utils/base_callback.py +47 -0
  290. aiecs/utils/cache_provider.py +695 -0
  291. aiecs/utils/execution_utils.py +184 -0
  292. aiecs/utils/logging.py +1 -0
  293. aiecs/utils/prompt_loader.py +14 -0
  294. aiecs/utils/token_usage_repository.py +323 -0
  295. aiecs/ws/__init__.py +0 -0
  296. aiecs/ws/socket_server.py +52 -0
  297. aiecs-1.5.1.dist-info/METADATA +608 -0
  298. aiecs-1.5.1.dist-info/RECORD +302 -0
  299. aiecs-1.5.1.dist-info/WHEEL +5 -0
  300. aiecs-1.5.1.dist-info/entry_points.txt +10 -0
  301. aiecs-1.5.1.dist-info/licenses/LICENSE +225 -0
  302. aiecs-1.5.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,599 @@
1
+ import asyncio
2
+ import logging
3
+ from typing import Dict, Any, List, Optional, Callable
4
+ from enum import Enum
5
+ from datetime import datetime
6
+
7
+ from pydantic import BaseModel, Field, ConfigDict
8
+
9
+ from aiecs.tools.base_tool import BaseTool
10
+ from aiecs.tools import register_tool
11
+
12
+
13
+ class ProcessingMode(str, Enum):
14
+ """AI document processing modes"""
15
+
16
+ SUMMARIZE = "summarize"
17
+ EXTRACT_INFO = "extract_info"
18
+ ANALYZE = "analyze"
19
+ TRANSLATE = "translate"
20
+ CLASSIFY = "classify"
21
+ ANSWER_QUESTIONS = "answer_questions"
22
+ CUSTOM = "custom"
23
+
24
+
25
+ class AIProvider(str, Enum):
26
+ """Supported AI providers"""
27
+
28
+ OPENAI = "openai"
29
+ VERTEX_AI = "vertex_ai"
30
+ XAI = "xai"
31
+ LOCAL = "local"
32
+
33
+
34
+ class AIDocumentOrchestratorError(Exception):
35
+ """Base exception for AI Document Orchestrator errors"""
36
+
37
+
38
+ class AIProviderError(AIDocumentOrchestratorError):
39
+ """Raised when AI provider operations fail"""
40
+
41
+
42
+ class ProcessingError(AIDocumentOrchestratorError):
43
+ """Raised when document processing fails"""
44
+
45
+
46
+ @register_tool("ai_document_orchestrator")
47
+ class AIDocumentOrchestrator(BaseTool):
48
+ """
49
+ AI-powered document processing orchestrator that:
50
+ 1. Coordinates document parsing with AI analysis
51
+ 2. Manages AI provider interactions
52
+ 3. Handles complex document processing workflows
53
+ 4. Provides intelligent content analysis and extraction
54
+
55
+ Integrates with:
56
+ - DocumentParserTool for document parsing
57
+ - Various AI providers for content analysis
58
+ - Existing AIECS infrastructure
59
+ """
60
+
61
+ # Configuration schema
62
+ class Config(BaseModel):
63
+ """Configuration for the AI document orchestrator tool"""
64
+
65
+ model_config = ConfigDict(env_prefix="AI_DOC_ORCHESTRATOR_")
66
+
67
+ default_ai_provider: str = Field(default="openai", description="Default AI provider to use")
68
+ max_chunk_size: int = Field(
69
+ default=4000, description="Maximum chunk size for AI processing"
70
+ )
71
+ max_concurrent_requests: int = Field(
72
+ default=5, description="Maximum concurrent AI requests"
73
+ )
74
+ default_temperature: float = Field(
75
+ default=0.1, description="Default temperature for AI model"
76
+ )
77
+ max_tokens: int = Field(default=2000, description="Maximum tokens for AI response")
78
+ timeout: int = Field(default=60, description="Timeout in seconds for AI operations")
79
+
80
+ def __init__(self, config: Optional[Dict] = None):
81
+ """Initialize AI Document Orchestrator with settings"""
82
+ super().__init__(config)
83
+
84
+ # Parse configuration
85
+ self.config = self.Config(**(config or {}))
86
+
87
+ self.logger = logging.getLogger(__name__)
88
+
89
+ # Initialize document parser
90
+ self._init_document_parser()
91
+
92
+ # Initialize AI providers
93
+ self._init_ai_providers()
94
+
95
+ # Processing templates
96
+ self._init_processing_templates()
97
+
98
+ def _init_document_parser(self):
99
+ """Initialize document parser tool"""
100
+ try:
101
+ from aiecs.tools.docs.document_parser_tool import (
102
+ DocumentParserTool,
103
+ )
104
+
105
+ self.document_parser = DocumentParserTool()
106
+ except ImportError:
107
+ self.logger.error("DocumentParserTool not available")
108
+ self.document_parser = None
109
+
110
+ def _init_ai_providers(self):
111
+ """Initialize AI providers"""
112
+ self.ai_providers = {}
113
+
114
+ try:
115
+ # Initialize AIECS client for AI operations
116
+ from aiecs import AIECS
117
+
118
+ self.aiecs_client = AIECS()
119
+ self.ai_providers["aiecs"] = self.aiecs_client
120
+ except ImportError:
121
+ self.logger.warning("AIECS client not available")
122
+ self.aiecs_client = None
123
+
124
+ def _init_processing_templates(self):
125
+ """Initialize processing templates for different AI tasks"""
126
+ self.processing_templates = {
127
+ ProcessingMode.SUMMARIZE: {
128
+ "system_prompt": "You are an expert document summarizer. Create concise, informative summaries.",
129
+ "user_prompt_template": "Please summarize the following document content:\n\n{content}\n\nProvide a clear, structured summary highlighting the key points.",
130
+ },
131
+ ProcessingMode.EXTRACT_INFO: {
132
+ "system_prompt": "You are an expert information extractor. Extract specific information from documents.",
133
+ "user_prompt_template": "Extract the following information from the document:\n{extraction_criteria}\n\nDocument content:\n{content}\n\nProvide the extracted information in a structured format.",
134
+ },
135
+ ProcessingMode.ANALYZE: {
136
+ "system_prompt": "You are an expert document analyzer. Provide thorough analysis of document content.",
137
+ "user_prompt_template": "Analyze the following document content and provide insights:\n\n{content}\n\nInclude analysis of:\n- Main themes and topics\n- Key findings\n- Important details\n- Overall structure and organization",
138
+ },
139
+ ProcessingMode.TRANSLATE: {
140
+ "system_prompt": "You are an expert translator. Provide accurate translations while preserving meaning and context.",
141
+ "user_prompt_template": "Translate the following document content to {target_language}:\n\n{content}\n\nMaintain the original structure and formatting where possible.",
142
+ },
143
+ ProcessingMode.CLASSIFY: {
144
+ "system_prompt": "You are an expert document classifier. Classify documents accurately based on their content.",
145
+ "user_prompt_template": "Classify the following document content into the appropriate categories:\n\nCategories: {categories}\n\nDocument content:\n{content}\n\nProvide the classification with confidence scores and reasoning.",
146
+ },
147
+ ProcessingMode.ANSWER_QUESTIONS: {
148
+ "system_prompt": "You are an expert document analyst. Answer questions based on document content accurately.",
149
+ "user_prompt_template": "Based on the following document content, answer these questions:\n\nQuestions:\n{questions}\n\nDocument content:\n{content}\n\nProvide clear, accurate answers with references to the relevant parts of the document.",
150
+ },
151
+ ProcessingMode.CUSTOM: {
152
+ "system_prompt": "You are an expert document analyst. Follow the custom instructions provided.",
153
+ "user_prompt_template": "{custom_prompt}\n\nDocument content:\n{content}\n\nPlease provide your analysis based on the custom instructions above.",
154
+ },
155
+ }
156
+
157
+ # Schema definitions
158
+ class ProcessDocumentSchema(BaseModel):
159
+ """Schema for process_document operation"""
160
+
161
+ source: str = Field(description="URL or file path to the document")
162
+ processing_mode: ProcessingMode = Field(description="AI processing mode to apply")
163
+ ai_provider: Optional[AIProvider] = Field(default=None, description="AI provider to use")
164
+ processing_params: Optional[Dict[str, Any]] = Field(
165
+ default=None, description="Additional processing parameters"
166
+ )
167
+ parse_params: Optional[Dict[str, Any]] = Field(
168
+ default=None, description="Document parsing parameters"
169
+ )
170
+ ai_params: Optional[Dict[str, Any]] = Field(
171
+ default=None, description="AI provider parameters"
172
+ )
173
+
174
+ class BatchProcessSchema(BaseModel):
175
+ """Schema for batch_process_documents operation"""
176
+
177
+ sources: List[str] = Field(description="List of URLs or file paths")
178
+ processing_mode: ProcessingMode = Field(description="AI processing mode to apply")
179
+ ai_provider: Optional[AIProvider] = Field(default=None, description="AI provider to use")
180
+ processing_params: Optional[Dict[str, Any]] = Field(
181
+ default=None, description="Additional processing parameters"
182
+ )
183
+ max_concurrent: Optional[int] = Field(
184
+ default=None, description="Maximum concurrent processing"
185
+ )
186
+
187
+ class AnalyzeDocumentSchema(BaseModel):
188
+ """Schema for analyze_document operation (AI-first approach)"""
189
+
190
+ source: str = Field(description="URL or file path to the document")
191
+ analysis_type: str = Field(description="Type of analysis to perform")
192
+ custom_prompt: Optional[str] = Field(
193
+ default=None, description="Custom AI prompt for analysis"
194
+ )
195
+ ai_provider: Optional[AIProvider] = Field(default=None, description="AI provider to use")
196
+
197
+ def process_document(
198
+ self,
199
+ source: str,
200
+ processing_mode: ProcessingMode,
201
+ ai_provider: Optional[AIProvider] = None,
202
+ processing_params: Optional[Dict[str, Any]] = None,
203
+ parse_params: Optional[Dict[str, Any]] = None,
204
+ ai_params: Optional[Dict[str, Any]] = None,
205
+ ) -> Dict[str, Any]:
206
+ """
207
+ Process a document using AI with intelligent orchestration
208
+
209
+ Args:
210
+ source: URL or file path to document
211
+ processing_mode: AI processing mode to apply
212
+ ai_provider: AI provider to use (optional)
213
+ processing_params: Additional processing parameters
214
+ parse_params: Document parsing parameters
215
+ ai_params: AI provider parameters
216
+
217
+ Returns:
218
+ Dict containing processed results and metadata
219
+ """
220
+ try:
221
+ start_time = datetime.now()
222
+
223
+ # Step 1: Parse the document
224
+ self.logger.info(f"Starting document processing: {source}")
225
+ parsed_result = self._parse_document(source, parse_params or {})
226
+
227
+ # Step 2: Prepare content for AI processing
228
+ content = self._prepare_content_for_ai(parsed_result, processing_mode)
229
+
230
+ # Step 3: Process with AI
231
+ ai_result = self._process_with_ai(
232
+ content,
233
+ processing_mode,
234
+ ai_provider or self.config.default_ai_provider,
235
+ processing_params or {},
236
+ ai_params or {},
237
+ )
238
+
239
+ # Step 4: Combine results
240
+ result = {
241
+ "source": source,
242
+ "processing_mode": processing_mode,
243
+ "ai_provider": ai_provider or self.config.default_ai_provider,
244
+ "document_info": {
245
+ "type": parsed_result.get("document_type"),
246
+ "detection_confidence": parsed_result.get("detection_confidence"),
247
+ "content_stats": parsed_result.get("content_stats"),
248
+ },
249
+ "ai_result": ai_result,
250
+ "processing_metadata": {
251
+ "start_time": start_time.isoformat(),
252
+ "end_time": datetime.now().isoformat(),
253
+ "processing_duration": (datetime.now() - start_time).total_seconds(),
254
+ },
255
+ }
256
+
257
+ # Step 5: Post-process if needed
258
+ result = self._post_process_result(result, processing_mode, processing_params or {})
259
+
260
+ return result
261
+
262
+ except Exception as e:
263
+ raise ProcessingError(f"Document processing failed: {str(e)}")
264
+
265
+ async def process_document_async(
266
+ self,
267
+ source: str,
268
+ processing_mode: ProcessingMode,
269
+ ai_provider: Optional[AIProvider] = None,
270
+ processing_params: Optional[Dict[str, Any]] = None,
271
+ parse_params: Optional[Dict[str, Any]] = None,
272
+ ai_params: Optional[Dict[str, Any]] = None,
273
+ ) -> Dict[str, Any]:
274
+ """Async version of process_document"""
275
+ return await asyncio.to_thread(
276
+ self.process_document,
277
+ source=source,
278
+ processing_mode=processing_mode,
279
+ ai_provider=ai_provider,
280
+ processing_params=processing_params,
281
+ parse_params=parse_params,
282
+ ai_params=ai_params,
283
+ )
284
+
285
+ def batch_process_documents(
286
+ self,
287
+ sources: List[str],
288
+ processing_mode: ProcessingMode,
289
+ ai_provider: Optional[AIProvider] = None,
290
+ processing_params: Optional[Dict[str, Any]] = None,
291
+ max_concurrent: Optional[int] = None,
292
+ ) -> Dict[str, Any]:
293
+ """
294
+ Process multiple documents in batch with intelligent orchestration
295
+
296
+ Args:
297
+ sources: List of URLs or file paths
298
+ processing_mode: AI processing mode to apply
299
+ ai_provider: AI provider to use
300
+ processing_params: Additional processing parameters
301
+ max_concurrent: Maximum concurrent processing
302
+
303
+ Returns:
304
+ Dict containing batch processing results
305
+ """
306
+ try:
307
+ start_time = datetime.now()
308
+ max_concurrent = max_concurrent or self.config.max_concurrent_requests
309
+
310
+ # Process documents in batches
311
+ results = asyncio.run(
312
+ self._batch_process_async(
313
+ sources,
314
+ processing_mode,
315
+ ai_provider,
316
+ processing_params,
317
+ max_concurrent,
318
+ )
319
+ )
320
+
321
+ # Aggregate results
322
+ batch_result = {
323
+ "sources": sources,
324
+ "processing_mode": processing_mode,
325
+ "ai_provider": ai_provider or self.config.default_ai_provider,
326
+ "total_documents": len(sources),
327
+ "successful_documents": len([r for r in results if r.get("status") == "success"]),
328
+ "failed_documents": len([r for r in results if r.get("status") == "error"]),
329
+ "results": results,
330
+ "batch_metadata": {
331
+ "start_time": start_time.isoformat(),
332
+ "end_time": datetime.now().isoformat(),
333
+ "total_duration": (datetime.now() - start_time).total_seconds(),
334
+ },
335
+ }
336
+
337
+ return batch_result
338
+
339
+ except Exception as e:
340
+ raise ProcessingError(f"Batch processing failed: {str(e)}")
341
+
342
+ def analyze_document(
343
+ self,
344
+ source: str,
345
+ analysis_type: str,
346
+ custom_prompt: Optional[str] = None,
347
+ ai_provider: Optional[AIProvider] = None,
348
+ ) -> Dict[str, Any]:
349
+ """
350
+ Perform AI-first document analysis
351
+
352
+ Args:
353
+ source: URL or file path to document
354
+ analysis_type: Type of analysis to perform
355
+ custom_prompt: Custom AI prompt for analysis
356
+ ai_provider: AI provider to use
357
+
358
+ Returns:
359
+ Dict containing analysis results
360
+ """
361
+ try:
362
+ # Parse document first
363
+ parsed_result = self._parse_document(source, {})
364
+ content = parsed_result.get("content", "")
365
+
366
+ # Prepare AI prompt
367
+ if custom_prompt:
368
+ prompt = custom_prompt.format(content=content, analysis_type=analysis_type)
369
+ else:
370
+ prompt = f"Perform {analysis_type} analysis on the following document:\n\n{content}"
371
+
372
+ # Process with AI
373
+ ai_result = self._call_ai_provider(
374
+ prompt, ai_provider or self.config.default_ai_provider, {}
375
+ )
376
+
377
+ return {
378
+ "source": source,
379
+ "analysis_type": analysis_type,
380
+ "document_info": {
381
+ "type": parsed_result.get("document_type"),
382
+ "content_stats": parsed_result.get("content_stats"),
383
+ },
384
+ "analysis_result": ai_result,
385
+ "timestamp": datetime.now().isoformat(),
386
+ }
387
+
388
+ except Exception as e:
389
+ raise ProcessingError(f"Document analysis failed: {str(e)}")
390
+
391
+ def _parse_document(self, source: str, parse_params: Dict[str, Any]) -> Dict[str, Any]:
392
+ """Parse document using DocumentParserTool"""
393
+ if not self.document_parser:
394
+ raise ProcessingError("DocumentParserTool not available")
395
+
396
+ try:
397
+ return self.document_parser.parse_document(source, **parse_params)
398
+ except Exception as e:
399
+ raise ProcessingError(f"Document parsing failed: {str(e)}")
400
+
401
+ def _prepare_content_for_ai(
402
+ self, parsed_result: Dict[str, Any], processing_mode: ProcessingMode
403
+ ) -> str:
404
+ """Prepare parsed content for AI processing"""
405
+ content = parsed_result.get("content", "")
406
+
407
+ if isinstance(content, dict):
408
+ # Extract text from structured content
409
+ text_content = content.get("text", str(content))
410
+ else:
411
+ text_content = str(content)
412
+
413
+ # Chunk content if too large
414
+ max_size = self.config.max_chunk_size
415
+ if len(text_content) > max_size:
416
+ # For now, truncate - could implement smart chunking
417
+ text_content = text_content[:max_size] + "\n\n[Content truncated...]"
418
+
419
+ return text_content
420
+
421
+ def _process_with_ai(
422
+ self,
423
+ content: str,
424
+ processing_mode: ProcessingMode,
425
+ ai_provider: AIProvider,
426
+ processing_params: Dict[str, Any],
427
+ ai_params: Dict[str, Any],
428
+ ) -> Dict[str, Any]:
429
+ """Process content with AI based on processing mode"""
430
+ try:
431
+ # Get processing template
432
+ template = self.processing_templates.get(processing_mode)
433
+ if not template:
434
+ raise ProcessingError(f"No template found for processing mode: {processing_mode}")
435
+
436
+ # Format prompt
437
+ prompt = self._format_prompt(template, content, processing_params)
438
+
439
+ # Call AI provider
440
+ ai_result = self._call_ai_provider(prompt, ai_provider, ai_params)
441
+
442
+ return {
443
+ "processing_mode": processing_mode,
444
+ "prompt_used": prompt,
445
+ "ai_response": ai_result,
446
+ "ai_provider": ai_provider,
447
+ }
448
+
449
+ except Exception as e:
450
+ raise AIProviderError(f"AI processing failed: {str(e)}")
451
+
452
+ def _format_prompt(self, template: Dict[str, str], content: str, params: Dict[str, Any]) -> str:
453
+ """Format AI prompt using template and parameters"""
454
+ user_prompt = template["user_prompt_template"]
455
+
456
+ # Replace content placeholder
457
+ formatted_prompt = user_prompt.replace("{content}", content)
458
+
459
+ # Replace other parameters
460
+ for key, value in params.items():
461
+ placeholder = f"{{{key}}}"
462
+ if placeholder in formatted_prompt:
463
+ formatted_prompt = formatted_prompt.replace(placeholder, str(value))
464
+
465
+ return formatted_prompt
466
+
467
+ def _call_ai_provider(
468
+ self, prompt: str, ai_provider: AIProvider, ai_params: Dict[str, Any]
469
+ ) -> str:
470
+ """Call AI provider with prompt"""
471
+ try:
472
+ if self.aiecs_client:
473
+ # Use AIECS client for AI operations
474
+ from aiecs.domain.task.task_context import TaskContext
475
+
476
+ task_context = TaskContext(
477
+ task_id=f"doc_processing_{datetime.now().timestamp()}",
478
+ task_type="document_processing",
479
+ input_data={"prompt": prompt},
480
+ metadata=ai_params,
481
+ )
482
+
483
+ # This would need to be adapted based on actual AIECS API
484
+ result = self.aiecs_client.process_task(task_context)
485
+ return result.get("response", "")
486
+ else:
487
+ # Fallback to direct AI provider calls
488
+ return self._direct_ai_call(prompt, ai_provider, ai_params)
489
+
490
+ except Exception as e:
491
+ raise AIProviderError(f"AI provider call failed: {str(e)}")
492
+
493
+ def _direct_ai_call(
494
+ self, prompt: str, ai_provider: AIProvider, ai_params: Dict[str, Any]
495
+ ) -> str:
496
+ """Direct AI provider call (fallback)"""
497
+ # This is a placeholder for direct AI provider integration
498
+ # In a real implementation, you would integrate with specific AI APIs
499
+ self.logger.warning("Using mock AI response - implement actual AI provider integration")
500
+ return f"Mock AI response for prompt: {prompt[:100]}..."
501
+
502
+ async def _batch_process_async(
503
+ self,
504
+ sources: List[str],
505
+ processing_mode: ProcessingMode,
506
+ ai_provider: Optional[AIProvider],
507
+ processing_params: Optional[Dict[str, Any]],
508
+ max_concurrent: int,
509
+ ) -> List[Dict[str, Any]]:
510
+ """Process documents in parallel with concurrency control"""
511
+ semaphore = asyncio.Semaphore(max_concurrent)
512
+
513
+ async def process_single(source: str) -> Dict[str, Any]:
514
+ async with semaphore:
515
+ try:
516
+ result = await self.process_document_async(
517
+ source=source,
518
+ processing_mode=processing_mode,
519
+ ai_provider=ai_provider,
520
+ processing_params=processing_params,
521
+ )
522
+ return {
523
+ "source": source,
524
+ "status": "success",
525
+ "result": result,
526
+ }
527
+ except Exception as e:
528
+ return {
529
+ "source": source,
530
+ "status": "error",
531
+ "error": str(e),
532
+ }
533
+
534
+ tasks = [process_single(source) for source in sources]
535
+ return await asyncio.gather(*tasks)
536
+
537
+ def _post_process_result(
538
+ self,
539
+ result: Dict[str, Any],
540
+ processing_mode: ProcessingMode,
541
+ params: Dict[str, Any],
542
+ ) -> Dict[str, Any]:
543
+ """Post-process results based on processing mode"""
544
+ # Add any post-processing logic here
545
+ # For example, formatting, validation, additional analysis
546
+
547
+ if processing_mode == ProcessingMode.EXTRACT_INFO:
548
+ # Validate extracted information
549
+ result["validation"] = self._validate_extracted_info(result, params)
550
+ elif processing_mode == ProcessingMode.CLASSIFY:
551
+ # Add confidence scoring
552
+ result["confidence_analysis"] = self._analyze_classification_confidence(result)
553
+
554
+ return result
555
+
556
+ def _validate_extracted_info(
557
+ self, result: Dict[str, Any], params: Dict[str, Any]
558
+ ) -> Dict[str, str]:
559
+ """Validate extracted information"""
560
+ # Placeholder for validation logic
561
+ return {"status": "validated", "notes": "Validation completed"}
562
+
563
+ def _analyze_classification_confidence(self, result: Dict[str, Any]) -> Dict[str, Any]:
564
+ """Analyze classification confidence"""
565
+ # Placeholder for confidence analysis
566
+ return {
567
+ "overall_confidence": 0.85,
568
+ "factors": ["content_quality", "model_certainty"],
569
+ }
570
+
571
+ # Utility methods for custom processing
572
+ def create_custom_processor(self, system_prompt: str, user_prompt_template: str) -> Callable:
573
+ """Create a custom processing function"""
574
+
575
+ def custom_processor(source: str, **kwargs) -> Dict[str, Any]:
576
+ # Add custom template
577
+ self.processing_templates[ProcessingMode.CUSTOM] = {
578
+ "system_prompt": system_prompt,
579
+ "user_prompt_template": user_prompt_template,
580
+ }
581
+
582
+ return self.process_document(
583
+ source=source,
584
+ processing_mode=ProcessingMode.CUSTOM,
585
+ processing_params=kwargs,
586
+ )
587
+
588
+ return custom_processor
589
+
590
+ def get_processing_stats(self) -> Dict[str, Any]:
591
+ """Get processing statistics"""
592
+ # Placeholder for statistics tracking
593
+ return {
594
+ "total_documents_processed": 0,
595
+ "average_processing_time": 0,
596
+ "success_rate": 1.0,
597
+ "most_common_document_types": [],
598
+ "ai_provider_usage": {},
599
+ }