aiecs 1.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (302) hide show
  1. aiecs/__init__.py +72 -0
  2. aiecs/__main__.py +41 -0
  3. aiecs/aiecs_client.py +469 -0
  4. aiecs/application/__init__.py +10 -0
  5. aiecs/application/executors/__init__.py +10 -0
  6. aiecs/application/executors/operation_executor.py +363 -0
  7. aiecs/application/knowledge_graph/__init__.py +7 -0
  8. aiecs/application/knowledge_graph/builder/__init__.py +37 -0
  9. aiecs/application/knowledge_graph/builder/document_builder.py +375 -0
  10. aiecs/application/knowledge_graph/builder/graph_builder.py +356 -0
  11. aiecs/application/knowledge_graph/builder/schema_mapping.py +531 -0
  12. aiecs/application/knowledge_graph/builder/structured_pipeline.py +443 -0
  13. aiecs/application/knowledge_graph/builder/text_chunker.py +319 -0
  14. aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
  15. aiecs/application/knowledge_graph/extractors/base.py +100 -0
  16. aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +327 -0
  17. aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +349 -0
  18. aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +244 -0
  19. aiecs/application/knowledge_graph/fusion/__init__.py +23 -0
  20. aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +387 -0
  21. aiecs/application/knowledge_graph/fusion/entity_linker.py +343 -0
  22. aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +580 -0
  23. aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +189 -0
  24. aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
  25. aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +344 -0
  26. aiecs/application/knowledge_graph/pattern_matching/query_executor.py +378 -0
  27. aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
  28. aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +199 -0
  29. aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
  30. aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
  31. aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +347 -0
  32. aiecs/application/knowledge_graph/reasoning/inference_engine.py +504 -0
  33. aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +167 -0
  34. aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
  35. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
  36. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +630 -0
  37. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +654 -0
  38. aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +477 -0
  39. aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +390 -0
  40. aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +217 -0
  41. aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +169 -0
  42. aiecs/application/knowledge_graph/reasoning/query_planner.py +872 -0
  43. aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +554 -0
  44. aiecs/application/knowledge_graph/retrieval/__init__.py +19 -0
  45. aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +596 -0
  46. aiecs/application/knowledge_graph/search/__init__.py +59 -0
  47. aiecs/application/knowledge_graph/search/hybrid_search.py +423 -0
  48. aiecs/application/knowledge_graph/search/reranker.py +295 -0
  49. aiecs/application/knowledge_graph/search/reranker_strategies.py +553 -0
  50. aiecs/application/knowledge_graph/search/text_similarity.py +398 -0
  51. aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
  52. aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +329 -0
  53. aiecs/application/knowledge_graph/traversal/path_scorer.py +269 -0
  54. aiecs/application/knowledge_graph/validators/__init__.py +13 -0
  55. aiecs/application/knowledge_graph/validators/relation_validator.py +189 -0
  56. aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
  57. aiecs/application/knowledge_graph/visualization/graph_visualizer.py +321 -0
  58. aiecs/common/__init__.py +9 -0
  59. aiecs/common/knowledge_graph/__init__.py +17 -0
  60. aiecs/common/knowledge_graph/runnable.py +484 -0
  61. aiecs/config/__init__.py +16 -0
  62. aiecs/config/config.py +498 -0
  63. aiecs/config/graph_config.py +137 -0
  64. aiecs/config/registry.py +23 -0
  65. aiecs/core/__init__.py +46 -0
  66. aiecs/core/interface/__init__.py +34 -0
  67. aiecs/core/interface/execution_interface.py +152 -0
  68. aiecs/core/interface/storage_interface.py +171 -0
  69. aiecs/domain/__init__.py +289 -0
  70. aiecs/domain/agent/__init__.py +189 -0
  71. aiecs/domain/agent/base_agent.py +697 -0
  72. aiecs/domain/agent/exceptions.py +103 -0
  73. aiecs/domain/agent/graph_aware_mixin.py +559 -0
  74. aiecs/domain/agent/hybrid_agent.py +490 -0
  75. aiecs/domain/agent/integration/__init__.py +26 -0
  76. aiecs/domain/agent/integration/context_compressor.py +222 -0
  77. aiecs/domain/agent/integration/context_engine_adapter.py +252 -0
  78. aiecs/domain/agent/integration/retry_policy.py +219 -0
  79. aiecs/domain/agent/integration/role_config.py +213 -0
  80. aiecs/domain/agent/knowledge_aware_agent.py +646 -0
  81. aiecs/domain/agent/lifecycle.py +296 -0
  82. aiecs/domain/agent/llm_agent.py +300 -0
  83. aiecs/domain/agent/memory/__init__.py +12 -0
  84. aiecs/domain/agent/memory/conversation.py +197 -0
  85. aiecs/domain/agent/migration/__init__.py +14 -0
  86. aiecs/domain/agent/migration/conversion.py +160 -0
  87. aiecs/domain/agent/migration/legacy_wrapper.py +90 -0
  88. aiecs/domain/agent/models.py +317 -0
  89. aiecs/domain/agent/observability.py +407 -0
  90. aiecs/domain/agent/persistence.py +289 -0
  91. aiecs/domain/agent/prompts/__init__.py +29 -0
  92. aiecs/domain/agent/prompts/builder.py +161 -0
  93. aiecs/domain/agent/prompts/formatters.py +189 -0
  94. aiecs/domain/agent/prompts/template.py +255 -0
  95. aiecs/domain/agent/registry.py +260 -0
  96. aiecs/domain/agent/tool_agent.py +257 -0
  97. aiecs/domain/agent/tools/__init__.py +12 -0
  98. aiecs/domain/agent/tools/schema_generator.py +221 -0
  99. aiecs/domain/community/__init__.py +155 -0
  100. aiecs/domain/community/agent_adapter.py +477 -0
  101. aiecs/domain/community/analytics.py +481 -0
  102. aiecs/domain/community/collaborative_workflow.py +642 -0
  103. aiecs/domain/community/communication_hub.py +645 -0
  104. aiecs/domain/community/community_builder.py +320 -0
  105. aiecs/domain/community/community_integration.py +800 -0
  106. aiecs/domain/community/community_manager.py +813 -0
  107. aiecs/domain/community/decision_engine.py +879 -0
  108. aiecs/domain/community/exceptions.py +225 -0
  109. aiecs/domain/community/models/__init__.py +33 -0
  110. aiecs/domain/community/models/community_models.py +268 -0
  111. aiecs/domain/community/resource_manager.py +457 -0
  112. aiecs/domain/community/shared_context_manager.py +603 -0
  113. aiecs/domain/context/__init__.py +58 -0
  114. aiecs/domain/context/context_engine.py +989 -0
  115. aiecs/domain/context/conversation_models.py +354 -0
  116. aiecs/domain/context/graph_memory.py +467 -0
  117. aiecs/domain/execution/__init__.py +12 -0
  118. aiecs/domain/execution/model.py +57 -0
  119. aiecs/domain/knowledge_graph/__init__.py +19 -0
  120. aiecs/domain/knowledge_graph/models/__init__.py +52 -0
  121. aiecs/domain/knowledge_graph/models/entity.py +130 -0
  122. aiecs/domain/knowledge_graph/models/evidence.py +194 -0
  123. aiecs/domain/knowledge_graph/models/inference_rule.py +186 -0
  124. aiecs/domain/knowledge_graph/models/path.py +179 -0
  125. aiecs/domain/knowledge_graph/models/path_pattern.py +173 -0
  126. aiecs/domain/knowledge_graph/models/query.py +272 -0
  127. aiecs/domain/knowledge_graph/models/query_plan.py +187 -0
  128. aiecs/domain/knowledge_graph/models/relation.py +136 -0
  129. aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
  130. aiecs/domain/knowledge_graph/schema/entity_type.py +135 -0
  131. aiecs/domain/knowledge_graph/schema/graph_schema.py +271 -0
  132. aiecs/domain/knowledge_graph/schema/property_schema.py +155 -0
  133. aiecs/domain/knowledge_graph/schema/relation_type.py +171 -0
  134. aiecs/domain/knowledge_graph/schema/schema_manager.py +496 -0
  135. aiecs/domain/knowledge_graph/schema/type_enums.py +205 -0
  136. aiecs/domain/task/__init__.py +13 -0
  137. aiecs/domain/task/dsl_processor.py +613 -0
  138. aiecs/domain/task/model.py +62 -0
  139. aiecs/domain/task/task_context.py +268 -0
  140. aiecs/infrastructure/__init__.py +24 -0
  141. aiecs/infrastructure/graph_storage/__init__.py +11 -0
  142. aiecs/infrastructure/graph_storage/base.py +601 -0
  143. aiecs/infrastructure/graph_storage/batch_operations.py +449 -0
  144. aiecs/infrastructure/graph_storage/cache.py +429 -0
  145. aiecs/infrastructure/graph_storage/distributed.py +226 -0
  146. aiecs/infrastructure/graph_storage/error_handling.py +390 -0
  147. aiecs/infrastructure/graph_storage/graceful_degradation.py +306 -0
  148. aiecs/infrastructure/graph_storage/health_checks.py +378 -0
  149. aiecs/infrastructure/graph_storage/in_memory.py +514 -0
  150. aiecs/infrastructure/graph_storage/index_optimization.py +483 -0
  151. aiecs/infrastructure/graph_storage/lazy_loading.py +410 -0
  152. aiecs/infrastructure/graph_storage/metrics.py +357 -0
  153. aiecs/infrastructure/graph_storage/migration.py +413 -0
  154. aiecs/infrastructure/graph_storage/pagination.py +471 -0
  155. aiecs/infrastructure/graph_storage/performance_monitoring.py +466 -0
  156. aiecs/infrastructure/graph_storage/postgres.py +871 -0
  157. aiecs/infrastructure/graph_storage/query_optimizer.py +635 -0
  158. aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
  159. aiecs/infrastructure/graph_storage/sqlite.py +623 -0
  160. aiecs/infrastructure/graph_storage/streaming.py +495 -0
  161. aiecs/infrastructure/messaging/__init__.py +13 -0
  162. aiecs/infrastructure/messaging/celery_task_manager.py +383 -0
  163. aiecs/infrastructure/messaging/websocket_manager.py +298 -0
  164. aiecs/infrastructure/monitoring/__init__.py +34 -0
  165. aiecs/infrastructure/monitoring/executor_metrics.py +174 -0
  166. aiecs/infrastructure/monitoring/global_metrics_manager.py +213 -0
  167. aiecs/infrastructure/monitoring/structured_logger.py +48 -0
  168. aiecs/infrastructure/monitoring/tracing_manager.py +410 -0
  169. aiecs/infrastructure/persistence/__init__.py +24 -0
  170. aiecs/infrastructure/persistence/context_engine_client.py +187 -0
  171. aiecs/infrastructure/persistence/database_manager.py +333 -0
  172. aiecs/infrastructure/persistence/file_storage.py +754 -0
  173. aiecs/infrastructure/persistence/redis_client.py +220 -0
  174. aiecs/llm/__init__.py +86 -0
  175. aiecs/llm/callbacks/__init__.py +11 -0
  176. aiecs/llm/callbacks/custom_callbacks.py +264 -0
  177. aiecs/llm/client_factory.py +420 -0
  178. aiecs/llm/clients/__init__.py +33 -0
  179. aiecs/llm/clients/base_client.py +193 -0
  180. aiecs/llm/clients/googleai_client.py +181 -0
  181. aiecs/llm/clients/openai_client.py +131 -0
  182. aiecs/llm/clients/vertex_client.py +437 -0
  183. aiecs/llm/clients/xai_client.py +184 -0
  184. aiecs/llm/config/__init__.py +51 -0
  185. aiecs/llm/config/config_loader.py +275 -0
  186. aiecs/llm/config/config_validator.py +236 -0
  187. aiecs/llm/config/model_config.py +151 -0
  188. aiecs/llm/utils/__init__.py +10 -0
  189. aiecs/llm/utils/validate_config.py +91 -0
  190. aiecs/main.py +363 -0
  191. aiecs/scripts/__init__.py +3 -0
  192. aiecs/scripts/aid/VERSION_MANAGEMENT.md +97 -0
  193. aiecs/scripts/aid/__init__.py +19 -0
  194. aiecs/scripts/aid/version_manager.py +215 -0
  195. aiecs/scripts/dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md +242 -0
  196. aiecs/scripts/dependance_check/README_DEPENDENCY_CHECKER.md +310 -0
  197. aiecs/scripts/dependance_check/__init__.py +17 -0
  198. aiecs/scripts/dependance_check/dependency_checker.py +938 -0
  199. aiecs/scripts/dependance_check/dependency_fixer.py +391 -0
  200. aiecs/scripts/dependance_check/download_nlp_data.py +396 -0
  201. aiecs/scripts/dependance_check/quick_dependency_check.py +270 -0
  202. aiecs/scripts/dependance_check/setup_nlp_data.sh +217 -0
  203. aiecs/scripts/dependance_patch/__init__.py +7 -0
  204. aiecs/scripts/dependance_patch/fix_weasel/README_WEASEL_PATCH.md +126 -0
  205. aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
  206. aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.py +128 -0
  207. aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.sh +82 -0
  208. aiecs/scripts/dependance_patch/fix_weasel/patch_weasel_library.sh +188 -0
  209. aiecs/scripts/dependance_patch/fix_weasel/run_weasel_patch.sh +41 -0
  210. aiecs/scripts/tools_develop/README.md +449 -0
  211. aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
  212. aiecs/scripts/tools_develop/__init__.py +21 -0
  213. aiecs/scripts/tools_develop/check_type_annotations.py +259 -0
  214. aiecs/scripts/tools_develop/validate_tool_schemas.py +422 -0
  215. aiecs/scripts/tools_develop/verify_tools.py +356 -0
  216. aiecs/tasks/__init__.py +1 -0
  217. aiecs/tasks/worker.py +172 -0
  218. aiecs/tools/__init__.py +299 -0
  219. aiecs/tools/apisource/__init__.py +99 -0
  220. aiecs/tools/apisource/intelligence/__init__.py +19 -0
  221. aiecs/tools/apisource/intelligence/data_fusion.py +381 -0
  222. aiecs/tools/apisource/intelligence/query_analyzer.py +413 -0
  223. aiecs/tools/apisource/intelligence/search_enhancer.py +388 -0
  224. aiecs/tools/apisource/monitoring/__init__.py +9 -0
  225. aiecs/tools/apisource/monitoring/metrics.py +303 -0
  226. aiecs/tools/apisource/providers/__init__.py +115 -0
  227. aiecs/tools/apisource/providers/base.py +664 -0
  228. aiecs/tools/apisource/providers/census.py +401 -0
  229. aiecs/tools/apisource/providers/fred.py +564 -0
  230. aiecs/tools/apisource/providers/newsapi.py +412 -0
  231. aiecs/tools/apisource/providers/worldbank.py +357 -0
  232. aiecs/tools/apisource/reliability/__init__.py +12 -0
  233. aiecs/tools/apisource/reliability/error_handler.py +375 -0
  234. aiecs/tools/apisource/reliability/fallback_strategy.py +391 -0
  235. aiecs/tools/apisource/tool.py +850 -0
  236. aiecs/tools/apisource/utils/__init__.py +9 -0
  237. aiecs/tools/apisource/utils/validators.py +338 -0
  238. aiecs/tools/base_tool.py +201 -0
  239. aiecs/tools/docs/__init__.py +121 -0
  240. aiecs/tools/docs/ai_document_orchestrator.py +599 -0
  241. aiecs/tools/docs/ai_document_writer_orchestrator.py +2403 -0
  242. aiecs/tools/docs/content_insertion_tool.py +1333 -0
  243. aiecs/tools/docs/document_creator_tool.py +1317 -0
  244. aiecs/tools/docs/document_layout_tool.py +1166 -0
  245. aiecs/tools/docs/document_parser_tool.py +994 -0
  246. aiecs/tools/docs/document_writer_tool.py +1818 -0
  247. aiecs/tools/knowledge_graph/__init__.py +17 -0
  248. aiecs/tools/knowledge_graph/graph_reasoning_tool.py +734 -0
  249. aiecs/tools/knowledge_graph/graph_search_tool.py +923 -0
  250. aiecs/tools/knowledge_graph/kg_builder_tool.py +476 -0
  251. aiecs/tools/langchain_adapter.py +542 -0
  252. aiecs/tools/schema_generator.py +275 -0
  253. aiecs/tools/search_tool/__init__.py +100 -0
  254. aiecs/tools/search_tool/analyzers.py +589 -0
  255. aiecs/tools/search_tool/cache.py +260 -0
  256. aiecs/tools/search_tool/constants.py +128 -0
  257. aiecs/tools/search_tool/context.py +216 -0
  258. aiecs/tools/search_tool/core.py +749 -0
  259. aiecs/tools/search_tool/deduplicator.py +123 -0
  260. aiecs/tools/search_tool/error_handler.py +271 -0
  261. aiecs/tools/search_tool/metrics.py +371 -0
  262. aiecs/tools/search_tool/rate_limiter.py +178 -0
  263. aiecs/tools/search_tool/schemas.py +277 -0
  264. aiecs/tools/statistics/__init__.py +80 -0
  265. aiecs/tools/statistics/ai_data_analysis_orchestrator.py +643 -0
  266. aiecs/tools/statistics/ai_insight_generator_tool.py +505 -0
  267. aiecs/tools/statistics/ai_report_orchestrator_tool.py +694 -0
  268. aiecs/tools/statistics/data_loader_tool.py +564 -0
  269. aiecs/tools/statistics/data_profiler_tool.py +658 -0
  270. aiecs/tools/statistics/data_transformer_tool.py +573 -0
  271. aiecs/tools/statistics/data_visualizer_tool.py +495 -0
  272. aiecs/tools/statistics/model_trainer_tool.py +487 -0
  273. aiecs/tools/statistics/statistical_analyzer_tool.py +459 -0
  274. aiecs/tools/task_tools/__init__.py +86 -0
  275. aiecs/tools/task_tools/chart_tool.py +732 -0
  276. aiecs/tools/task_tools/classfire_tool.py +922 -0
  277. aiecs/tools/task_tools/image_tool.py +447 -0
  278. aiecs/tools/task_tools/office_tool.py +684 -0
  279. aiecs/tools/task_tools/pandas_tool.py +635 -0
  280. aiecs/tools/task_tools/report_tool.py +635 -0
  281. aiecs/tools/task_tools/research_tool.py +392 -0
  282. aiecs/tools/task_tools/scraper_tool.py +715 -0
  283. aiecs/tools/task_tools/stats_tool.py +688 -0
  284. aiecs/tools/temp_file_manager.py +130 -0
  285. aiecs/tools/tool_executor/__init__.py +37 -0
  286. aiecs/tools/tool_executor/tool_executor.py +881 -0
  287. aiecs/utils/LLM_output_structor.py +445 -0
  288. aiecs/utils/__init__.py +34 -0
  289. aiecs/utils/base_callback.py +47 -0
  290. aiecs/utils/cache_provider.py +695 -0
  291. aiecs/utils/execution_utils.py +184 -0
  292. aiecs/utils/logging.py +1 -0
  293. aiecs/utils/prompt_loader.py +14 -0
  294. aiecs/utils/token_usage_repository.py +323 -0
  295. aiecs/ws/__init__.py +0 -0
  296. aiecs/ws/socket_server.py +52 -0
  297. aiecs-1.5.1.dist-info/METADATA +608 -0
  298. aiecs-1.5.1.dist-info/RECORD +302 -0
  299. aiecs-1.5.1.dist-info/WHEEL +5 -0
  300. aiecs-1.5.1.dist-info/entry_points.txt +10 -0
  301. aiecs-1.5.1.dist-info/licenses/LICENSE +225 -0
  302. aiecs-1.5.1.dist-info/top_level.txt +1 -0
aiecs/config/config.py ADDED
@@ -0,0 +1,498 @@
1
+ from pydantic import Field, ConfigDict, field_validator
2
+ from pydantic_settings import BaseSettings
3
+ from functools import lru_cache
4
+ from pathlib import Path
5
+ import logging
6
+ from typing import Literal
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+
11
+ class Settings(BaseSettings):
12
+ # LLM Provider Configuration (optional until used)
13
+ openai_api_key: str = Field(default="", alias="OPENAI_API_KEY")
14
+ googleai_api_key: str = Field(default="", alias="GOOGLEAI_API_KEY")
15
+ vertex_project_id: str = Field(default="", alias="VERTEX_PROJECT_ID")
16
+ vertex_location: str = Field(default="us-central1", alias="VERTEX_LOCATION")
17
+ google_application_credentials: str = Field(default="", alias="GOOGLE_APPLICATION_CREDENTIALS")
18
+ google_api_key: str = Field(default="", alias="GOOGLE_API_KEY")
19
+ google_cse_id: str = Field(default="", alias="GOOGLE_CSE_ID")
20
+ xai_api_key: str = Field(default="", alias="XAI_API_KEY")
21
+ grok_api_key: str = Field(default="", alias="GROK_API_KEY") # Backward compatibility
22
+
23
+ # LLM Models Configuration
24
+ llm_models_config_path: str = Field(
25
+ default="",
26
+ alias="LLM_MODELS_CONFIG",
27
+ description="Path to LLM models YAML configuration file",
28
+ )
29
+
30
+ # Infrastructure Configuration (with sensible defaults)
31
+ celery_broker_url: str = Field(default="redis://localhost:6379/0", alias="CELERY_BROKER_URL")
32
+ cors_allowed_origins: str = Field(
33
+ default="http://localhost:3000,http://express-gateway:3001",
34
+ alias="CORS_ALLOWED_ORIGINS",
35
+ )
36
+
37
+ # PostgreSQL Database Configuration (with defaults)
38
+ db_host: str = Field(default="localhost", alias="DB_HOST")
39
+ db_user: str = Field(default="postgres", alias="DB_USER")
40
+ db_password: str = Field(default="", alias="DB_PASSWORD")
41
+ db_name: str = Field(default="aiecs", alias="DB_NAME")
42
+ db_port: int = Field(default=5432, alias="DB_PORT")
43
+ postgres_url: str = Field(default="", alias="POSTGRES_URL")
44
+ # Connection mode: "local" (use individual parameters) or "cloud" (use POSTGRES_URL)
45
+ # If "cloud" is set, POSTGRES_URL will be used; otherwise individual
46
+ # parameters are used
47
+ db_connection_mode: str = Field(default="local", alias="DB_CONNECTION_MODE")
48
+
49
+ # Google Cloud Storage Configuration (optional)
50
+ google_cloud_project_id: str = Field(default="", alias="GOOGLE_CLOUD_PROJECT_ID")
51
+ google_cloud_storage_bucket: str = Field(default="", alias="GOOGLE_CLOUD_STORAGE_BUCKET")
52
+
53
+ # Qdrant configuration (legacy)
54
+ qdrant_url: str = Field("http://qdrant:6333", alias="QDRANT_URL")
55
+ qdrant_collection: str = Field("documents", alias="QDRANT_COLLECTION")
56
+
57
+ # Vertex AI Vector Search configuration
58
+ vertex_index_id: str | None = Field(default=None, alias="VERTEX_INDEX_ID")
59
+ vertex_endpoint_id: str | None = Field(default=None, alias="VERTEX_ENDPOINT_ID")
60
+ vertex_deployed_index_id: str | None = Field(default=None, alias="VERTEX_DEPLOYED_INDEX_ID")
61
+
62
+ # Vector store backend selection (Qdrant deprecated, using Vertex AI by
63
+ # default)
64
+ vector_store_backend: str = Field(
65
+ "vertex", alias="VECTOR_STORE_BACKEND"
66
+ ) # "vertex" (qdrant deprecated)
67
+
68
+ # Development/Server Configuration
69
+ reload: bool = Field(default=False, alias="RELOAD")
70
+ port: int = Field(default=8000, alias="PORT")
71
+
72
+ # Knowledge Graph Configuration
73
+ # Storage backend selection
74
+ kg_storage_backend: Literal["inmemory", "sqlite", "postgresql"] = Field(
75
+ default="inmemory",
76
+ alias="KG_STORAGE_BACKEND",
77
+ description="Knowledge graph storage backend: inmemory (default), sqlite (file-based), or postgresql (production)",
78
+ )
79
+
80
+ # SQLite configuration (for file-based persistence)
81
+ kg_sqlite_db_path: str = Field(
82
+ default="./storage/knowledge_graph.db",
83
+ alias="KG_SQLITE_DB_PATH",
84
+ description="Path to SQLite database file for knowledge graph storage",
85
+ )
86
+
87
+ # PostgreSQL configuration (uses main database config by default)
88
+ # If you want a separate database for knowledge graph, set these:
89
+ kg_db_host: str = Field(default="", alias="KG_DB_HOST")
90
+ kg_db_port: int = Field(default=5432, alias="KG_DB_PORT")
91
+ kg_db_user: str = Field(default="", alias="KG_DB_USER")
92
+ kg_db_password: str = Field(default="", alias="KG_DB_PASSWORD")
93
+ kg_db_name: str = Field(default="", alias="KG_DB_NAME")
94
+ kg_postgres_url: str = Field(default="", alias="KG_POSTGRES_URL")
95
+
96
+ # PostgreSQL connection pool settings
97
+ kg_min_pool_size: int = Field(
98
+ default=5,
99
+ alias="KG_MIN_POOL_SIZE",
100
+ description="Minimum number of connections in PostgreSQL pool",
101
+ )
102
+ kg_max_pool_size: int = Field(
103
+ default=20,
104
+ alias="KG_MAX_POOL_SIZE",
105
+ description="Maximum number of connections in PostgreSQL pool",
106
+ )
107
+
108
+ # PostgreSQL pgvector support
109
+ kg_enable_pgvector: bool = Field(
110
+ default=False,
111
+ alias="KG_ENABLE_PGVECTOR",
112
+ description="Enable pgvector extension for optimized vector search (requires pgvector installed)",
113
+ )
114
+
115
+ # In-memory configuration
116
+ kg_inmemory_max_nodes: int = Field(
117
+ default=100000,
118
+ alias="KG_INMEMORY_MAX_NODES",
119
+ description="Maximum number of nodes for in-memory storage",
120
+ )
121
+
122
+ # Vector search configuration
123
+ kg_vector_dimension: int = Field(
124
+ default=1536,
125
+ alias="KG_VECTOR_DIMENSION",
126
+ description="Dimension of embedding vectors (default 1536 for OpenAI ada-002)",
127
+ )
128
+
129
+ # Query configuration
130
+ kg_default_search_limit: int = Field(
131
+ default=10,
132
+ alias="KG_DEFAULT_SEARCH_LIMIT",
133
+ description="Default number of results to return in searches",
134
+ )
135
+
136
+ kg_max_traversal_depth: int = Field(
137
+ default=5,
138
+ alias="KG_MAX_TRAVERSAL_DEPTH",
139
+ description="Maximum depth for graph traversal queries",
140
+ )
141
+
142
+ # Cache configuration
143
+ kg_enable_query_cache: bool = Field(
144
+ default=True,
145
+ alias="KG_ENABLE_QUERY_CACHE",
146
+ description="Enable caching of query results",
147
+ )
148
+
149
+ kg_cache_ttl_seconds: int = Field(
150
+ default=300,
151
+ alias="KG_CACHE_TTL_SECONDS",
152
+ description="Time-to-live for cached query results (seconds)",
153
+ )
154
+
155
+ # Feature flags for new capabilities
156
+ kg_enable_runnable_pattern: bool = Field(
157
+ default=True,
158
+ alias="KG_ENABLE_RUNNABLE_PATTERN",
159
+ description="Enable Runnable pattern for composable graph operations",
160
+ )
161
+
162
+ kg_enable_knowledge_fusion: bool = Field(
163
+ default=True,
164
+ alias="KG_ENABLE_KNOWLEDGE_FUSION",
165
+ description="Enable knowledge fusion for cross-document entity merging",
166
+ )
167
+
168
+ kg_enable_reranking: bool = Field(
169
+ default=True,
170
+ alias="KG_ENABLE_RERANKING",
171
+ description="Enable result reranking for improved search relevance",
172
+ )
173
+
174
+ kg_enable_logical_queries: bool = Field(
175
+ default=True,
176
+ alias="KG_ENABLE_LOGICAL_QUERIES",
177
+ description="Enable logical query parsing for structured queries",
178
+ )
179
+
180
+ kg_enable_structured_import: bool = Field(
181
+ default=True,
182
+ alias="KG_ENABLE_STRUCTURED_IMPORT",
183
+ description="Enable structured data import (CSV/JSON)",
184
+ )
185
+
186
+ # Knowledge Fusion configuration
187
+ kg_fusion_similarity_threshold: float = Field(
188
+ default=0.85,
189
+ alias="KG_FUSION_SIMILARITY_THRESHOLD",
190
+ description="Similarity threshold for entity fusion (0.0-1.0)",
191
+ )
192
+
193
+ kg_fusion_conflict_resolution: str = Field(
194
+ default="most_complete",
195
+ alias="KG_FUSION_CONFLICT_RESOLUTION",
196
+ description="Conflict resolution strategy: most_complete, most_recent, most_confident, longest, keep_all",
197
+ )
198
+
199
+ # Reranking configuration
200
+ kg_reranking_default_strategy: str = Field(
201
+ default="hybrid",
202
+ alias="KG_RERANKING_DEFAULT_STRATEGY",
203
+ description="Default reranking strategy: text, semantic, structural, hybrid",
204
+ )
205
+
206
+ kg_reranking_top_k: int = Field(
207
+ default=100,
208
+ alias="KG_RERANKING_TOP_K",
209
+ description="Top-K results to fetch before reranking",
210
+ )
211
+
212
+ # Schema cache configuration
213
+ kg_enable_schema_cache: bool = Field(
214
+ default=True,
215
+ alias="KG_ENABLE_SCHEMA_CACHE",
216
+ description="Enable schema caching for improved performance",
217
+ )
218
+
219
+ kg_schema_cache_ttl_seconds: int = Field(
220
+ default=3600,
221
+ alias="KG_SCHEMA_CACHE_TTL_SECONDS",
222
+ description="Time-to-live for cached schemas (seconds)",
223
+ )
224
+
225
+ # Query optimization configuration
226
+ kg_enable_query_optimization: bool = Field(
227
+ default=True,
228
+ alias="KG_ENABLE_QUERY_OPTIMIZATION",
229
+ description="Enable query optimization for better performance",
230
+ )
231
+
232
+ kg_query_optimization_strategy: str = Field(
233
+ default="balanced",
234
+ alias="KG_QUERY_OPTIMIZATION_STRATEGY",
235
+ description="Query optimization strategy: cost, latency, balanced",
236
+ )
237
+
238
+ model_config = ConfigDict(env_file=".env", env_file_encoding="utf-8", extra="allow")
239
+
240
+ @property
241
+ def database_config(self) -> dict:
242
+ """
243
+ Get database configuration for asyncpg.
244
+
245
+ Supports both connection string (POSTGRES_URL) and individual parameters.
246
+ The connection mode is controlled by DB_CONNECTION_MODE:
247
+ - "cloud": Use POSTGRES_URL connection string (for cloud databases)
248
+ - "local": Use individual parameters (for local databases)
249
+
250
+ If DB_CONNECTION_MODE is "cloud" but POSTGRES_URL is not provided,
251
+ falls back to individual parameters with a warning.
252
+ """
253
+ # Check connection mode
254
+ if self.db_connection_mode.lower() == "cloud":
255
+ # Use connection string for cloud databases
256
+ if self.postgres_url:
257
+ return {"dsn": self.postgres_url}
258
+ else:
259
+ logger.warning(
260
+ "DB_CONNECTION_MODE is set to 'cloud' but POSTGRES_URL is not provided. "
261
+ "Falling back to individual parameters (local mode)."
262
+ )
263
+ # Fall back to individual parameters
264
+ return {
265
+ "host": self.db_host,
266
+ "user": self.db_user,
267
+ "password": self.db_password,
268
+ "database": self.db_name,
269
+ "port": self.db_port,
270
+ }
271
+ else:
272
+ # Use individual parameters for local databases (default)
273
+ return {
274
+ "host": self.db_host,
275
+ "user": self.db_user,
276
+ "password": self.db_password,
277
+ "database": self.db_name,
278
+ "port": self.db_port,
279
+ }
280
+
281
+ @property
282
+ def file_storage_config(self) -> dict:
283
+ """Get file storage configuration for Google Cloud Storage"""
284
+ return {
285
+ "gcs_project_id": self.google_cloud_project_id,
286
+ "gcs_bucket_name": self.google_cloud_storage_bucket,
287
+ "gcs_credentials_path": self.google_application_credentials,
288
+ "enable_local_fallback": True,
289
+ "local_storage_path": "./storage",
290
+ }
291
+
292
+ @property
293
+ def kg_database_config(self) -> dict:
294
+ """
295
+ Get knowledge graph database configuration.
296
+
297
+ Returns configuration for the knowledge graph storage backend:
298
+ - For PostgreSQL: Returns connection parameters (uses main DB config if KG-specific not set)
299
+ - For SQLite: Returns db_path
300
+ - For in-memory: Returns max_nodes limit
301
+ """
302
+ if self.kg_storage_backend == "postgresql":
303
+ # Use KG-specific config if provided, otherwise fall back to main
304
+ # DB config
305
+ if self.kg_postgres_url:
306
+ return {
307
+ "dsn": self.kg_postgres_url,
308
+ "min_pool_size": self.kg_min_pool_size,
309
+ "max_pool_size": self.kg_max_pool_size,
310
+ "enable_pgvector": self.kg_enable_pgvector,
311
+ }
312
+ elif self.kg_db_host:
313
+ return {
314
+ "host": self.kg_db_host,
315
+ "port": self.kg_db_port,
316
+ "user": self.kg_db_user,
317
+ "password": self.kg_db_password,
318
+ "database": self.kg_db_name or "aiecs_knowledge_graph",
319
+ "min_pool_size": self.kg_min_pool_size,
320
+ "max_pool_size": self.kg_max_pool_size,
321
+ "enable_pgvector": self.kg_enable_pgvector,
322
+ }
323
+ else:
324
+ # Fall back to main database config
325
+ db_config = self.database_config.copy()
326
+ db_config["min_pool_size"] = self.kg_min_pool_size
327
+ db_config["max_pool_size"] = self.kg_max_pool_size
328
+ db_config["enable_pgvector"] = self.kg_enable_pgvector
329
+ return db_config
330
+ elif self.kg_storage_backend == "sqlite":
331
+ return {"db_path": self.kg_sqlite_db_path}
332
+ else: # inmemory
333
+ return {"max_nodes": self.kg_inmemory_max_nodes}
334
+
335
+ @property
336
+ def kg_query_config(self) -> dict:
337
+ """Get knowledge graph query configuration"""
338
+ return {
339
+ "default_search_limit": self.kg_default_search_limit,
340
+ "max_traversal_depth": self.kg_max_traversal_depth,
341
+ "vector_dimension": self.kg_vector_dimension,
342
+ }
343
+
344
+ @property
345
+ def kg_cache_config(self) -> dict:
346
+ """Get knowledge graph cache configuration"""
347
+ return {
348
+ "enable_query_cache": self.kg_enable_query_cache,
349
+ "cache_ttl_seconds": self.kg_cache_ttl_seconds,
350
+ }
351
+
352
+ @field_validator("kg_storage_backend")
353
+ @classmethod
354
+ def validate_kg_storage_backend(cls, v: str) -> str:
355
+ """Validate knowledge graph storage backend selection"""
356
+ valid_backends = ["inmemory", "sqlite", "postgresql"]
357
+ if v not in valid_backends:
358
+ raise ValueError(
359
+ f"Invalid KG_STORAGE_BACKEND: {v}. " f"Must be one of: {', '.join(valid_backends)}"
360
+ )
361
+ return v
362
+
363
+ @field_validator("kg_sqlite_db_path")
364
+ @classmethod
365
+ def validate_kg_sqlite_path(cls, v: str) -> str:
366
+ """Validate and create parent directory for SQLite database"""
367
+ if v and v != ":memory:":
368
+ path = Path(v)
369
+ # Create parent directory if it doesn't exist
370
+ path.parent.mkdir(parents=True, exist_ok=True)
371
+ return v
372
+
373
+ @field_validator("kg_max_traversal_depth")
374
+ @classmethod
375
+ def validate_kg_max_traversal_depth(cls, v: int) -> int:
376
+ """Validate maximum traversal depth"""
377
+ if v < 1:
378
+ raise ValueError("KG_MAX_TRAVERSAL_DEPTH must be at least 1")
379
+ if v > 10:
380
+ logger.warning(
381
+ f"KG_MAX_TRAVERSAL_DEPTH is set to {v}, which may cause performance issues. "
382
+ "Consider using a value <= 10 for production use."
383
+ )
384
+ return v
385
+
386
+ @field_validator("kg_vector_dimension")
387
+ @classmethod
388
+ def validate_kg_vector_dimension(cls, v: int) -> int:
389
+ """Validate vector dimension"""
390
+ if v < 1:
391
+ raise ValueError("KG_VECTOR_DIMENSION must be at least 1")
392
+ # Common dimensions: 128, 256, 384, 512, 768, 1024, 1536, 3072
393
+ common_dims = [128, 256, 384, 512, 768, 1024, 1536, 3072]
394
+ if v not in common_dims:
395
+ logger.warning(
396
+ f"KG_VECTOR_DIMENSION is set to {v}, which is not a common embedding dimension. "
397
+ f"Common dimensions are: {common_dims}"
398
+ )
399
+ return v
400
+
401
+ def validate_llm_models_config(self) -> bool:
402
+ """
403
+ Validate that LLM models configuration file exists.
404
+
405
+ Returns:
406
+ True if config file exists or can be found in default locations
407
+
408
+ Raises:
409
+ FileNotFoundError: If config file doesn't exist
410
+ """
411
+ if self.llm_models_config_path:
412
+ config_path = Path(self.llm_models_config_path)
413
+ if not config_path.exists():
414
+ raise FileNotFoundError(f"LLM models config file not found: {config_path}")
415
+ return True
416
+
417
+ # Check default locations
418
+ current_dir = Path(__file__).parent
419
+ default_path = current_dir / "llm_models.yaml"
420
+
421
+ if default_path.exists():
422
+ return True
423
+
424
+ # If not found, it's still okay - the config loader will try to find it
425
+ return True
426
+
427
+
428
+ @lru_cache()
429
+ def get_settings():
430
+ return Settings()
431
+
432
+
433
+ def validate_required_settings(operation_type: str = "full") -> bool:
434
+ """
435
+ Validate that required settings are present for specific operations
436
+
437
+ Args:
438
+ operation_type: Type of operation to validate for
439
+ - "basic": Only basic package functionality
440
+ - "llm": LLM provider functionality
441
+ - "database": Database operations
442
+ - "storage": Cloud storage operations
443
+ - "knowledge_graph": Knowledge graph operations
444
+ - "full": All functionality
445
+
446
+ Returns:
447
+ True if settings are valid, False otherwise
448
+
449
+ Raises:
450
+ ValueError: If required settings are missing for the operation type
451
+ """
452
+ settings = get_settings()
453
+ missing = []
454
+
455
+ if operation_type in ["llm", "full"]:
456
+ # At least one LLM provider should be configured
457
+ llm_configs = [
458
+ ("OpenAI", settings.openai_api_key),
459
+ (
460
+ "Vertex AI",
461
+ settings.vertex_project_id and settings.google_application_credentials,
462
+ ),
463
+ ("xAI", settings.xai_api_key),
464
+ ]
465
+
466
+ if not any(config[1] for config in llm_configs):
467
+ missing.append("At least one LLM provider (OpenAI, Vertex AI, or xAI)")
468
+
469
+ if operation_type in ["database", "full"]:
470
+ if not settings.db_password:
471
+ missing.append("DB_PASSWORD")
472
+
473
+ if operation_type in ["storage", "full"]:
474
+ if settings.google_cloud_project_id and not settings.google_cloud_storage_bucket:
475
+ missing.append(
476
+ "GOOGLE_CLOUD_STORAGE_BUCKET (required when GOOGLE_CLOUD_PROJECT_ID is set)"
477
+ )
478
+
479
+ if operation_type in ["knowledge_graph", "full"]:
480
+ # Validate knowledge graph configuration
481
+ if settings.kg_storage_backend == "postgresql":
482
+ # Check if KG-specific or main DB config is available
483
+ if not (settings.kg_postgres_url or settings.kg_db_host or settings.db_password):
484
+ missing.append(
485
+ "Knowledge graph PostgreSQL configuration: "
486
+ "Either set KG_POSTGRES_URL, KG_DB_* parameters, or main DB_PASSWORD"
487
+ )
488
+ elif settings.kg_storage_backend == "sqlite":
489
+ if not settings.kg_sqlite_db_path:
490
+ missing.append("KG_SQLITE_DB_PATH (required for SQLite backend)")
491
+
492
+ if missing:
493
+ raise ValueError(
494
+ f"Missing required settings for {operation_type} operation: {', '.join(missing)}\n"
495
+ "Please check your .env file or environment variables."
496
+ )
497
+
498
+ return True
@@ -0,0 +1,137 @@
1
+ """
2
+ Knowledge Graph Configuration
3
+
4
+ Configuration settings for knowledge graph storage and operations.
5
+ """
6
+
7
+ from enum import Enum
8
+ from pydantic import Field
9
+
10
+
11
+ class GraphStorageBackend(str, Enum):
12
+ """Available graph storage backends"""
13
+
14
+ INMEMORY = "inmemory"
15
+ SQLITE = "sqlite"
16
+ POSTGRESQL = "postgresql"
17
+
18
+
19
+ class KnowledgeGraphConfig:
20
+ """
21
+ Knowledge Graph Configuration
22
+
23
+ This class provides configuration settings for knowledge graph operations.
24
+ It integrates with AIECS Settings through environment variables.
25
+ """
26
+
27
+ # Storage backend selection
28
+ backend: GraphStorageBackend = Field(
29
+ default=GraphStorageBackend.INMEMORY,
30
+ description="Graph storage backend to use",
31
+ )
32
+
33
+ # SQLite configuration (for file-based persistence)
34
+ sqlite_db_path: str = Field(
35
+ default="./storage/knowledge_graph.db",
36
+ description="Path to SQLite database file",
37
+ )
38
+
39
+ # In-memory configuration
40
+ inmemory_max_nodes: int = Field(
41
+ default=100000,
42
+ description="Maximum number of nodes for in-memory storage",
43
+ )
44
+
45
+ # Vector search configuration
46
+ vector_dimension: int = Field(
47
+ default=1536,
48
+ description="Dimension of embedding vectors (default for OpenAI ada-002)",
49
+ )
50
+
51
+ # Query configuration
52
+ default_search_limit: int = Field(
53
+ default=10,
54
+ description="Default number of results to return in searches",
55
+ )
56
+
57
+ max_traversal_depth: int = Field(
58
+ default=5, description="Maximum depth for graph traversal queries"
59
+ )
60
+
61
+ # Cache configuration
62
+ enable_query_cache: bool = Field(default=True, description="Enable caching of query results")
63
+
64
+ cache_ttl_seconds: int = Field(
65
+ default=300,
66
+ description="Time-to-live for cached query results (seconds)",
67
+ )
68
+
69
+ # Feature flags for new capabilities
70
+ enable_runnable_pattern: bool = Field(
71
+ default=True,
72
+ description="Enable Runnable pattern for composable graph operations",
73
+ )
74
+
75
+ enable_knowledge_fusion: bool = Field(
76
+ default=True,
77
+ description="Enable knowledge fusion for cross-document entity merging",
78
+ )
79
+
80
+ enable_reranking: bool = Field(
81
+ default=True,
82
+ description="Enable result reranking for improved search relevance",
83
+ )
84
+
85
+ enable_logical_queries: bool = Field(
86
+ default=True,
87
+ description="Enable logical query parsing for structured queries",
88
+ )
89
+
90
+ enable_structured_import: bool = Field(
91
+ default=True, description="Enable structured data import (CSV/JSON)"
92
+ )
93
+
94
+ # Knowledge Fusion configuration
95
+ fusion_similarity_threshold: float = Field(
96
+ default=0.85,
97
+ description="Similarity threshold for entity fusion (0.0-1.0)",
98
+ )
99
+
100
+ fusion_conflict_resolution: str = Field(
101
+ default="most_complete",
102
+ description="Conflict resolution strategy: most_complete, most_recent, most_confident, longest, keep_all",
103
+ )
104
+
105
+ # Reranking configuration
106
+ reranking_default_strategy: str = Field(
107
+ default="hybrid",
108
+ description="Default reranking strategy: text, semantic, structural, hybrid",
109
+ )
110
+
111
+ reranking_top_k: int = Field(default=100, description="Top-K results to fetch before reranking")
112
+
113
+ # Schema cache configuration
114
+ enable_schema_cache: bool = Field(
115
+ default=True,
116
+ description="Enable schema caching for improved performance",
117
+ )
118
+
119
+ schema_cache_ttl_seconds: int = Field(
120
+ default=3600, description="Time-to-live for cached schemas (seconds)"
121
+ )
122
+
123
+ # Query optimization configuration
124
+ enable_query_optimization: bool = Field(
125
+ default=True,
126
+ description="Enable query optimization for better performance",
127
+ )
128
+
129
+ query_optimization_strategy: str = Field(
130
+ default="balanced",
131
+ description="Query optimization strategy: cost, latency, balanced",
132
+ )
133
+
134
+
135
+ def get_graph_config() -> KnowledgeGraphConfig:
136
+ """Get knowledge graph configuration singleton"""
137
+ return KnowledgeGraphConfig()
@@ -0,0 +1,23 @@
1
+ AI_SERVICE_REGISTRY = {}
2
+
3
+
4
+ def register_ai_service(mode: str, service: str):
5
+ """
6
+ Decorator for registering a class to the service center, so it can be found and called by (mode, service).
7
+ """
8
+
9
+ def decorator(cls):
10
+ AI_SERVICE_REGISTRY[(mode, service)] = cls
11
+ return cls
12
+
13
+ return decorator
14
+
15
+
16
+ def get_ai_service(mode: str, service: str):
17
+ """
18
+ Find registered service class based on mode and service name.
19
+ """
20
+ key = (mode, service)
21
+ if key not in AI_SERVICE_REGISTRY:
22
+ raise ValueError(f"No registered service for mode '{mode}', service '{service}'")
23
+ return AI_SERVICE_REGISTRY[key]