aiecs 1.0.1__py3-none-any.whl → 1.7.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aiecs might be problematic. Click here for more details.

Files changed (340) hide show
  1. aiecs/__init__.py +13 -16
  2. aiecs/__main__.py +7 -7
  3. aiecs/aiecs_client.py +269 -75
  4. aiecs/application/executors/operation_executor.py +79 -54
  5. aiecs/application/knowledge_graph/__init__.py +7 -0
  6. aiecs/application/knowledge_graph/builder/__init__.py +37 -0
  7. aiecs/application/knowledge_graph/builder/data_quality.py +302 -0
  8. aiecs/application/knowledge_graph/builder/data_reshaping.py +293 -0
  9. aiecs/application/knowledge_graph/builder/document_builder.py +369 -0
  10. aiecs/application/knowledge_graph/builder/graph_builder.py +490 -0
  11. aiecs/application/knowledge_graph/builder/import_optimizer.py +396 -0
  12. aiecs/application/knowledge_graph/builder/schema_inference.py +462 -0
  13. aiecs/application/knowledge_graph/builder/schema_mapping.py +563 -0
  14. aiecs/application/knowledge_graph/builder/structured_pipeline.py +1384 -0
  15. aiecs/application/knowledge_graph/builder/text_chunker.py +317 -0
  16. aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
  17. aiecs/application/knowledge_graph/extractors/base.py +98 -0
  18. aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +422 -0
  19. aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +347 -0
  20. aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +241 -0
  21. aiecs/application/knowledge_graph/fusion/__init__.py +78 -0
  22. aiecs/application/knowledge_graph/fusion/ab_testing.py +395 -0
  23. aiecs/application/knowledge_graph/fusion/abbreviation_expander.py +327 -0
  24. aiecs/application/knowledge_graph/fusion/alias_index.py +597 -0
  25. aiecs/application/knowledge_graph/fusion/alias_matcher.py +384 -0
  26. aiecs/application/knowledge_graph/fusion/cache_coordinator.py +343 -0
  27. aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +433 -0
  28. aiecs/application/knowledge_graph/fusion/entity_linker.py +511 -0
  29. aiecs/application/knowledge_graph/fusion/evaluation_dataset.py +240 -0
  30. aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +632 -0
  31. aiecs/application/knowledge_graph/fusion/matching_config.py +489 -0
  32. aiecs/application/knowledge_graph/fusion/name_normalizer.py +352 -0
  33. aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +183 -0
  34. aiecs/application/knowledge_graph/fusion/semantic_name_matcher.py +464 -0
  35. aiecs/application/knowledge_graph/fusion/similarity_pipeline.py +534 -0
  36. aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
  37. aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +342 -0
  38. aiecs/application/knowledge_graph/pattern_matching/query_executor.py +366 -0
  39. aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
  40. aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +195 -0
  41. aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
  42. aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
  43. aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +341 -0
  44. aiecs/application/knowledge_graph/reasoning/inference_engine.py +500 -0
  45. aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +163 -0
  46. aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
  47. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
  48. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +913 -0
  49. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +866 -0
  50. aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +475 -0
  51. aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +396 -0
  52. aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +208 -0
  53. aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +170 -0
  54. aiecs/application/knowledge_graph/reasoning/query_planner.py +855 -0
  55. aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +518 -0
  56. aiecs/application/knowledge_graph/retrieval/__init__.py +27 -0
  57. aiecs/application/knowledge_graph/retrieval/query_intent_classifier.py +211 -0
  58. aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +592 -0
  59. aiecs/application/knowledge_graph/retrieval/strategy_types.py +23 -0
  60. aiecs/application/knowledge_graph/search/__init__.py +59 -0
  61. aiecs/application/knowledge_graph/search/hybrid_search.py +457 -0
  62. aiecs/application/knowledge_graph/search/reranker.py +293 -0
  63. aiecs/application/knowledge_graph/search/reranker_strategies.py +535 -0
  64. aiecs/application/knowledge_graph/search/text_similarity.py +392 -0
  65. aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
  66. aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +305 -0
  67. aiecs/application/knowledge_graph/traversal/path_scorer.py +271 -0
  68. aiecs/application/knowledge_graph/validators/__init__.py +13 -0
  69. aiecs/application/knowledge_graph/validators/relation_validator.py +239 -0
  70. aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
  71. aiecs/application/knowledge_graph/visualization/graph_visualizer.py +313 -0
  72. aiecs/common/__init__.py +9 -0
  73. aiecs/common/knowledge_graph/__init__.py +17 -0
  74. aiecs/common/knowledge_graph/runnable.py +471 -0
  75. aiecs/config/__init__.py +20 -5
  76. aiecs/config/config.py +762 -31
  77. aiecs/config/graph_config.py +131 -0
  78. aiecs/config/tool_config.py +399 -0
  79. aiecs/core/__init__.py +29 -13
  80. aiecs/core/interface/__init__.py +2 -2
  81. aiecs/core/interface/execution_interface.py +22 -22
  82. aiecs/core/interface/storage_interface.py +37 -88
  83. aiecs/core/registry/__init__.py +31 -0
  84. aiecs/core/registry/service_registry.py +92 -0
  85. aiecs/domain/__init__.py +270 -1
  86. aiecs/domain/agent/__init__.py +191 -0
  87. aiecs/domain/agent/base_agent.py +3870 -0
  88. aiecs/domain/agent/exceptions.py +99 -0
  89. aiecs/domain/agent/graph_aware_mixin.py +569 -0
  90. aiecs/domain/agent/hybrid_agent.py +1435 -0
  91. aiecs/domain/agent/integration/__init__.py +29 -0
  92. aiecs/domain/agent/integration/context_compressor.py +216 -0
  93. aiecs/domain/agent/integration/context_engine_adapter.py +587 -0
  94. aiecs/domain/agent/integration/protocols.py +281 -0
  95. aiecs/domain/agent/integration/retry_policy.py +218 -0
  96. aiecs/domain/agent/integration/role_config.py +213 -0
  97. aiecs/domain/agent/knowledge_aware_agent.py +1892 -0
  98. aiecs/domain/agent/lifecycle.py +291 -0
  99. aiecs/domain/agent/llm_agent.py +692 -0
  100. aiecs/domain/agent/memory/__init__.py +12 -0
  101. aiecs/domain/agent/memory/conversation.py +1124 -0
  102. aiecs/domain/agent/migration/__init__.py +14 -0
  103. aiecs/domain/agent/migration/conversion.py +163 -0
  104. aiecs/domain/agent/migration/legacy_wrapper.py +86 -0
  105. aiecs/domain/agent/models.py +884 -0
  106. aiecs/domain/agent/observability.py +479 -0
  107. aiecs/domain/agent/persistence.py +449 -0
  108. aiecs/domain/agent/prompts/__init__.py +29 -0
  109. aiecs/domain/agent/prompts/builder.py +159 -0
  110. aiecs/domain/agent/prompts/formatters.py +187 -0
  111. aiecs/domain/agent/prompts/template.py +255 -0
  112. aiecs/domain/agent/registry.py +253 -0
  113. aiecs/domain/agent/tool_agent.py +444 -0
  114. aiecs/domain/agent/tools/__init__.py +15 -0
  115. aiecs/domain/agent/tools/schema_generator.py +364 -0
  116. aiecs/domain/community/__init__.py +155 -0
  117. aiecs/domain/community/agent_adapter.py +469 -0
  118. aiecs/domain/community/analytics.py +432 -0
  119. aiecs/domain/community/collaborative_workflow.py +648 -0
  120. aiecs/domain/community/communication_hub.py +634 -0
  121. aiecs/domain/community/community_builder.py +320 -0
  122. aiecs/domain/community/community_integration.py +796 -0
  123. aiecs/domain/community/community_manager.py +803 -0
  124. aiecs/domain/community/decision_engine.py +849 -0
  125. aiecs/domain/community/exceptions.py +231 -0
  126. aiecs/domain/community/models/__init__.py +33 -0
  127. aiecs/domain/community/models/community_models.py +234 -0
  128. aiecs/domain/community/resource_manager.py +461 -0
  129. aiecs/domain/community/shared_context_manager.py +589 -0
  130. aiecs/domain/context/__init__.py +40 -10
  131. aiecs/domain/context/context_engine.py +1910 -0
  132. aiecs/domain/context/conversation_models.py +87 -53
  133. aiecs/domain/context/graph_memory.py +582 -0
  134. aiecs/domain/execution/model.py +12 -4
  135. aiecs/domain/knowledge_graph/__init__.py +19 -0
  136. aiecs/domain/knowledge_graph/models/__init__.py +52 -0
  137. aiecs/domain/knowledge_graph/models/entity.py +148 -0
  138. aiecs/domain/knowledge_graph/models/evidence.py +178 -0
  139. aiecs/domain/knowledge_graph/models/inference_rule.py +184 -0
  140. aiecs/domain/knowledge_graph/models/path.py +171 -0
  141. aiecs/domain/knowledge_graph/models/path_pattern.py +171 -0
  142. aiecs/domain/knowledge_graph/models/query.py +261 -0
  143. aiecs/domain/knowledge_graph/models/query_plan.py +181 -0
  144. aiecs/domain/knowledge_graph/models/relation.py +202 -0
  145. aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
  146. aiecs/domain/knowledge_graph/schema/entity_type.py +131 -0
  147. aiecs/domain/knowledge_graph/schema/graph_schema.py +253 -0
  148. aiecs/domain/knowledge_graph/schema/property_schema.py +143 -0
  149. aiecs/domain/knowledge_graph/schema/relation_type.py +163 -0
  150. aiecs/domain/knowledge_graph/schema/schema_manager.py +691 -0
  151. aiecs/domain/knowledge_graph/schema/type_enums.py +209 -0
  152. aiecs/domain/task/dsl_processor.py +172 -56
  153. aiecs/domain/task/model.py +20 -8
  154. aiecs/domain/task/task_context.py +27 -24
  155. aiecs/infrastructure/__init__.py +0 -2
  156. aiecs/infrastructure/graph_storage/__init__.py +11 -0
  157. aiecs/infrastructure/graph_storage/base.py +837 -0
  158. aiecs/infrastructure/graph_storage/batch_operations.py +458 -0
  159. aiecs/infrastructure/graph_storage/cache.py +424 -0
  160. aiecs/infrastructure/graph_storage/distributed.py +223 -0
  161. aiecs/infrastructure/graph_storage/error_handling.py +380 -0
  162. aiecs/infrastructure/graph_storage/graceful_degradation.py +294 -0
  163. aiecs/infrastructure/graph_storage/health_checks.py +378 -0
  164. aiecs/infrastructure/graph_storage/in_memory.py +1197 -0
  165. aiecs/infrastructure/graph_storage/index_optimization.py +446 -0
  166. aiecs/infrastructure/graph_storage/lazy_loading.py +431 -0
  167. aiecs/infrastructure/graph_storage/metrics.py +344 -0
  168. aiecs/infrastructure/graph_storage/migration.py +400 -0
  169. aiecs/infrastructure/graph_storage/pagination.py +483 -0
  170. aiecs/infrastructure/graph_storage/performance_monitoring.py +456 -0
  171. aiecs/infrastructure/graph_storage/postgres.py +1563 -0
  172. aiecs/infrastructure/graph_storage/property_storage.py +353 -0
  173. aiecs/infrastructure/graph_storage/protocols.py +76 -0
  174. aiecs/infrastructure/graph_storage/query_optimizer.py +642 -0
  175. aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
  176. aiecs/infrastructure/graph_storage/sqlite.py +1373 -0
  177. aiecs/infrastructure/graph_storage/streaming.py +487 -0
  178. aiecs/infrastructure/graph_storage/tenant.py +412 -0
  179. aiecs/infrastructure/messaging/celery_task_manager.py +92 -54
  180. aiecs/infrastructure/messaging/websocket_manager.py +51 -35
  181. aiecs/infrastructure/monitoring/__init__.py +22 -0
  182. aiecs/infrastructure/monitoring/executor_metrics.py +45 -11
  183. aiecs/infrastructure/monitoring/global_metrics_manager.py +212 -0
  184. aiecs/infrastructure/monitoring/structured_logger.py +3 -7
  185. aiecs/infrastructure/monitoring/tracing_manager.py +63 -35
  186. aiecs/infrastructure/persistence/__init__.py +14 -1
  187. aiecs/infrastructure/persistence/context_engine_client.py +184 -0
  188. aiecs/infrastructure/persistence/database_manager.py +67 -43
  189. aiecs/infrastructure/persistence/file_storage.py +180 -103
  190. aiecs/infrastructure/persistence/redis_client.py +74 -21
  191. aiecs/llm/__init__.py +73 -25
  192. aiecs/llm/callbacks/__init__.py +11 -0
  193. aiecs/llm/{custom_callbacks.py → callbacks/custom_callbacks.py} +26 -19
  194. aiecs/llm/client_factory.py +224 -36
  195. aiecs/llm/client_resolver.py +155 -0
  196. aiecs/llm/clients/__init__.py +38 -0
  197. aiecs/llm/clients/base_client.py +324 -0
  198. aiecs/llm/clients/google_function_calling_mixin.py +457 -0
  199. aiecs/llm/clients/googleai_client.py +241 -0
  200. aiecs/llm/clients/openai_client.py +158 -0
  201. aiecs/llm/clients/openai_compatible_mixin.py +367 -0
  202. aiecs/llm/clients/vertex_client.py +897 -0
  203. aiecs/llm/clients/xai_client.py +201 -0
  204. aiecs/llm/config/__init__.py +51 -0
  205. aiecs/llm/config/config_loader.py +272 -0
  206. aiecs/llm/config/config_validator.py +206 -0
  207. aiecs/llm/config/model_config.py +143 -0
  208. aiecs/llm/protocols.py +149 -0
  209. aiecs/llm/utils/__init__.py +10 -0
  210. aiecs/llm/utils/validate_config.py +89 -0
  211. aiecs/main.py +140 -121
  212. aiecs/scripts/aid/VERSION_MANAGEMENT.md +138 -0
  213. aiecs/scripts/aid/__init__.py +19 -0
  214. aiecs/scripts/aid/module_checker.py +499 -0
  215. aiecs/scripts/aid/version_manager.py +235 -0
  216. aiecs/scripts/{DEPENDENCY_SYSTEM_SUMMARY.md → dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md} +1 -0
  217. aiecs/scripts/{README_DEPENDENCY_CHECKER.md → dependance_check/README_DEPENDENCY_CHECKER.md} +1 -0
  218. aiecs/scripts/dependance_check/__init__.py +15 -0
  219. aiecs/scripts/dependance_check/dependency_checker.py +1835 -0
  220. aiecs/scripts/{dependency_fixer.py → dependance_check/dependency_fixer.py} +192 -90
  221. aiecs/scripts/{download_nlp_data.py → dependance_check/download_nlp_data.py} +203 -71
  222. aiecs/scripts/dependance_patch/__init__.py +7 -0
  223. aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
  224. aiecs/scripts/{fix_weasel_validator.py → dependance_patch/fix_weasel/fix_weasel_validator.py} +21 -14
  225. aiecs/scripts/{patch_weasel_library.sh → dependance_patch/fix_weasel/patch_weasel_library.sh} +1 -1
  226. aiecs/scripts/knowledge_graph/__init__.py +3 -0
  227. aiecs/scripts/knowledge_graph/run_threshold_experiments.py +212 -0
  228. aiecs/scripts/migrations/multi_tenancy/README.md +142 -0
  229. aiecs/scripts/tools_develop/README.md +671 -0
  230. aiecs/scripts/tools_develop/README_CONFIG_CHECKER.md +273 -0
  231. aiecs/scripts/tools_develop/TOOLS_CONFIG_GUIDE.md +1287 -0
  232. aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
  233. aiecs/scripts/tools_develop/__init__.py +21 -0
  234. aiecs/scripts/tools_develop/check_all_tools_config.py +548 -0
  235. aiecs/scripts/tools_develop/check_type_annotations.py +257 -0
  236. aiecs/scripts/tools_develop/pre-commit-schema-coverage.sh +66 -0
  237. aiecs/scripts/tools_develop/schema_coverage.py +511 -0
  238. aiecs/scripts/tools_develop/validate_tool_schemas.py +475 -0
  239. aiecs/scripts/tools_develop/verify_executor_config_fix.py +98 -0
  240. aiecs/scripts/tools_develop/verify_tools.py +352 -0
  241. aiecs/tasks/__init__.py +0 -1
  242. aiecs/tasks/worker.py +115 -47
  243. aiecs/tools/__init__.py +194 -72
  244. aiecs/tools/apisource/__init__.py +99 -0
  245. aiecs/tools/apisource/intelligence/__init__.py +19 -0
  246. aiecs/tools/apisource/intelligence/data_fusion.py +632 -0
  247. aiecs/tools/apisource/intelligence/query_analyzer.py +417 -0
  248. aiecs/tools/apisource/intelligence/search_enhancer.py +385 -0
  249. aiecs/tools/apisource/monitoring/__init__.py +9 -0
  250. aiecs/tools/apisource/monitoring/metrics.py +330 -0
  251. aiecs/tools/apisource/providers/__init__.py +112 -0
  252. aiecs/tools/apisource/providers/base.py +671 -0
  253. aiecs/tools/apisource/providers/census.py +397 -0
  254. aiecs/tools/apisource/providers/fred.py +535 -0
  255. aiecs/tools/apisource/providers/newsapi.py +409 -0
  256. aiecs/tools/apisource/providers/worldbank.py +352 -0
  257. aiecs/tools/apisource/reliability/__init__.py +12 -0
  258. aiecs/tools/apisource/reliability/error_handler.py +363 -0
  259. aiecs/tools/apisource/reliability/fallback_strategy.py +376 -0
  260. aiecs/tools/apisource/tool.py +832 -0
  261. aiecs/tools/apisource/utils/__init__.py +9 -0
  262. aiecs/tools/apisource/utils/validators.py +334 -0
  263. aiecs/tools/base_tool.py +415 -21
  264. aiecs/tools/docs/__init__.py +121 -0
  265. aiecs/tools/docs/ai_document_orchestrator.py +607 -0
  266. aiecs/tools/docs/ai_document_writer_orchestrator.py +2350 -0
  267. aiecs/tools/docs/content_insertion_tool.py +1320 -0
  268. aiecs/tools/docs/document_creator_tool.py +1323 -0
  269. aiecs/tools/docs/document_layout_tool.py +1160 -0
  270. aiecs/tools/docs/document_parser_tool.py +1011 -0
  271. aiecs/tools/docs/document_writer_tool.py +1829 -0
  272. aiecs/tools/knowledge_graph/__init__.py +17 -0
  273. aiecs/tools/knowledge_graph/graph_reasoning_tool.py +807 -0
  274. aiecs/tools/knowledge_graph/graph_search_tool.py +944 -0
  275. aiecs/tools/knowledge_graph/kg_builder_tool.py +524 -0
  276. aiecs/tools/langchain_adapter.py +300 -138
  277. aiecs/tools/schema_generator.py +455 -0
  278. aiecs/tools/search_tool/__init__.py +100 -0
  279. aiecs/tools/search_tool/analyzers.py +581 -0
  280. aiecs/tools/search_tool/cache.py +264 -0
  281. aiecs/tools/search_tool/constants.py +128 -0
  282. aiecs/tools/search_tool/context.py +224 -0
  283. aiecs/tools/search_tool/core.py +778 -0
  284. aiecs/tools/search_tool/deduplicator.py +119 -0
  285. aiecs/tools/search_tool/error_handler.py +242 -0
  286. aiecs/tools/search_tool/metrics.py +343 -0
  287. aiecs/tools/search_tool/rate_limiter.py +172 -0
  288. aiecs/tools/search_tool/schemas.py +275 -0
  289. aiecs/tools/statistics/__init__.py +80 -0
  290. aiecs/tools/statistics/ai_data_analysis_orchestrator.py +646 -0
  291. aiecs/tools/statistics/ai_insight_generator_tool.py +508 -0
  292. aiecs/tools/statistics/ai_report_orchestrator_tool.py +684 -0
  293. aiecs/tools/statistics/data_loader_tool.py +555 -0
  294. aiecs/tools/statistics/data_profiler_tool.py +638 -0
  295. aiecs/tools/statistics/data_transformer_tool.py +580 -0
  296. aiecs/tools/statistics/data_visualizer_tool.py +498 -0
  297. aiecs/tools/statistics/model_trainer_tool.py +507 -0
  298. aiecs/tools/statistics/statistical_analyzer_tool.py +472 -0
  299. aiecs/tools/task_tools/__init__.py +49 -36
  300. aiecs/tools/task_tools/chart_tool.py +200 -184
  301. aiecs/tools/task_tools/classfire_tool.py +268 -267
  302. aiecs/tools/task_tools/image_tool.py +175 -131
  303. aiecs/tools/task_tools/office_tool.py +226 -146
  304. aiecs/tools/task_tools/pandas_tool.py +477 -121
  305. aiecs/tools/task_tools/report_tool.py +390 -142
  306. aiecs/tools/task_tools/research_tool.py +149 -79
  307. aiecs/tools/task_tools/scraper_tool.py +339 -145
  308. aiecs/tools/task_tools/stats_tool.py +448 -209
  309. aiecs/tools/temp_file_manager.py +26 -24
  310. aiecs/tools/tool_executor/__init__.py +18 -16
  311. aiecs/tools/tool_executor/tool_executor.py +364 -52
  312. aiecs/utils/LLM_output_structor.py +74 -48
  313. aiecs/utils/__init__.py +14 -3
  314. aiecs/utils/base_callback.py +0 -3
  315. aiecs/utils/cache_provider.py +696 -0
  316. aiecs/utils/execution_utils.py +50 -31
  317. aiecs/utils/prompt_loader.py +1 -0
  318. aiecs/utils/token_usage_repository.py +37 -11
  319. aiecs/ws/socket_server.py +14 -4
  320. {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/METADATA +52 -15
  321. aiecs-1.7.6.dist-info/RECORD +337 -0
  322. aiecs-1.7.6.dist-info/entry_points.txt +13 -0
  323. aiecs/config/registry.py +0 -19
  324. aiecs/domain/context/content_engine.py +0 -982
  325. aiecs/llm/base_client.py +0 -99
  326. aiecs/llm/openai_client.py +0 -125
  327. aiecs/llm/vertex_client.py +0 -186
  328. aiecs/llm/xai_client.py +0 -184
  329. aiecs/scripts/dependency_checker.py +0 -857
  330. aiecs/scripts/quick_dependency_check.py +0 -269
  331. aiecs/tools/task_tools/search_api.py +0 -7
  332. aiecs-1.0.1.dist-info/RECORD +0 -90
  333. aiecs-1.0.1.dist-info/entry_points.txt +0 -7
  334. /aiecs/scripts/{setup_nlp_data.sh → dependance_check/setup_nlp_data.sh} +0 -0
  335. /aiecs/scripts/{README_WEASEL_PATCH.md → dependance_patch/fix_weasel/README_WEASEL_PATCH.md} +0 -0
  336. /aiecs/scripts/{fix_weasel_validator.sh → dependance_patch/fix_weasel/fix_weasel_validator.sh} +0 -0
  337. /aiecs/scripts/{run_weasel_patch.sh → dependance_patch/fix_weasel/run_weasel_patch.sh} +0 -0
  338. {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/WHEEL +0 -0
  339. {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/licenses/LICENSE +0 -0
  340. {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/top_level.txt +0 -0
aiecs/config/config.py CHANGED
@@ -1,19 +1,66 @@
1
- from pydantic import Field, ConfigDict
2
- from pydantic_settings import BaseSettings
1
+ """
2
+ Configuration Module for AIECS
3
+
4
+ This module provides centralized configuration management using Pydantic settings.
5
+ Configuration can be loaded from environment variables or .env files.
6
+
7
+ Knowledge Graph Multi-Tenancy Configuration:
8
+ KG_MULTI_TENANCY_ENABLED: Enable multi-tenancy support (default: False)
9
+ KG_TENANT_ISOLATION_MODE: Tenant isolation mode (default: shared_schema)
10
+ - disabled: No tenant isolation (single-tenant mode)
11
+ - shared_schema: Shared database schema with tenant_id column filtering
12
+ - separate_schema: Separate database schemas per tenant
13
+ KG_ENABLE_RLS: Enable PostgreSQL Row-Level Security for SHARED_SCHEMA mode (default: False)
14
+ KG_INMEMORY_MAX_TENANTS: Maximum tenant graphs in memory for InMemoryGraphStore (default: 100)
15
+
16
+ Example:
17
+ # Enable multi-tenancy with shared schema and RLS
18
+ export KG_MULTI_TENANCY_ENABLED=true
19
+ export KG_TENANT_ISOLATION_MODE=shared_schema
20
+ export KG_ENABLE_RLS=true
21
+ export KG_STORAGE_BACKEND=postgresql
22
+
23
+ # Use separate schemas for stronger isolation
24
+ export KG_MULTI_TENANCY_ENABLED=true
25
+ export KG_TENANT_ISOLATION_MODE=separate_schema
26
+ export KG_STORAGE_BACKEND=postgresql
27
+ """
28
+
29
+ from pydantic import Field, field_validator
30
+ from pydantic_settings import BaseSettings, SettingsConfigDict
3
31
  from functools import lru_cache
32
+ from pathlib import Path
33
+ import logging
34
+ from typing import Literal
35
+
36
+ logger = logging.getLogger(__name__)
37
+
4
38
 
5
39
  class Settings(BaseSettings):
6
40
  # LLM Provider Configuration (optional until used)
7
41
  openai_api_key: str = Field(default="", alias="OPENAI_API_KEY")
42
+ googleai_api_key: str = Field(default="", alias="GOOGLEAI_API_KEY")
8
43
  vertex_project_id: str = Field(default="", alias="VERTEX_PROJECT_ID")
9
44
  vertex_location: str = Field(default="us-central1", alias="VERTEX_LOCATION")
10
45
  google_application_credentials: str = Field(default="", alias="GOOGLE_APPLICATION_CREDENTIALS")
46
+ google_api_key: str = Field(default="", alias="GOOGLE_API_KEY")
47
+ google_cse_id: str = Field(default="", alias="GOOGLE_CSE_ID")
11
48
  xai_api_key: str = Field(default="", alias="XAI_API_KEY")
12
49
  grok_api_key: str = Field(default="", alias="GROK_API_KEY") # Backward compatibility
13
-
50
+
51
+ # LLM Models Configuration
52
+ llm_models_config_path: str = Field(
53
+ default="",
54
+ alias="LLM_MODELS_CONFIG",
55
+ description="Path to LLM models YAML configuration file",
56
+ )
57
+
14
58
  # Infrastructure Configuration (with sensible defaults)
15
59
  celery_broker_url: str = Field(default="redis://localhost:6379/0", alias="CELERY_BROKER_URL")
16
- cors_allowed_origins: str = Field(default="http://localhost:3000,http://express-gateway:3001", alias="CORS_ALLOWED_ORIGINS")
60
+ cors_allowed_origins: str = Field(
61
+ default="http://localhost:3000,http://express-gateway:3001",
62
+ alias="CORS_ALLOWED_ORIGINS",
63
+ )
17
64
 
18
65
  # PostgreSQL Database Configuration (with defaults)
19
66
  db_host: str = Field(default="localhost", alias="DB_HOST")
@@ -22,6 +69,10 @@ class Settings(BaseSettings):
22
69
  db_name: str = Field(default="aiecs", alias="DB_NAME")
23
70
  db_port: int = Field(default=5432, alias="DB_PORT")
24
71
  postgres_url: str = Field(default="", alias="POSTGRES_URL")
72
+ # Connection mode: "local" (use individual parameters) or "cloud" (use POSTGRES_URL)
73
+ # If "cloud" is set, POSTGRES_URL will be used; otherwise individual
74
+ # parameters are used
75
+ db_connection_mode: str = Field(default="local", alias="DB_CONNECTION_MODE")
25
76
 
26
77
  # Google Cloud Storage Configuration (optional)
27
78
  google_cloud_project_id: str = Field(default="", alias="GOOGLE_CLOUD_PROJECT_ID")
@@ -36,21 +87,379 @@ class Settings(BaseSettings):
36
87
  vertex_endpoint_id: str | None = Field(default=None, alias="VERTEX_ENDPOINT_ID")
37
88
  vertex_deployed_index_id: str | None = Field(default=None, alias="VERTEX_DEPLOYED_INDEX_ID")
38
89
 
39
- # Vector store backend selection (Qdrant deprecated, using Vertex AI by default)
90
+ # Vector store backend selection (Qdrant deprecated, using Vertex AI by
91
+ # default)
40
92
  vector_store_backend: str = Field("vertex", alias="VECTOR_STORE_BACKEND") # "vertex" (qdrant deprecated)
41
93
 
42
- model_config = ConfigDict(env_file=".env", env_file_encoding="utf-8")
94
+ # Development/Server Configuration
95
+ reload: bool = Field(default=False, alias="RELOAD")
96
+ port: int = Field(default=8000, alias="PORT")
97
+
98
+ # Knowledge Graph Configuration
99
+ # Storage backend selection
100
+ kg_storage_backend: Literal["inmemory", "sqlite", "postgresql"] = Field(
101
+ default="inmemory",
102
+ alias="KG_STORAGE_BACKEND",
103
+ description="Knowledge graph storage backend: inmemory (default), sqlite (file-based), or postgresql (production)",
104
+ )
105
+
106
+ # SQLite configuration (for file-based persistence)
107
+ kg_sqlite_db_path: str = Field(
108
+ default="./storage/knowledge_graph.db",
109
+ alias="KG_SQLITE_DB_PATH",
110
+ description="Path to SQLite database file for knowledge graph storage",
111
+ )
112
+
113
+ # PostgreSQL configuration (uses main database config by default)
114
+ # If you want a separate database for knowledge graph, set these:
115
+ kg_db_host: str = Field(default="", alias="KG_DB_HOST")
116
+ kg_db_port: int = Field(default=5432, alias="KG_DB_PORT")
117
+ kg_db_user: str = Field(default="", alias="KG_DB_USER")
118
+ kg_db_password: str = Field(default="", alias="KG_DB_PASSWORD")
119
+ kg_db_name: str = Field(default="", alias="KG_DB_NAME")
120
+ kg_postgres_url: str = Field(default="", alias="KG_POSTGRES_URL")
121
+
122
+ # PostgreSQL connection pool settings
123
+ kg_min_pool_size: int = Field(
124
+ default=5,
125
+ alias="KG_MIN_POOL_SIZE",
126
+ description="Minimum number of connections in PostgreSQL pool",
127
+ )
128
+ kg_max_pool_size: int = Field(
129
+ default=20,
130
+ alias="KG_MAX_POOL_SIZE",
131
+ description="Maximum number of connections in PostgreSQL pool",
132
+ )
133
+
134
+ # PostgreSQL pgvector support
135
+ kg_enable_pgvector: bool = Field(
136
+ default=False,
137
+ alias="KG_ENABLE_PGVECTOR",
138
+ description="Enable pgvector extension for optimized vector search (requires pgvector installed)",
139
+ )
140
+
141
+ # In-memory configuration
142
+ kg_inmemory_max_nodes: int = Field(
143
+ default=100000,
144
+ alias="KG_INMEMORY_MAX_NODES",
145
+ description="Maximum number of nodes for in-memory storage",
146
+ )
147
+
148
+ kg_inmemory_max_tenants: int = Field(
149
+ default=100,
150
+ alias="KG_INMEMORY_MAX_TENANTS",
151
+ description="Maximum number of tenant graphs in memory (LRU eviction)",
152
+ )
153
+
154
+ # Vector search configuration
155
+ kg_vector_dimension: int = Field(
156
+ default=1536,
157
+ alias="KG_VECTOR_DIMENSION",
158
+ description="Dimension of embedding vectors (default 1536 for OpenAI ada-002)",
159
+ )
160
+
161
+ # Query configuration
162
+ kg_default_search_limit: int = Field(
163
+ default=10,
164
+ alias="KG_DEFAULT_SEARCH_LIMIT",
165
+ description="Default number of results to return in searches",
166
+ )
167
+
168
+ kg_max_traversal_depth: int = Field(
169
+ default=5,
170
+ alias="KG_MAX_TRAVERSAL_DEPTH",
171
+ description="Maximum depth for graph traversal queries",
172
+ )
173
+
174
+ # Cache configuration
175
+ kg_enable_query_cache: bool = Field(
176
+ default=True,
177
+ alias="KG_ENABLE_QUERY_CACHE",
178
+ description="Enable caching of query results",
179
+ )
180
+
181
+ kg_cache_ttl_seconds: int = Field(
182
+ default=300,
183
+ alias="KG_CACHE_TTL_SECONDS",
184
+ description="Time-to-live for cached query results (seconds)",
185
+ )
186
+
187
+ # Entity Extraction LLM Configuration
188
+ kg_entity_extraction_llm_provider: str = Field(
189
+ default="",
190
+ alias="KG_ENTITY_EXTRACTION_LLM_PROVIDER",
191
+ description="LLM provider for entity extraction (supports custom providers registered via LLMClientFactory)",
192
+ )
193
+
194
+ kg_entity_extraction_llm_model: str = Field(
195
+ default="",
196
+ alias="KG_ENTITY_EXTRACTION_LLM_MODEL",
197
+ description="LLM model for entity extraction",
198
+ )
199
+
200
+ kg_entity_extraction_temperature: float = Field(
201
+ default=0.1,
202
+ alias="KG_ENTITY_EXTRACTION_TEMPERATURE",
203
+ description="Temperature for entity extraction (low for deterministic results)",
204
+ )
205
+
206
+ kg_entity_extraction_max_tokens: int = Field(
207
+ default=2000,
208
+ alias="KG_ENTITY_EXTRACTION_MAX_TOKENS",
209
+ description="Maximum tokens for entity extraction response",
210
+ )
211
+
212
+ # Embedding Configuration
213
+ kg_embedding_provider: str = Field(
214
+ default="openai",
215
+ alias="KG_EMBEDDING_PROVIDER",
216
+ description="LLM provider for embeddings (supports custom providers registered via LLMClientFactory)",
217
+ )
218
+
219
+ kg_embedding_model: str = Field(
220
+ default="text-embedding-ada-002",
221
+ alias="KG_EMBEDDING_MODEL",
222
+ description="Model for generating embeddings",
223
+ )
224
+
225
+ kg_embedding_dimension: int = Field(
226
+ default=1536,
227
+ alias="KG_EMBEDDING_DIMENSION",
228
+ description="Dimension of embedding vectors (must match model output, e.g., 1536 for ada-002)",
229
+ )
230
+
231
+ # Feature flags for new capabilities
232
+ kg_enable_runnable_pattern: bool = Field(
233
+ default=True,
234
+ alias="KG_ENABLE_RUNNABLE_PATTERN",
235
+ description="Enable Runnable pattern for composable graph operations",
236
+ )
237
+
238
+ kg_enable_knowledge_fusion: bool = Field(
239
+ default=True,
240
+ alias="KG_ENABLE_KNOWLEDGE_FUSION",
241
+ description="Enable knowledge fusion for cross-document entity merging",
242
+ )
243
+
244
+ kg_enable_reranking: bool = Field(
245
+ default=True,
246
+ alias="KG_ENABLE_RERANKING",
247
+ description="Enable result reranking for improved search relevance",
248
+ )
249
+
250
+ kg_enable_logical_queries: bool = Field(
251
+ default=True,
252
+ alias="KG_ENABLE_LOGICAL_QUERIES",
253
+ description="Enable logical query parsing for structured queries",
254
+ )
255
+
256
+ kg_enable_structured_import: bool = Field(
257
+ default=True,
258
+ alias="KG_ENABLE_STRUCTURED_IMPORT",
259
+ description="Enable structured data import (CSV/JSON)",
260
+ )
261
+
262
+ # Knowledge Fusion configuration
263
+ kg_fusion_similarity_threshold: float = Field(
264
+ default=0.85,
265
+ alias="KG_FUSION_SIMILARITY_THRESHOLD",
266
+ description="Similarity threshold for entity fusion (0.0-1.0)",
267
+ )
268
+
269
+ kg_fusion_conflict_resolution: str = Field(
270
+ default="most_complete",
271
+ alias="KG_FUSION_CONFLICT_RESOLUTION",
272
+ description="Conflict resolution strategy: most_complete, most_recent, most_confident, longest, keep_all",
273
+ )
274
+
275
+ # Knowledge Fusion Matching Pipeline Configuration
276
+ # Threshold for alias-based matching (O(1) lookup via AliasIndex)
277
+ kg_fusion_alias_match_score: float = Field(
278
+ default=0.98,
279
+ alias="KG_FUSION_ALIAS_MATCH_SCORE",
280
+ description="Minimum score for alias-based matching (0.0-1.0, default: 0.98)",
281
+ )
282
+
283
+ # Threshold for abbreviation/acronym matching
284
+ kg_fusion_abbreviation_match_score: float = Field(
285
+ default=0.95,
286
+ alias="KG_FUSION_ABBREVIATION_MATCH_SCORE",
287
+ description="Minimum score for abbreviation matching (0.0-1.0, default: 0.95)",
288
+ )
289
+
290
+ # Threshold for normalized name matching
291
+ kg_fusion_normalization_match_score: float = Field(
292
+ default=0.90,
293
+ alias="KG_FUSION_NORMALIZATION_MATCH_SCORE",
294
+ description="Minimum score for normalized name matching (0.0-1.0, default: 0.90)",
295
+ )
296
+
297
+ # Threshold for semantic embedding matching
298
+ kg_fusion_semantic_threshold: float = Field(
299
+ default=0.85,
300
+ alias="KG_FUSION_SEMANTIC_THRESHOLD",
301
+ description="Minimum score for semantic embedding matching (0.0-1.0, default: 0.85)",
302
+ )
303
+
304
+ # Threshold for string similarity matching (fallback)
305
+ kg_fusion_string_similarity_threshold: float = Field(
306
+ default=0.80,
307
+ alias="KG_FUSION_STRING_SIMILARITY_THRESHOLD",
308
+ description="Minimum score for string similarity matching (0.0-1.0, default: 0.80)",
309
+ )
310
+
311
+ # Enable/disable semantic matching globally
312
+ kg_fusion_semantic_enabled: bool = Field(
313
+ default=True,
314
+ alias="KG_FUSION_SEMANTIC_ENABLED",
315
+ description="Enable semantic embedding matching (requires LLM provider)",
316
+ )
317
+
318
+ # Default enabled matching stages
319
+ kg_fusion_enabled_stages: str = Field(
320
+ default="exact,alias,abbreviation,normalized,semantic,string",
321
+ alias="KG_FUSION_ENABLED_STAGES",
322
+ description="Comma-separated list of enabled matching stages: exact,alias,abbreviation,normalized,semantic,string",
323
+ )
324
+
325
+ # Early exit threshold for pipeline optimization
326
+ kg_fusion_early_exit_threshold: float = Field(
327
+ default=0.95,
328
+ alias="KG_FUSION_EARLY_EXIT_THRESHOLD",
329
+ description="Skip remaining stages if match score >= this threshold (0.0-1.0)",
330
+ )
331
+
332
+ # AliasIndex backend configuration
333
+ kg_fusion_alias_backend: str = Field(
334
+ default="memory",
335
+ alias="KG_FUSION_ALIAS_BACKEND",
336
+ description="AliasIndex backend: memory (default for small graphs) or redis (for large/distributed)",
337
+ )
338
+
339
+ # Redis URL for AliasIndex (when backend is redis)
340
+ kg_fusion_alias_redis_url: str = Field(
341
+ default="redis://localhost:6379/1",
342
+ alias="KG_FUSION_ALIAS_REDIS_URL",
343
+ description="Redis URL for AliasIndex when using redis backend",
344
+ )
345
+
346
+ # Threshold for auto-switching from memory to Redis backend
347
+ kg_fusion_alias_redis_threshold: int = Field(
348
+ default=100000,
349
+ alias="KG_FUSION_ALIAS_REDIS_THRESHOLD",
350
+ description="Number of aliases before auto-switching to Redis backend",
351
+ )
352
+
353
+ # Path to per-entity-type configuration file (JSON or YAML)
354
+ kg_fusion_entity_type_config_path: str = Field(
355
+ default="",
356
+ alias="KG_FUSION_ENTITY_TYPE_CONFIG_PATH",
357
+ description="Path to JSON/YAML file with per-entity-type matching configuration",
358
+ )
359
+
360
+ # Reranking configuration
361
+ kg_reranking_default_strategy: str = Field(
362
+ default="hybrid",
363
+ alias="KG_RERANKING_DEFAULT_STRATEGY",
364
+ description="Default reranking strategy: text, semantic, structural, hybrid",
365
+ )
366
+
367
+ kg_reranking_top_k: int = Field(
368
+ default=100,
369
+ alias="KG_RERANKING_TOP_K",
370
+ description="Top-K results to fetch before reranking",
371
+ )
372
+
373
+ # Schema cache configuration
374
+ kg_enable_schema_cache: bool = Field(
375
+ default=True,
376
+ alias="KG_ENABLE_SCHEMA_CACHE",
377
+ description="Enable schema caching for improved performance",
378
+ )
379
+
380
+ kg_schema_cache_ttl_seconds: int = Field(
381
+ default=3600,
382
+ alias="KG_SCHEMA_CACHE_TTL_SECONDS",
383
+ description="Time-to-live for cached schemas (seconds)",
384
+ )
385
+
386
+ # Query optimization configuration
387
+ kg_enable_query_optimization: bool = Field(
388
+ default=True,
389
+ alias="KG_ENABLE_QUERY_OPTIMIZATION",
390
+ description="Enable query optimization for better performance",
391
+ )
392
+
393
+ kg_query_optimization_strategy: str = Field(
394
+ default="balanced",
395
+ alias="KG_QUERY_OPTIMIZATION_STRATEGY",
396
+ description="Query optimization strategy: cost, latency, balanced",
397
+ )
398
+
399
+ # Multi-tenancy configuration
400
+ kg_multi_tenancy_enabled: bool = Field(
401
+ default=False,
402
+ alias="KG_MULTI_TENANCY_ENABLED",
403
+ description="Enable multi-tenancy support for knowledge graph",
404
+ )
405
+
406
+ kg_tenant_isolation_mode: str = Field(
407
+ default="shared_schema",
408
+ alias="KG_TENANT_ISOLATION_MODE",
409
+ description="Tenant isolation mode: disabled, shared_schema, separate_schema",
410
+ )
411
+
412
+ kg_enable_rls: bool = Field(
413
+ default=False,
414
+ alias="KG_ENABLE_RLS",
415
+ description="Enable Row-Level Security for PostgreSQL (SHARED_SCHEMA mode only)",
416
+ )
417
+
418
+ kg_inmemory_max_tenants: int = Field(
419
+ default=100,
420
+ alias="KG_INMEMORY_MAX_TENANTS",
421
+ description="Maximum number of tenant graphs in memory (for InMemoryGraphStore LRU)",
422
+ )
423
+
424
+ model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8", extra="allow")
43
425
 
44
426
  @property
45
427
  def database_config(self) -> dict:
46
- """Get database configuration for asyncpg"""
47
- return {
48
- "host": self.db_host,
49
- "user": self.db_user,
50
- "password": self.db_password,
51
- "database": self.db_name,
52
- "port": self.db_port
53
- }
428
+ """
429
+ Get database configuration for asyncpg.
430
+
431
+ Supports both connection string (POSTGRES_URL) and individual parameters.
432
+ The connection mode is controlled by DB_CONNECTION_MODE:
433
+ - "cloud": Use POSTGRES_URL connection string (for cloud databases)
434
+ - "local": Use individual parameters (for local databases)
435
+
436
+ If DB_CONNECTION_MODE is "cloud" but POSTGRES_URL is not provided,
437
+ falls back to individual parameters with a warning.
438
+ """
439
+ # Check connection mode
440
+ if self.db_connection_mode.lower() == "cloud":
441
+ # Use connection string for cloud databases
442
+ if self.postgres_url:
443
+ return {"dsn": self.postgres_url}
444
+ else:
445
+ logger.warning("DB_CONNECTION_MODE is set to 'cloud' but POSTGRES_URL is not provided. " "Falling back to individual parameters (local mode).")
446
+ # Fall back to individual parameters
447
+ return {
448
+ "host": self.db_host,
449
+ "user": self.db_user,
450
+ "password": self.db_password,
451
+ "database": self.db_name,
452
+ "port": self.db_port,
453
+ }
454
+ else:
455
+ # Use individual parameters for local databases (default)
456
+ return {
457
+ "host": self.db_host,
458
+ "user": self.db_user,
459
+ "password": self.db_password,
460
+ "database": self.db_name,
461
+ "port": self.db_port,
462
+ }
54
463
 
55
464
  @property
56
465
  def file_storage_config(self) -> dict:
@@ -60,9 +469,320 @@ class Settings(BaseSettings):
60
469
  "gcs_bucket_name": self.google_cloud_storage_bucket,
61
470
  "gcs_credentials_path": self.google_application_credentials,
62
471
  "enable_local_fallback": True,
63
- "local_storage_path": "./storage"
472
+ "local_storage_path": "./storage",
64
473
  }
65
474
 
475
+ def validate_multi_tenancy_config(self) -> bool:
476
+ """
477
+ Validate multi-tenancy configuration consistency.
478
+
479
+ Returns:
480
+ True if configuration is valid
481
+
482
+ Raises:
483
+ ValueError: If configuration is inconsistent
484
+ """
485
+ if self.kg_enable_rls:
486
+ # RLS only makes sense with PostgreSQL and SHARED_SCHEMA mode
487
+ if self.kg_storage_backend != "postgresql":
488
+ logger.warning(
489
+ "KG_ENABLE_RLS is enabled but storage backend is not PostgreSQL. "
490
+ "RLS will have no effect."
491
+ )
492
+ if self.kg_tenant_isolation_mode != "shared_schema":
493
+ logger.warning(
494
+ "KG_ENABLE_RLS is enabled but isolation mode is not 'shared_schema'. "
495
+ "RLS is only applicable to shared_schema mode."
496
+ )
497
+
498
+ if self.kg_multi_tenancy_enabled:
499
+ # Validate that tenant isolation mode is not disabled
500
+ if self.kg_tenant_isolation_mode == "disabled":
501
+ raise ValueError(
502
+ "KG_MULTI_TENANCY_ENABLED is True but KG_TENANT_ISOLATION_MODE is 'disabled'. "
503
+ "Please set KG_TENANT_ISOLATION_MODE to 'shared_schema' or 'separate_schema'."
504
+ )
505
+
506
+ return True
507
+
508
+ @property
509
+ def kg_database_config(self) -> dict:
510
+ """
511
+ Get knowledge graph database configuration.
512
+
513
+ Returns configuration for the knowledge graph storage backend:
514
+ - For PostgreSQL: Returns connection parameters (uses main DB config if KG-specific not set)
515
+ - For SQLite: Returns db_path
516
+ - For in-memory: Returns max_nodes limit
517
+ """
518
+ if self.kg_storage_backend == "postgresql":
519
+ # Use KG-specific config if provided, otherwise fall back to main
520
+ # DB config
521
+ if self.kg_postgres_url:
522
+ return {
523
+ "dsn": self.kg_postgres_url,
524
+ "min_pool_size": self.kg_min_pool_size,
525
+ "max_pool_size": self.kg_max_pool_size,
526
+ "enable_pgvector": self.kg_enable_pgvector,
527
+ }
528
+ elif self.kg_db_host:
529
+ return {
530
+ "host": self.kg_db_host,
531
+ "port": self.kg_db_port,
532
+ "user": self.kg_db_user,
533
+ "password": self.kg_db_password,
534
+ "database": self.kg_db_name or "aiecs_knowledge_graph",
535
+ "min_pool_size": self.kg_min_pool_size,
536
+ "max_pool_size": self.kg_max_pool_size,
537
+ "enable_pgvector": self.kg_enable_pgvector,
538
+ }
539
+ else:
540
+ # Fall back to main database config
541
+ db_config = self.database_config.copy()
542
+ db_config["min_pool_size"] = self.kg_min_pool_size
543
+ db_config["max_pool_size"] = self.kg_max_pool_size
544
+ db_config["enable_pgvector"] = self.kg_enable_pgvector
545
+ return db_config
546
+ elif self.kg_storage_backend == "sqlite":
547
+ return {"db_path": self.kg_sqlite_db_path}
548
+ else: # inmemory
549
+ return {"max_nodes": self.kg_inmemory_max_nodes}
550
+
551
+ @property
552
+ def kg_query_config(self) -> dict:
553
+ """Get knowledge graph query configuration"""
554
+ return {
555
+ "default_search_limit": self.kg_default_search_limit,
556
+ "max_traversal_depth": self.kg_max_traversal_depth,
557
+ "vector_dimension": self.kg_vector_dimension,
558
+ }
559
+
560
+ @property
561
+ def kg_cache_config(self) -> dict:
562
+ """Get knowledge graph cache configuration"""
563
+ return {
564
+ "enable_query_cache": self.kg_enable_query_cache,
565
+ "cache_ttl_seconds": self.kg_cache_ttl_seconds,
566
+ }
567
+
568
+ @property
569
+ def kg_multi_tenancy_config(self) -> dict:
570
+ """
571
+ Get knowledge graph multi-tenancy configuration.
572
+
573
+ Returns:
574
+ Dictionary with multi-tenancy settings including:
575
+ - enabled: Whether multi-tenancy is enabled
576
+ - isolation_mode: Tenant isolation mode (disabled/shared_schema/separate_schema)
577
+ - enable_rls: Whether PostgreSQL RLS is enabled
578
+ - max_tenants: Maximum tenant graphs for in-memory storage
579
+ """
580
+ return {
581
+ "enabled": self.kg_multi_tenancy_enabled,
582
+ "isolation_mode": self.kg_tenant_isolation_mode,
583
+ "enable_rls": self.kg_enable_rls,
584
+ "max_tenants": self.kg_inmemory_max_tenants,
585
+ }
586
+
587
+ @field_validator("kg_storage_backend")
588
+ @classmethod
589
+ def validate_kg_storage_backend(cls, v: str) -> str:
590
+ """Validate knowledge graph storage backend selection"""
591
+ valid_backends = ["inmemory", "sqlite", "postgresql"]
592
+ if v not in valid_backends:
593
+ raise ValueError(f"Invalid KG_STORAGE_BACKEND: {v}. " f"Must be one of: {', '.join(valid_backends)}")
594
+ return v
595
+
596
+ @field_validator("kg_sqlite_db_path")
597
+ @classmethod
598
+ def validate_kg_sqlite_path(cls, v: str) -> str:
599
+ """Validate and create parent directory for SQLite database"""
600
+ if v and v != ":memory:":
601
+ path = Path(v)
602
+ # Create parent directory if it doesn't exist
603
+ path.parent.mkdir(parents=True, exist_ok=True)
604
+ return v
605
+
606
+ @field_validator("kg_max_traversal_depth")
607
+ @classmethod
608
+ def validate_kg_max_traversal_depth(cls, v: int) -> int:
609
+ """Validate maximum traversal depth"""
610
+ if v < 1:
611
+ raise ValueError("KG_MAX_TRAVERSAL_DEPTH must be at least 1")
612
+ if v > 10:
613
+ logger.warning(f"KG_MAX_TRAVERSAL_DEPTH is set to {v}, which may cause performance issues. " "Consider using a value <= 10 for production use.")
614
+ return v
615
+
616
+ @field_validator("kg_vector_dimension")
617
+ @classmethod
618
+ def validate_kg_vector_dimension(cls, v: int) -> int:
619
+ """Validate vector dimension"""
620
+ if v < 1:
621
+ raise ValueError("KG_VECTOR_DIMENSION must be at least 1")
622
+ # Common dimensions: 128, 256, 384, 512, 768, 1024, 1536, 3072
623
+ common_dims = [128, 256, 384, 512, 768, 1024, 1536, 3072]
624
+ if v not in common_dims:
625
+ logger.warning(f"KG_VECTOR_DIMENSION is set to {v}, which is not a common embedding dimension. " f"Common dimensions are: {common_dims}")
626
+ return v
627
+
628
+ @field_validator(
629
+ "kg_fusion_alias_match_score",
630
+ "kg_fusion_abbreviation_match_score",
631
+ "kg_fusion_normalization_match_score",
632
+ "kg_fusion_semantic_threshold",
633
+ "kg_fusion_string_similarity_threshold",
634
+ "kg_fusion_early_exit_threshold",
635
+ )
636
+ @classmethod
637
+ def validate_fusion_thresholds(cls, v: float) -> float:
638
+ """Validate fusion matching thresholds are in range [0.0, 1.0]"""
639
+ if not 0.0 <= v <= 1.0:
640
+ raise ValueError(f"Fusion threshold must be between 0.0 and 1.0, got {v}")
641
+ return v
642
+
643
+ @field_validator("kg_fusion_alias_backend")
644
+ @classmethod
645
+ def validate_fusion_alias_backend(cls, v: str) -> str:
646
+ """Validate AliasIndex backend selection"""
647
+ valid_backends = ["memory", "redis"]
648
+ if v not in valid_backends:
649
+ raise ValueError(f"Invalid KG_FUSION_ALIAS_BACKEND: {v}. Must be one of: {', '.join(valid_backends)}")
650
+ return v
651
+
652
+ @field_validator("kg_fusion_enabled_stages")
653
+ @classmethod
654
+ def validate_fusion_enabled_stages(cls, v: str) -> str:
655
+ """Validate enabled matching stages"""
656
+ valid_stages = {"exact", "alias", "abbreviation", "normalized", "semantic", "string"}
657
+ stages = [s.strip() for s in v.split(",") if s.strip()]
658
+ invalid = set(stages) - valid_stages
659
+ if invalid:
660
+ raise ValueError(f"Invalid matching stages: {invalid}. Valid stages are: {valid_stages}")
661
+ return v
662
+
663
+ @field_validator("kg_tenant_isolation_mode")
664
+ @classmethod
665
+ def validate_tenant_isolation_mode(cls, v: str) -> str:
666
+ """Validate tenant isolation mode"""
667
+ valid_modes = ["disabled", "shared_schema", "separate_schema"]
668
+ if v not in valid_modes:
669
+ raise ValueError(
670
+ f"Invalid KG_TENANT_ISOLATION_MODE: {v}. "
671
+ f"Must be one of: {', '.join(valid_modes)}"
672
+ )
673
+ return v
674
+
675
+ @field_validator("kg_enable_rls")
676
+ @classmethod
677
+ def validate_enable_rls(cls, v: bool, info) -> bool:
678
+ """Validate RLS configuration - warn if enabled with wrong backend or mode"""
679
+ # Note: This validator runs before all fields are set, so we can't access
680
+ # other fields reliably here. We'll do cross-field validation in a separate method.
681
+ return v
682
+
683
+ def validate_llm_models_config(self) -> bool:
684
+ """
685
+ Validate that LLM models configuration file exists.
686
+
687
+ Returns:
688
+ True if config file exists or can be found in default locations
689
+
690
+ Raises:
691
+ FileNotFoundError: If config file doesn't exist
692
+ """
693
+ if self.llm_models_config_path:
694
+ config_path = Path(self.llm_models_config_path)
695
+ if not config_path.exists():
696
+ raise FileNotFoundError(f"LLM models config file not found: {config_path}")
697
+ return True
698
+
699
+ # Check default locations
700
+ current_dir = Path(__file__).parent
701
+ default_path = current_dir / "llm_models.yaml"
702
+
703
+ if default_path.exists():
704
+ return True
705
+
706
+ # If not found, it's still okay - the config loader will try to find it
707
+ return True
708
+
709
+ def get_fusion_matching_config(self) -> "FusionMatchingConfig":
710
+ """
711
+ Create FusionMatchingConfig from Settings with inheritance support.
712
+
713
+ Configuration load order:
714
+ 1. System defaults (hardcoded in FusionMatchingConfig)
715
+ 2. Global config (from Settings/environment variables)
716
+ 3. Per-entity-type config (from kg_fusion_entity_type_config_path file)
717
+ 4. Runtime overrides (can be passed to methods)
718
+
719
+ Returns:
720
+ FusionMatchingConfig instance initialized from Settings
721
+
722
+ Example:
723
+ ```python
724
+ settings = get_settings()
725
+ config = settings.get_fusion_matching_config()
726
+ person_config = config.get_config_for_type("Person")
727
+ ```
728
+ """
729
+ # Import here to avoid circular imports
730
+ from aiecs.application.knowledge_graph.fusion.matching_config import (
731
+ FusionMatchingConfig,
732
+ load_matching_config,
733
+ )
734
+
735
+ # Parse enabled stages from comma-separated string
736
+ enabled_stages = [
737
+ s.strip() for s in self.kg_fusion_enabled_stages.split(",") if s.strip()
738
+ ]
739
+
740
+ # Start with global config from Settings
741
+ config = FusionMatchingConfig(
742
+ alias_match_score=self.kg_fusion_alias_match_score,
743
+ abbreviation_match_score=self.kg_fusion_abbreviation_match_score,
744
+ normalization_match_score=self.kg_fusion_normalization_match_score,
745
+ semantic_threshold=self.kg_fusion_semantic_threshold,
746
+ string_similarity_threshold=self.kg_fusion_string_similarity_threshold,
747
+ enabled_stages=enabled_stages,
748
+ semantic_enabled=self.kg_fusion_semantic_enabled,
749
+ )
750
+
751
+ # Log configuration sources for debugging
752
+ logger.debug(
753
+ f"Fusion matching config loaded from Settings: "
754
+ f"alias={self.kg_fusion_alias_match_score}, "
755
+ f"abbreviation={self.kg_fusion_abbreviation_match_score}, "
756
+ f"normalization={self.kg_fusion_normalization_match_score}, "
757
+ f"semantic={self.kg_fusion_semantic_threshold}, "
758
+ f"string={self.kg_fusion_string_similarity_threshold}"
759
+ )
760
+
761
+ # Load per-entity-type config from file if specified
762
+ if self.kg_fusion_entity_type_config_path:
763
+ config_path = Path(self.kg_fusion_entity_type_config_path)
764
+ if config_path.exists():
765
+ try:
766
+ file_config = load_matching_config(str(config_path))
767
+ # Merge entity type configs from file
768
+ for entity_type, type_config in file_config.entity_type_configs.items():
769
+ config.add_entity_type_config(entity_type, type_config)
770
+ logger.info(
771
+ f"Loaded per-entity-type config from: {config_path} "
772
+ f"({len(file_config.entity_type_configs)} types)"
773
+ )
774
+ except Exception as e:
775
+ logger.warning(
776
+ f"Failed to load entity type config from {config_path}: {e}"
777
+ )
778
+ else:
779
+ logger.warning(
780
+ f"Entity type config file not found: {config_path}"
781
+ )
782
+
783
+ return config
784
+
785
+
66
786
  @lru_cache()
67
787
  def get_settings():
68
788
  return Settings()
@@ -71,47 +791,58 @@ def get_settings():
71
791
  def validate_required_settings(operation_type: str = "full") -> bool:
72
792
  """
73
793
  Validate that required settings are present for specific operations
74
-
794
+
75
795
  Args:
76
796
  operation_type: Type of operation to validate for
77
797
  - "basic": Only basic package functionality
78
- - "llm": LLM provider functionality
798
+ - "llm": LLM provider functionality
79
799
  - "database": Database operations
80
800
  - "storage": Cloud storage operations
801
+ - "knowledge_graph": Knowledge graph operations
81
802
  - "full": All functionality
82
-
803
+
83
804
  Returns:
84
805
  True if settings are valid, False otherwise
85
-
806
+
86
807
  Raises:
87
808
  ValueError: If required settings are missing for the operation type
88
809
  """
89
810
  settings = get_settings()
90
811
  missing = []
91
-
812
+
92
813
  if operation_type in ["llm", "full"]:
93
814
  # At least one LLM provider should be configured
94
815
  llm_configs = [
95
816
  ("OpenAI", settings.openai_api_key),
96
- ("Vertex AI", settings.vertex_project_id and settings.google_application_credentials),
97
- ("xAI", settings.xai_api_key)
817
+ (
818
+ "Vertex AI",
819
+ settings.vertex_project_id and settings.google_application_credentials,
820
+ ),
821
+ ("xAI", settings.xai_api_key),
98
822
  ]
99
-
823
+
100
824
  if not any(config[1] for config in llm_configs):
101
825
  missing.append("At least one LLM provider (OpenAI, Vertex AI, or xAI)")
102
-
826
+
103
827
  if operation_type in ["database", "full"]:
104
828
  if not settings.db_password:
105
829
  missing.append("DB_PASSWORD")
106
-
830
+
107
831
  if operation_type in ["storage", "full"]:
108
832
  if settings.google_cloud_project_id and not settings.google_cloud_storage_bucket:
109
833
  missing.append("GOOGLE_CLOUD_STORAGE_BUCKET (required when GOOGLE_CLOUD_PROJECT_ID is set)")
110
-
834
+
835
+ if operation_type in ["knowledge_graph", "full"]:
836
+ # Validate knowledge graph configuration
837
+ if settings.kg_storage_backend == "postgresql":
838
+ # Check if KG-specific or main DB config is available
839
+ if not (settings.kg_postgres_url or settings.kg_db_host or settings.db_password):
840
+ missing.append("Knowledge graph PostgreSQL configuration: " "Either set KG_POSTGRES_URL, KG_DB_* parameters, or main DB_PASSWORD")
841
+ elif settings.kg_storage_backend == "sqlite":
842
+ if not settings.kg_sqlite_db_path:
843
+ missing.append("KG_SQLITE_DB_PATH (required for SQLite backend)")
844
+
111
845
  if missing:
112
- raise ValueError(
113
- f"Missing required settings for {operation_type} operation: {', '.join(missing)}\n"
114
- "Please check your .env file or environment variables."
115
- )
116
-
846
+ raise ValueError(f"Missing required settings for {operation_type} operation: {', '.join(missing)}\n" "Please check your .env file or environment variables.")
847
+
117
848
  return True