aiecs 1.0.1__py3-none-any.whl → 1.7.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aiecs might be problematic. Click here for more details.

Files changed (340) hide show
  1. aiecs/__init__.py +13 -16
  2. aiecs/__main__.py +7 -7
  3. aiecs/aiecs_client.py +269 -75
  4. aiecs/application/executors/operation_executor.py +79 -54
  5. aiecs/application/knowledge_graph/__init__.py +7 -0
  6. aiecs/application/knowledge_graph/builder/__init__.py +37 -0
  7. aiecs/application/knowledge_graph/builder/data_quality.py +302 -0
  8. aiecs/application/knowledge_graph/builder/data_reshaping.py +293 -0
  9. aiecs/application/knowledge_graph/builder/document_builder.py +369 -0
  10. aiecs/application/knowledge_graph/builder/graph_builder.py +490 -0
  11. aiecs/application/knowledge_graph/builder/import_optimizer.py +396 -0
  12. aiecs/application/knowledge_graph/builder/schema_inference.py +462 -0
  13. aiecs/application/knowledge_graph/builder/schema_mapping.py +563 -0
  14. aiecs/application/knowledge_graph/builder/structured_pipeline.py +1384 -0
  15. aiecs/application/knowledge_graph/builder/text_chunker.py +317 -0
  16. aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
  17. aiecs/application/knowledge_graph/extractors/base.py +98 -0
  18. aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +422 -0
  19. aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +347 -0
  20. aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +241 -0
  21. aiecs/application/knowledge_graph/fusion/__init__.py +78 -0
  22. aiecs/application/knowledge_graph/fusion/ab_testing.py +395 -0
  23. aiecs/application/knowledge_graph/fusion/abbreviation_expander.py +327 -0
  24. aiecs/application/knowledge_graph/fusion/alias_index.py +597 -0
  25. aiecs/application/knowledge_graph/fusion/alias_matcher.py +384 -0
  26. aiecs/application/knowledge_graph/fusion/cache_coordinator.py +343 -0
  27. aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +433 -0
  28. aiecs/application/knowledge_graph/fusion/entity_linker.py +511 -0
  29. aiecs/application/knowledge_graph/fusion/evaluation_dataset.py +240 -0
  30. aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +632 -0
  31. aiecs/application/knowledge_graph/fusion/matching_config.py +489 -0
  32. aiecs/application/knowledge_graph/fusion/name_normalizer.py +352 -0
  33. aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +183 -0
  34. aiecs/application/knowledge_graph/fusion/semantic_name_matcher.py +464 -0
  35. aiecs/application/knowledge_graph/fusion/similarity_pipeline.py +534 -0
  36. aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
  37. aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +342 -0
  38. aiecs/application/knowledge_graph/pattern_matching/query_executor.py +366 -0
  39. aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
  40. aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +195 -0
  41. aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
  42. aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
  43. aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +341 -0
  44. aiecs/application/knowledge_graph/reasoning/inference_engine.py +500 -0
  45. aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +163 -0
  46. aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
  47. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
  48. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +913 -0
  49. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +866 -0
  50. aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +475 -0
  51. aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +396 -0
  52. aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +208 -0
  53. aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +170 -0
  54. aiecs/application/knowledge_graph/reasoning/query_planner.py +855 -0
  55. aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +518 -0
  56. aiecs/application/knowledge_graph/retrieval/__init__.py +27 -0
  57. aiecs/application/knowledge_graph/retrieval/query_intent_classifier.py +211 -0
  58. aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +592 -0
  59. aiecs/application/knowledge_graph/retrieval/strategy_types.py +23 -0
  60. aiecs/application/knowledge_graph/search/__init__.py +59 -0
  61. aiecs/application/knowledge_graph/search/hybrid_search.py +457 -0
  62. aiecs/application/knowledge_graph/search/reranker.py +293 -0
  63. aiecs/application/knowledge_graph/search/reranker_strategies.py +535 -0
  64. aiecs/application/knowledge_graph/search/text_similarity.py +392 -0
  65. aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
  66. aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +305 -0
  67. aiecs/application/knowledge_graph/traversal/path_scorer.py +271 -0
  68. aiecs/application/knowledge_graph/validators/__init__.py +13 -0
  69. aiecs/application/knowledge_graph/validators/relation_validator.py +239 -0
  70. aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
  71. aiecs/application/knowledge_graph/visualization/graph_visualizer.py +313 -0
  72. aiecs/common/__init__.py +9 -0
  73. aiecs/common/knowledge_graph/__init__.py +17 -0
  74. aiecs/common/knowledge_graph/runnable.py +471 -0
  75. aiecs/config/__init__.py +20 -5
  76. aiecs/config/config.py +762 -31
  77. aiecs/config/graph_config.py +131 -0
  78. aiecs/config/tool_config.py +399 -0
  79. aiecs/core/__init__.py +29 -13
  80. aiecs/core/interface/__init__.py +2 -2
  81. aiecs/core/interface/execution_interface.py +22 -22
  82. aiecs/core/interface/storage_interface.py +37 -88
  83. aiecs/core/registry/__init__.py +31 -0
  84. aiecs/core/registry/service_registry.py +92 -0
  85. aiecs/domain/__init__.py +270 -1
  86. aiecs/domain/agent/__init__.py +191 -0
  87. aiecs/domain/agent/base_agent.py +3870 -0
  88. aiecs/domain/agent/exceptions.py +99 -0
  89. aiecs/domain/agent/graph_aware_mixin.py +569 -0
  90. aiecs/domain/agent/hybrid_agent.py +1435 -0
  91. aiecs/domain/agent/integration/__init__.py +29 -0
  92. aiecs/domain/agent/integration/context_compressor.py +216 -0
  93. aiecs/domain/agent/integration/context_engine_adapter.py +587 -0
  94. aiecs/domain/agent/integration/protocols.py +281 -0
  95. aiecs/domain/agent/integration/retry_policy.py +218 -0
  96. aiecs/domain/agent/integration/role_config.py +213 -0
  97. aiecs/domain/agent/knowledge_aware_agent.py +1892 -0
  98. aiecs/domain/agent/lifecycle.py +291 -0
  99. aiecs/domain/agent/llm_agent.py +692 -0
  100. aiecs/domain/agent/memory/__init__.py +12 -0
  101. aiecs/domain/agent/memory/conversation.py +1124 -0
  102. aiecs/domain/agent/migration/__init__.py +14 -0
  103. aiecs/domain/agent/migration/conversion.py +163 -0
  104. aiecs/domain/agent/migration/legacy_wrapper.py +86 -0
  105. aiecs/domain/agent/models.py +884 -0
  106. aiecs/domain/agent/observability.py +479 -0
  107. aiecs/domain/agent/persistence.py +449 -0
  108. aiecs/domain/agent/prompts/__init__.py +29 -0
  109. aiecs/domain/agent/prompts/builder.py +159 -0
  110. aiecs/domain/agent/prompts/formatters.py +187 -0
  111. aiecs/domain/agent/prompts/template.py +255 -0
  112. aiecs/domain/agent/registry.py +253 -0
  113. aiecs/domain/agent/tool_agent.py +444 -0
  114. aiecs/domain/agent/tools/__init__.py +15 -0
  115. aiecs/domain/agent/tools/schema_generator.py +364 -0
  116. aiecs/domain/community/__init__.py +155 -0
  117. aiecs/domain/community/agent_adapter.py +469 -0
  118. aiecs/domain/community/analytics.py +432 -0
  119. aiecs/domain/community/collaborative_workflow.py +648 -0
  120. aiecs/domain/community/communication_hub.py +634 -0
  121. aiecs/domain/community/community_builder.py +320 -0
  122. aiecs/domain/community/community_integration.py +796 -0
  123. aiecs/domain/community/community_manager.py +803 -0
  124. aiecs/domain/community/decision_engine.py +849 -0
  125. aiecs/domain/community/exceptions.py +231 -0
  126. aiecs/domain/community/models/__init__.py +33 -0
  127. aiecs/domain/community/models/community_models.py +234 -0
  128. aiecs/domain/community/resource_manager.py +461 -0
  129. aiecs/domain/community/shared_context_manager.py +589 -0
  130. aiecs/domain/context/__init__.py +40 -10
  131. aiecs/domain/context/context_engine.py +1910 -0
  132. aiecs/domain/context/conversation_models.py +87 -53
  133. aiecs/domain/context/graph_memory.py +582 -0
  134. aiecs/domain/execution/model.py +12 -4
  135. aiecs/domain/knowledge_graph/__init__.py +19 -0
  136. aiecs/domain/knowledge_graph/models/__init__.py +52 -0
  137. aiecs/domain/knowledge_graph/models/entity.py +148 -0
  138. aiecs/domain/knowledge_graph/models/evidence.py +178 -0
  139. aiecs/domain/knowledge_graph/models/inference_rule.py +184 -0
  140. aiecs/domain/knowledge_graph/models/path.py +171 -0
  141. aiecs/domain/knowledge_graph/models/path_pattern.py +171 -0
  142. aiecs/domain/knowledge_graph/models/query.py +261 -0
  143. aiecs/domain/knowledge_graph/models/query_plan.py +181 -0
  144. aiecs/domain/knowledge_graph/models/relation.py +202 -0
  145. aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
  146. aiecs/domain/knowledge_graph/schema/entity_type.py +131 -0
  147. aiecs/domain/knowledge_graph/schema/graph_schema.py +253 -0
  148. aiecs/domain/knowledge_graph/schema/property_schema.py +143 -0
  149. aiecs/domain/knowledge_graph/schema/relation_type.py +163 -0
  150. aiecs/domain/knowledge_graph/schema/schema_manager.py +691 -0
  151. aiecs/domain/knowledge_graph/schema/type_enums.py +209 -0
  152. aiecs/domain/task/dsl_processor.py +172 -56
  153. aiecs/domain/task/model.py +20 -8
  154. aiecs/domain/task/task_context.py +27 -24
  155. aiecs/infrastructure/__init__.py +0 -2
  156. aiecs/infrastructure/graph_storage/__init__.py +11 -0
  157. aiecs/infrastructure/graph_storage/base.py +837 -0
  158. aiecs/infrastructure/graph_storage/batch_operations.py +458 -0
  159. aiecs/infrastructure/graph_storage/cache.py +424 -0
  160. aiecs/infrastructure/graph_storage/distributed.py +223 -0
  161. aiecs/infrastructure/graph_storage/error_handling.py +380 -0
  162. aiecs/infrastructure/graph_storage/graceful_degradation.py +294 -0
  163. aiecs/infrastructure/graph_storage/health_checks.py +378 -0
  164. aiecs/infrastructure/graph_storage/in_memory.py +1197 -0
  165. aiecs/infrastructure/graph_storage/index_optimization.py +446 -0
  166. aiecs/infrastructure/graph_storage/lazy_loading.py +431 -0
  167. aiecs/infrastructure/graph_storage/metrics.py +344 -0
  168. aiecs/infrastructure/graph_storage/migration.py +400 -0
  169. aiecs/infrastructure/graph_storage/pagination.py +483 -0
  170. aiecs/infrastructure/graph_storage/performance_monitoring.py +456 -0
  171. aiecs/infrastructure/graph_storage/postgres.py +1563 -0
  172. aiecs/infrastructure/graph_storage/property_storage.py +353 -0
  173. aiecs/infrastructure/graph_storage/protocols.py +76 -0
  174. aiecs/infrastructure/graph_storage/query_optimizer.py +642 -0
  175. aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
  176. aiecs/infrastructure/graph_storage/sqlite.py +1373 -0
  177. aiecs/infrastructure/graph_storage/streaming.py +487 -0
  178. aiecs/infrastructure/graph_storage/tenant.py +412 -0
  179. aiecs/infrastructure/messaging/celery_task_manager.py +92 -54
  180. aiecs/infrastructure/messaging/websocket_manager.py +51 -35
  181. aiecs/infrastructure/monitoring/__init__.py +22 -0
  182. aiecs/infrastructure/monitoring/executor_metrics.py +45 -11
  183. aiecs/infrastructure/monitoring/global_metrics_manager.py +212 -0
  184. aiecs/infrastructure/monitoring/structured_logger.py +3 -7
  185. aiecs/infrastructure/monitoring/tracing_manager.py +63 -35
  186. aiecs/infrastructure/persistence/__init__.py +14 -1
  187. aiecs/infrastructure/persistence/context_engine_client.py +184 -0
  188. aiecs/infrastructure/persistence/database_manager.py +67 -43
  189. aiecs/infrastructure/persistence/file_storage.py +180 -103
  190. aiecs/infrastructure/persistence/redis_client.py +74 -21
  191. aiecs/llm/__init__.py +73 -25
  192. aiecs/llm/callbacks/__init__.py +11 -0
  193. aiecs/llm/{custom_callbacks.py → callbacks/custom_callbacks.py} +26 -19
  194. aiecs/llm/client_factory.py +224 -36
  195. aiecs/llm/client_resolver.py +155 -0
  196. aiecs/llm/clients/__init__.py +38 -0
  197. aiecs/llm/clients/base_client.py +324 -0
  198. aiecs/llm/clients/google_function_calling_mixin.py +457 -0
  199. aiecs/llm/clients/googleai_client.py +241 -0
  200. aiecs/llm/clients/openai_client.py +158 -0
  201. aiecs/llm/clients/openai_compatible_mixin.py +367 -0
  202. aiecs/llm/clients/vertex_client.py +897 -0
  203. aiecs/llm/clients/xai_client.py +201 -0
  204. aiecs/llm/config/__init__.py +51 -0
  205. aiecs/llm/config/config_loader.py +272 -0
  206. aiecs/llm/config/config_validator.py +206 -0
  207. aiecs/llm/config/model_config.py +143 -0
  208. aiecs/llm/protocols.py +149 -0
  209. aiecs/llm/utils/__init__.py +10 -0
  210. aiecs/llm/utils/validate_config.py +89 -0
  211. aiecs/main.py +140 -121
  212. aiecs/scripts/aid/VERSION_MANAGEMENT.md +138 -0
  213. aiecs/scripts/aid/__init__.py +19 -0
  214. aiecs/scripts/aid/module_checker.py +499 -0
  215. aiecs/scripts/aid/version_manager.py +235 -0
  216. aiecs/scripts/{DEPENDENCY_SYSTEM_SUMMARY.md → dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md} +1 -0
  217. aiecs/scripts/{README_DEPENDENCY_CHECKER.md → dependance_check/README_DEPENDENCY_CHECKER.md} +1 -0
  218. aiecs/scripts/dependance_check/__init__.py +15 -0
  219. aiecs/scripts/dependance_check/dependency_checker.py +1835 -0
  220. aiecs/scripts/{dependency_fixer.py → dependance_check/dependency_fixer.py} +192 -90
  221. aiecs/scripts/{download_nlp_data.py → dependance_check/download_nlp_data.py} +203 -71
  222. aiecs/scripts/dependance_patch/__init__.py +7 -0
  223. aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
  224. aiecs/scripts/{fix_weasel_validator.py → dependance_patch/fix_weasel/fix_weasel_validator.py} +21 -14
  225. aiecs/scripts/{patch_weasel_library.sh → dependance_patch/fix_weasel/patch_weasel_library.sh} +1 -1
  226. aiecs/scripts/knowledge_graph/__init__.py +3 -0
  227. aiecs/scripts/knowledge_graph/run_threshold_experiments.py +212 -0
  228. aiecs/scripts/migrations/multi_tenancy/README.md +142 -0
  229. aiecs/scripts/tools_develop/README.md +671 -0
  230. aiecs/scripts/tools_develop/README_CONFIG_CHECKER.md +273 -0
  231. aiecs/scripts/tools_develop/TOOLS_CONFIG_GUIDE.md +1287 -0
  232. aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
  233. aiecs/scripts/tools_develop/__init__.py +21 -0
  234. aiecs/scripts/tools_develop/check_all_tools_config.py +548 -0
  235. aiecs/scripts/tools_develop/check_type_annotations.py +257 -0
  236. aiecs/scripts/tools_develop/pre-commit-schema-coverage.sh +66 -0
  237. aiecs/scripts/tools_develop/schema_coverage.py +511 -0
  238. aiecs/scripts/tools_develop/validate_tool_schemas.py +475 -0
  239. aiecs/scripts/tools_develop/verify_executor_config_fix.py +98 -0
  240. aiecs/scripts/tools_develop/verify_tools.py +352 -0
  241. aiecs/tasks/__init__.py +0 -1
  242. aiecs/tasks/worker.py +115 -47
  243. aiecs/tools/__init__.py +194 -72
  244. aiecs/tools/apisource/__init__.py +99 -0
  245. aiecs/tools/apisource/intelligence/__init__.py +19 -0
  246. aiecs/tools/apisource/intelligence/data_fusion.py +632 -0
  247. aiecs/tools/apisource/intelligence/query_analyzer.py +417 -0
  248. aiecs/tools/apisource/intelligence/search_enhancer.py +385 -0
  249. aiecs/tools/apisource/monitoring/__init__.py +9 -0
  250. aiecs/tools/apisource/monitoring/metrics.py +330 -0
  251. aiecs/tools/apisource/providers/__init__.py +112 -0
  252. aiecs/tools/apisource/providers/base.py +671 -0
  253. aiecs/tools/apisource/providers/census.py +397 -0
  254. aiecs/tools/apisource/providers/fred.py +535 -0
  255. aiecs/tools/apisource/providers/newsapi.py +409 -0
  256. aiecs/tools/apisource/providers/worldbank.py +352 -0
  257. aiecs/tools/apisource/reliability/__init__.py +12 -0
  258. aiecs/tools/apisource/reliability/error_handler.py +363 -0
  259. aiecs/tools/apisource/reliability/fallback_strategy.py +376 -0
  260. aiecs/tools/apisource/tool.py +832 -0
  261. aiecs/tools/apisource/utils/__init__.py +9 -0
  262. aiecs/tools/apisource/utils/validators.py +334 -0
  263. aiecs/tools/base_tool.py +415 -21
  264. aiecs/tools/docs/__init__.py +121 -0
  265. aiecs/tools/docs/ai_document_orchestrator.py +607 -0
  266. aiecs/tools/docs/ai_document_writer_orchestrator.py +2350 -0
  267. aiecs/tools/docs/content_insertion_tool.py +1320 -0
  268. aiecs/tools/docs/document_creator_tool.py +1323 -0
  269. aiecs/tools/docs/document_layout_tool.py +1160 -0
  270. aiecs/tools/docs/document_parser_tool.py +1011 -0
  271. aiecs/tools/docs/document_writer_tool.py +1829 -0
  272. aiecs/tools/knowledge_graph/__init__.py +17 -0
  273. aiecs/tools/knowledge_graph/graph_reasoning_tool.py +807 -0
  274. aiecs/tools/knowledge_graph/graph_search_tool.py +944 -0
  275. aiecs/tools/knowledge_graph/kg_builder_tool.py +524 -0
  276. aiecs/tools/langchain_adapter.py +300 -138
  277. aiecs/tools/schema_generator.py +455 -0
  278. aiecs/tools/search_tool/__init__.py +100 -0
  279. aiecs/tools/search_tool/analyzers.py +581 -0
  280. aiecs/tools/search_tool/cache.py +264 -0
  281. aiecs/tools/search_tool/constants.py +128 -0
  282. aiecs/tools/search_tool/context.py +224 -0
  283. aiecs/tools/search_tool/core.py +778 -0
  284. aiecs/tools/search_tool/deduplicator.py +119 -0
  285. aiecs/tools/search_tool/error_handler.py +242 -0
  286. aiecs/tools/search_tool/metrics.py +343 -0
  287. aiecs/tools/search_tool/rate_limiter.py +172 -0
  288. aiecs/tools/search_tool/schemas.py +275 -0
  289. aiecs/tools/statistics/__init__.py +80 -0
  290. aiecs/tools/statistics/ai_data_analysis_orchestrator.py +646 -0
  291. aiecs/tools/statistics/ai_insight_generator_tool.py +508 -0
  292. aiecs/tools/statistics/ai_report_orchestrator_tool.py +684 -0
  293. aiecs/tools/statistics/data_loader_tool.py +555 -0
  294. aiecs/tools/statistics/data_profiler_tool.py +638 -0
  295. aiecs/tools/statistics/data_transformer_tool.py +580 -0
  296. aiecs/tools/statistics/data_visualizer_tool.py +498 -0
  297. aiecs/tools/statistics/model_trainer_tool.py +507 -0
  298. aiecs/tools/statistics/statistical_analyzer_tool.py +472 -0
  299. aiecs/tools/task_tools/__init__.py +49 -36
  300. aiecs/tools/task_tools/chart_tool.py +200 -184
  301. aiecs/tools/task_tools/classfire_tool.py +268 -267
  302. aiecs/tools/task_tools/image_tool.py +175 -131
  303. aiecs/tools/task_tools/office_tool.py +226 -146
  304. aiecs/tools/task_tools/pandas_tool.py +477 -121
  305. aiecs/tools/task_tools/report_tool.py +390 -142
  306. aiecs/tools/task_tools/research_tool.py +149 -79
  307. aiecs/tools/task_tools/scraper_tool.py +339 -145
  308. aiecs/tools/task_tools/stats_tool.py +448 -209
  309. aiecs/tools/temp_file_manager.py +26 -24
  310. aiecs/tools/tool_executor/__init__.py +18 -16
  311. aiecs/tools/tool_executor/tool_executor.py +364 -52
  312. aiecs/utils/LLM_output_structor.py +74 -48
  313. aiecs/utils/__init__.py +14 -3
  314. aiecs/utils/base_callback.py +0 -3
  315. aiecs/utils/cache_provider.py +696 -0
  316. aiecs/utils/execution_utils.py +50 -31
  317. aiecs/utils/prompt_loader.py +1 -0
  318. aiecs/utils/token_usage_repository.py +37 -11
  319. aiecs/ws/socket_server.py +14 -4
  320. {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/METADATA +52 -15
  321. aiecs-1.7.6.dist-info/RECORD +337 -0
  322. aiecs-1.7.6.dist-info/entry_points.txt +13 -0
  323. aiecs/config/registry.py +0 -19
  324. aiecs/domain/context/content_engine.py +0 -982
  325. aiecs/llm/base_client.py +0 -99
  326. aiecs/llm/openai_client.py +0 -125
  327. aiecs/llm/vertex_client.py +0 -186
  328. aiecs/llm/xai_client.py +0 -184
  329. aiecs/scripts/dependency_checker.py +0 -857
  330. aiecs/scripts/quick_dependency_check.py +0 -269
  331. aiecs/tools/task_tools/search_api.py +0 -7
  332. aiecs-1.0.1.dist-info/RECORD +0 -90
  333. aiecs-1.0.1.dist-info/entry_points.txt +0 -7
  334. /aiecs/scripts/{setup_nlp_data.sh → dependance_check/setup_nlp_data.sh} +0 -0
  335. /aiecs/scripts/{README_WEASEL_PATCH.md → dependance_patch/fix_weasel/README_WEASEL_PATCH.md} +0 -0
  336. /aiecs/scripts/{fix_weasel_validator.sh → dependance_patch/fix_weasel/fix_weasel_validator.sh} +0 -0
  337. /aiecs/scripts/{run_weasel_patch.sh → dependance_patch/fix_weasel/run_weasel_patch.sh} +0 -0
  338. {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/WHEEL +0 -0
  339. {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/licenses/LICENSE +0 -0
  340. {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1563 @@
1
+ """
2
+ PostgreSQL Graph Storage Backend
3
+
4
+ Provides production-grade graph storage using PostgreSQL with:
5
+ - Connection pooling via asyncpg
6
+ - Transaction support
7
+ - Recursive CTEs for efficient graph traversal
8
+ - Optional pgvector support for vector similarity search
9
+
10
+ Multi-tenancy Support:
11
+ - SHARED_SCHEMA mode: Single schema with tenant_id column + optional RLS
12
+ - SEPARATE_SCHEMA mode: PostgreSQL schemas per tenant (CREATE SCHEMA tenant_xxx)
13
+ - Global namespace for tenant_id=NULL (backward compatible)
14
+ """
15
+
16
+ import json
17
+ import asyncpg # type: ignore[import-untyped]
18
+ import logging
19
+ from typing import Any, Dict, List, Optional, Tuple, cast
20
+ from contextlib import asynccontextmanager
21
+ import numpy as np
22
+
23
+ from aiecs.domain.knowledge_graph.models.entity import Entity
24
+ from aiecs.domain.knowledge_graph.models.relation import Relation
25
+ from aiecs.domain.knowledge_graph.models.path import Path
26
+ from aiecs.infrastructure.graph_storage.base import GraphStore
27
+ from aiecs.infrastructure.graph_storage.tenant import (
28
+ TenantContext,
29
+ TenantIsolationMode,
30
+ CrossTenantRelationError,
31
+ )
32
+ from aiecs.config.config import get_settings
33
+
34
+ logger = logging.getLogger(__name__)
35
+
36
+
37
+ # PostgreSQL Schema for graph storage with multi-tenancy support
38
+ # Note: For existing databases, run MIGRATION_SQL first to add tenant_id columns
39
+ # Uses empty string '' as default for tenant_id to allow proper composite primary key
40
+ SCHEMA_SQL = """
41
+ -- Entities table with tenant_id for multi-tenancy
42
+ -- tenant_id = '' (empty string) for global namespace
43
+ CREATE TABLE IF NOT EXISTS graph_entities (
44
+ id TEXT NOT NULL,
45
+ tenant_id TEXT NOT NULL DEFAULT '', -- Empty string for global namespace
46
+ entity_type TEXT NOT NULL,
47
+ properties JSONB NOT NULL DEFAULT '{}'::jsonb,
48
+ embedding BYTEA,
49
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
50
+ updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
51
+ PRIMARY KEY (id, tenant_id)
52
+ );
53
+
54
+ -- Relations table with tenant_id for multi-tenancy
55
+ CREATE TABLE IF NOT EXISTS graph_relations (
56
+ id TEXT NOT NULL,
57
+ tenant_id TEXT NOT NULL DEFAULT '', -- Empty string for global namespace
58
+ relation_type TEXT NOT NULL,
59
+ source_id TEXT NOT NULL,
60
+ target_id TEXT NOT NULL,
61
+ properties JSONB NOT NULL DEFAULT '{}'::jsonb,
62
+ weight REAL DEFAULT 1.0,
63
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
64
+ updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
65
+ PRIMARY KEY (id, tenant_id)
66
+ );
67
+
68
+ -- Indexes for performance
69
+ CREATE INDEX IF NOT EXISTS idx_graph_entities_type ON graph_entities(entity_type);
70
+ CREATE INDEX IF NOT EXISTS idx_graph_entities_tenant ON graph_entities(tenant_id);
71
+ CREATE INDEX IF NOT EXISTS idx_graph_entities_tenant_type ON graph_entities(tenant_id, entity_type);
72
+ CREATE INDEX IF NOT EXISTS idx_graph_entities_properties ON graph_entities USING GIN(properties);
73
+ CREATE INDEX IF NOT EXISTS idx_graph_relations_type ON graph_relations(relation_type);
74
+ CREATE INDEX IF NOT EXISTS idx_graph_relations_tenant ON graph_relations(tenant_id);
75
+ CREATE INDEX IF NOT EXISTS idx_graph_relations_source ON graph_relations(source_id);
76
+ CREATE INDEX IF NOT EXISTS idx_graph_relations_target ON graph_relations(target_id);
77
+ CREATE INDEX IF NOT EXISTS idx_graph_relations_tenant_source ON graph_relations(tenant_id, source_id);
78
+ CREATE INDEX IF NOT EXISTS idx_graph_relations_tenant_target ON graph_relations(tenant_id, target_id);
79
+ CREATE INDEX IF NOT EXISTS idx_graph_relations_properties ON graph_relations USING GIN(properties);
80
+
81
+ -- Optional: Add pgvector extension support (if available)
82
+ -- CREATE EXTENSION IF NOT EXISTS vector;
83
+ -- ALTER TABLE graph_entities ADD COLUMN IF NOT EXISTS embedding_vector vector(1536);
84
+ -- CREATE INDEX IF NOT EXISTS idx_graph_entities_embedding ON graph_entities USING ivfflat (embedding_vector vector_cosine_ops);
85
+ """
86
+
87
+ # Migration SQL for existing databases (adds tenant_id columns if they don't exist)
88
+ MIGRATION_SQL = """
89
+ -- Add tenant_id column to entities if not exists
90
+ DO $$
91
+ BEGIN
92
+ IF NOT EXISTS (
93
+ SELECT 1 FROM information_schema.columns
94
+ WHERE table_name = 'graph_entities' AND column_name = 'tenant_id'
95
+ ) THEN
96
+ -- Add tenant_id column with empty string default
97
+ ALTER TABLE graph_entities ADD COLUMN tenant_id TEXT NOT NULL DEFAULT '';
98
+
99
+ -- Drop old primary key if exists
100
+ ALTER TABLE graph_entities DROP CONSTRAINT IF EXISTS graph_entities_pkey;
101
+
102
+ -- Create new composite primary key
103
+ ALTER TABLE graph_entities ADD PRIMARY KEY (id, tenant_id);
104
+
105
+ -- Create indexes
106
+ CREATE INDEX IF NOT EXISTS idx_graph_entities_tenant ON graph_entities(tenant_id);
107
+ CREATE INDEX IF NOT EXISTS idx_graph_entities_tenant_type ON graph_entities(tenant_id, entity_type);
108
+ END IF;
109
+ END $$;
110
+
111
+ -- Add tenant_id column to relations if not exists
112
+ DO $$
113
+ BEGIN
114
+ IF NOT EXISTS (
115
+ SELECT 1 FROM information_schema.columns
116
+ WHERE table_name = 'graph_relations' AND column_name = 'tenant_id'
117
+ ) THEN
118
+ -- Add tenant_id column with empty string default
119
+ ALTER TABLE graph_relations ADD COLUMN tenant_id TEXT NOT NULL DEFAULT '';
120
+
121
+ -- Drop old primary key if exists
122
+ ALTER TABLE graph_relations DROP CONSTRAINT IF EXISTS graph_relations_pkey;
123
+
124
+ -- Create new composite primary key
125
+ ALTER TABLE graph_relations ADD PRIMARY KEY (id, tenant_id);
126
+
127
+ -- Create indexes
128
+ CREATE INDEX IF NOT EXISTS idx_graph_relations_tenant ON graph_relations(tenant_id);
129
+ CREATE INDEX IF NOT EXISTS idx_graph_relations_tenant_source ON graph_relations(tenant_id, source_id);
130
+ CREATE INDEX IF NOT EXISTS idx_graph_relations_tenant_target ON graph_relations(tenant_id, target_id);
131
+ END IF;
132
+ END $$;
133
+ """
134
+
135
+ # RLS (Row-Level Security) policies for SHARED_SCHEMA mode
136
+ RLS_SETUP_SQL = """
137
+ -- Enable RLS on tables
138
+ ALTER TABLE graph_entities ENABLE ROW LEVEL SECURITY;
139
+ ALTER TABLE graph_relations ENABLE ROW LEVEL SECURITY;
140
+
141
+ -- Force RLS even for table owners (important for superuser/owner connections)
142
+ ALTER TABLE graph_entities FORCE ROW LEVEL SECURITY;
143
+ ALTER TABLE graph_relations FORCE ROW LEVEL SECURITY;
144
+
145
+ -- Drop existing policies if they exist
146
+ DROP POLICY IF EXISTS tenant_isolation_entities ON graph_entities;
147
+ DROP POLICY IF EXISTS tenant_isolation_relations ON graph_relations;
148
+
149
+ -- Create RLS policies
150
+ -- Note: Uses current_setting('app.current_tenant_id', true) which returns empty string if not set
151
+ -- Empty string ('') represents the global namespace
152
+ CREATE POLICY tenant_isolation_entities ON graph_entities
153
+ USING (
154
+ tenant_id = '' OR
155
+ tenant_id = COALESCE(current_setting('app.current_tenant_id', true), '')
156
+ );
157
+
158
+ CREATE POLICY tenant_isolation_relations ON graph_relations
159
+ USING (
160
+ tenant_id = '' OR
161
+ tenant_id = COALESCE(current_setting('app.current_tenant_id', true), '')
162
+ );
163
+ """
164
+
165
+ # Schema template for SEPARATE_SCHEMA mode
166
+ TENANT_SCHEMA_SQL = """
167
+ -- Create tenant schema
168
+ CREATE SCHEMA IF NOT EXISTS {schema_name};
169
+
170
+ -- Entities table in tenant schema
171
+ CREATE TABLE IF NOT EXISTS {schema_name}.graph_entities (
172
+ id TEXT PRIMARY KEY,
173
+ entity_type TEXT NOT NULL,
174
+ properties JSONB NOT NULL DEFAULT '{{}}'::jsonb,
175
+ embedding BYTEA,
176
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
177
+ updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
178
+ );
179
+
180
+ -- Relations table in tenant schema
181
+ CREATE TABLE IF NOT EXISTS {schema_name}.graph_relations (
182
+ id TEXT PRIMARY KEY,
183
+ relation_type TEXT NOT NULL,
184
+ source_id TEXT NOT NULL,
185
+ target_id TEXT NOT NULL,
186
+ properties JSONB NOT NULL DEFAULT '{{}}'::jsonb,
187
+ weight REAL DEFAULT 1.0,
188
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
189
+ updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
190
+ );
191
+
192
+ -- Indexes
193
+ CREATE INDEX IF NOT EXISTS idx_{schema_name}_entities_type ON {schema_name}.graph_entities(entity_type);
194
+ CREATE INDEX IF NOT EXISTS idx_{schema_name}_relations_type ON {schema_name}.graph_relations(relation_type);
195
+ CREATE INDEX IF NOT EXISTS idx_{schema_name}_relations_source ON {schema_name}.graph_relations(source_id);
196
+ CREATE INDEX IF NOT EXISTS idx_{schema_name}_relations_target ON {schema_name}.graph_relations(target_id);
197
+ """
198
+
199
+
200
+ class PostgresGraphStore(GraphStore):
201
+ """
202
+ PostgreSQL-based graph storage implementation
203
+
204
+ Provides production-grade persistent graph storage with:
205
+ - Connection pooling via asyncpg
206
+ - ACID transactions
207
+ - SQL-optimized queries with recursive CTEs
208
+ - JSONB for flexible property storage
209
+ - Optional pgvector for vector similarity search
210
+
211
+ Features:
212
+ - Production-ready with connection pooling
213
+ - Efficient graph traversal using WITH RECURSIVE
214
+ - Automatic schema initialization
215
+ - Transaction support
216
+ - JSONB indexing for fast property queries
217
+
218
+ Multi-Tenancy Support:
219
+ - SHARED_SCHEMA mode: Single schema with tenant_id column + optional RLS
220
+ - SEPARATE_SCHEMA mode: PostgreSQL schemas per tenant (CREATE SCHEMA tenant_xxx)
221
+ - Global namespace for tenant_id=NULL (backward compatible)
222
+ - Row-Level Security (RLS) for automatic tenant filtering
223
+
224
+ Example:
225
+ ```python
226
+ from aiecs.infrastructure.graph_storage import PostgresGraphStore
227
+
228
+ # Using config from settings
229
+ store = PostgresGraphStore()
230
+ await store.initialize()
231
+
232
+ # Multi-tenant with RLS
233
+ store = PostgresGraphStore(
234
+ isolation_mode=TenantIsolationMode.SHARED_SCHEMA,
235
+ enable_rls=True
236
+ )
237
+ await store.initialize()
238
+
239
+ # Multi-tenant usage
240
+ from aiecs.infrastructure.graph_storage.tenant import TenantContext
241
+ context = TenantContext(tenant_id="acme-corp")
242
+ await store.add_entity(entity, context=context)
243
+
244
+ await store.close()
245
+ ```
246
+ """
247
+
248
+ def __init__(
249
+ self,
250
+ host: Optional[str] = None,
251
+ port: Optional[int] = None,
252
+ user: Optional[str] = None,
253
+ password: Optional[str] = None,
254
+ database: Optional[str] = None,
255
+ min_pool_size: int = 5,
256
+ max_pool_size: int = 20,
257
+ enable_pgvector: bool = False,
258
+ isolation_mode: TenantIsolationMode = TenantIsolationMode.SHARED_SCHEMA,
259
+ enable_rls: bool = False,
260
+ pool: Optional[asyncpg.Pool] = None,
261
+ database_manager: Optional[Any] = None,
262
+ **kwargs,
263
+ ):
264
+ """
265
+ Initialize PostgreSQL graph store
266
+
267
+ Args:
268
+ host: PostgreSQL host (defaults from config)
269
+ port: PostgreSQL port (defaults from config)
270
+ user: PostgreSQL user (defaults from config)
271
+ password: PostgreSQL password (defaults from config)
272
+ database: Database name (defaults from config)
273
+ min_pool_size: Minimum connection pool size
274
+ max_pool_size: Maximum connection pool size
275
+ enable_pgvector: Enable pgvector extension for vector search
276
+ isolation_mode: Tenant isolation mode (SHARED_SCHEMA or SEPARATE_SCHEMA)
277
+ enable_rls: Enable Row-Level Security for SHARED_SCHEMA mode
278
+ pool: Optional existing asyncpg pool to reuse (from DatabaseManager)
279
+ database_manager: Optional DatabaseManager instance to reuse its pool
280
+ **kwargs: Additional asyncpg connection parameters
281
+ """
282
+ super().__init__()
283
+
284
+ # Multi-tenancy configuration
285
+ self.isolation_mode = isolation_mode
286
+ self.enable_rls = enable_rls
287
+ self._initialized_tenant_schemas: set = set() # Track created tenant schemas
288
+
289
+ # Option 1: Reuse existing pool
290
+ self._external_pool = pool
291
+ self._owns_pool = pool is None and database_manager is None
292
+
293
+ # Option 2: Reuse DatabaseManager's pool
294
+ if database_manager is not None:
295
+ self._external_pool = getattr(database_manager, "connection_pool", None)
296
+ if self._external_pool:
297
+ logger.info("Reusing DatabaseManager's connection pool")
298
+ self._owns_pool = False
299
+
300
+ # Load config from settings if not provided (needed for own pool creation)
301
+ # Support both connection string (dsn) and individual parameters
302
+ self.dsn = None
303
+ if not all([host, port, user, password, database]):
304
+ settings = get_settings()
305
+ db_config = settings.database_config
306
+
307
+ # Check if connection string (dsn) is provided (for cloud
308
+ # databases)
309
+ if "dsn" in db_config:
310
+ self.dsn = db_config["dsn"]
311
+ # Still set defaults for logging/display purposes
312
+ host = host or "cloud"
313
+ port = port or 5432
314
+ user = user or "postgres"
315
+ password = password or ""
316
+ database = database or "aiecs"
317
+ else:
318
+ # Use individual parameters (for local databases)
319
+ host = host or db_config.get("host", "localhost")
320
+ port = port or db_config.get("port", 5432)
321
+ user = user or db_config.get("user", "postgres")
322
+ password = password or db_config.get("password", "")
323
+ database = database or db_config.get("database", "aiecs")
324
+
325
+ self.host = host
326
+ self.port = port
327
+ self.user = user
328
+ self.password = password
329
+ self.database = database
330
+ self.min_pool_size = min_pool_size
331
+ self.max_pool_size = max_pool_size
332
+ self.enable_pgvector = enable_pgvector
333
+ self.conn_kwargs = kwargs
334
+
335
+ self.pool: Optional[asyncpg.Pool] = self._external_pool
336
+ self._is_initialized = False
337
+ self._transaction_conn: Optional[asyncpg.Connection] = None
338
+
339
+ def _ensure_pool(self) -> asyncpg.Pool:
340
+ """Ensure pool is initialized and return it."""
341
+ if self.pool is None:
342
+ raise RuntimeError("Connection pool not initialized")
343
+ return self.pool
344
+
345
+ async def initialize(self):
346
+ """Initialize PostgreSQL connection pool and create schema"""
347
+ try:
348
+ # Create connection pool only if we don't have an external one
349
+ if self._owns_pool:
350
+ # Use connection string (dsn) if available (for cloud databases)
351
+ # Otherwise use individual parameters (for local databases)
352
+ if self.dsn:
353
+ self.pool = await asyncpg.create_pool(
354
+ dsn=self.dsn,
355
+ min_size=self.min_pool_size,
356
+ max_size=self.max_pool_size,
357
+ **self.conn_kwargs,
358
+ )
359
+ logger.info("PostgreSQL connection pool created using connection string (cloud/local)")
360
+ else:
361
+ self.pool = await asyncpg.create_pool(
362
+ host=self.host,
363
+ port=self.port,
364
+ user=self.user,
365
+ password=self.password,
366
+ database=self.database,
367
+ min_size=self.min_pool_size,
368
+ max_size=self.max_pool_size,
369
+ **self.conn_kwargs,
370
+ )
371
+ logger.info(f"PostgreSQL connection pool created: {self.host}:{self.port}/{self.database}")
372
+ else:
373
+ logger.info("Using external PostgreSQL connection pool (shared with AIECS DatabaseManager)")
374
+
375
+ # Create schema
376
+ pool = self._ensure_pool()
377
+ async with pool.acquire() as conn:
378
+ # Optionally enable pgvector first
379
+ if self.enable_pgvector:
380
+ try:
381
+ await conn.execute("CREATE EXTENSION IF NOT EXISTS vector")
382
+ logger.info("pgvector extension enabled")
383
+ except Exception as e:
384
+ logger.warning(f"Failed to enable pgvector: {e}. Continuing without vector support.")
385
+ self.enable_pgvector = False
386
+
387
+ # Check if tables exist and need migration
388
+ tables_exist = await conn.fetchval(
389
+ """
390
+ SELECT EXISTS (
391
+ SELECT 1 FROM information_schema.tables
392
+ WHERE table_name = 'graph_entities'
393
+ )
394
+ """
395
+ )
396
+
397
+ if tables_exist:
398
+ # Run migration for existing databases to add tenant_id
399
+ try:
400
+ await conn.execute(MIGRATION_SQL)
401
+ logger.info("Database migration for multi-tenancy completed")
402
+ except Exception as e:
403
+ logger.warning(f"Migration may have already been applied: {e}")
404
+ else:
405
+ # Execute schema creation for new databases
406
+ await conn.execute(SCHEMA_SQL)
407
+
408
+ # Add vector column if pgvector is enabled
409
+ if self.enable_pgvector:
410
+ try:
411
+ # Check if vector column exists
412
+ column_exists = await conn.fetchval(
413
+ """
414
+ SELECT EXISTS (
415
+ SELECT 1 FROM information_schema.columns
416
+ WHERE table_name = 'graph_entities'
417
+ AND column_name = 'embedding_vector'
418
+ )
419
+ """
420
+ )
421
+
422
+ if not column_exists:
423
+ # Add vector column (default dimension 1536, can be
424
+ # adjusted)
425
+ await conn.execute(
426
+ """
427
+ ALTER TABLE graph_entities
428
+ ADD COLUMN embedding_vector vector(1536)
429
+ """
430
+ )
431
+ logger.info("Added embedding_vector column")
432
+
433
+ # Create index if it doesn't exist
434
+ index_exists = await conn.fetchval(
435
+ """
436
+ SELECT EXISTS (
437
+ SELECT 1 FROM pg_indexes
438
+ WHERE tablename = 'graph_entities'
439
+ AND indexname = 'idx_graph_entities_embedding'
440
+ )
441
+ """
442
+ )
443
+
444
+ if not index_exists:
445
+ await conn.execute(
446
+ """
447
+ CREATE INDEX idx_graph_entities_embedding
448
+ ON graph_entities USING ivfflat (embedding_vector vector_cosine_ops)
449
+ WITH (lists = 100)
450
+ """
451
+ )
452
+ logger.info("Created vector similarity index")
453
+ except Exception as e:
454
+ logger.warning(f"Failed to set up pgvector column/index: {e}")
455
+
456
+ # Set up RLS if enabled for SHARED_SCHEMA mode
457
+ if self.enable_rls and self.isolation_mode == TenantIsolationMode.SHARED_SCHEMA:
458
+ try:
459
+ await conn.execute(RLS_SETUP_SQL)
460
+ logger.info("Row-Level Security (RLS) policies enabled")
461
+ except Exception as e:
462
+ logger.warning(f"Failed to set up RLS: {e}. Continuing without RLS.")
463
+ self.enable_rls = False
464
+
465
+ self._is_initialized = True
466
+ self._initialized_tenant_schemas = set()
467
+ logger.info("PostgreSQL graph store initialized successfully")
468
+
469
+ except Exception as e:
470
+ logger.error(f"Failed to initialize PostgreSQL graph store: {e}")
471
+ raise
472
+
473
+ async def close(self):
474
+ """Close database connection pool (only if we own it)"""
475
+ if self.pool and self._owns_pool:
476
+ await self.pool.close()
477
+ self.pool = None
478
+ logger.info("PostgreSQL connection pool closed")
479
+ elif self.pool and not self._owns_pool:
480
+ logger.info("Detaching from shared PostgreSQL connection pool (not closing)")
481
+ self.pool = None
482
+ self._is_initialized = False
483
+ self._initialized_tenant_schemas = set()
484
+
485
+ # =========================================================================
486
+ # Multi-Tenancy Helpers
487
+ # =========================================================================
488
+
489
+ def _get_tenant_id(self, context: Optional[TenantContext]) -> str:
490
+ """Extract tenant_id from context, returns empty string for global namespace."""
491
+ return context.tenant_id if context and context.tenant_id else ""
492
+
493
+ def _get_schema_name(self, tenant_id: Optional[str]) -> str:
494
+ """
495
+ Get schema name for SEPARATE_SCHEMA mode.
496
+
497
+ Returns 'public' for global namespace or 'tenant_xxx' for tenants.
498
+ """
499
+ if tenant_id is None:
500
+ return "public"
501
+ # Sanitize tenant_id for use in schema name
502
+ safe_tenant = tenant_id.replace("-", "_")
503
+ return f"tenant_{safe_tenant}"
504
+
505
+ async def _ensure_tenant_schema(self, conn: asyncpg.Connection, tenant_id: str) -> None:
506
+ """
507
+ Ensure tenant-specific schema exists for SEPARATE_SCHEMA mode.
508
+ """
509
+ if self.isolation_mode != TenantIsolationMode.SEPARATE_SCHEMA:
510
+ return
511
+
512
+ if tenant_id in self._initialized_tenant_schemas:
513
+ return
514
+
515
+ schema_name = self._get_schema_name(tenant_id)
516
+ schema_sql = TENANT_SCHEMA_SQL.format(schema_name=schema_name)
517
+
518
+ await conn.execute(schema_sql)
519
+ self._initialized_tenant_schemas.add(tenant_id)
520
+ logger.info(f"Created tenant schema: {schema_name}")
521
+
522
+ async def _set_tenant_context(self, conn: asyncpg.Connection, tenant_id: str) -> None:
523
+ """
524
+ Set tenant context for RLS or search_path based on isolation mode.
525
+
526
+ For SHARED_SCHEMA with RLS: SET LOCAL app.current_tenant = 'tenant_id'
527
+ For SEPARATE_SCHEMA: SET search_path = tenant_xxx, public
528
+ """
529
+ if self.isolation_mode == TenantIsolationMode.SEPARATE_SCHEMA:
530
+ schema_name = self._get_schema_name(tenant_id if tenant_id else None)
531
+ await conn.execute(f"SET search_path = {schema_name}, public")
532
+ logger.debug(f"Set search_path to {schema_name}")
533
+ elif self.enable_rls:
534
+ # Set app.current_tenant for RLS policies (empty string for global)
535
+ # Use SET LOCAL to scope to current transaction in connection pool
536
+ await conn.execute(f"SET LOCAL app.current_tenant_id = '{tenant_id}'")
537
+ logger.debug(f"Set LOCAL app.current_tenant_id = '{tenant_id}'")
538
+ # Verify it was set
539
+ check = await conn.fetchval("SELECT current_setting('app.current_tenant_id', true)")
540
+ logger.debug(f"Verified app.current_tenant_id = '{check}'")
541
+
542
+ def _build_tenant_filter(self, tenant_id: str, table_alias: str = "") -> Tuple[str, List]:
543
+ """
544
+ Build SQL WHERE clause for tenant filtering in SHARED_SCHEMA mode without RLS.
545
+
546
+ Returns:
547
+ Tuple of (WHERE clause fragment, parameters list)
548
+ """
549
+ prefix = f"{table_alias}." if table_alias else ""
550
+ # tenant_id is always a string, empty string for global namespace
551
+ return f"{prefix}tenant_id = ${{param}}", [tenant_id]
552
+
553
+ @asynccontextmanager
554
+ async def transaction(self):
555
+ """
556
+ Transaction context manager for atomic operations
557
+
558
+ Usage:
559
+ ```python
560
+ async with store.transaction():
561
+ await store.add_entity(entity1)
562
+ await store.add_entity(entity2)
563
+ # Both entities added atomically
564
+ ```
565
+ """
566
+ if not self._is_initialized:
567
+ raise RuntimeError("GraphStore not initialized")
568
+
569
+ pool = self._ensure_pool()
570
+ async with pool.acquire() as conn:
571
+ async with conn.transaction():
572
+ # Store connection for use within transaction
573
+ old_conn = self._transaction_conn
574
+ self._transaction_conn = conn
575
+ try:
576
+ yield conn
577
+ finally:
578
+ self._transaction_conn = old_conn
579
+
580
+ async def _get_connection(self):
581
+ """Get connection from pool or transaction"""
582
+ if self._transaction_conn:
583
+ return self._transaction_conn
584
+ return self.pool.acquire()
585
+
586
+ # =========================================================================
587
+ # Tier 1: Basic Interface (PostgreSQL-optimized implementations)
588
+ # =========================================================================
589
+
590
+ async def add_entity(self, entity: Entity, context: Optional[TenantContext] = None) -> None:
591
+ """
592
+ Add entity to PostgreSQL database
593
+
594
+ Args:
595
+ entity: Entity to add
596
+ context: Optional tenant context for multi-tenant isolation
597
+ """
598
+ if not self._is_initialized:
599
+ raise RuntimeError("GraphStore not initialized")
600
+
601
+ tenant_id = self._get_tenant_id(context)
602
+ logger.debug(f"add_entity called with entity_id='{entity.id}', tenant_id='{tenant_id}', enable_rls={self.enable_rls}")
603
+
604
+ # Set tenant_id on entity if context provided
605
+ if tenant_id is not None and entity.tenant_id is None:
606
+ entity.tenant_id = tenant_id
607
+
608
+ # Serialize data
609
+ properties_json = json.dumps(entity.properties)
610
+ embedding_blob = self._serialize_embedding(entity.embedding) if entity.embedding else None
611
+
612
+ async def _execute(conn: asyncpg.Connection):
613
+ # Set tenant context (search_path or RLS)
614
+ if self.isolation_mode == TenantIsolationMode.SEPARATE_SCHEMA and tenant_id:
615
+ await self._ensure_tenant_schema(conn, tenant_id)
616
+
617
+ if self.isolation_mode == TenantIsolationMode.SEPARATE_SCHEMA:
618
+ # SEPARATE_SCHEMA: No tenant_id column
619
+ await conn.execute(
620
+ """
621
+ INSERT INTO graph_entities (id, entity_type, properties, embedding)
622
+ VALUES ($1, $2, $3::jsonb, $4)
623
+ ON CONFLICT (id) DO UPDATE SET
624
+ entity_type = EXCLUDED.entity_type,
625
+ properties = EXCLUDED.properties,
626
+ embedding = EXCLUDED.embedding,
627
+ updated_at = CURRENT_TIMESTAMP
628
+ """,
629
+ entity.id,
630
+ entity.entity_type,
631
+ properties_json,
632
+ embedding_blob,
633
+ )
634
+ else:
635
+ # SHARED_SCHEMA: Include tenant_id column
636
+ await conn.execute(
637
+ """
638
+ INSERT INTO graph_entities (id, tenant_id, entity_type, properties, embedding)
639
+ VALUES ($1, $2, $3, $4::jsonb, $5)
640
+ ON CONFLICT (id, tenant_id) DO UPDATE SET
641
+ entity_type = EXCLUDED.entity_type,
642
+ properties = EXCLUDED.properties,
643
+ embedding = EXCLUDED.embedding,
644
+ updated_at = CURRENT_TIMESTAMP
645
+ """,
646
+ entity.id,
647
+ tenant_id,
648
+ entity.entity_type,
649
+ properties_json,
650
+ embedding_blob,
651
+ )
652
+
653
+ if self._transaction_conn:
654
+ await self._set_tenant_context(self._transaction_conn, tenant_id)
655
+ await _execute(self._transaction_conn)
656
+ else:
657
+ pool = self._ensure_pool()
658
+ async with pool.acquire() as conn:
659
+ # Wrap in transaction if RLS is enabled (SET LOCAL requires transaction)
660
+ if self.enable_rls:
661
+ async with conn.transaction():
662
+ await self._set_tenant_context(conn, tenant_id)
663
+ await _execute(conn)
664
+ else:
665
+ await self._set_tenant_context(conn, tenant_id)
666
+ await _execute(conn)
667
+
668
+ async def get_entity(self, entity_id: str, context: Optional[TenantContext] = None) -> Optional[Entity]:
669
+ """
670
+ Get entity from PostgreSQL database
671
+
672
+ Args:
673
+ entity_id: Entity ID to retrieve
674
+ context: Optional tenant context for multi-tenant isolation
675
+ """
676
+ if not self._is_initialized:
677
+ raise RuntimeError("GraphStore not initialized")
678
+
679
+ tenant_id = self._get_tenant_id(context)
680
+
681
+ async def _fetch(conn: asyncpg.Connection):
682
+ if self.isolation_mode == TenantIsolationMode.SEPARATE_SCHEMA:
683
+ return await conn.fetchrow(
684
+ """
685
+ SELECT id, entity_type, properties, embedding
686
+ FROM graph_entities
687
+ WHERE id = $1
688
+ """,
689
+ entity_id,
690
+ )
691
+ elif self.enable_rls:
692
+ # RLS will filter automatically
693
+ return await conn.fetchrow(
694
+ """
695
+ SELECT id, tenant_id, entity_type, properties, embedding
696
+ FROM graph_entities
697
+ WHERE id = $1
698
+ """,
699
+ entity_id,
700
+ )
701
+ else:
702
+ # Manual tenant filtering (tenant_id is always a string, '' for global)
703
+ return await conn.fetchrow(
704
+ """
705
+ SELECT id, tenant_id, entity_type, properties, embedding
706
+ FROM graph_entities
707
+ WHERE id = $1 AND tenant_id = $2
708
+ """,
709
+ entity_id,
710
+ tenant_id,
711
+ )
712
+
713
+ if self._transaction_conn:
714
+ await self._set_tenant_context(self._transaction_conn, tenant_id)
715
+ row = await _fetch(self._transaction_conn)
716
+ else:
717
+ pool = self._ensure_pool()
718
+ async with pool.acquire() as conn:
719
+ # Wrap in transaction if RLS is enabled (SET LOCAL requires transaction)
720
+ if self.enable_rls:
721
+ async with conn.transaction():
722
+ await self._set_tenant_context(conn, tenant_id)
723
+ row = await _fetch(conn)
724
+ else:
725
+ await self._set_tenant_context(conn, tenant_id)
726
+ row = await _fetch(conn)
727
+
728
+ if not row:
729
+ return None
730
+
731
+ # Deserialize
732
+ properties = json.loads(row["properties"]) if isinstance(row["properties"], str) else row["properties"]
733
+ embedding_raw = self._deserialize_embedding(row["embedding"]) if row["embedding"] else None
734
+ embedding: Optional[List[float]] = cast(List[float], embedding_raw.tolist()) if embedding_raw is not None else None
735
+
736
+ # Get tenant_id from row or context
737
+ row_tenant_id = row.get("tenant_id") if "tenant_id" in row.keys() else tenant_id
738
+
739
+ return Entity(
740
+ id=row["id"],
741
+ entity_type=row["entity_type"],
742
+ properties=properties,
743
+ embedding=embedding,
744
+ tenant_id=row_tenant_id,
745
+ )
746
+
747
+ async def update_entity(self, entity: Entity, context: Optional[TenantContext] = None) -> None:
748
+ """
749
+ Update entity in PostgreSQL database
750
+
751
+ Args:
752
+ entity: Entity to update
753
+ context: Optional tenant context for multi-tenant isolation
754
+ """
755
+ if not self._is_initialized:
756
+ raise RuntimeError("GraphStore not initialized")
757
+
758
+ tenant_id = self._get_tenant_id(context)
759
+ properties_json = json.dumps(entity.properties)
760
+ embedding_blob = self._serialize_embedding(entity.embedding) if entity.embedding else None
761
+
762
+ async def _execute(conn: asyncpg.Connection):
763
+ if self.isolation_mode == TenantIsolationMode.SEPARATE_SCHEMA:
764
+ return await conn.execute(
765
+ """
766
+ UPDATE graph_entities
767
+ SET entity_type = $2, properties = $3::jsonb, embedding = $4, updated_at = CURRENT_TIMESTAMP
768
+ WHERE id = $1
769
+ """,
770
+ entity.id,
771
+ entity.entity_type,
772
+ properties_json,
773
+ embedding_blob,
774
+ )
775
+ elif self.enable_rls:
776
+ return await conn.execute(
777
+ """
778
+ UPDATE graph_entities
779
+ SET entity_type = $2, properties = $3::jsonb, embedding = $4, updated_at = CURRENT_TIMESTAMP
780
+ WHERE id = $1
781
+ """,
782
+ entity.id,
783
+ entity.entity_type,
784
+ properties_json,
785
+ embedding_blob,
786
+ )
787
+ else:
788
+ # Manual tenant filtering (tenant_id is always a string, '' for global)
789
+ return await conn.execute(
790
+ """
791
+ UPDATE graph_entities
792
+ SET entity_type = $2, properties = $3::jsonb, embedding = $4, updated_at = CURRENT_TIMESTAMP
793
+ WHERE id = $1 AND tenant_id = $5
794
+ """,
795
+ entity.id,
796
+ entity.entity_type,
797
+ properties_json,
798
+ embedding_blob,
799
+ tenant_id,
800
+ )
801
+
802
+ if self._transaction_conn:
803
+ await self._set_tenant_context(self._transaction_conn, tenant_id)
804
+ result = await _execute(self._transaction_conn)
805
+ else:
806
+ pool = self._ensure_pool()
807
+ async with pool.acquire() as conn:
808
+ # Wrap in transaction if RLS is enabled (SET LOCAL requires transaction)
809
+ if self.enable_rls:
810
+ async with conn.transaction():
811
+ await self._set_tenant_context(conn, tenant_id)
812
+ result = await _execute(conn)
813
+ else:
814
+ await self._set_tenant_context(conn, tenant_id)
815
+ result = await _execute(conn)
816
+
817
+ if result == "UPDATE 0":
818
+ raise ValueError(f"Entity with ID '{entity.id}' not found")
819
+
820
+ async def delete_entity(self, entity_id: str, context: Optional[TenantContext] = None) -> None:
821
+ """
822
+ Delete entity from PostgreSQL database
823
+
824
+ Args:
825
+ entity_id: Entity ID to delete
826
+ context: Optional tenant context for multi-tenant isolation
827
+ """
828
+ if not self._is_initialized:
829
+ raise RuntimeError("GraphStore not initialized")
830
+
831
+ tenant_id = self._get_tenant_id(context)
832
+
833
+ async def _execute(conn: asyncpg.Connection):
834
+ if self.isolation_mode == TenantIsolationMode.SEPARATE_SCHEMA or self.enable_rls:
835
+ # Delete relations first
836
+ await conn.execute(
837
+ "DELETE FROM graph_relations WHERE source_id = $1 OR target_id = $1",
838
+ entity_id
839
+ )
840
+ return await conn.execute("DELETE FROM graph_entities WHERE id = $1", entity_id)
841
+ else:
842
+ # Manual tenant filtering (tenant_id is always a string, '' for global)
843
+ await conn.execute(
844
+ "DELETE FROM graph_relations WHERE (source_id = $1 OR target_id = $1) AND tenant_id = $2",
845
+ entity_id,
846
+ tenant_id
847
+ )
848
+ return await conn.execute(
849
+ "DELETE FROM graph_entities WHERE id = $1 AND tenant_id = $2",
850
+ entity_id,
851
+ tenant_id
852
+ )
853
+
854
+ if self._transaction_conn:
855
+ await self._set_tenant_context(self._transaction_conn, tenant_id)
856
+ result = await _execute(self._transaction_conn)
857
+ else:
858
+ pool = self._ensure_pool()
859
+ async with pool.acquire() as conn:
860
+ # Wrap in transaction if RLS is enabled (SET LOCAL requires transaction)
861
+ if self.enable_rls:
862
+ async with conn.transaction():
863
+ await self._set_tenant_context(conn, tenant_id)
864
+ result = await _execute(conn)
865
+ else:
866
+ await self._set_tenant_context(conn, tenant_id)
867
+ result = await _execute(conn)
868
+
869
+ if result == "DELETE 0":
870
+ raise ValueError(f"Entity with ID '{entity_id}' not found")
871
+
872
+ async def add_relation(self, relation: Relation, context: Optional[TenantContext] = None) -> None:
873
+ """
874
+ Add relation to PostgreSQL database
875
+
876
+ Args:
877
+ relation: Relation to add
878
+ context: Optional tenant context for multi-tenant isolation
879
+
880
+ Raises:
881
+ CrossTenantRelationError: If source and target entities belong to different tenants
882
+ """
883
+ if not self._is_initialized:
884
+ raise RuntimeError("GraphStore not initialized")
885
+
886
+ tenant_id = self._get_tenant_id(context)
887
+
888
+ # Check entities exist and enforce same-tenant constraint
889
+ source_entity = await self.get_entity(relation.source_id, context=context)
890
+ target_entity = await self.get_entity(relation.target_id, context=context)
891
+
892
+ if not source_entity:
893
+ raise ValueError(f"Source entity '{relation.source_id}' does not exist")
894
+ if not target_entity:
895
+ raise ValueError(f"Target entity '{relation.target_id}' does not exist")
896
+
897
+ # Enforce same-tenant constraint (skip for global namespace which has empty tenant_id)
898
+ if tenant_id:
899
+ if source_entity.tenant_id != target_entity.tenant_id:
900
+ raise CrossTenantRelationError(source_entity.tenant_id, target_entity.tenant_id)
901
+
902
+ # Set tenant_id on relation
903
+ if relation.tenant_id is None:
904
+ relation.tenant_id = tenant_id
905
+
906
+ properties_json = json.dumps(relation.properties)
907
+
908
+ async def _execute(conn: asyncpg.Connection):
909
+ if self.isolation_mode == TenantIsolationMode.SEPARATE_SCHEMA and tenant_id:
910
+ await self._ensure_tenant_schema(conn, tenant_id)
911
+
912
+ if self.isolation_mode == TenantIsolationMode.SEPARATE_SCHEMA:
913
+ await conn.execute(
914
+ """
915
+ INSERT INTO graph_relations (id, relation_type, source_id, target_id, properties, weight)
916
+ VALUES ($1, $2, $3, $4, $5::jsonb, $6)
917
+ ON CONFLICT (id) DO UPDATE SET
918
+ relation_type = EXCLUDED.relation_type,
919
+ source_id = EXCLUDED.source_id,
920
+ target_id = EXCLUDED.target_id,
921
+ properties = EXCLUDED.properties,
922
+ weight = EXCLUDED.weight,
923
+ updated_at = CURRENT_TIMESTAMP
924
+ """,
925
+ relation.id,
926
+ relation.relation_type,
927
+ relation.source_id,
928
+ relation.target_id,
929
+ properties_json,
930
+ relation.weight,
931
+ )
932
+ else:
933
+ await conn.execute(
934
+ """
935
+ INSERT INTO graph_relations (id, tenant_id, relation_type, source_id, target_id, properties, weight)
936
+ VALUES ($1, $2, $3, $4, $5, $6::jsonb, $7)
937
+ ON CONFLICT (id, tenant_id) DO UPDATE SET
938
+ relation_type = EXCLUDED.relation_type,
939
+ source_id = EXCLUDED.source_id,
940
+ target_id = EXCLUDED.target_id,
941
+ properties = EXCLUDED.properties,
942
+ weight = EXCLUDED.weight,
943
+ updated_at = CURRENT_TIMESTAMP
944
+ """,
945
+ relation.id,
946
+ tenant_id,
947
+ relation.relation_type,
948
+ relation.source_id,
949
+ relation.target_id,
950
+ properties_json,
951
+ relation.weight,
952
+ )
953
+
954
+ if self._transaction_conn:
955
+ await self._set_tenant_context(self._transaction_conn, tenant_id)
956
+ await _execute(self._transaction_conn)
957
+ else:
958
+ pool = self._ensure_pool()
959
+ async with pool.acquire() as conn:
960
+ # Wrap in transaction if RLS is enabled (SET LOCAL requires transaction)
961
+ if self.enable_rls:
962
+ async with conn.transaction():
963
+ await self._set_tenant_context(conn, tenant_id)
964
+ await _execute(conn)
965
+ else:
966
+ await self._set_tenant_context(conn, tenant_id)
967
+ await _execute(conn)
968
+
969
+ async def get_relation(self, relation_id: str, context: Optional[TenantContext] = None) -> Optional[Relation]:
970
+ """
971
+ Get relation from PostgreSQL database
972
+
973
+ Args:
974
+ relation_id: Relation ID to retrieve
975
+ context: Optional tenant context for multi-tenant isolation
976
+ """
977
+ if not self._is_initialized:
978
+ raise RuntimeError("GraphStore not initialized")
979
+
980
+ tenant_id = self._get_tenant_id(context)
981
+
982
+ async def _fetch(conn: asyncpg.Connection):
983
+ if self.isolation_mode == TenantIsolationMode.SEPARATE_SCHEMA:
984
+ return await conn.fetchrow(
985
+ """
986
+ SELECT id, relation_type, source_id, target_id, properties, weight
987
+ FROM graph_relations
988
+ WHERE id = $1
989
+ """,
990
+ relation_id,
991
+ )
992
+ elif self.enable_rls:
993
+ return await conn.fetchrow(
994
+ """
995
+ SELECT id, tenant_id, relation_type, source_id, target_id, properties, weight
996
+ FROM graph_relations
997
+ WHERE id = $1
998
+ """,
999
+ relation_id,
1000
+ )
1001
+ else:
1002
+ # Manual tenant filtering (tenant_id is always a string, '' for global)
1003
+ return await conn.fetchrow(
1004
+ """
1005
+ SELECT id, tenant_id, relation_type, source_id, target_id, properties, weight
1006
+ FROM graph_relations
1007
+ WHERE id = $1 AND tenant_id = $2
1008
+ """,
1009
+ relation_id,
1010
+ tenant_id,
1011
+ )
1012
+
1013
+ if self._transaction_conn:
1014
+ await self._set_tenant_context(self._transaction_conn, tenant_id)
1015
+ row = await _fetch(self._transaction_conn)
1016
+ else:
1017
+ pool = self._ensure_pool()
1018
+ async with pool.acquire() as conn:
1019
+ # Wrap in transaction if RLS is enabled (SET LOCAL requires transaction)
1020
+ if self.enable_rls:
1021
+ async with conn.transaction():
1022
+ await self._set_tenant_context(conn, tenant_id)
1023
+ row = await _fetch(conn)
1024
+ else:
1025
+ await self._set_tenant_context(conn, tenant_id)
1026
+ row = await _fetch(conn)
1027
+
1028
+ if not row:
1029
+ return None
1030
+
1031
+ properties = json.loads(row["properties"]) if isinstance(row["properties"], str) else row["properties"]
1032
+ row_tenant_id = row.get("tenant_id") if "tenant_id" in row.keys() else tenant_id
1033
+
1034
+ return Relation(
1035
+ id=row["id"],
1036
+ relation_type=row["relation_type"],
1037
+ source_id=row["source_id"],
1038
+ target_id=row["target_id"],
1039
+ properties=properties,
1040
+ weight=float(row["weight"]) if row["weight"] else 1.0,
1041
+ tenant_id=row_tenant_id,
1042
+ )
1043
+
1044
+ async def delete_relation(self, relation_id: str, context: Optional[TenantContext] = None) -> None:
1045
+ """
1046
+ Delete relation from PostgreSQL database
1047
+
1048
+ Args:
1049
+ relation_id: Relation ID to delete
1050
+ context: Optional tenant context for multi-tenant isolation
1051
+ """
1052
+ if not self._is_initialized:
1053
+ raise RuntimeError("GraphStore not initialized")
1054
+
1055
+ tenant_id = self._get_tenant_id(context)
1056
+
1057
+ async def _execute(conn: asyncpg.Connection):
1058
+ if self.isolation_mode == TenantIsolationMode.SEPARATE_SCHEMA or self.enable_rls:
1059
+ return await conn.execute("DELETE FROM graph_relations WHERE id = $1", relation_id)
1060
+ else:
1061
+ # Manual tenant filtering (tenant_id is always a string, '' for global)
1062
+ return await conn.execute(
1063
+ "DELETE FROM graph_relations WHERE id = $1 AND tenant_id = $2",
1064
+ relation_id,
1065
+ tenant_id
1066
+ )
1067
+
1068
+ if self._transaction_conn:
1069
+ await self._set_tenant_context(self._transaction_conn, tenant_id)
1070
+ result = await _execute(self._transaction_conn)
1071
+ else:
1072
+ pool = self._ensure_pool()
1073
+ async with pool.acquire() as conn:
1074
+ # Wrap in transaction if RLS is enabled (SET LOCAL requires transaction)
1075
+ if self.enable_rls:
1076
+ async with conn.transaction():
1077
+ await self._set_tenant_context(conn, tenant_id)
1078
+ result = await _execute(conn)
1079
+ else:
1080
+ await self._set_tenant_context(conn, tenant_id)
1081
+ result = await _execute(conn)
1082
+
1083
+ if result == "DELETE 0":
1084
+ raise ValueError(f"Relation with ID '{relation_id}' not found")
1085
+
1086
+ async def get_neighbors(
1087
+ self,
1088
+ entity_id: str,
1089
+ relation_type: Optional[str] = None,
1090
+ direction: str = "outgoing",
1091
+ context: Optional[TenantContext] = None,
1092
+ ) -> List[Entity]:
1093
+ """
1094
+ Get neighboring entities (optimized with SQL)
1095
+
1096
+ Args:
1097
+ entity_id: ID of entity to get neighbors for
1098
+ relation_type: Optional filter by relation type
1099
+ direction: "outgoing", "incoming", or "both"
1100
+ context: Optional tenant context for multi-tenant isolation
1101
+ """
1102
+ if not self._is_initialized:
1103
+ raise RuntimeError("GraphStore not initialized")
1104
+
1105
+ tenant_id = self._get_tenant_id(context)
1106
+
1107
+ async def _fetch(conn: asyncpg.Connection):
1108
+ # For SEPARATE_SCHEMA or RLS, the context handles filtering
1109
+ use_tenant_filter = not (self.isolation_mode == TenantIsolationMode.SEPARATE_SCHEMA or self.enable_rls)
1110
+
1111
+ # Build query based on direction
1112
+ if self.isolation_mode == TenantIsolationMode.SEPARATE_SCHEMA:
1113
+ # No tenant_id column in SEPARATE_SCHEMA
1114
+ if direction == "outgoing":
1115
+ query = """
1116
+ SELECT DISTINCT e.id, e.entity_type, e.properties, e.embedding
1117
+ FROM graph_entities e
1118
+ JOIN graph_relations r ON e.id = r.target_id
1119
+ WHERE r.source_id = $1
1120
+ """
1121
+ elif direction == "incoming":
1122
+ query = """
1123
+ SELECT DISTINCT e.id, e.entity_type, e.properties, e.embedding
1124
+ FROM graph_entities e
1125
+ JOIN graph_relations r ON e.id = r.source_id
1126
+ WHERE r.target_id = $1
1127
+ """
1128
+ else: # both
1129
+ query = """
1130
+ SELECT DISTINCT e.id, e.entity_type, e.properties, e.embedding
1131
+ FROM graph_entities e
1132
+ WHERE e.id IN (
1133
+ SELECT target_id FROM graph_relations WHERE source_id = $1
1134
+ UNION
1135
+ SELECT source_id FROM graph_relations WHERE target_id = $1
1136
+ )
1137
+ """
1138
+ params: List[Any] = [entity_id]
1139
+ if relation_type:
1140
+ if direction == "both":
1141
+ query = query.replace(
1142
+ "SELECT target_id FROM graph_relations WHERE source_id = $1",
1143
+ "SELECT target_id FROM graph_relations WHERE source_id = $1 AND relation_type = $2",
1144
+ )
1145
+ query = query.replace(
1146
+ "SELECT source_id FROM graph_relations WHERE target_id = $1",
1147
+ "SELECT source_id FROM graph_relations WHERE target_id = $1 AND relation_type = $2",
1148
+ )
1149
+ else:
1150
+ query += " AND r.relation_type = $2"
1151
+ params.append(relation_type)
1152
+ else:
1153
+ # SHARED_SCHEMA with tenant_id column (tenant_id is always a string, '' for global)
1154
+ tenant_filter = ""
1155
+ if use_tenant_filter:
1156
+ tenant_filter = "AND e.tenant_id = $2 AND r.tenant_id = $2"
1157
+
1158
+ if direction == "outgoing":
1159
+ query = f"""
1160
+ SELECT DISTINCT e.id, e.tenant_id, e.entity_type, e.properties, e.embedding
1161
+ FROM graph_entities e
1162
+ JOIN graph_relations r ON e.id = r.target_id
1163
+ WHERE r.source_id = $1 {tenant_filter}
1164
+ """
1165
+ elif direction == "incoming":
1166
+ query = f"""
1167
+ SELECT DISTINCT e.id, e.tenant_id, e.entity_type, e.properties, e.embedding
1168
+ FROM graph_entities e
1169
+ JOIN graph_relations r ON e.id = r.source_id
1170
+ WHERE r.target_id = $1 {tenant_filter}
1171
+ """
1172
+ else: # both
1173
+ inner_filter = "AND tenant_id = $2" if use_tenant_filter else ""
1174
+ query = f"""
1175
+ SELECT DISTINCT e.id, e.tenant_id, e.entity_type, e.properties, e.embedding
1176
+ FROM graph_entities e
1177
+ WHERE e.id IN (
1178
+ SELECT target_id FROM graph_relations WHERE source_id = $1 {inner_filter}
1179
+ UNION
1180
+ SELECT source_id FROM graph_relations WHERE target_id = $1 {inner_filter}
1181
+ )
1182
+ """
1183
+ if use_tenant_filter:
1184
+ query += " AND e.tenant_id = $2"
1185
+
1186
+ params = [entity_id]
1187
+ if use_tenant_filter:
1188
+ params.append(tenant_id)
1189
+
1190
+ if relation_type:
1191
+ param_idx = len(params) + 1
1192
+ if direction == "both":
1193
+ query = query.replace(
1194
+ "SELECT target_id FROM graph_relations WHERE source_id = $1",
1195
+ f"SELECT target_id FROM graph_relations WHERE source_id = $1 AND relation_type = ${param_idx}",
1196
+ )
1197
+ query = query.replace(
1198
+ "SELECT source_id FROM graph_relations WHERE target_id = $1",
1199
+ f"SELECT source_id FROM graph_relations WHERE target_id = $1 AND relation_type = ${param_idx}",
1200
+ )
1201
+ else:
1202
+ query += f" AND r.relation_type = ${param_idx}"
1203
+ params.append(relation_type)
1204
+
1205
+ return await conn.fetch(query, *params)
1206
+
1207
+ if self._transaction_conn:
1208
+ await self._set_tenant_context(self._transaction_conn, tenant_id)
1209
+ rows = await _fetch(self._transaction_conn)
1210
+ else:
1211
+ pool = self._ensure_pool()
1212
+ async with pool.acquire() as conn:
1213
+ # Wrap in transaction if RLS is enabled (SET LOCAL requires transaction)
1214
+ if self.enable_rls:
1215
+ async with conn.transaction():
1216
+ await self._set_tenant_context(conn, tenant_id)
1217
+ rows = await _fetch(conn)
1218
+ else:
1219
+ await self._set_tenant_context(conn, tenant_id)
1220
+ rows = await _fetch(conn)
1221
+
1222
+ entities = []
1223
+ for row in rows:
1224
+ properties = json.loads(row["properties"]) if isinstance(row["properties"], str) else row["properties"]
1225
+ embedding_raw = self._deserialize_embedding(row["embedding"]) if row["embedding"] else None
1226
+ embedding: Optional[List[float]] = cast(List[float], embedding_raw.tolist()) if embedding_raw is not None else None
1227
+ row_tenant_id = row.get("tenant_id") if "tenant_id" in row.keys() else tenant_id
1228
+ entities.append(
1229
+ Entity(
1230
+ id=row["id"],
1231
+ entity_type=row["entity_type"],
1232
+ properties=properties,
1233
+ embedding=embedding,
1234
+ tenant_id=row_tenant_id,
1235
+ )
1236
+ )
1237
+
1238
+ return entities
1239
+
1240
+ async def get_all_entities(
1241
+ self,
1242
+ entity_type: Optional[str] = None,
1243
+ limit: Optional[int] = None,
1244
+ context: Optional[TenantContext] = None,
1245
+ ) -> List[Entity]:
1246
+ """
1247
+ Get all entities, optionally filtered by type
1248
+
1249
+ Args:
1250
+ entity_type: Optional filter by entity type
1251
+ limit: Optional limit on number of entities
1252
+ context: Optional tenant context for multi-tenant isolation
1253
+ """
1254
+ if not self._is_initialized:
1255
+ raise RuntimeError("GraphStore not initialized")
1256
+
1257
+ tenant_id = self._get_tenant_id(context)
1258
+ logger.debug(f"get_all_entities called with tenant_id='{tenant_id}', enable_rls={self.enable_rls}, isolation_mode={self.isolation_mode}")
1259
+
1260
+ async def _fetch(conn: asyncpg.Connection):
1261
+ if self.isolation_mode == TenantIsolationMode.SEPARATE_SCHEMA:
1262
+ query = "SELECT id, entity_type, properties, embedding FROM graph_entities"
1263
+ params: List[Any] = []
1264
+
1265
+ if entity_type:
1266
+ query += " WHERE entity_type = $1"
1267
+ params.append(entity_type)
1268
+
1269
+ if limit:
1270
+ query += f" LIMIT ${len(params) + 1}"
1271
+ params.append(limit)
1272
+ elif self.enable_rls:
1273
+ query = "SELECT id, tenant_id, entity_type, properties, embedding FROM graph_entities"
1274
+ params = []
1275
+
1276
+ if entity_type:
1277
+ query += " WHERE entity_type = $1"
1278
+ params.append(entity_type)
1279
+
1280
+ if limit:
1281
+ query += f" LIMIT ${len(params) + 1}"
1282
+ params.append(limit)
1283
+ else:
1284
+ # Manual tenant filtering (tenant_id is always a string, '' for global)
1285
+ query = "SELECT id, tenant_id, entity_type, properties, embedding FROM graph_entities WHERE tenant_id = $1"
1286
+ params = [tenant_id]
1287
+
1288
+ if entity_type:
1289
+ query += f" AND entity_type = ${len(params) + 1}"
1290
+ params.append(entity_type)
1291
+
1292
+ if limit:
1293
+ query += f" LIMIT ${len(params) + 1}"
1294
+ params.append(limit)
1295
+
1296
+ return await conn.fetch(query, *params)
1297
+
1298
+ if self._transaction_conn:
1299
+ await self._set_tenant_context(self._transaction_conn, tenant_id)
1300
+ rows = await _fetch(self._transaction_conn)
1301
+ else:
1302
+ pool = self._ensure_pool()
1303
+ async with pool.acquire() as conn:
1304
+ # Wrap in transaction if RLS is enabled (SET LOCAL requires transaction)
1305
+ if self.enable_rls:
1306
+ async with conn.transaction():
1307
+ await self._set_tenant_context(conn, tenant_id)
1308
+ rows = await _fetch(conn)
1309
+ else:
1310
+ await self._set_tenant_context(conn, tenant_id)
1311
+ rows = await _fetch(conn)
1312
+
1313
+ logger.debug(f"get_all_entities query returned {len(rows)} rows")
1314
+ entities = []
1315
+ for row in rows:
1316
+ properties = json.loads(row["properties"]) if isinstance(row["properties"], str) else row["properties"]
1317
+ embedding_raw = self._deserialize_embedding(row["embedding"]) if row["embedding"] else None
1318
+ embedding: Optional[List[float]] = cast(List[float], embedding_raw.tolist()) if embedding_raw is not None else None
1319
+ row_tenant_id = row.get("tenant_id") if "tenant_id" in row.keys() else tenant_id
1320
+ logger.debug(f"Retrieved entity id='{row['id']}', tenant_id='{row_tenant_id}'")
1321
+ entities.append(
1322
+ Entity(
1323
+ id=row["id"],
1324
+ entity_type=row["entity_type"],
1325
+ properties=properties,
1326
+ embedding=embedding,
1327
+ tenant_id=row_tenant_id,
1328
+ )
1329
+ )
1330
+
1331
+ logger.debug(f"get_all_entities returning {len(entities)} entities for requested tenant_id='{tenant_id}'")
1332
+ return entities
1333
+
1334
+ async def get_stats(self, context: Optional[TenantContext] = None) -> Dict[str, Any]:
1335
+ """
1336
+ Get graph statistics
1337
+
1338
+ Args:
1339
+ context: Optional tenant context for tenant-scoped stats
1340
+ """
1341
+ if not self._is_initialized:
1342
+ raise RuntimeError("GraphStore not initialized")
1343
+
1344
+ tenant_id = self._get_tenant_id(context)
1345
+
1346
+ async def _fetch(conn: asyncpg.Connection):
1347
+ if self.isolation_mode == TenantIsolationMode.SEPARATE_SCHEMA or self.enable_rls:
1348
+ entity_count = await conn.fetchval("SELECT COUNT(*) FROM graph_entities")
1349
+ relation_count = await conn.fetchval("SELECT COUNT(*) FROM graph_relations")
1350
+ entity_types = await conn.fetch("SELECT entity_type, COUNT(*) as count FROM graph_entities GROUP BY entity_type")
1351
+ relation_types = await conn.fetch("SELECT relation_type, COUNT(*) as count FROM graph_relations GROUP BY relation_type")
1352
+ else:
1353
+ # Manual tenant filtering (tenant_id is always a string, '' for global)
1354
+ entity_count = await conn.fetchval("SELECT COUNT(*) FROM graph_entities WHERE tenant_id = $1", tenant_id)
1355
+ relation_count = await conn.fetchval("SELECT COUNT(*) FROM graph_relations WHERE tenant_id = $1", tenant_id)
1356
+ entity_types = await conn.fetch("SELECT entity_type, COUNT(*) as count FROM graph_entities WHERE tenant_id = $1 GROUP BY entity_type", tenant_id)
1357
+ relation_types = await conn.fetch("SELECT relation_type, COUNT(*) as count FROM graph_relations WHERE tenant_id = $1 GROUP BY relation_type", tenant_id)
1358
+
1359
+ return entity_count, relation_count, entity_types, relation_types
1360
+
1361
+ if self._transaction_conn:
1362
+ await self._set_tenant_context(self._transaction_conn, tenant_id)
1363
+ entity_count, relation_count, entity_types, relation_types = await _fetch(self._transaction_conn)
1364
+ else:
1365
+ pool = self._ensure_pool()
1366
+ async with pool.acquire() as conn:
1367
+ # Wrap in transaction if RLS is enabled (SET LOCAL requires transaction)
1368
+ if self.enable_rls:
1369
+ async with conn.transaction():
1370
+ await self._set_tenant_context(conn, tenant_id)
1371
+ entity_count, relation_count, entity_types, relation_types = await _fetch(conn)
1372
+ else:
1373
+ await self._set_tenant_context(conn, tenant_id)
1374
+ entity_count, relation_count, entity_types, relation_types = await _fetch(conn)
1375
+
1376
+ return {
1377
+ "entity_count": entity_count,
1378
+ "relation_count": relation_count,
1379
+ "entity_types": {row["entity_type"]: row["count"] for row in entity_types},
1380
+ "relation_types": {row["relation_type"]: row["count"] for row in relation_types},
1381
+ "backend": "postgresql",
1382
+ "pool_size": (f"{self.pool.get_size()}/{self.max_pool_size}" if self.pool else "0/0"),
1383
+ "isolation_mode": self.isolation_mode.value,
1384
+ "tenant_id": tenant_id,
1385
+ "enable_rls": self.enable_rls,
1386
+ }
1387
+
1388
+ async def clear(self, context: Optional[TenantContext] = None) -> None:
1389
+ """
1390
+ Clear data from PostgreSQL database
1391
+
1392
+ Args:
1393
+ context: Optional tenant context for multi-tenant isolation.
1394
+ If provided, clears only data for the specified tenant.
1395
+ If None (no context), clears ALL data across all tenants.
1396
+ """
1397
+ if not self._is_initialized:
1398
+ raise RuntimeError("GraphStore not initialized")
1399
+
1400
+ # Note: context=None means clear ALL data, not just global namespace
1401
+ clear_all = context is None
1402
+ tenant_id = self._get_tenant_id(context)
1403
+
1404
+ async def _execute(conn: asyncpg.Connection):
1405
+ if clear_all:
1406
+ # Clear all data across all tenants
1407
+ await conn.execute("DELETE FROM graph_relations")
1408
+ await conn.execute("DELETE FROM graph_entities")
1409
+
1410
+ # Drop tenant schemas for SEPARATE_SCHEMA mode
1411
+ if self.isolation_mode == TenantIsolationMode.SEPARATE_SCHEMA:
1412
+ schemas = await conn.fetch(
1413
+ "SELECT schema_name FROM information_schema.schemata WHERE schema_name LIKE 'tenant_%'"
1414
+ )
1415
+ for row in schemas:
1416
+ await conn.execute(f"DROP SCHEMA IF EXISTS {row['schema_name']} CASCADE")
1417
+ self._initialized_tenant_schemas.clear()
1418
+ else:
1419
+ if self.isolation_mode == TenantIsolationMode.SEPARATE_SCHEMA:
1420
+ # Drop tenant schema
1421
+ schema_name = self._get_schema_name(tenant_id if tenant_id else None)
1422
+ await conn.execute(f"DROP SCHEMA IF EXISTS {schema_name} CASCADE")
1423
+ self._initialized_tenant_schemas.discard(tenant_id)
1424
+ elif self.enable_rls:
1425
+ # RLS will filter automatically
1426
+ await conn.execute("DELETE FROM graph_relations")
1427
+ await conn.execute("DELETE FROM graph_entities")
1428
+ else:
1429
+ # Manual tenant filtering (tenant_id is string, '' for global)
1430
+ await conn.execute("DELETE FROM graph_relations WHERE tenant_id = $1", tenant_id)
1431
+ await conn.execute("DELETE FROM graph_entities WHERE tenant_id = $1", tenant_id)
1432
+
1433
+ if self._transaction_conn:
1434
+ await self._set_tenant_context(self._transaction_conn, tenant_id)
1435
+ await _execute(self._transaction_conn)
1436
+ else:
1437
+ pool = self._ensure_pool()
1438
+ async with pool.acquire() as conn:
1439
+ # Wrap in transaction if RLS is enabled (SET LOCAL requires transaction)
1440
+ if self.enable_rls and not clear_all:
1441
+ async with conn.transaction():
1442
+ await self._set_tenant_context(conn, tenant_id)
1443
+ await _execute(conn)
1444
+ else:
1445
+ if not clear_all:
1446
+ await self._set_tenant_context(conn, tenant_id)
1447
+ await _execute(conn)
1448
+
1449
+ # =========================================================================
1450
+ # Tier 2: Advanced Interface (PostgreSQL-optimized with recursive CTEs)
1451
+ # =========================================================================
1452
+
1453
+ async def find_paths(
1454
+ self,
1455
+ source_id: str,
1456
+ target_id: str,
1457
+ max_depth: int = 3,
1458
+ limit: Optional[int] = 10,
1459
+ ) -> List[Path]:
1460
+ """
1461
+ Find paths using WITH RECURSIVE CTE (PostgreSQL-optimized)
1462
+
1463
+ This overrides the default implementation with an efficient
1464
+ recursive SQL query.
1465
+ """
1466
+ if not self._is_initialized:
1467
+ raise RuntimeError("GraphStore not initialized")
1468
+
1469
+ # Recursive CTE to find all paths
1470
+ query = """
1471
+ WITH RECURSIVE paths AS (
1472
+ -- Base case: direct connections
1473
+ SELECT
1474
+ r.source_id,
1475
+ r.target_id,
1476
+ r.relation_type,
1477
+ ARRAY[r.source_id] as path_nodes,
1478
+ ARRAY[r.id] as path_relations,
1479
+ 1 as depth
1480
+ FROM graph_relations r
1481
+ WHERE r.source_id = $1
1482
+
1483
+ UNION ALL
1484
+
1485
+ -- Recursive case: extend paths
1486
+ SELECT
1487
+ p.source_id,
1488
+ r.target_id,
1489
+ r.relation_type,
1490
+ p.path_nodes || r.source_id,
1491
+ p.path_relations || r.id,
1492
+ p.depth + 1
1493
+ FROM paths p
1494
+ JOIN graph_relations r ON p.target_id = r.source_id
1495
+ WHERE p.depth < $3
1496
+ AND NOT (r.source_id = ANY(p.path_nodes)) -- Avoid cycles
1497
+ )
1498
+ SELECT DISTINCT
1499
+ path_nodes || target_id as nodes,
1500
+ path_relations as relations,
1501
+ depth
1502
+ FROM paths
1503
+ WHERE target_id = $2
1504
+ ORDER BY depth ASC
1505
+ LIMIT $4
1506
+ """
1507
+
1508
+ if self._transaction_conn:
1509
+ conn = self._transaction_conn
1510
+ rows = await conn.fetch(query, source_id, target_id, max_depth, limit or 10)
1511
+ else:
1512
+ pool = self._ensure_pool()
1513
+ async with pool.acquire() as conn:
1514
+ rows = await conn.fetch(query, source_id, target_id, max_depth, limit or 10)
1515
+
1516
+ paths = []
1517
+ for row in rows:
1518
+ node_ids = row["nodes"]
1519
+ relation_ids = row["relations"]
1520
+
1521
+ # Fetch entities and relations
1522
+ entities = []
1523
+ for node_id in node_ids:
1524
+ entity = await self.get_entity(node_id)
1525
+ if entity:
1526
+ entities.append(entity)
1527
+
1528
+ relations = []
1529
+ for rel_id in relation_ids:
1530
+ relation = await self.get_relation(rel_id)
1531
+ if relation:
1532
+ relations.append(relation)
1533
+
1534
+ if entities and relations:
1535
+ paths.append(Path(nodes=entities, edges=relations))
1536
+
1537
+ return paths
1538
+
1539
+ # =========================================================================
1540
+ # Helper methods
1541
+ # =========================================================================
1542
+
1543
+ def _serialize_embedding(self, embedding) -> Optional[bytes]:
1544
+ """Serialize numpy array or list to bytes"""
1545
+ if embedding is None:
1546
+ return None
1547
+ # Handle both numpy array and list
1548
+ if isinstance(embedding, np.ndarray):
1549
+ return embedding.tobytes()
1550
+ elif isinstance(embedding, (list, tuple)):
1551
+ # Convert list to numpy array first
1552
+ arr = np.array(embedding, dtype=np.float32)
1553
+ return arr.tobytes()
1554
+ else:
1555
+ # Try to convert to numpy array
1556
+ arr = np.array(embedding, dtype=np.float32)
1557
+ return arr.tobytes()
1558
+
1559
+ def _deserialize_embedding(self, data: bytes) -> Optional[np.ndarray]:
1560
+ """Deserialize bytes to numpy array"""
1561
+ if not data:
1562
+ return None
1563
+ return np.frombuffer(data, dtype=np.float32)