aiecs 1.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (302) hide show
  1. aiecs/__init__.py +72 -0
  2. aiecs/__main__.py +41 -0
  3. aiecs/aiecs_client.py +469 -0
  4. aiecs/application/__init__.py +10 -0
  5. aiecs/application/executors/__init__.py +10 -0
  6. aiecs/application/executors/operation_executor.py +363 -0
  7. aiecs/application/knowledge_graph/__init__.py +7 -0
  8. aiecs/application/knowledge_graph/builder/__init__.py +37 -0
  9. aiecs/application/knowledge_graph/builder/document_builder.py +375 -0
  10. aiecs/application/knowledge_graph/builder/graph_builder.py +356 -0
  11. aiecs/application/knowledge_graph/builder/schema_mapping.py +531 -0
  12. aiecs/application/knowledge_graph/builder/structured_pipeline.py +443 -0
  13. aiecs/application/knowledge_graph/builder/text_chunker.py +319 -0
  14. aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
  15. aiecs/application/knowledge_graph/extractors/base.py +100 -0
  16. aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +327 -0
  17. aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +349 -0
  18. aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +244 -0
  19. aiecs/application/knowledge_graph/fusion/__init__.py +23 -0
  20. aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +387 -0
  21. aiecs/application/knowledge_graph/fusion/entity_linker.py +343 -0
  22. aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +580 -0
  23. aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +189 -0
  24. aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
  25. aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +344 -0
  26. aiecs/application/knowledge_graph/pattern_matching/query_executor.py +378 -0
  27. aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
  28. aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +199 -0
  29. aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
  30. aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
  31. aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +347 -0
  32. aiecs/application/knowledge_graph/reasoning/inference_engine.py +504 -0
  33. aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +167 -0
  34. aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
  35. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
  36. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +630 -0
  37. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +654 -0
  38. aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +477 -0
  39. aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +390 -0
  40. aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +217 -0
  41. aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +169 -0
  42. aiecs/application/knowledge_graph/reasoning/query_planner.py +872 -0
  43. aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +554 -0
  44. aiecs/application/knowledge_graph/retrieval/__init__.py +19 -0
  45. aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +596 -0
  46. aiecs/application/knowledge_graph/search/__init__.py +59 -0
  47. aiecs/application/knowledge_graph/search/hybrid_search.py +423 -0
  48. aiecs/application/knowledge_graph/search/reranker.py +295 -0
  49. aiecs/application/knowledge_graph/search/reranker_strategies.py +553 -0
  50. aiecs/application/knowledge_graph/search/text_similarity.py +398 -0
  51. aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
  52. aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +329 -0
  53. aiecs/application/knowledge_graph/traversal/path_scorer.py +269 -0
  54. aiecs/application/knowledge_graph/validators/__init__.py +13 -0
  55. aiecs/application/knowledge_graph/validators/relation_validator.py +189 -0
  56. aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
  57. aiecs/application/knowledge_graph/visualization/graph_visualizer.py +321 -0
  58. aiecs/common/__init__.py +9 -0
  59. aiecs/common/knowledge_graph/__init__.py +17 -0
  60. aiecs/common/knowledge_graph/runnable.py +484 -0
  61. aiecs/config/__init__.py +16 -0
  62. aiecs/config/config.py +498 -0
  63. aiecs/config/graph_config.py +137 -0
  64. aiecs/config/registry.py +23 -0
  65. aiecs/core/__init__.py +46 -0
  66. aiecs/core/interface/__init__.py +34 -0
  67. aiecs/core/interface/execution_interface.py +152 -0
  68. aiecs/core/interface/storage_interface.py +171 -0
  69. aiecs/domain/__init__.py +289 -0
  70. aiecs/domain/agent/__init__.py +189 -0
  71. aiecs/domain/agent/base_agent.py +697 -0
  72. aiecs/domain/agent/exceptions.py +103 -0
  73. aiecs/domain/agent/graph_aware_mixin.py +559 -0
  74. aiecs/domain/agent/hybrid_agent.py +490 -0
  75. aiecs/domain/agent/integration/__init__.py +26 -0
  76. aiecs/domain/agent/integration/context_compressor.py +222 -0
  77. aiecs/domain/agent/integration/context_engine_adapter.py +252 -0
  78. aiecs/domain/agent/integration/retry_policy.py +219 -0
  79. aiecs/domain/agent/integration/role_config.py +213 -0
  80. aiecs/domain/agent/knowledge_aware_agent.py +646 -0
  81. aiecs/domain/agent/lifecycle.py +296 -0
  82. aiecs/domain/agent/llm_agent.py +300 -0
  83. aiecs/domain/agent/memory/__init__.py +12 -0
  84. aiecs/domain/agent/memory/conversation.py +197 -0
  85. aiecs/domain/agent/migration/__init__.py +14 -0
  86. aiecs/domain/agent/migration/conversion.py +160 -0
  87. aiecs/domain/agent/migration/legacy_wrapper.py +90 -0
  88. aiecs/domain/agent/models.py +317 -0
  89. aiecs/domain/agent/observability.py +407 -0
  90. aiecs/domain/agent/persistence.py +289 -0
  91. aiecs/domain/agent/prompts/__init__.py +29 -0
  92. aiecs/domain/agent/prompts/builder.py +161 -0
  93. aiecs/domain/agent/prompts/formatters.py +189 -0
  94. aiecs/domain/agent/prompts/template.py +255 -0
  95. aiecs/domain/agent/registry.py +260 -0
  96. aiecs/domain/agent/tool_agent.py +257 -0
  97. aiecs/domain/agent/tools/__init__.py +12 -0
  98. aiecs/domain/agent/tools/schema_generator.py +221 -0
  99. aiecs/domain/community/__init__.py +155 -0
  100. aiecs/domain/community/agent_adapter.py +477 -0
  101. aiecs/domain/community/analytics.py +481 -0
  102. aiecs/domain/community/collaborative_workflow.py +642 -0
  103. aiecs/domain/community/communication_hub.py +645 -0
  104. aiecs/domain/community/community_builder.py +320 -0
  105. aiecs/domain/community/community_integration.py +800 -0
  106. aiecs/domain/community/community_manager.py +813 -0
  107. aiecs/domain/community/decision_engine.py +879 -0
  108. aiecs/domain/community/exceptions.py +225 -0
  109. aiecs/domain/community/models/__init__.py +33 -0
  110. aiecs/domain/community/models/community_models.py +268 -0
  111. aiecs/domain/community/resource_manager.py +457 -0
  112. aiecs/domain/community/shared_context_manager.py +603 -0
  113. aiecs/domain/context/__init__.py +58 -0
  114. aiecs/domain/context/context_engine.py +989 -0
  115. aiecs/domain/context/conversation_models.py +354 -0
  116. aiecs/domain/context/graph_memory.py +467 -0
  117. aiecs/domain/execution/__init__.py +12 -0
  118. aiecs/domain/execution/model.py +57 -0
  119. aiecs/domain/knowledge_graph/__init__.py +19 -0
  120. aiecs/domain/knowledge_graph/models/__init__.py +52 -0
  121. aiecs/domain/knowledge_graph/models/entity.py +130 -0
  122. aiecs/domain/knowledge_graph/models/evidence.py +194 -0
  123. aiecs/domain/knowledge_graph/models/inference_rule.py +186 -0
  124. aiecs/domain/knowledge_graph/models/path.py +179 -0
  125. aiecs/domain/knowledge_graph/models/path_pattern.py +173 -0
  126. aiecs/domain/knowledge_graph/models/query.py +272 -0
  127. aiecs/domain/knowledge_graph/models/query_plan.py +187 -0
  128. aiecs/domain/knowledge_graph/models/relation.py +136 -0
  129. aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
  130. aiecs/domain/knowledge_graph/schema/entity_type.py +135 -0
  131. aiecs/domain/knowledge_graph/schema/graph_schema.py +271 -0
  132. aiecs/domain/knowledge_graph/schema/property_schema.py +155 -0
  133. aiecs/domain/knowledge_graph/schema/relation_type.py +171 -0
  134. aiecs/domain/knowledge_graph/schema/schema_manager.py +496 -0
  135. aiecs/domain/knowledge_graph/schema/type_enums.py +205 -0
  136. aiecs/domain/task/__init__.py +13 -0
  137. aiecs/domain/task/dsl_processor.py +613 -0
  138. aiecs/domain/task/model.py +62 -0
  139. aiecs/domain/task/task_context.py +268 -0
  140. aiecs/infrastructure/__init__.py +24 -0
  141. aiecs/infrastructure/graph_storage/__init__.py +11 -0
  142. aiecs/infrastructure/graph_storage/base.py +601 -0
  143. aiecs/infrastructure/graph_storage/batch_operations.py +449 -0
  144. aiecs/infrastructure/graph_storage/cache.py +429 -0
  145. aiecs/infrastructure/graph_storage/distributed.py +226 -0
  146. aiecs/infrastructure/graph_storage/error_handling.py +390 -0
  147. aiecs/infrastructure/graph_storage/graceful_degradation.py +306 -0
  148. aiecs/infrastructure/graph_storage/health_checks.py +378 -0
  149. aiecs/infrastructure/graph_storage/in_memory.py +514 -0
  150. aiecs/infrastructure/graph_storage/index_optimization.py +483 -0
  151. aiecs/infrastructure/graph_storage/lazy_loading.py +410 -0
  152. aiecs/infrastructure/graph_storage/metrics.py +357 -0
  153. aiecs/infrastructure/graph_storage/migration.py +413 -0
  154. aiecs/infrastructure/graph_storage/pagination.py +471 -0
  155. aiecs/infrastructure/graph_storage/performance_monitoring.py +466 -0
  156. aiecs/infrastructure/graph_storage/postgres.py +871 -0
  157. aiecs/infrastructure/graph_storage/query_optimizer.py +635 -0
  158. aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
  159. aiecs/infrastructure/graph_storage/sqlite.py +623 -0
  160. aiecs/infrastructure/graph_storage/streaming.py +495 -0
  161. aiecs/infrastructure/messaging/__init__.py +13 -0
  162. aiecs/infrastructure/messaging/celery_task_manager.py +383 -0
  163. aiecs/infrastructure/messaging/websocket_manager.py +298 -0
  164. aiecs/infrastructure/monitoring/__init__.py +34 -0
  165. aiecs/infrastructure/monitoring/executor_metrics.py +174 -0
  166. aiecs/infrastructure/monitoring/global_metrics_manager.py +213 -0
  167. aiecs/infrastructure/monitoring/structured_logger.py +48 -0
  168. aiecs/infrastructure/monitoring/tracing_manager.py +410 -0
  169. aiecs/infrastructure/persistence/__init__.py +24 -0
  170. aiecs/infrastructure/persistence/context_engine_client.py +187 -0
  171. aiecs/infrastructure/persistence/database_manager.py +333 -0
  172. aiecs/infrastructure/persistence/file_storage.py +754 -0
  173. aiecs/infrastructure/persistence/redis_client.py +220 -0
  174. aiecs/llm/__init__.py +86 -0
  175. aiecs/llm/callbacks/__init__.py +11 -0
  176. aiecs/llm/callbacks/custom_callbacks.py +264 -0
  177. aiecs/llm/client_factory.py +420 -0
  178. aiecs/llm/clients/__init__.py +33 -0
  179. aiecs/llm/clients/base_client.py +193 -0
  180. aiecs/llm/clients/googleai_client.py +181 -0
  181. aiecs/llm/clients/openai_client.py +131 -0
  182. aiecs/llm/clients/vertex_client.py +437 -0
  183. aiecs/llm/clients/xai_client.py +184 -0
  184. aiecs/llm/config/__init__.py +51 -0
  185. aiecs/llm/config/config_loader.py +275 -0
  186. aiecs/llm/config/config_validator.py +236 -0
  187. aiecs/llm/config/model_config.py +151 -0
  188. aiecs/llm/utils/__init__.py +10 -0
  189. aiecs/llm/utils/validate_config.py +91 -0
  190. aiecs/main.py +363 -0
  191. aiecs/scripts/__init__.py +3 -0
  192. aiecs/scripts/aid/VERSION_MANAGEMENT.md +97 -0
  193. aiecs/scripts/aid/__init__.py +19 -0
  194. aiecs/scripts/aid/version_manager.py +215 -0
  195. aiecs/scripts/dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md +242 -0
  196. aiecs/scripts/dependance_check/README_DEPENDENCY_CHECKER.md +310 -0
  197. aiecs/scripts/dependance_check/__init__.py +17 -0
  198. aiecs/scripts/dependance_check/dependency_checker.py +938 -0
  199. aiecs/scripts/dependance_check/dependency_fixer.py +391 -0
  200. aiecs/scripts/dependance_check/download_nlp_data.py +396 -0
  201. aiecs/scripts/dependance_check/quick_dependency_check.py +270 -0
  202. aiecs/scripts/dependance_check/setup_nlp_data.sh +217 -0
  203. aiecs/scripts/dependance_patch/__init__.py +7 -0
  204. aiecs/scripts/dependance_patch/fix_weasel/README_WEASEL_PATCH.md +126 -0
  205. aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
  206. aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.py +128 -0
  207. aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.sh +82 -0
  208. aiecs/scripts/dependance_patch/fix_weasel/patch_weasel_library.sh +188 -0
  209. aiecs/scripts/dependance_patch/fix_weasel/run_weasel_patch.sh +41 -0
  210. aiecs/scripts/tools_develop/README.md +449 -0
  211. aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
  212. aiecs/scripts/tools_develop/__init__.py +21 -0
  213. aiecs/scripts/tools_develop/check_type_annotations.py +259 -0
  214. aiecs/scripts/tools_develop/validate_tool_schemas.py +422 -0
  215. aiecs/scripts/tools_develop/verify_tools.py +356 -0
  216. aiecs/tasks/__init__.py +1 -0
  217. aiecs/tasks/worker.py +172 -0
  218. aiecs/tools/__init__.py +299 -0
  219. aiecs/tools/apisource/__init__.py +99 -0
  220. aiecs/tools/apisource/intelligence/__init__.py +19 -0
  221. aiecs/tools/apisource/intelligence/data_fusion.py +381 -0
  222. aiecs/tools/apisource/intelligence/query_analyzer.py +413 -0
  223. aiecs/tools/apisource/intelligence/search_enhancer.py +388 -0
  224. aiecs/tools/apisource/monitoring/__init__.py +9 -0
  225. aiecs/tools/apisource/monitoring/metrics.py +303 -0
  226. aiecs/tools/apisource/providers/__init__.py +115 -0
  227. aiecs/tools/apisource/providers/base.py +664 -0
  228. aiecs/tools/apisource/providers/census.py +401 -0
  229. aiecs/tools/apisource/providers/fred.py +564 -0
  230. aiecs/tools/apisource/providers/newsapi.py +412 -0
  231. aiecs/tools/apisource/providers/worldbank.py +357 -0
  232. aiecs/tools/apisource/reliability/__init__.py +12 -0
  233. aiecs/tools/apisource/reliability/error_handler.py +375 -0
  234. aiecs/tools/apisource/reliability/fallback_strategy.py +391 -0
  235. aiecs/tools/apisource/tool.py +850 -0
  236. aiecs/tools/apisource/utils/__init__.py +9 -0
  237. aiecs/tools/apisource/utils/validators.py +338 -0
  238. aiecs/tools/base_tool.py +201 -0
  239. aiecs/tools/docs/__init__.py +121 -0
  240. aiecs/tools/docs/ai_document_orchestrator.py +599 -0
  241. aiecs/tools/docs/ai_document_writer_orchestrator.py +2403 -0
  242. aiecs/tools/docs/content_insertion_tool.py +1333 -0
  243. aiecs/tools/docs/document_creator_tool.py +1317 -0
  244. aiecs/tools/docs/document_layout_tool.py +1166 -0
  245. aiecs/tools/docs/document_parser_tool.py +994 -0
  246. aiecs/tools/docs/document_writer_tool.py +1818 -0
  247. aiecs/tools/knowledge_graph/__init__.py +17 -0
  248. aiecs/tools/knowledge_graph/graph_reasoning_tool.py +734 -0
  249. aiecs/tools/knowledge_graph/graph_search_tool.py +923 -0
  250. aiecs/tools/knowledge_graph/kg_builder_tool.py +476 -0
  251. aiecs/tools/langchain_adapter.py +542 -0
  252. aiecs/tools/schema_generator.py +275 -0
  253. aiecs/tools/search_tool/__init__.py +100 -0
  254. aiecs/tools/search_tool/analyzers.py +589 -0
  255. aiecs/tools/search_tool/cache.py +260 -0
  256. aiecs/tools/search_tool/constants.py +128 -0
  257. aiecs/tools/search_tool/context.py +216 -0
  258. aiecs/tools/search_tool/core.py +749 -0
  259. aiecs/tools/search_tool/deduplicator.py +123 -0
  260. aiecs/tools/search_tool/error_handler.py +271 -0
  261. aiecs/tools/search_tool/metrics.py +371 -0
  262. aiecs/tools/search_tool/rate_limiter.py +178 -0
  263. aiecs/tools/search_tool/schemas.py +277 -0
  264. aiecs/tools/statistics/__init__.py +80 -0
  265. aiecs/tools/statistics/ai_data_analysis_orchestrator.py +643 -0
  266. aiecs/tools/statistics/ai_insight_generator_tool.py +505 -0
  267. aiecs/tools/statistics/ai_report_orchestrator_tool.py +694 -0
  268. aiecs/tools/statistics/data_loader_tool.py +564 -0
  269. aiecs/tools/statistics/data_profiler_tool.py +658 -0
  270. aiecs/tools/statistics/data_transformer_tool.py +573 -0
  271. aiecs/tools/statistics/data_visualizer_tool.py +495 -0
  272. aiecs/tools/statistics/model_trainer_tool.py +487 -0
  273. aiecs/tools/statistics/statistical_analyzer_tool.py +459 -0
  274. aiecs/tools/task_tools/__init__.py +86 -0
  275. aiecs/tools/task_tools/chart_tool.py +732 -0
  276. aiecs/tools/task_tools/classfire_tool.py +922 -0
  277. aiecs/tools/task_tools/image_tool.py +447 -0
  278. aiecs/tools/task_tools/office_tool.py +684 -0
  279. aiecs/tools/task_tools/pandas_tool.py +635 -0
  280. aiecs/tools/task_tools/report_tool.py +635 -0
  281. aiecs/tools/task_tools/research_tool.py +392 -0
  282. aiecs/tools/task_tools/scraper_tool.py +715 -0
  283. aiecs/tools/task_tools/stats_tool.py +688 -0
  284. aiecs/tools/temp_file_manager.py +130 -0
  285. aiecs/tools/tool_executor/__init__.py +37 -0
  286. aiecs/tools/tool_executor/tool_executor.py +881 -0
  287. aiecs/utils/LLM_output_structor.py +445 -0
  288. aiecs/utils/__init__.py +34 -0
  289. aiecs/utils/base_callback.py +47 -0
  290. aiecs/utils/cache_provider.py +695 -0
  291. aiecs/utils/execution_utils.py +184 -0
  292. aiecs/utils/logging.py +1 -0
  293. aiecs/utils/prompt_loader.py +14 -0
  294. aiecs/utils/token_usage_repository.py +323 -0
  295. aiecs/ws/__init__.py +0 -0
  296. aiecs/ws/socket_server.py +52 -0
  297. aiecs-1.5.1.dist-info/METADATA +608 -0
  298. aiecs-1.5.1.dist-info/RECORD +302 -0
  299. aiecs-1.5.1.dist-info/WHEEL +5 -0
  300. aiecs-1.5.1.dist-info/entry_points.txt +10 -0
  301. aiecs-1.5.1.dist-info/licenses/LICENSE +225 -0
  302. aiecs-1.5.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,449 @@
1
+ """
2
+ Batch Operations for Graph Storage
3
+
4
+ Provides efficient batch operations for bulk inserts, updates, and deletes.
5
+ Uses PostgreSQL COPY and multi-row INSERT for optimal performance.
6
+ """
7
+
8
+ import asyncpg
9
+ import logging
10
+ import io
11
+ from typing import List
12
+ import json
13
+
14
+ from aiecs.domain.knowledge_graph.models.entity import Entity
15
+ from aiecs.domain.knowledge_graph.models.relation import Relation
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ class BatchOperationsMixin:
21
+ """
22
+ Mixin providing batch operations for graph stores
23
+
24
+ This mixin adds efficient batch insert/update/delete methods
25
+ using PostgreSQL-specific optimizations like COPY and multi-row INSERT.
26
+
27
+ Example:
28
+ ```python
29
+ class MyGraphStore(GraphStore, BatchOperationsMixin):
30
+ pass
31
+
32
+ store = MyGraphStore()
33
+ await store.batch_add_entities([entity1, entity2, ...], batch_size=1000)
34
+ ```
35
+ """
36
+
37
+ async def batch_add_entities(
38
+ self,
39
+ entities: List[Entity],
40
+ batch_size: int = 1000,
41
+ use_copy: bool = True,
42
+ ) -> int:
43
+ """
44
+ Add multiple entities efficiently
45
+
46
+ Args:
47
+ entities: List of entities to add
48
+ batch_size: Number of entities per batch
49
+ use_copy: Use PostgreSQL COPY for better performance
50
+
51
+ Returns:
52
+ Number of entities added
53
+
54
+ Example:
55
+ ```python
56
+ entities = [
57
+ Entity(id="e1", entity_type="Person", properties={"name": "Alice"}),
58
+ Entity(id="e2", entity_type="Person", properties={"name": "Bob"}),
59
+ # ... thousands more
60
+ ]
61
+ count = await store.batch_add_entities(entities, batch_size=1000)
62
+ ```
63
+ """
64
+ if not entities:
65
+ return 0
66
+
67
+ if not hasattr(self, "pool") or not self.pool:
68
+ raise RuntimeError("GraphStore not initialized")
69
+
70
+ total_added = 0
71
+
72
+ if use_copy:
73
+ # Use COPY for maximum performance
74
+ total_added = await self._batch_add_entities_copy(entities)
75
+ else:
76
+ # Use multi-row INSERT
77
+ for i in range(0, len(entities), batch_size):
78
+ batch = entities[i : i + batch_size]
79
+ added = await self._batch_add_entities_insert(batch)
80
+ total_added += added
81
+
82
+ logger.info(f"Batch added {total_added} entities")
83
+ return total_added
84
+
85
+ async def _batch_add_entities_copy(self, entities: List[Entity]) -> int:
86
+ """
87
+ Add entities using PostgreSQL COPY (fastest method)
88
+
89
+ Args:
90
+ entities: List of entities to add
91
+
92
+ Returns:
93
+ Number of entities added
94
+ """
95
+ if not entities:
96
+ return 0
97
+
98
+ # Prepare data for COPY
99
+ copy_data = io.StringIO()
100
+ for entity in entities:
101
+ # Serialize data
102
+ properties_json = json.dumps(entity.properties)
103
+ embedding_bytes = (
104
+ self._serialize_embedding(entity.embedding)
105
+ if hasattr(entity, "embedding") and entity.embedding
106
+ else None
107
+ )
108
+
109
+ # Write tab-separated values
110
+ # Format: id \t entity_type \t properties \t embedding
111
+ embedding_hex = embedding_bytes.hex() if embedding_bytes else "\\N"
112
+ copy_data.write(
113
+ f"{entity.id}\t{entity.entity_type}\t{properties_json}\t{embedding_hex}\n"
114
+ )
115
+
116
+ copy_data.seek(0)
117
+
118
+ # Execute COPY
119
+ async with self.pool.acquire() as conn:
120
+ try:
121
+ result = await conn.copy_to_table(
122
+ "graph_entities",
123
+ source=copy_data,
124
+ columns=["id", "entity_type", "properties", "embedding"],
125
+ format="text",
126
+ )
127
+ # Parse result to get row count
128
+ # Result format: "COPY n" where n is number of rows
129
+ if result and result.startswith("COPY"):
130
+ return int(result.split()[1])
131
+ return len(entities)
132
+ except asyncpg.UniqueViolationError as e:
133
+ logger.warning(f"Duplicate entities in batch: {e}")
134
+ # Fall back to individual inserts with ON CONFLICT
135
+ return await self._batch_add_entities_insert(entities)
136
+ except Exception as e:
137
+ logger.error(f"COPY failed: {e}")
138
+ # Fall back to INSERT
139
+ return await self._batch_add_entities_insert(entities)
140
+
141
+ async def _batch_add_entities_insert(self, entities: List[Entity]) -> int:
142
+ """
143
+ Add entities using multi-row INSERT with ON CONFLICT
144
+
145
+ Args:
146
+ entities: List of entities to add
147
+
148
+ Returns:
149
+ Number of entities added/updated
150
+ """
151
+ if not entities:
152
+ return 0
153
+
154
+ # Build multi-row INSERT
155
+ values_placeholders = []
156
+ values = []
157
+
158
+ for i, entity in enumerate(entities):
159
+ base_idx = i * 4
160
+ values_placeholders.append(
161
+ f"(${base_idx+1}, ${base_idx+2}, ${base_idx+3}::jsonb, ${base_idx+4})"
162
+ )
163
+
164
+ properties_json = json.dumps(entity.properties)
165
+ embedding_blob = (
166
+ self._serialize_embedding(entity.embedding)
167
+ if hasattr(entity, "embedding") and entity.embedding
168
+ else None
169
+ )
170
+
171
+ values.extend(
172
+ [
173
+ entity.id,
174
+ entity.entity_type,
175
+ properties_json,
176
+ embedding_blob,
177
+ ]
178
+ )
179
+
180
+ query = f"""
181
+ INSERT INTO graph_entities (id, entity_type, properties, embedding)
182
+ VALUES {', '.join(values_placeholders)}
183
+ ON CONFLICT (id) DO UPDATE SET
184
+ entity_type = EXCLUDED.entity_type,
185
+ properties = EXCLUDED.properties,
186
+ embedding = EXCLUDED.embedding,
187
+ updated_at = CURRENT_TIMESTAMP
188
+ """
189
+
190
+ async with self.pool.acquire() as conn:
191
+ try:
192
+ await conn.execute(query, *values)
193
+ return len(entities)
194
+ except Exception as e:
195
+ logger.error(f"Batch insert failed: {e}")
196
+ raise
197
+
198
+ async def batch_add_relations(
199
+ self,
200
+ relations: List[Relation],
201
+ batch_size: int = 1000,
202
+ use_copy: bool = True,
203
+ ) -> int:
204
+ """
205
+ Add multiple relations efficiently
206
+
207
+ Args:
208
+ relations: List of relations to add
209
+ batch_size: Number of relations per batch
210
+ use_copy: Use PostgreSQL COPY for better performance
211
+
212
+ Returns:
213
+ Number of relations added
214
+
215
+ Example:
216
+ ```python
217
+ relations = [
218
+ Relation(id="r1", source_id="e1", target_id="e2", relation_type="KNOWS", properties={}),
219
+ Relation(id="r2", source_id="e2", target_id="e3", relation_type="KNOWS", properties={}),
220
+ # ... thousands more
221
+ ]
222
+ count = await store.batch_add_relations(relations, batch_size=1000)
223
+ ```
224
+ """
225
+ if not relations:
226
+ return 0
227
+
228
+ if not hasattr(self, "pool") or not self.pool:
229
+ raise RuntimeError("GraphStore not initialized")
230
+
231
+ total_added = 0
232
+
233
+ if use_copy:
234
+ # Use COPY for maximum performance
235
+ total_added = await self._batch_add_relations_copy(relations)
236
+ else:
237
+ # Use multi-row INSERT
238
+ for i in range(0, len(relations), batch_size):
239
+ batch = relations[i : i + batch_size]
240
+ added = await self._batch_add_relations_insert(batch)
241
+ total_added += added
242
+
243
+ logger.info(f"Batch added {total_added} relations")
244
+ return total_added
245
+
246
+ async def _batch_add_relations_copy(self, relations: List[Relation]) -> int:
247
+ """
248
+ Add relations using PostgreSQL COPY
249
+
250
+ Args:
251
+ relations: List of relations to add
252
+
253
+ Returns:
254
+ Number of relations added
255
+ """
256
+ if not relations:
257
+ return 0
258
+
259
+ # Prepare data for COPY
260
+ copy_data = io.StringIO()
261
+ for relation in relations:
262
+ properties_json = json.dumps(relation.properties)
263
+
264
+ # Write tab-separated values
265
+ # Format: id \t relation_type \t source_id \t target_id \t
266
+ # properties \t weight
267
+ copy_data.write(
268
+ f"{relation.id}\t{relation.relation_type}\t{relation.source_id}\t"
269
+ f"{relation.target_id}\t{properties_json}\t{relation.weight}\n"
270
+ )
271
+
272
+ copy_data.seek(0)
273
+
274
+ # Execute COPY
275
+ async with self.pool.acquire() as conn:
276
+ try:
277
+ result = await conn.copy_to_table(
278
+ "graph_relations",
279
+ source=copy_data,
280
+ columns=[
281
+ "id",
282
+ "relation_type",
283
+ "source_id",
284
+ "target_id",
285
+ "properties",
286
+ "weight",
287
+ ],
288
+ format="text",
289
+ )
290
+ if result and result.startswith("COPY"):
291
+ return int(result.split()[1])
292
+ return len(relations)
293
+ except asyncpg.UniqueViolationError as e:
294
+ logger.warning(f"Duplicate relations in batch: {e}")
295
+ return await self._batch_add_relations_insert(relations)
296
+ except asyncpg.ForeignKeyViolationError as e:
297
+ logger.error(f"Foreign key violation in batch: {e}")
298
+ # Some entities don't exist, fall back to individual inserts
299
+ return await self._batch_add_relations_insert(relations)
300
+ except Exception as e:
301
+ logger.error(f"COPY failed: {e}")
302
+ return await self._batch_add_relations_insert(relations)
303
+
304
+ async def _batch_add_relations_insert(self, relations: List[Relation]) -> int:
305
+ """
306
+ Add relations using multi-row INSERT
307
+
308
+ Args:
309
+ relations: List of relations to add
310
+
311
+ Returns:
312
+ Number of relations added/updated
313
+ """
314
+ if not relations:
315
+ return 0
316
+
317
+ # Build multi-row INSERT
318
+ values_placeholders = []
319
+ values = []
320
+
321
+ for i, relation in enumerate(relations):
322
+ base_idx = i * 6
323
+ values_placeholders.append(
324
+ f"(${base_idx+1}, ${base_idx+2}, ${base_idx+3}, ${base_idx+4}, ${base_idx+5}::jsonb, ${base_idx+6})"
325
+ )
326
+
327
+ properties_json = json.dumps(relation.properties)
328
+
329
+ values.extend(
330
+ [
331
+ relation.id,
332
+ relation.relation_type,
333
+ relation.source_id,
334
+ relation.target_id,
335
+ properties_json,
336
+ relation.weight,
337
+ ]
338
+ )
339
+
340
+ query = f"""
341
+ INSERT INTO graph_relations (id, relation_type, source_id, target_id, properties, weight)
342
+ VALUES {', '.join(values_placeholders)}
343
+ ON CONFLICT (id) DO UPDATE SET
344
+ relation_type = EXCLUDED.relation_type,
345
+ source_id = EXCLUDED.source_id,
346
+ target_id = EXCLUDED.target_id,
347
+ properties = EXCLUDED.properties,
348
+ weight = EXCLUDED.weight,
349
+ updated_at = CURRENT_TIMESTAMP
350
+ """
351
+
352
+ async with self.pool.acquire() as conn:
353
+ try:
354
+ await conn.execute(query, *values)
355
+ return len(relations)
356
+ except Exception as e:
357
+ logger.error(f"Batch insert failed: {e}")
358
+ raise
359
+
360
+ async def batch_delete_entities(self, entity_ids: List[str], batch_size: int = 1000) -> int:
361
+ """
362
+ Delete multiple entities efficiently
363
+
364
+ Args:
365
+ entity_ids: List of entity IDs to delete
366
+ batch_size: Number of entities per batch
367
+
368
+ Returns:
369
+ Number of entities deleted
370
+ """
371
+ if not entity_ids:
372
+ return 0
373
+
374
+ if not hasattr(self, "pool") or not self.pool:
375
+ raise RuntimeError("GraphStore not initialized")
376
+
377
+ total_deleted = 0
378
+
379
+ for i in range(0, len(entity_ids), batch_size):
380
+ batch = entity_ids[i : i + batch_size]
381
+
382
+ # Use ANY() for efficient batch delete
383
+ query = "DELETE FROM graph_entities WHERE id = ANY($1)"
384
+
385
+ async with self.pool.acquire() as conn:
386
+ result = await conn.execute(query, batch)
387
+ # Parse result: "DELETE n"
388
+ if result and result.startswith("DELETE"):
389
+ total_deleted += int(result.split()[1])
390
+
391
+ logger.info(f"Batch deleted {total_deleted} entities")
392
+ return total_deleted
393
+
394
+ async def batch_delete_relations(self, relation_ids: List[str], batch_size: int = 1000) -> int:
395
+ """
396
+ Delete multiple relations efficiently
397
+
398
+ Args:
399
+ relation_ids: List of relation IDs to delete
400
+ batch_size: Number of relations per batch
401
+
402
+ Returns:
403
+ Number of relations deleted
404
+ """
405
+ if not relation_ids:
406
+ return 0
407
+
408
+ if not hasattr(self, "pool") or not self.pool:
409
+ raise RuntimeError("GraphStore not initialized")
410
+
411
+ total_deleted = 0
412
+
413
+ for i in range(0, len(relation_ids), batch_size):
414
+ batch = relation_ids[i : i + batch_size]
415
+
416
+ # Use ANY() for efficient batch delete
417
+ query = "DELETE FROM graph_relations WHERE id = ANY($1)"
418
+
419
+ async with self.pool.acquire() as conn:
420
+ result = await conn.execute(query, batch)
421
+ # Parse result: "DELETE n"
422
+ if result and result.startswith("DELETE"):
423
+ total_deleted += int(result.split()[1])
424
+
425
+ logger.info(f"Batch deleted {total_deleted} relations")
426
+ return total_deleted
427
+
428
+
429
+ def estimate_batch_size(avg_item_size_bytes: int, target_batch_size_mb: int = 10) -> int:
430
+ """
431
+ Estimate optimal batch size based on item size
432
+
433
+ Args:
434
+ avg_item_size_bytes: Average size of each item in bytes
435
+ target_batch_size_mb: Target batch size in MB
436
+
437
+ Returns:
438
+ Recommended batch size (number of items)
439
+
440
+ Example:
441
+ ```python
442
+ # For entities averaging 1KB each
443
+ batch_size = estimate_batch_size(1024, target_batch_size_mb=10)
444
+ # Returns ~10,000
445
+ ```
446
+ """
447
+ target_bytes = target_batch_size_mb * 1024 * 1024
448
+ batch_size = max(100, target_bytes // avg_item_size_bytes)
449
+ return batch_size