aiecs 1.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (302) hide show
  1. aiecs/__init__.py +72 -0
  2. aiecs/__main__.py +41 -0
  3. aiecs/aiecs_client.py +469 -0
  4. aiecs/application/__init__.py +10 -0
  5. aiecs/application/executors/__init__.py +10 -0
  6. aiecs/application/executors/operation_executor.py +363 -0
  7. aiecs/application/knowledge_graph/__init__.py +7 -0
  8. aiecs/application/knowledge_graph/builder/__init__.py +37 -0
  9. aiecs/application/knowledge_graph/builder/document_builder.py +375 -0
  10. aiecs/application/knowledge_graph/builder/graph_builder.py +356 -0
  11. aiecs/application/knowledge_graph/builder/schema_mapping.py +531 -0
  12. aiecs/application/knowledge_graph/builder/structured_pipeline.py +443 -0
  13. aiecs/application/knowledge_graph/builder/text_chunker.py +319 -0
  14. aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
  15. aiecs/application/knowledge_graph/extractors/base.py +100 -0
  16. aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +327 -0
  17. aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +349 -0
  18. aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +244 -0
  19. aiecs/application/knowledge_graph/fusion/__init__.py +23 -0
  20. aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +387 -0
  21. aiecs/application/knowledge_graph/fusion/entity_linker.py +343 -0
  22. aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +580 -0
  23. aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +189 -0
  24. aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
  25. aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +344 -0
  26. aiecs/application/knowledge_graph/pattern_matching/query_executor.py +378 -0
  27. aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
  28. aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +199 -0
  29. aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
  30. aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
  31. aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +347 -0
  32. aiecs/application/knowledge_graph/reasoning/inference_engine.py +504 -0
  33. aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +167 -0
  34. aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
  35. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
  36. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +630 -0
  37. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +654 -0
  38. aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +477 -0
  39. aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +390 -0
  40. aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +217 -0
  41. aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +169 -0
  42. aiecs/application/knowledge_graph/reasoning/query_planner.py +872 -0
  43. aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +554 -0
  44. aiecs/application/knowledge_graph/retrieval/__init__.py +19 -0
  45. aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +596 -0
  46. aiecs/application/knowledge_graph/search/__init__.py +59 -0
  47. aiecs/application/knowledge_graph/search/hybrid_search.py +423 -0
  48. aiecs/application/knowledge_graph/search/reranker.py +295 -0
  49. aiecs/application/knowledge_graph/search/reranker_strategies.py +553 -0
  50. aiecs/application/knowledge_graph/search/text_similarity.py +398 -0
  51. aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
  52. aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +329 -0
  53. aiecs/application/knowledge_graph/traversal/path_scorer.py +269 -0
  54. aiecs/application/knowledge_graph/validators/__init__.py +13 -0
  55. aiecs/application/knowledge_graph/validators/relation_validator.py +189 -0
  56. aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
  57. aiecs/application/knowledge_graph/visualization/graph_visualizer.py +321 -0
  58. aiecs/common/__init__.py +9 -0
  59. aiecs/common/knowledge_graph/__init__.py +17 -0
  60. aiecs/common/knowledge_graph/runnable.py +484 -0
  61. aiecs/config/__init__.py +16 -0
  62. aiecs/config/config.py +498 -0
  63. aiecs/config/graph_config.py +137 -0
  64. aiecs/config/registry.py +23 -0
  65. aiecs/core/__init__.py +46 -0
  66. aiecs/core/interface/__init__.py +34 -0
  67. aiecs/core/interface/execution_interface.py +152 -0
  68. aiecs/core/interface/storage_interface.py +171 -0
  69. aiecs/domain/__init__.py +289 -0
  70. aiecs/domain/agent/__init__.py +189 -0
  71. aiecs/domain/agent/base_agent.py +697 -0
  72. aiecs/domain/agent/exceptions.py +103 -0
  73. aiecs/domain/agent/graph_aware_mixin.py +559 -0
  74. aiecs/domain/agent/hybrid_agent.py +490 -0
  75. aiecs/domain/agent/integration/__init__.py +26 -0
  76. aiecs/domain/agent/integration/context_compressor.py +222 -0
  77. aiecs/domain/agent/integration/context_engine_adapter.py +252 -0
  78. aiecs/domain/agent/integration/retry_policy.py +219 -0
  79. aiecs/domain/agent/integration/role_config.py +213 -0
  80. aiecs/domain/agent/knowledge_aware_agent.py +646 -0
  81. aiecs/domain/agent/lifecycle.py +296 -0
  82. aiecs/domain/agent/llm_agent.py +300 -0
  83. aiecs/domain/agent/memory/__init__.py +12 -0
  84. aiecs/domain/agent/memory/conversation.py +197 -0
  85. aiecs/domain/agent/migration/__init__.py +14 -0
  86. aiecs/domain/agent/migration/conversion.py +160 -0
  87. aiecs/domain/agent/migration/legacy_wrapper.py +90 -0
  88. aiecs/domain/agent/models.py +317 -0
  89. aiecs/domain/agent/observability.py +407 -0
  90. aiecs/domain/agent/persistence.py +289 -0
  91. aiecs/domain/agent/prompts/__init__.py +29 -0
  92. aiecs/domain/agent/prompts/builder.py +161 -0
  93. aiecs/domain/agent/prompts/formatters.py +189 -0
  94. aiecs/domain/agent/prompts/template.py +255 -0
  95. aiecs/domain/agent/registry.py +260 -0
  96. aiecs/domain/agent/tool_agent.py +257 -0
  97. aiecs/domain/agent/tools/__init__.py +12 -0
  98. aiecs/domain/agent/tools/schema_generator.py +221 -0
  99. aiecs/domain/community/__init__.py +155 -0
  100. aiecs/domain/community/agent_adapter.py +477 -0
  101. aiecs/domain/community/analytics.py +481 -0
  102. aiecs/domain/community/collaborative_workflow.py +642 -0
  103. aiecs/domain/community/communication_hub.py +645 -0
  104. aiecs/domain/community/community_builder.py +320 -0
  105. aiecs/domain/community/community_integration.py +800 -0
  106. aiecs/domain/community/community_manager.py +813 -0
  107. aiecs/domain/community/decision_engine.py +879 -0
  108. aiecs/domain/community/exceptions.py +225 -0
  109. aiecs/domain/community/models/__init__.py +33 -0
  110. aiecs/domain/community/models/community_models.py +268 -0
  111. aiecs/domain/community/resource_manager.py +457 -0
  112. aiecs/domain/community/shared_context_manager.py +603 -0
  113. aiecs/domain/context/__init__.py +58 -0
  114. aiecs/domain/context/context_engine.py +989 -0
  115. aiecs/domain/context/conversation_models.py +354 -0
  116. aiecs/domain/context/graph_memory.py +467 -0
  117. aiecs/domain/execution/__init__.py +12 -0
  118. aiecs/domain/execution/model.py +57 -0
  119. aiecs/domain/knowledge_graph/__init__.py +19 -0
  120. aiecs/domain/knowledge_graph/models/__init__.py +52 -0
  121. aiecs/domain/knowledge_graph/models/entity.py +130 -0
  122. aiecs/domain/knowledge_graph/models/evidence.py +194 -0
  123. aiecs/domain/knowledge_graph/models/inference_rule.py +186 -0
  124. aiecs/domain/knowledge_graph/models/path.py +179 -0
  125. aiecs/domain/knowledge_graph/models/path_pattern.py +173 -0
  126. aiecs/domain/knowledge_graph/models/query.py +272 -0
  127. aiecs/domain/knowledge_graph/models/query_plan.py +187 -0
  128. aiecs/domain/knowledge_graph/models/relation.py +136 -0
  129. aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
  130. aiecs/domain/knowledge_graph/schema/entity_type.py +135 -0
  131. aiecs/domain/knowledge_graph/schema/graph_schema.py +271 -0
  132. aiecs/domain/knowledge_graph/schema/property_schema.py +155 -0
  133. aiecs/domain/knowledge_graph/schema/relation_type.py +171 -0
  134. aiecs/domain/knowledge_graph/schema/schema_manager.py +496 -0
  135. aiecs/domain/knowledge_graph/schema/type_enums.py +205 -0
  136. aiecs/domain/task/__init__.py +13 -0
  137. aiecs/domain/task/dsl_processor.py +613 -0
  138. aiecs/domain/task/model.py +62 -0
  139. aiecs/domain/task/task_context.py +268 -0
  140. aiecs/infrastructure/__init__.py +24 -0
  141. aiecs/infrastructure/graph_storage/__init__.py +11 -0
  142. aiecs/infrastructure/graph_storage/base.py +601 -0
  143. aiecs/infrastructure/graph_storage/batch_operations.py +449 -0
  144. aiecs/infrastructure/graph_storage/cache.py +429 -0
  145. aiecs/infrastructure/graph_storage/distributed.py +226 -0
  146. aiecs/infrastructure/graph_storage/error_handling.py +390 -0
  147. aiecs/infrastructure/graph_storage/graceful_degradation.py +306 -0
  148. aiecs/infrastructure/graph_storage/health_checks.py +378 -0
  149. aiecs/infrastructure/graph_storage/in_memory.py +514 -0
  150. aiecs/infrastructure/graph_storage/index_optimization.py +483 -0
  151. aiecs/infrastructure/graph_storage/lazy_loading.py +410 -0
  152. aiecs/infrastructure/graph_storage/metrics.py +357 -0
  153. aiecs/infrastructure/graph_storage/migration.py +413 -0
  154. aiecs/infrastructure/graph_storage/pagination.py +471 -0
  155. aiecs/infrastructure/graph_storage/performance_monitoring.py +466 -0
  156. aiecs/infrastructure/graph_storage/postgres.py +871 -0
  157. aiecs/infrastructure/graph_storage/query_optimizer.py +635 -0
  158. aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
  159. aiecs/infrastructure/graph_storage/sqlite.py +623 -0
  160. aiecs/infrastructure/graph_storage/streaming.py +495 -0
  161. aiecs/infrastructure/messaging/__init__.py +13 -0
  162. aiecs/infrastructure/messaging/celery_task_manager.py +383 -0
  163. aiecs/infrastructure/messaging/websocket_manager.py +298 -0
  164. aiecs/infrastructure/monitoring/__init__.py +34 -0
  165. aiecs/infrastructure/monitoring/executor_metrics.py +174 -0
  166. aiecs/infrastructure/monitoring/global_metrics_manager.py +213 -0
  167. aiecs/infrastructure/monitoring/structured_logger.py +48 -0
  168. aiecs/infrastructure/monitoring/tracing_manager.py +410 -0
  169. aiecs/infrastructure/persistence/__init__.py +24 -0
  170. aiecs/infrastructure/persistence/context_engine_client.py +187 -0
  171. aiecs/infrastructure/persistence/database_manager.py +333 -0
  172. aiecs/infrastructure/persistence/file_storage.py +754 -0
  173. aiecs/infrastructure/persistence/redis_client.py +220 -0
  174. aiecs/llm/__init__.py +86 -0
  175. aiecs/llm/callbacks/__init__.py +11 -0
  176. aiecs/llm/callbacks/custom_callbacks.py +264 -0
  177. aiecs/llm/client_factory.py +420 -0
  178. aiecs/llm/clients/__init__.py +33 -0
  179. aiecs/llm/clients/base_client.py +193 -0
  180. aiecs/llm/clients/googleai_client.py +181 -0
  181. aiecs/llm/clients/openai_client.py +131 -0
  182. aiecs/llm/clients/vertex_client.py +437 -0
  183. aiecs/llm/clients/xai_client.py +184 -0
  184. aiecs/llm/config/__init__.py +51 -0
  185. aiecs/llm/config/config_loader.py +275 -0
  186. aiecs/llm/config/config_validator.py +236 -0
  187. aiecs/llm/config/model_config.py +151 -0
  188. aiecs/llm/utils/__init__.py +10 -0
  189. aiecs/llm/utils/validate_config.py +91 -0
  190. aiecs/main.py +363 -0
  191. aiecs/scripts/__init__.py +3 -0
  192. aiecs/scripts/aid/VERSION_MANAGEMENT.md +97 -0
  193. aiecs/scripts/aid/__init__.py +19 -0
  194. aiecs/scripts/aid/version_manager.py +215 -0
  195. aiecs/scripts/dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md +242 -0
  196. aiecs/scripts/dependance_check/README_DEPENDENCY_CHECKER.md +310 -0
  197. aiecs/scripts/dependance_check/__init__.py +17 -0
  198. aiecs/scripts/dependance_check/dependency_checker.py +938 -0
  199. aiecs/scripts/dependance_check/dependency_fixer.py +391 -0
  200. aiecs/scripts/dependance_check/download_nlp_data.py +396 -0
  201. aiecs/scripts/dependance_check/quick_dependency_check.py +270 -0
  202. aiecs/scripts/dependance_check/setup_nlp_data.sh +217 -0
  203. aiecs/scripts/dependance_patch/__init__.py +7 -0
  204. aiecs/scripts/dependance_patch/fix_weasel/README_WEASEL_PATCH.md +126 -0
  205. aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
  206. aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.py +128 -0
  207. aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.sh +82 -0
  208. aiecs/scripts/dependance_patch/fix_weasel/patch_weasel_library.sh +188 -0
  209. aiecs/scripts/dependance_patch/fix_weasel/run_weasel_patch.sh +41 -0
  210. aiecs/scripts/tools_develop/README.md +449 -0
  211. aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
  212. aiecs/scripts/tools_develop/__init__.py +21 -0
  213. aiecs/scripts/tools_develop/check_type_annotations.py +259 -0
  214. aiecs/scripts/tools_develop/validate_tool_schemas.py +422 -0
  215. aiecs/scripts/tools_develop/verify_tools.py +356 -0
  216. aiecs/tasks/__init__.py +1 -0
  217. aiecs/tasks/worker.py +172 -0
  218. aiecs/tools/__init__.py +299 -0
  219. aiecs/tools/apisource/__init__.py +99 -0
  220. aiecs/tools/apisource/intelligence/__init__.py +19 -0
  221. aiecs/tools/apisource/intelligence/data_fusion.py +381 -0
  222. aiecs/tools/apisource/intelligence/query_analyzer.py +413 -0
  223. aiecs/tools/apisource/intelligence/search_enhancer.py +388 -0
  224. aiecs/tools/apisource/monitoring/__init__.py +9 -0
  225. aiecs/tools/apisource/monitoring/metrics.py +303 -0
  226. aiecs/tools/apisource/providers/__init__.py +115 -0
  227. aiecs/tools/apisource/providers/base.py +664 -0
  228. aiecs/tools/apisource/providers/census.py +401 -0
  229. aiecs/tools/apisource/providers/fred.py +564 -0
  230. aiecs/tools/apisource/providers/newsapi.py +412 -0
  231. aiecs/tools/apisource/providers/worldbank.py +357 -0
  232. aiecs/tools/apisource/reliability/__init__.py +12 -0
  233. aiecs/tools/apisource/reliability/error_handler.py +375 -0
  234. aiecs/tools/apisource/reliability/fallback_strategy.py +391 -0
  235. aiecs/tools/apisource/tool.py +850 -0
  236. aiecs/tools/apisource/utils/__init__.py +9 -0
  237. aiecs/tools/apisource/utils/validators.py +338 -0
  238. aiecs/tools/base_tool.py +201 -0
  239. aiecs/tools/docs/__init__.py +121 -0
  240. aiecs/tools/docs/ai_document_orchestrator.py +599 -0
  241. aiecs/tools/docs/ai_document_writer_orchestrator.py +2403 -0
  242. aiecs/tools/docs/content_insertion_tool.py +1333 -0
  243. aiecs/tools/docs/document_creator_tool.py +1317 -0
  244. aiecs/tools/docs/document_layout_tool.py +1166 -0
  245. aiecs/tools/docs/document_parser_tool.py +994 -0
  246. aiecs/tools/docs/document_writer_tool.py +1818 -0
  247. aiecs/tools/knowledge_graph/__init__.py +17 -0
  248. aiecs/tools/knowledge_graph/graph_reasoning_tool.py +734 -0
  249. aiecs/tools/knowledge_graph/graph_search_tool.py +923 -0
  250. aiecs/tools/knowledge_graph/kg_builder_tool.py +476 -0
  251. aiecs/tools/langchain_adapter.py +542 -0
  252. aiecs/tools/schema_generator.py +275 -0
  253. aiecs/tools/search_tool/__init__.py +100 -0
  254. aiecs/tools/search_tool/analyzers.py +589 -0
  255. aiecs/tools/search_tool/cache.py +260 -0
  256. aiecs/tools/search_tool/constants.py +128 -0
  257. aiecs/tools/search_tool/context.py +216 -0
  258. aiecs/tools/search_tool/core.py +749 -0
  259. aiecs/tools/search_tool/deduplicator.py +123 -0
  260. aiecs/tools/search_tool/error_handler.py +271 -0
  261. aiecs/tools/search_tool/metrics.py +371 -0
  262. aiecs/tools/search_tool/rate_limiter.py +178 -0
  263. aiecs/tools/search_tool/schemas.py +277 -0
  264. aiecs/tools/statistics/__init__.py +80 -0
  265. aiecs/tools/statistics/ai_data_analysis_orchestrator.py +643 -0
  266. aiecs/tools/statistics/ai_insight_generator_tool.py +505 -0
  267. aiecs/tools/statistics/ai_report_orchestrator_tool.py +694 -0
  268. aiecs/tools/statistics/data_loader_tool.py +564 -0
  269. aiecs/tools/statistics/data_profiler_tool.py +658 -0
  270. aiecs/tools/statistics/data_transformer_tool.py +573 -0
  271. aiecs/tools/statistics/data_visualizer_tool.py +495 -0
  272. aiecs/tools/statistics/model_trainer_tool.py +487 -0
  273. aiecs/tools/statistics/statistical_analyzer_tool.py +459 -0
  274. aiecs/tools/task_tools/__init__.py +86 -0
  275. aiecs/tools/task_tools/chart_tool.py +732 -0
  276. aiecs/tools/task_tools/classfire_tool.py +922 -0
  277. aiecs/tools/task_tools/image_tool.py +447 -0
  278. aiecs/tools/task_tools/office_tool.py +684 -0
  279. aiecs/tools/task_tools/pandas_tool.py +635 -0
  280. aiecs/tools/task_tools/report_tool.py +635 -0
  281. aiecs/tools/task_tools/research_tool.py +392 -0
  282. aiecs/tools/task_tools/scraper_tool.py +715 -0
  283. aiecs/tools/task_tools/stats_tool.py +688 -0
  284. aiecs/tools/temp_file_manager.py +130 -0
  285. aiecs/tools/tool_executor/__init__.py +37 -0
  286. aiecs/tools/tool_executor/tool_executor.py +881 -0
  287. aiecs/utils/LLM_output_structor.py +445 -0
  288. aiecs/utils/__init__.py +34 -0
  289. aiecs/utils/base_callback.py +47 -0
  290. aiecs/utils/cache_provider.py +695 -0
  291. aiecs/utils/execution_utils.py +184 -0
  292. aiecs/utils/logging.py +1 -0
  293. aiecs/utils/prompt_loader.py +14 -0
  294. aiecs/utils/token_usage_repository.py +323 -0
  295. aiecs/ws/__init__.py +0 -0
  296. aiecs/ws/socket_server.py +52 -0
  297. aiecs-1.5.1.dist-info/METADATA +608 -0
  298. aiecs-1.5.1.dist-info/RECORD +302 -0
  299. aiecs-1.5.1.dist-info/WHEEL +5 -0
  300. aiecs-1.5.1.dist-info/entry_points.txt +10 -0
  301. aiecs-1.5.1.dist-info/licenses/LICENSE +225 -0
  302. aiecs-1.5.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,495 @@
1
+ """
2
+ Streaming Support for Graph Storage
3
+
4
+ Provides streaming export and import capabilities for large graphs,
5
+ enabling efficient handling of millions of entities and relations.
6
+ """
7
+
8
+ import json
9
+ import logging
10
+ from typing import AsyncIterator, Optional, Dict, Any
11
+ from enum import Enum
12
+ import gzip
13
+ from pathlib import Path
14
+ from datetime import datetime
15
+
16
+ from aiecs.domain.knowledge_graph.models.entity import Entity
17
+ from aiecs.domain.knowledge_graph.models.relation import Relation
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ class DateTimeEncoder(json.JSONEncoder):
23
+ """Custom JSON encoder for datetime objects"""
24
+
25
+ def default(self, obj):
26
+ if isinstance(obj, datetime):
27
+ return obj.isoformat()
28
+ return super().default(obj)
29
+
30
+
31
+ class StreamFormat(str, Enum):
32
+ """Streaming export format"""
33
+
34
+ JSONL = "jsonl" # JSON Lines (one JSON object per line)
35
+ JSON = "json" # Standard JSON array
36
+ CSV = "csv" # CSV format
37
+
38
+
39
+ class GraphStreamExporter:
40
+ """
41
+ Stream large graphs to files
42
+
43
+ Exports entities and relations in chunks to avoid loading
44
+ everything into memory at once.
45
+
46
+ Example:
47
+ ```python
48
+ exporter = GraphStreamExporter(store)
49
+
50
+ # Export to JSONL (recommended for large graphs)
51
+ await exporter.export_to_file(
52
+ "graph_export.jsonl",
53
+ format=StreamFormat.JSONL,
54
+ compress=True
55
+ )
56
+
57
+ # Export with filtering
58
+ await exporter.export_entities(
59
+ "persons.jsonl",
60
+ entity_type="Person",
61
+ batch_size=1000
62
+ )
63
+ ```
64
+ """
65
+
66
+ def __init__(self, store: Any):
67
+ """
68
+ Initialize exporter
69
+
70
+ Args:
71
+ store: Graph store instance
72
+ """
73
+ self.store = store
74
+
75
+ async def export_to_file(
76
+ self,
77
+ filepath: str,
78
+ format: StreamFormat = StreamFormat.JSONL,
79
+ compress: bool = False,
80
+ batch_size: int = 1000,
81
+ entity_type: Optional[str] = None,
82
+ include_relations: bool = True,
83
+ ) -> Dict[str, int]:
84
+ """
85
+ Export entire graph to file
86
+
87
+ Args:
88
+ filepath: Output file path
89
+ format: Export format
90
+ compress: Enable gzip compression
91
+ batch_size: Streaming batch size
92
+ entity_type: Filter entities by type
93
+ include_relations: Include relations in export
94
+
95
+ Returns:
96
+ Dictionary with export statistics
97
+
98
+ Example:
99
+ ```python
100
+ stats = await exporter.export_to_file(
101
+ "graph.jsonl.gz",
102
+ format=StreamFormat.JSONL,
103
+ compress=True,
104
+ batch_size=5000
105
+ )
106
+ print(f"Exported {stats['entity_count']} entities")
107
+ ```
108
+ """
109
+ Path(filepath)
110
+
111
+ # Add .gz extension if compressing
112
+ if compress and not filepath.endswith(".gz"):
113
+ filepath = f"{filepath}.gz"
114
+
115
+ entity_count = 0
116
+ relation_count = 0
117
+
118
+ # Open file (with compression if requested)
119
+ if compress:
120
+ import gzip
121
+
122
+ file = gzip.open(filepath, "wt", encoding="utf-8")
123
+ else:
124
+ file = open(filepath, "w", encoding="utf-8")
125
+
126
+ try:
127
+ # Write header for JSON format
128
+ if format == StreamFormat.JSON:
129
+ file.write('{"entities": [')
130
+
131
+ # Stream entities
132
+ first = True
133
+ async for entity in self.stream_entities(
134
+ entity_type=entity_type, batch_size=batch_size
135
+ ):
136
+ if format == StreamFormat.JSONL:
137
+ json.dump(
138
+ {"type": "entity", "data": entity.model_dump()},
139
+ file,
140
+ cls=DateTimeEncoder,
141
+ )
142
+ file.write("\n")
143
+ elif format == StreamFormat.JSON:
144
+ if not first:
145
+ file.write(",")
146
+ json.dump(entity.model_dump(), file, cls=DateTimeEncoder)
147
+ first = False
148
+
149
+ entity_count += 1
150
+
151
+ # Log progress
152
+ if entity_count % 10000 == 0:
153
+ logger.info(f"Exported {entity_count} entities...")
154
+
155
+ # Stream relations if requested
156
+ if include_relations:
157
+ if format == StreamFormat.JSON:
158
+ file.write('], "relations": [')
159
+ first = True
160
+
161
+ async for relation in self.stream_relations(batch_size=batch_size):
162
+ if format == StreamFormat.JSONL:
163
+ json.dump(
164
+ {
165
+ "type": "relation",
166
+ "data": relation.model_dump(),
167
+ },
168
+ file,
169
+ cls=DateTimeEncoder,
170
+ )
171
+ file.write("\n")
172
+ elif format == StreamFormat.JSON:
173
+ if not first:
174
+ file.write(",")
175
+ json.dump(relation.model_dump(), file, cls=DateTimeEncoder)
176
+ first = False
177
+
178
+ relation_count += 1
179
+
180
+ if relation_count % 10000 == 0:
181
+ logger.info(f"Exported {relation_count} relations...")
182
+
183
+ # Write footer for JSON format
184
+ if format == StreamFormat.JSON:
185
+ file.write("]}")
186
+
187
+ finally:
188
+ file.close()
189
+
190
+ logger.info(f"Export complete: {entity_count} entities, {relation_count} relations")
191
+
192
+ return {
193
+ "entity_count": entity_count,
194
+ "relation_count": relation_count,
195
+ "filepath": filepath,
196
+ "compressed": compress,
197
+ }
198
+
199
+ async def stream_entities(
200
+ self, entity_type: Optional[str] = None, batch_size: int = 1000
201
+ ) -> AsyncIterator[Entity]:
202
+ """
203
+ Stream entities in batches
204
+
205
+ Args:
206
+ entity_type: Filter by entity type
207
+ batch_size: Batch size for streaming
208
+
209
+ Yields:
210
+ Entity instances
211
+ """
212
+ # Use pagination to stream efficiently
213
+ if hasattr(self.store, "paginate_entities"):
214
+ cursor = None
215
+ while True:
216
+ page = await self.store.paginate_entities(
217
+ entity_type=entity_type,
218
+ page_size=batch_size,
219
+ cursor=cursor,
220
+ )
221
+
222
+ for entity in page.items:
223
+ yield entity
224
+
225
+ if not page.page_info.has_next_page:
226
+ break
227
+
228
+ cursor = page.page_info.end_cursor
229
+ else:
230
+ # Fallback: load all and yield
231
+ entities = await self.store.get_all_entities(entity_type=entity_type)
232
+ for entity in entities:
233
+ yield entity
234
+
235
+ async def stream_relations(
236
+ self, relation_type: Optional[str] = None, batch_size: int = 1000
237
+ ) -> AsyncIterator[Relation]:
238
+ """
239
+ Stream relations in batches
240
+
241
+ Args:
242
+ relation_type: Filter by relation type
243
+ batch_size: Batch size for streaming
244
+
245
+ Yields:
246
+ Relation instances
247
+ """
248
+ # Use pagination if available
249
+ if hasattr(self.store, "paginate_relations"):
250
+ cursor = None
251
+ while True:
252
+ page = await self.store.paginate_relations(
253
+ relation_type=relation_type,
254
+ page_size=batch_size,
255
+ cursor=cursor,
256
+ )
257
+
258
+ for relation in page.items:
259
+ yield relation
260
+
261
+ if not page.page_info.has_next_page:
262
+ break
263
+
264
+ cursor = page.page_info.end_cursor
265
+ else:
266
+ # Fallback: get all relations (this may be memory intensive)
267
+ # Backends should implement paginate_relations
268
+ logger.warning("Pagination not available, loading all relations")
269
+ # For now, yield nothing - backends must implement pagination
270
+ return
271
+
272
+ async def export_entities(
273
+ self,
274
+ filepath: str,
275
+ entity_type: Optional[str] = None,
276
+ batch_size: int = 1000,
277
+ compress: bool = False,
278
+ ) -> int:
279
+ """
280
+ Export only entities to file
281
+
282
+ Args:
283
+ filepath: Output file path
284
+ entity_type: Filter by entity type
285
+ batch_size: Streaming batch size
286
+ compress: Enable gzip compression
287
+
288
+ Returns:
289
+ Number of entities exported
290
+ """
291
+ if compress:
292
+ file = gzip.open(filepath, "wt", encoding="utf-8")
293
+ else:
294
+ file = open(filepath, "w", encoding="utf-8")
295
+
296
+ count = 0
297
+ try:
298
+ async for entity in self.stream_entities(
299
+ entity_type=entity_type, batch_size=batch_size
300
+ ):
301
+ json.dump(entity.model_dump(), file, cls=DateTimeEncoder)
302
+ file.write("\n")
303
+ count += 1
304
+ finally:
305
+ file.close()
306
+
307
+ return count
308
+
309
+
310
+ class GraphStreamImporter:
311
+ """
312
+ Stream large graphs from files
313
+
314
+ Imports entities and relations in chunks to avoid memory issues.
315
+
316
+ Example:
317
+ ```python
318
+ importer = GraphStreamImporter(store)
319
+
320
+ # Import from JSONL file
321
+ stats = await importer.import_from_file(
322
+ "graph_export.jsonl.gz",
323
+ batch_size=1000
324
+ )
325
+ print(f"Imported {stats['entity_count']} entities")
326
+ ```
327
+ """
328
+
329
+ def __init__(self, store: Any):
330
+ """
331
+ Initialize importer
332
+
333
+ Args:
334
+ store: Graph store instance
335
+ """
336
+ self.store = store
337
+
338
+ async def import_from_file(
339
+ self,
340
+ filepath: str,
341
+ batch_size: int = 1000,
342
+ format: StreamFormat = StreamFormat.JSONL,
343
+ ) -> Dict[str, int]:
344
+ """
345
+ Import graph from file
346
+
347
+ Args:
348
+ filepath: Input file path
349
+ batch_size: Batch size for bulk operations
350
+ format: File format
351
+
352
+ Returns:
353
+ Dictionary with import statistics
354
+ """
355
+ # Detect compression
356
+ compressed = filepath.endswith(".gz")
357
+
358
+ # Open file
359
+ if compressed:
360
+ file = gzip.open(filepath, "rt", encoding="utf-8")
361
+ else:
362
+ file = open(filepath, "r", encoding="utf-8")
363
+
364
+ entity_count = 0
365
+ relation_count = 0
366
+
367
+ entity_batch = []
368
+ relation_batch = []
369
+
370
+ try:
371
+ if format == StreamFormat.JSONL:
372
+ for line in file:
373
+ if not line.strip():
374
+ continue
375
+
376
+ data = json.loads(line)
377
+
378
+ if data.get("type") == "entity":
379
+ entity_batch.append(Entity(**data["data"]))
380
+ elif data.get("type") == "relation":
381
+ relation_batch.append(Relation(**data["data"]))
382
+ else:
383
+ # Assume entity if no type specified
384
+ entity_batch.append(Entity(**data))
385
+
386
+ # Flush batches
387
+ if len(entity_batch) >= batch_size:
388
+ await self._import_entity_batch(entity_batch)
389
+ entity_count += len(entity_batch)
390
+ entity_batch.clear()
391
+ logger.info(f"Imported {entity_count} entities...")
392
+
393
+ if len(relation_batch) >= batch_size:
394
+ await self._import_relation_batch(relation_batch)
395
+ relation_count += len(relation_batch)
396
+ relation_batch.clear()
397
+ logger.info(f"Imported {relation_count} relations...")
398
+
399
+ # Flush remaining batches
400
+ if entity_batch:
401
+ await self._import_entity_batch(entity_batch)
402
+ entity_count += len(entity_batch)
403
+
404
+ if relation_batch:
405
+ await self._import_relation_batch(relation_batch)
406
+ relation_count += len(relation_batch)
407
+
408
+ finally:
409
+ file.close()
410
+
411
+ logger.info(f"Import complete: {entity_count} entities, {relation_count} relations")
412
+
413
+ return {"entity_count": entity_count, "relation_count": relation_count}
414
+
415
+ async def _import_entity_batch(self, entities: list[Entity]) -> None:
416
+ """Import a batch of entities"""
417
+ if hasattr(self.store, "batch_add_entities"):
418
+ await self.store.batch_add_entities(entities)
419
+ else:
420
+ for entity in entities:
421
+ await self.store.add_entity(entity)
422
+
423
+ async def _import_relation_batch(self, relations: list[Relation]) -> None:
424
+ """Import a batch of relations"""
425
+ if hasattr(self.store, "batch_add_relations"):
426
+ await self.store.batch_add_relations(relations)
427
+ else:
428
+ for relation in relations:
429
+ await self.store.add_relation(relation)
430
+
431
+
432
+ async def stream_subgraph(
433
+ store: Any,
434
+ entity_ids: list[str],
435
+ max_depth: int = 2,
436
+ batch_size: int = 100,
437
+ ) -> AsyncIterator[tuple[Entity, list[Relation]]]:
438
+ """
439
+ Stream a subgraph around specific entities
440
+
441
+ Yields entities with their relations in manageable chunks.
442
+
443
+ Args:
444
+ store: Graph store instance
445
+ entity_ids: Starting entity IDs
446
+ max_depth: Maximum depth to traverse
447
+ batch_size: Batch size for processing
448
+
449
+ Yields:
450
+ Tuples of (entity, relations) for each entity in subgraph
451
+
452
+ Example:
453
+ ```python
454
+ async for entity, relations in stream_subgraph(store, ["person_1"], max_depth=2):
455
+ print(f"Entity: {entity.id}, Relations: {len(relations)}")
456
+ ```
457
+ """
458
+ visited = set()
459
+ current_level = entity_ids
460
+ depth = 0
461
+
462
+ while current_level and depth <= max_depth:
463
+ # Process current level in batches
464
+ for i in range(0, len(current_level), batch_size):
465
+ batch = current_level[i : i + batch_size]
466
+ next_level_batch = []
467
+
468
+ for entity_id in batch:
469
+ if entity_id in visited:
470
+ continue
471
+
472
+ visited.add(entity_id)
473
+
474
+ # Get entity
475
+ entity = await store.get_entity(entity_id)
476
+ if not entity:
477
+ continue
478
+
479
+ # Get relations
480
+ neighbors = await store.get_neighbors(entity_id, direction="both")
481
+ # For now, return empty relations list - would need to fetch
482
+ # actual relations
483
+ relations = []
484
+
485
+ # Collect next level
486
+ for neighbor in neighbors:
487
+ if neighbor.id not in visited:
488
+ next_level_batch.append(neighbor.id)
489
+
490
+ yield (entity, relations)
491
+
492
+ # Add to next level
493
+ current_level.extend(next_level_batch)
494
+
495
+ depth += 1
@@ -0,0 +1,13 @@
1
+ """Infrastructure messaging module
2
+
3
+ Contains messaging and communication infrastructure.
4
+ """
5
+
6
+ from .celery_task_manager import CeleryTaskManager
7
+ from .websocket_manager import WebSocketManager, UserConfirmation
8
+
9
+ __all__ = [
10
+ "CeleryTaskManager",
11
+ "WebSocketManager",
12
+ "UserConfirmation",
13
+ ]