aiecs 1.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (302) hide show
  1. aiecs/__init__.py +72 -0
  2. aiecs/__main__.py +41 -0
  3. aiecs/aiecs_client.py +469 -0
  4. aiecs/application/__init__.py +10 -0
  5. aiecs/application/executors/__init__.py +10 -0
  6. aiecs/application/executors/operation_executor.py +363 -0
  7. aiecs/application/knowledge_graph/__init__.py +7 -0
  8. aiecs/application/knowledge_graph/builder/__init__.py +37 -0
  9. aiecs/application/knowledge_graph/builder/document_builder.py +375 -0
  10. aiecs/application/knowledge_graph/builder/graph_builder.py +356 -0
  11. aiecs/application/knowledge_graph/builder/schema_mapping.py +531 -0
  12. aiecs/application/knowledge_graph/builder/structured_pipeline.py +443 -0
  13. aiecs/application/knowledge_graph/builder/text_chunker.py +319 -0
  14. aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
  15. aiecs/application/knowledge_graph/extractors/base.py +100 -0
  16. aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +327 -0
  17. aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +349 -0
  18. aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +244 -0
  19. aiecs/application/knowledge_graph/fusion/__init__.py +23 -0
  20. aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +387 -0
  21. aiecs/application/knowledge_graph/fusion/entity_linker.py +343 -0
  22. aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +580 -0
  23. aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +189 -0
  24. aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
  25. aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +344 -0
  26. aiecs/application/knowledge_graph/pattern_matching/query_executor.py +378 -0
  27. aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
  28. aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +199 -0
  29. aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
  30. aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
  31. aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +347 -0
  32. aiecs/application/knowledge_graph/reasoning/inference_engine.py +504 -0
  33. aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +167 -0
  34. aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
  35. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
  36. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +630 -0
  37. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +654 -0
  38. aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +477 -0
  39. aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +390 -0
  40. aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +217 -0
  41. aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +169 -0
  42. aiecs/application/knowledge_graph/reasoning/query_planner.py +872 -0
  43. aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +554 -0
  44. aiecs/application/knowledge_graph/retrieval/__init__.py +19 -0
  45. aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +596 -0
  46. aiecs/application/knowledge_graph/search/__init__.py +59 -0
  47. aiecs/application/knowledge_graph/search/hybrid_search.py +423 -0
  48. aiecs/application/knowledge_graph/search/reranker.py +295 -0
  49. aiecs/application/knowledge_graph/search/reranker_strategies.py +553 -0
  50. aiecs/application/knowledge_graph/search/text_similarity.py +398 -0
  51. aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
  52. aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +329 -0
  53. aiecs/application/knowledge_graph/traversal/path_scorer.py +269 -0
  54. aiecs/application/knowledge_graph/validators/__init__.py +13 -0
  55. aiecs/application/knowledge_graph/validators/relation_validator.py +189 -0
  56. aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
  57. aiecs/application/knowledge_graph/visualization/graph_visualizer.py +321 -0
  58. aiecs/common/__init__.py +9 -0
  59. aiecs/common/knowledge_graph/__init__.py +17 -0
  60. aiecs/common/knowledge_graph/runnable.py +484 -0
  61. aiecs/config/__init__.py +16 -0
  62. aiecs/config/config.py +498 -0
  63. aiecs/config/graph_config.py +137 -0
  64. aiecs/config/registry.py +23 -0
  65. aiecs/core/__init__.py +46 -0
  66. aiecs/core/interface/__init__.py +34 -0
  67. aiecs/core/interface/execution_interface.py +152 -0
  68. aiecs/core/interface/storage_interface.py +171 -0
  69. aiecs/domain/__init__.py +289 -0
  70. aiecs/domain/agent/__init__.py +189 -0
  71. aiecs/domain/agent/base_agent.py +697 -0
  72. aiecs/domain/agent/exceptions.py +103 -0
  73. aiecs/domain/agent/graph_aware_mixin.py +559 -0
  74. aiecs/domain/agent/hybrid_agent.py +490 -0
  75. aiecs/domain/agent/integration/__init__.py +26 -0
  76. aiecs/domain/agent/integration/context_compressor.py +222 -0
  77. aiecs/domain/agent/integration/context_engine_adapter.py +252 -0
  78. aiecs/domain/agent/integration/retry_policy.py +219 -0
  79. aiecs/domain/agent/integration/role_config.py +213 -0
  80. aiecs/domain/agent/knowledge_aware_agent.py +646 -0
  81. aiecs/domain/agent/lifecycle.py +296 -0
  82. aiecs/domain/agent/llm_agent.py +300 -0
  83. aiecs/domain/agent/memory/__init__.py +12 -0
  84. aiecs/domain/agent/memory/conversation.py +197 -0
  85. aiecs/domain/agent/migration/__init__.py +14 -0
  86. aiecs/domain/agent/migration/conversion.py +160 -0
  87. aiecs/domain/agent/migration/legacy_wrapper.py +90 -0
  88. aiecs/domain/agent/models.py +317 -0
  89. aiecs/domain/agent/observability.py +407 -0
  90. aiecs/domain/agent/persistence.py +289 -0
  91. aiecs/domain/agent/prompts/__init__.py +29 -0
  92. aiecs/domain/agent/prompts/builder.py +161 -0
  93. aiecs/domain/agent/prompts/formatters.py +189 -0
  94. aiecs/domain/agent/prompts/template.py +255 -0
  95. aiecs/domain/agent/registry.py +260 -0
  96. aiecs/domain/agent/tool_agent.py +257 -0
  97. aiecs/domain/agent/tools/__init__.py +12 -0
  98. aiecs/domain/agent/tools/schema_generator.py +221 -0
  99. aiecs/domain/community/__init__.py +155 -0
  100. aiecs/domain/community/agent_adapter.py +477 -0
  101. aiecs/domain/community/analytics.py +481 -0
  102. aiecs/domain/community/collaborative_workflow.py +642 -0
  103. aiecs/domain/community/communication_hub.py +645 -0
  104. aiecs/domain/community/community_builder.py +320 -0
  105. aiecs/domain/community/community_integration.py +800 -0
  106. aiecs/domain/community/community_manager.py +813 -0
  107. aiecs/domain/community/decision_engine.py +879 -0
  108. aiecs/domain/community/exceptions.py +225 -0
  109. aiecs/domain/community/models/__init__.py +33 -0
  110. aiecs/domain/community/models/community_models.py +268 -0
  111. aiecs/domain/community/resource_manager.py +457 -0
  112. aiecs/domain/community/shared_context_manager.py +603 -0
  113. aiecs/domain/context/__init__.py +58 -0
  114. aiecs/domain/context/context_engine.py +989 -0
  115. aiecs/domain/context/conversation_models.py +354 -0
  116. aiecs/domain/context/graph_memory.py +467 -0
  117. aiecs/domain/execution/__init__.py +12 -0
  118. aiecs/domain/execution/model.py +57 -0
  119. aiecs/domain/knowledge_graph/__init__.py +19 -0
  120. aiecs/domain/knowledge_graph/models/__init__.py +52 -0
  121. aiecs/domain/knowledge_graph/models/entity.py +130 -0
  122. aiecs/domain/knowledge_graph/models/evidence.py +194 -0
  123. aiecs/domain/knowledge_graph/models/inference_rule.py +186 -0
  124. aiecs/domain/knowledge_graph/models/path.py +179 -0
  125. aiecs/domain/knowledge_graph/models/path_pattern.py +173 -0
  126. aiecs/domain/knowledge_graph/models/query.py +272 -0
  127. aiecs/domain/knowledge_graph/models/query_plan.py +187 -0
  128. aiecs/domain/knowledge_graph/models/relation.py +136 -0
  129. aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
  130. aiecs/domain/knowledge_graph/schema/entity_type.py +135 -0
  131. aiecs/domain/knowledge_graph/schema/graph_schema.py +271 -0
  132. aiecs/domain/knowledge_graph/schema/property_schema.py +155 -0
  133. aiecs/domain/knowledge_graph/schema/relation_type.py +171 -0
  134. aiecs/domain/knowledge_graph/schema/schema_manager.py +496 -0
  135. aiecs/domain/knowledge_graph/schema/type_enums.py +205 -0
  136. aiecs/domain/task/__init__.py +13 -0
  137. aiecs/domain/task/dsl_processor.py +613 -0
  138. aiecs/domain/task/model.py +62 -0
  139. aiecs/domain/task/task_context.py +268 -0
  140. aiecs/infrastructure/__init__.py +24 -0
  141. aiecs/infrastructure/graph_storage/__init__.py +11 -0
  142. aiecs/infrastructure/graph_storage/base.py +601 -0
  143. aiecs/infrastructure/graph_storage/batch_operations.py +449 -0
  144. aiecs/infrastructure/graph_storage/cache.py +429 -0
  145. aiecs/infrastructure/graph_storage/distributed.py +226 -0
  146. aiecs/infrastructure/graph_storage/error_handling.py +390 -0
  147. aiecs/infrastructure/graph_storage/graceful_degradation.py +306 -0
  148. aiecs/infrastructure/graph_storage/health_checks.py +378 -0
  149. aiecs/infrastructure/graph_storage/in_memory.py +514 -0
  150. aiecs/infrastructure/graph_storage/index_optimization.py +483 -0
  151. aiecs/infrastructure/graph_storage/lazy_loading.py +410 -0
  152. aiecs/infrastructure/graph_storage/metrics.py +357 -0
  153. aiecs/infrastructure/graph_storage/migration.py +413 -0
  154. aiecs/infrastructure/graph_storage/pagination.py +471 -0
  155. aiecs/infrastructure/graph_storage/performance_monitoring.py +466 -0
  156. aiecs/infrastructure/graph_storage/postgres.py +871 -0
  157. aiecs/infrastructure/graph_storage/query_optimizer.py +635 -0
  158. aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
  159. aiecs/infrastructure/graph_storage/sqlite.py +623 -0
  160. aiecs/infrastructure/graph_storage/streaming.py +495 -0
  161. aiecs/infrastructure/messaging/__init__.py +13 -0
  162. aiecs/infrastructure/messaging/celery_task_manager.py +383 -0
  163. aiecs/infrastructure/messaging/websocket_manager.py +298 -0
  164. aiecs/infrastructure/monitoring/__init__.py +34 -0
  165. aiecs/infrastructure/monitoring/executor_metrics.py +174 -0
  166. aiecs/infrastructure/monitoring/global_metrics_manager.py +213 -0
  167. aiecs/infrastructure/monitoring/structured_logger.py +48 -0
  168. aiecs/infrastructure/monitoring/tracing_manager.py +410 -0
  169. aiecs/infrastructure/persistence/__init__.py +24 -0
  170. aiecs/infrastructure/persistence/context_engine_client.py +187 -0
  171. aiecs/infrastructure/persistence/database_manager.py +333 -0
  172. aiecs/infrastructure/persistence/file_storage.py +754 -0
  173. aiecs/infrastructure/persistence/redis_client.py +220 -0
  174. aiecs/llm/__init__.py +86 -0
  175. aiecs/llm/callbacks/__init__.py +11 -0
  176. aiecs/llm/callbacks/custom_callbacks.py +264 -0
  177. aiecs/llm/client_factory.py +420 -0
  178. aiecs/llm/clients/__init__.py +33 -0
  179. aiecs/llm/clients/base_client.py +193 -0
  180. aiecs/llm/clients/googleai_client.py +181 -0
  181. aiecs/llm/clients/openai_client.py +131 -0
  182. aiecs/llm/clients/vertex_client.py +437 -0
  183. aiecs/llm/clients/xai_client.py +184 -0
  184. aiecs/llm/config/__init__.py +51 -0
  185. aiecs/llm/config/config_loader.py +275 -0
  186. aiecs/llm/config/config_validator.py +236 -0
  187. aiecs/llm/config/model_config.py +151 -0
  188. aiecs/llm/utils/__init__.py +10 -0
  189. aiecs/llm/utils/validate_config.py +91 -0
  190. aiecs/main.py +363 -0
  191. aiecs/scripts/__init__.py +3 -0
  192. aiecs/scripts/aid/VERSION_MANAGEMENT.md +97 -0
  193. aiecs/scripts/aid/__init__.py +19 -0
  194. aiecs/scripts/aid/version_manager.py +215 -0
  195. aiecs/scripts/dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md +242 -0
  196. aiecs/scripts/dependance_check/README_DEPENDENCY_CHECKER.md +310 -0
  197. aiecs/scripts/dependance_check/__init__.py +17 -0
  198. aiecs/scripts/dependance_check/dependency_checker.py +938 -0
  199. aiecs/scripts/dependance_check/dependency_fixer.py +391 -0
  200. aiecs/scripts/dependance_check/download_nlp_data.py +396 -0
  201. aiecs/scripts/dependance_check/quick_dependency_check.py +270 -0
  202. aiecs/scripts/dependance_check/setup_nlp_data.sh +217 -0
  203. aiecs/scripts/dependance_patch/__init__.py +7 -0
  204. aiecs/scripts/dependance_patch/fix_weasel/README_WEASEL_PATCH.md +126 -0
  205. aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
  206. aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.py +128 -0
  207. aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.sh +82 -0
  208. aiecs/scripts/dependance_patch/fix_weasel/patch_weasel_library.sh +188 -0
  209. aiecs/scripts/dependance_patch/fix_weasel/run_weasel_patch.sh +41 -0
  210. aiecs/scripts/tools_develop/README.md +449 -0
  211. aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
  212. aiecs/scripts/tools_develop/__init__.py +21 -0
  213. aiecs/scripts/tools_develop/check_type_annotations.py +259 -0
  214. aiecs/scripts/tools_develop/validate_tool_schemas.py +422 -0
  215. aiecs/scripts/tools_develop/verify_tools.py +356 -0
  216. aiecs/tasks/__init__.py +1 -0
  217. aiecs/tasks/worker.py +172 -0
  218. aiecs/tools/__init__.py +299 -0
  219. aiecs/tools/apisource/__init__.py +99 -0
  220. aiecs/tools/apisource/intelligence/__init__.py +19 -0
  221. aiecs/tools/apisource/intelligence/data_fusion.py +381 -0
  222. aiecs/tools/apisource/intelligence/query_analyzer.py +413 -0
  223. aiecs/tools/apisource/intelligence/search_enhancer.py +388 -0
  224. aiecs/tools/apisource/monitoring/__init__.py +9 -0
  225. aiecs/tools/apisource/monitoring/metrics.py +303 -0
  226. aiecs/tools/apisource/providers/__init__.py +115 -0
  227. aiecs/tools/apisource/providers/base.py +664 -0
  228. aiecs/tools/apisource/providers/census.py +401 -0
  229. aiecs/tools/apisource/providers/fred.py +564 -0
  230. aiecs/tools/apisource/providers/newsapi.py +412 -0
  231. aiecs/tools/apisource/providers/worldbank.py +357 -0
  232. aiecs/tools/apisource/reliability/__init__.py +12 -0
  233. aiecs/tools/apisource/reliability/error_handler.py +375 -0
  234. aiecs/tools/apisource/reliability/fallback_strategy.py +391 -0
  235. aiecs/tools/apisource/tool.py +850 -0
  236. aiecs/tools/apisource/utils/__init__.py +9 -0
  237. aiecs/tools/apisource/utils/validators.py +338 -0
  238. aiecs/tools/base_tool.py +201 -0
  239. aiecs/tools/docs/__init__.py +121 -0
  240. aiecs/tools/docs/ai_document_orchestrator.py +599 -0
  241. aiecs/tools/docs/ai_document_writer_orchestrator.py +2403 -0
  242. aiecs/tools/docs/content_insertion_tool.py +1333 -0
  243. aiecs/tools/docs/document_creator_tool.py +1317 -0
  244. aiecs/tools/docs/document_layout_tool.py +1166 -0
  245. aiecs/tools/docs/document_parser_tool.py +994 -0
  246. aiecs/tools/docs/document_writer_tool.py +1818 -0
  247. aiecs/tools/knowledge_graph/__init__.py +17 -0
  248. aiecs/tools/knowledge_graph/graph_reasoning_tool.py +734 -0
  249. aiecs/tools/knowledge_graph/graph_search_tool.py +923 -0
  250. aiecs/tools/knowledge_graph/kg_builder_tool.py +476 -0
  251. aiecs/tools/langchain_adapter.py +542 -0
  252. aiecs/tools/schema_generator.py +275 -0
  253. aiecs/tools/search_tool/__init__.py +100 -0
  254. aiecs/tools/search_tool/analyzers.py +589 -0
  255. aiecs/tools/search_tool/cache.py +260 -0
  256. aiecs/tools/search_tool/constants.py +128 -0
  257. aiecs/tools/search_tool/context.py +216 -0
  258. aiecs/tools/search_tool/core.py +749 -0
  259. aiecs/tools/search_tool/deduplicator.py +123 -0
  260. aiecs/tools/search_tool/error_handler.py +271 -0
  261. aiecs/tools/search_tool/metrics.py +371 -0
  262. aiecs/tools/search_tool/rate_limiter.py +178 -0
  263. aiecs/tools/search_tool/schemas.py +277 -0
  264. aiecs/tools/statistics/__init__.py +80 -0
  265. aiecs/tools/statistics/ai_data_analysis_orchestrator.py +643 -0
  266. aiecs/tools/statistics/ai_insight_generator_tool.py +505 -0
  267. aiecs/tools/statistics/ai_report_orchestrator_tool.py +694 -0
  268. aiecs/tools/statistics/data_loader_tool.py +564 -0
  269. aiecs/tools/statistics/data_profiler_tool.py +658 -0
  270. aiecs/tools/statistics/data_transformer_tool.py +573 -0
  271. aiecs/tools/statistics/data_visualizer_tool.py +495 -0
  272. aiecs/tools/statistics/model_trainer_tool.py +487 -0
  273. aiecs/tools/statistics/statistical_analyzer_tool.py +459 -0
  274. aiecs/tools/task_tools/__init__.py +86 -0
  275. aiecs/tools/task_tools/chart_tool.py +732 -0
  276. aiecs/tools/task_tools/classfire_tool.py +922 -0
  277. aiecs/tools/task_tools/image_tool.py +447 -0
  278. aiecs/tools/task_tools/office_tool.py +684 -0
  279. aiecs/tools/task_tools/pandas_tool.py +635 -0
  280. aiecs/tools/task_tools/report_tool.py +635 -0
  281. aiecs/tools/task_tools/research_tool.py +392 -0
  282. aiecs/tools/task_tools/scraper_tool.py +715 -0
  283. aiecs/tools/task_tools/stats_tool.py +688 -0
  284. aiecs/tools/temp_file_manager.py +130 -0
  285. aiecs/tools/tool_executor/__init__.py +37 -0
  286. aiecs/tools/tool_executor/tool_executor.py +881 -0
  287. aiecs/utils/LLM_output_structor.py +445 -0
  288. aiecs/utils/__init__.py +34 -0
  289. aiecs/utils/base_callback.py +47 -0
  290. aiecs/utils/cache_provider.py +695 -0
  291. aiecs/utils/execution_utils.py +184 -0
  292. aiecs/utils/logging.py +1 -0
  293. aiecs/utils/prompt_loader.py +14 -0
  294. aiecs/utils/token_usage_repository.py +323 -0
  295. aiecs/ws/__init__.py +0 -0
  296. aiecs/ws/socket_server.py +52 -0
  297. aiecs-1.5.1.dist-info/METADATA +608 -0
  298. aiecs-1.5.1.dist-info/RECORD +302 -0
  299. aiecs-1.5.1.dist-info/WHEEL +5 -0
  300. aiecs-1.5.1.dist-info/entry_points.txt +10 -0
  301. aiecs-1.5.1.dist-info/licenses/LICENSE +225 -0
  302. aiecs-1.5.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,356 @@
1
+ """
2
+ Graph Builder - Main Pipeline Orchestrator
3
+
4
+ Orchestrates the full document-to-graph conversion pipeline.
5
+ """
6
+
7
+ import asyncio
8
+ from typing import List, Optional, Dict, Any, Callable
9
+ from dataclasses import dataclass, field
10
+ from datetime import datetime
11
+
12
+ from aiecs.domain.knowledge_graph.schema.graph_schema import GraphSchema
13
+ from aiecs.infrastructure.graph_storage.base import GraphStore
14
+ from aiecs.application.knowledge_graph.extractors.base import (
15
+ EntityExtractor,
16
+ RelationExtractor,
17
+ )
18
+ from aiecs.application.knowledge_graph.fusion.entity_deduplicator import (
19
+ EntityDeduplicator,
20
+ )
21
+ from aiecs.application.knowledge_graph.fusion.entity_linker import EntityLinker
22
+ from aiecs.application.knowledge_graph.fusion.relation_deduplicator import (
23
+ RelationDeduplicator,
24
+ )
25
+ from aiecs.application.knowledge_graph.validators.relation_validator import (
26
+ RelationValidator,
27
+ )
28
+
29
+
30
+ @dataclass
31
+ class BuildResult:
32
+ """
33
+ Result of graph building operation
34
+
35
+ Attributes:
36
+ success: Whether build completed successfully
37
+ entities_added: Number of entities added to graph
38
+ relations_added: Number of relations added to graph
39
+ entities_linked: Number of entities linked to existing entities
40
+ entities_deduplicated: Number of entities deduplicated
41
+ relations_deduplicated: Number of relations deduplicated
42
+ errors: List of errors encountered
43
+ warnings: List of warnings
44
+ metadata: Additional metadata about the build
45
+ start_time: When build started
46
+ end_time: When build ended
47
+ duration_seconds: Total duration in seconds
48
+ """
49
+
50
+ success: bool = True
51
+ entities_added: int = 0
52
+ relations_added: int = 0
53
+ entities_linked: int = 0
54
+ entities_deduplicated: int = 0
55
+ relations_deduplicated: int = 0
56
+ errors: List[str] = field(default_factory=list)
57
+ warnings: List[str] = field(default_factory=list)
58
+ metadata: Dict[str, Any] = field(default_factory=dict)
59
+ start_time: Optional[datetime] = None
60
+ end_time: Optional[datetime] = None
61
+ duration_seconds: float = 0.0
62
+
63
+
64
+ class GraphBuilder:
65
+ """
66
+ Main pipeline for building knowledge graphs from text
67
+
68
+ The pipeline:
69
+ 1. Extract entities from text
70
+ 2. Deduplicate entities
71
+ 3. Link entities to existing graph
72
+ 4. Extract relations between entities
73
+ 5. Validate relations
74
+ 6. Deduplicate relations
75
+ 7. Store entities and relations in graph
76
+
77
+ Features:
78
+ - Async/parallel processing
79
+ - Progress callbacks
80
+ - Error handling and recovery
81
+ - Provenance tracking
82
+ - Configurable components
83
+
84
+ Example:
85
+ ```python
86
+ # Initialize components
87
+ entity_extractor = LLMEntityExtractor(schema)
88
+ relation_extractor = LLMRelationExtractor(schema)
89
+
90
+ # Create builder
91
+ builder = GraphBuilder(
92
+ graph_store=store,
93
+ entity_extractor=entity_extractor,
94
+ relation_extractor=relation_extractor,
95
+ schema=schema
96
+ )
97
+
98
+ # Build graph from text
99
+ result = await builder.build_from_text(
100
+ text="Alice works at Tech Corp.",
101
+ source="document_1.pdf"
102
+ )
103
+
104
+ print(f"Added {result.entities_added} entities, {result.relations_added} relations")
105
+ ```
106
+ """
107
+
108
+ def __init__(
109
+ self,
110
+ graph_store: GraphStore,
111
+ entity_extractor: EntityExtractor,
112
+ relation_extractor: RelationExtractor,
113
+ schema: Optional[GraphSchema] = None,
114
+ enable_deduplication: bool = True,
115
+ enable_linking: bool = True,
116
+ enable_validation: bool = True,
117
+ progress_callback: Optional[Callable[[str, float], None]] = None,
118
+ ):
119
+ """
120
+ Initialize graph builder
121
+
122
+ Args:
123
+ graph_store: Graph storage to save entities/relations
124
+ entity_extractor: Entity extractor to use
125
+ relation_extractor: Relation extractor to use
126
+ schema: Optional schema for validation
127
+ enable_deduplication: Enable entity/relation deduplication
128
+ enable_linking: Enable linking to existing entities
129
+ enable_validation: Enable relation validation
130
+ progress_callback: Optional callback for progress updates (message, progress_pct)
131
+ """
132
+ self.graph_store = graph_store
133
+ self.entity_extractor = entity_extractor
134
+ self.relation_extractor = relation_extractor
135
+ self.schema = schema
136
+ self.enable_deduplication = enable_deduplication
137
+ self.enable_linking = enable_linking
138
+ self.enable_validation = enable_validation
139
+ self.progress_callback = progress_callback
140
+
141
+ # Initialize fusion components
142
+ self.entity_deduplicator = EntityDeduplicator() if enable_deduplication else None
143
+ self.entity_linker = EntityLinker(graph_store) if enable_linking else None
144
+ self.relation_deduplicator = RelationDeduplicator() if enable_deduplication else None
145
+ self.relation_validator = (
146
+ RelationValidator(schema) if enable_validation and schema else None
147
+ )
148
+
149
+ async def build_from_text(
150
+ self,
151
+ text: str,
152
+ source: Optional[str] = None,
153
+ metadata: Optional[Dict[str, Any]] = None,
154
+ ) -> BuildResult:
155
+ """
156
+ Build knowledge graph from text
157
+
158
+ Args:
159
+ text: Input text to process
160
+ source: Optional source identifier (document name, URL, etc.)
161
+ metadata: Optional metadata to attach to entities/relations
162
+
163
+ Returns:
164
+ BuildResult with statistics and errors
165
+ """
166
+ result = BuildResult(start_time=datetime.now())
167
+
168
+ try:
169
+ self._report_progress("Starting entity extraction", 0.1)
170
+
171
+ # Step 1: Extract entities
172
+ entities = await self.entity_extractor.extract_entities(text)
173
+
174
+ if not entities:
175
+ result.warnings.append("No entities extracted from text")
176
+ return self._finalize_result(result)
177
+
178
+ self._report_progress(f"Extracted {len(entities)} entities", 0.2)
179
+
180
+ # Step 2: Deduplicate entities (within this text)
181
+ if self.enable_deduplication and self.entity_deduplicator:
182
+ original_count = len(entities)
183
+ entities = await self.entity_deduplicator.deduplicate(entities)
184
+ result.entities_deduplicated = original_count - len(entities)
185
+ self._report_progress(f"Deduplicated to {len(entities)} entities", 0.3)
186
+
187
+ # Step 3: Link entities to existing graph
188
+ linked_entities = []
189
+ new_entities = []
190
+
191
+ if self.enable_linking and self.entity_linker:
192
+ self._report_progress("Linking entities to graph", 0.4)
193
+ link_results = await self.entity_linker.link_entities(entities)
194
+
195
+ for link_result in link_results:
196
+ if link_result.linked:
197
+ linked_entities.append(link_result.existing_entity)
198
+ result.entities_linked += 1
199
+ else:
200
+ new_entities.append(link_result.new_entity)
201
+ else:
202
+ new_entities = entities
203
+
204
+ # Combine linked and new entities for relation extraction
205
+ all_entities = linked_entities + new_entities
206
+
207
+ # Step 4: Extract relations
208
+ if len(all_entities) >= 2:
209
+ self._report_progress(
210
+ f"Extracting relations from {len(all_entities)} entities",
211
+ 0.5,
212
+ )
213
+ relations = await self.relation_extractor.extract_relations(text, all_entities)
214
+ self._report_progress(f"Extracted {len(relations)} relations", 0.6)
215
+ else:
216
+ relations = []
217
+ result.warnings.append("Not enough entities for relation extraction")
218
+
219
+ # Step 5: Validate relations
220
+ valid_relations = relations
221
+ if self.enable_validation and self.relation_validator and relations:
222
+ self._report_progress("Validating relations", 0.7)
223
+ valid_relations = self.relation_validator.filter_valid_relations(
224
+ relations, all_entities
225
+ )
226
+ invalid_count = len(relations) - len(valid_relations)
227
+ if invalid_count > 0:
228
+ result.warnings.append(f"{invalid_count} relations failed validation")
229
+
230
+ # Step 6: Deduplicate relations
231
+ if self.enable_deduplication and self.relation_deduplicator and valid_relations:
232
+ original_count = len(valid_relations)
233
+ valid_relations = await self.relation_deduplicator.deduplicate(valid_relations)
234
+ result.relations_deduplicated = original_count - len(valid_relations)
235
+ self._report_progress(f"Deduplicated to {len(valid_relations)} relations", 0.8)
236
+
237
+ # Step 7: Store in graph
238
+ self._report_progress("Storing entities and relations in graph", 0.9)
239
+
240
+ # Add provenance metadata
241
+ if source or metadata:
242
+ provenance = {"source": source} if source else {}
243
+ if metadata:
244
+ provenance.update(metadata)
245
+
246
+ # Add provenance to entities
247
+ for entity in new_entities:
248
+ if not entity.properties:
249
+ entity.properties = {}
250
+ entity.properties["_provenance"] = provenance
251
+
252
+ # Add provenance to relations
253
+ for relation in valid_relations:
254
+ if not relation.properties:
255
+ relation.properties = {}
256
+ relation.properties["_provenance"] = provenance
257
+
258
+ # Store entities
259
+ for entity in new_entities:
260
+ await self.graph_store.add_entity(entity)
261
+ result.entities_added += 1
262
+
263
+ # Store relations
264
+ for relation in valid_relations:
265
+ await self.graph_store.add_relation(relation)
266
+ result.relations_added += 1
267
+
268
+ self._report_progress("Build complete", 1.0)
269
+
270
+ except Exception as e:
271
+ result.success = False
272
+ result.errors.append(f"Build failed: {str(e)}")
273
+
274
+ return self._finalize_result(result)
275
+
276
+ async def build_batch(
277
+ self,
278
+ texts: List[str],
279
+ sources: Optional[List[str]] = None,
280
+ parallel: bool = True,
281
+ max_parallel: int = 5,
282
+ ) -> List[BuildResult]:
283
+ """
284
+ Build graph from multiple texts in batch
285
+
286
+ Args:
287
+ texts: List of texts to process
288
+ sources: Optional list of source identifiers (same length as texts)
289
+ parallel: Process in parallel (default: True)
290
+ max_parallel: Maximum parallel tasks (default: 5)
291
+
292
+ Returns:
293
+ List of BuildResult objects (one per text)
294
+ """
295
+ if sources and len(sources) != len(texts):
296
+ raise ValueError("sources list must match texts list length")
297
+
298
+ if not sources:
299
+ sources = [f"text_{i}" for i in range(len(texts))]
300
+
301
+ if parallel:
302
+ # Process in parallel with semaphore for concurrency control
303
+ semaphore = asyncio.Semaphore(max_parallel)
304
+
305
+ async def process_one(text, source):
306
+ async with semaphore:
307
+ return await self.build_from_text(text, source)
308
+
309
+ tasks = [process_one(text, source) for text, source in zip(texts, sources)]
310
+ results = await asyncio.gather(*tasks, return_exceptions=True)
311
+
312
+ # Handle exceptions
313
+ for i, result in enumerate(results):
314
+ if isinstance(result, Exception):
315
+ error_result = BuildResult(success=False)
316
+ error_result.errors.append(str(result))
317
+ results[i] = error_result
318
+
319
+ return results
320
+ else:
321
+ # Process sequentially
322
+ results = []
323
+ for text, source in zip(texts, sources):
324
+ result = await self.build_from_text(text, source)
325
+ results.append(result)
326
+ return results
327
+
328
+ def _report_progress(self, message: str, progress: float):
329
+ """
330
+ Report progress via callback
331
+
332
+ Args:
333
+ message: Progress message
334
+ progress: Progress percentage (0.0-1.0)
335
+ """
336
+ if self.progress_callback:
337
+ try:
338
+ self.progress_callback(message, progress)
339
+ except Exception as e:
340
+ # Don't let callback errors break the pipeline
341
+ print(f"Warning: Progress callback error: {e}")
342
+
343
+ def _finalize_result(self, result: BuildResult) -> BuildResult:
344
+ """
345
+ Finalize build result with timing information
346
+
347
+ Args:
348
+ result: BuildResult to finalize
349
+
350
+ Returns:
351
+ Finalized BuildResult
352
+ """
353
+ result.end_time = datetime.now()
354
+ if result.start_time:
355
+ result.duration_seconds = (result.end_time - result.start_time).total_seconds()
356
+ return result