aiecs 1.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (302) hide show
  1. aiecs/__init__.py +72 -0
  2. aiecs/__main__.py +41 -0
  3. aiecs/aiecs_client.py +469 -0
  4. aiecs/application/__init__.py +10 -0
  5. aiecs/application/executors/__init__.py +10 -0
  6. aiecs/application/executors/operation_executor.py +363 -0
  7. aiecs/application/knowledge_graph/__init__.py +7 -0
  8. aiecs/application/knowledge_graph/builder/__init__.py +37 -0
  9. aiecs/application/knowledge_graph/builder/document_builder.py +375 -0
  10. aiecs/application/knowledge_graph/builder/graph_builder.py +356 -0
  11. aiecs/application/knowledge_graph/builder/schema_mapping.py +531 -0
  12. aiecs/application/knowledge_graph/builder/structured_pipeline.py +443 -0
  13. aiecs/application/knowledge_graph/builder/text_chunker.py +319 -0
  14. aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
  15. aiecs/application/knowledge_graph/extractors/base.py +100 -0
  16. aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +327 -0
  17. aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +349 -0
  18. aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +244 -0
  19. aiecs/application/knowledge_graph/fusion/__init__.py +23 -0
  20. aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +387 -0
  21. aiecs/application/knowledge_graph/fusion/entity_linker.py +343 -0
  22. aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +580 -0
  23. aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +189 -0
  24. aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
  25. aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +344 -0
  26. aiecs/application/knowledge_graph/pattern_matching/query_executor.py +378 -0
  27. aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
  28. aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +199 -0
  29. aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
  30. aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
  31. aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +347 -0
  32. aiecs/application/knowledge_graph/reasoning/inference_engine.py +504 -0
  33. aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +167 -0
  34. aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
  35. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
  36. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +630 -0
  37. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +654 -0
  38. aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +477 -0
  39. aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +390 -0
  40. aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +217 -0
  41. aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +169 -0
  42. aiecs/application/knowledge_graph/reasoning/query_planner.py +872 -0
  43. aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +554 -0
  44. aiecs/application/knowledge_graph/retrieval/__init__.py +19 -0
  45. aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +596 -0
  46. aiecs/application/knowledge_graph/search/__init__.py +59 -0
  47. aiecs/application/knowledge_graph/search/hybrid_search.py +423 -0
  48. aiecs/application/knowledge_graph/search/reranker.py +295 -0
  49. aiecs/application/knowledge_graph/search/reranker_strategies.py +553 -0
  50. aiecs/application/knowledge_graph/search/text_similarity.py +398 -0
  51. aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
  52. aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +329 -0
  53. aiecs/application/knowledge_graph/traversal/path_scorer.py +269 -0
  54. aiecs/application/knowledge_graph/validators/__init__.py +13 -0
  55. aiecs/application/knowledge_graph/validators/relation_validator.py +189 -0
  56. aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
  57. aiecs/application/knowledge_graph/visualization/graph_visualizer.py +321 -0
  58. aiecs/common/__init__.py +9 -0
  59. aiecs/common/knowledge_graph/__init__.py +17 -0
  60. aiecs/common/knowledge_graph/runnable.py +484 -0
  61. aiecs/config/__init__.py +16 -0
  62. aiecs/config/config.py +498 -0
  63. aiecs/config/graph_config.py +137 -0
  64. aiecs/config/registry.py +23 -0
  65. aiecs/core/__init__.py +46 -0
  66. aiecs/core/interface/__init__.py +34 -0
  67. aiecs/core/interface/execution_interface.py +152 -0
  68. aiecs/core/interface/storage_interface.py +171 -0
  69. aiecs/domain/__init__.py +289 -0
  70. aiecs/domain/agent/__init__.py +189 -0
  71. aiecs/domain/agent/base_agent.py +697 -0
  72. aiecs/domain/agent/exceptions.py +103 -0
  73. aiecs/domain/agent/graph_aware_mixin.py +559 -0
  74. aiecs/domain/agent/hybrid_agent.py +490 -0
  75. aiecs/domain/agent/integration/__init__.py +26 -0
  76. aiecs/domain/agent/integration/context_compressor.py +222 -0
  77. aiecs/domain/agent/integration/context_engine_adapter.py +252 -0
  78. aiecs/domain/agent/integration/retry_policy.py +219 -0
  79. aiecs/domain/agent/integration/role_config.py +213 -0
  80. aiecs/domain/agent/knowledge_aware_agent.py +646 -0
  81. aiecs/domain/agent/lifecycle.py +296 -0
  82. aiecs/domain/agent/llm_agent.py +300 -0
  83. aiecs/domain/agent/memory/__init__.py +12 -0
  84. aiecs/domain/agent/memory/conversation.py +197 -0
  85. aiecs/domain/agent/migration/__init__.py +14 -0
  86. aiecs/domain/agent/migration/conversion.py +160 -0
  87. aiecs/domain/agent/migration/legacy_wrapper.py +90 -0
  88. aiecs/domain/agent/models.py +317 -0
  89. aiecs/domain/agent/observability.py +407 -0
  90. aiecs/domain/agent/persistence.py +289 -0
  91. aiecs/domain/agent/prompts/__init__.py +29 -0
  92. aiecs/domain/agent/prompts/builder.py +161 -0
  93. aiecs/domain/agent/prompts/formatters.py +189 -0
  94. aiecs/domain/agent/prompts/template.py +255 -0
  95. aiecs/domain/agent/registry.py +260 -0
  96. aiecs/domain/agent/tool_agent.py +257 -0
  97. aiecs/domain/agent/tools/__init__.py +12 -0
  98. aiecs/domain/agent/tools/schema_generator.py +221 -0
  99. aiecs/domain/community/__init__.py +155 -0
  100. aiecs/domain/community/agent_adapter.py +477 -0
  101. aiecs/domain/community/analytics.py +481 -0
  102. aiecs/domain/community/collaborative_workflow.py +642 -0
  103. aiecs/domain/community/communication_hub.py +645 -0
  104. aiecs/domain/community/community_builder.py +320 -0
  105. aiecs/domain/community/community_integration.py +800 -0
  106. aiecs/domain/community/community_manager.py +813 -0
  107. aiecs/domain/community/decision_engine.py +879 -0
  108. aiecs/domain/community/exceptions.py +225 -0
  109. aiecs/domain/community/models/__init__.py +33 -0
  110. aiecs/domain/community/models/community_models.py +268 -0
  111. aiecs/domain/community/resource_manager.py +457 -0
  112. aiecs/domain/community/shared_context_manager.py +603 -0
  113. aiecs/domain/context/__init__.py +58 -0
  114. aiecs/domain/context/context_engine.py +989 -0
  115. aiecs/domain/context/conversation_models.py +354 -0
  116. aiecs/domain/context/graph_memory.py +467 -0
  117. aiecs/domain/execution/__init__.py +12 -0
  118. aiecs/domain/execution/model.py +57 -0
  119. aiecs/domain/knowledge_graph/__init__.py +19 -0
  120. aiecs/domain/knowledge_graph/models/__init__.py +52 -0
  121. aiecs/domain/knowledge_graph/models/entity.py +130 -0
  122. aiecs/domain/knowledge_graph/models/evidence.py +194 -0
  123. aiecs/domain/knowledge_graph/models/inference_rule.py +186 -0
  124. aiecs/domain/knowledge_graph/models/path.py +179 -0
  125. aiecs/domain/knowledge_graph/models/path_pattern.py +173 -0
  126. aiecs/domain/knowledge_graph/models/query.py +272 -0
  127. aiecs/domain/knowledge_graph/models/query_plan.py +187 -0
  128. aiecs/domain/knowledge_graph/models/relation.py +136 -0
  129. aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
  130. aiecs/domain/knowledge_graph/schema/entity_type.py +135 -0
  131. aiecs/domain/knowledge_graph/schema/graph_schema.py +271 -0
  132. aiecs/domain/knowledge_graph/schema/property_schema.py +155 -0
  133. aiecs/domain/knowledge_graph/schema/relation_type.py +171 -0
  134. aiecs/domain/knowledge_graph/schema/schema_manager.py +496 -0
  135. aiecs/domain/knowledge_graph/schema/type_enums.py +205 -0
  136. aiecs/domain/task/__init__.py +13 -0
  137. aiecs/domain/task/dsl_processor.py +613 -0
  138. aiecs/domain/task/model.py +62 -0
  139. aiecs/domain/task/task_context.py +268 -0
  140. aiecs/infrastructure/__init__.py +24 -0
  141. aiecs/infrastructure/graph_storage/__init__.py +11 -0
  142. aiecs/infrastructure/graph_storage/base.py +601 -0
  143. aiecs/infrastructure/graph_storage/batch_operations.py +449 -0
  144. aiecs/infrastructure/graph_storage/cache.py +429 -0
  145. aiecs/infrastructure/graph_storage/distributed.py +226 -0
  146. aiecs/infrastructure/graph_storage/error_handling.py +390 -0
  147. aiecs/infrastructure/graph_storage/graceful_degradation.py +306 -0
  148. aiecs/infrastructure/graph_storage/health_checks.py +378 -0
  149. aiecs/infrastructure/graph_storage/in_memory.py +514 -0
  150. aiecs/infrastructure/graph_storage/index_optimization.py +483 -0
  151. aiecs/infrastructure/graph_storage/lazy_loading.py +410 -0
  152. aiecs/infrastructure/graph_storage/metrics.py +357 -0
  153. aiecs/infrastructure/graph_storage/migration.py +413 -0
  154. aiecs/infrastructure/graph_storage/pagination.py +471 -0
  155. aiecs/infrastructure/graph_storage/performance_monitoring.py +466 -0
  156. aiecs/infrastructure/graph_storage/postgres.py +871 -0
  157. aiecs/infrastructure/graph_storage/query_optimizer.py +635 -0
  158. aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
  159. aiecs/infrastructure/graph_storage/sqlite.py +623 -0
  160. aiecs/infrastructure/graph_storage/streaming.py +495 -0
  161. aiecs/infrastructure/messaging/__init__.py +13 -0
  162. aiecs/infrastructure/messaging/celery_task_manager.py +383 -0
  163. aiecs/infrastructure/messaging/websocket_manager.py +298 -0
  164. aiecs/infrastructure/monitoring/__init__.py +34 -0
  165. aiecs/infrastructure/monitoring/executor_metrics.py +174 -0
  166. aiecs/infrastructure/monitoring/global_metrics_manager.py +213 -0
  167. aiecs/infrastructure/monitoring/structured_logger.py +48 -0
  168. aiecs/infrastructure/monitoring/tracing_manager.py +410 -0
  169. aiecs/infrastructure/persistence/__init__.py +24 -0
  170. aiecs/infrastructure/persistence/context_engine_client.py +187 -0
  171. aiecs/infrastructure/persistence/database_manager.py +333 -0
  172. aiecs/infrastructure/persistence/file_storage.py +754 -0
  173. aiecs/infrastructure/persistence/redis_client.py +220 -0
  174. aiecs/llm/__init__.py +86 -0
  175. aiecs/llm/callbacks/__init__.py +11 -0
  176. aiecs/llm/callbacks/custom_callbacks.py +264 -0
  177. aiecs/llm/client_factory.py +420 -0
  178. aiecs/llm/clients/__init__.py +33 -0
  179. aiecs/llm/clients/base_client.py +193 -0
  180. aiecs/llm/clients/googleai_client.py +181 -0
  181. aiecs/llm/clients/openai_client.py +131 -0
  182. aiecs/llm/clients/vertex_client.py +437 -0
  183. aiecs/llm/clients/xai_client.py +184 -0
  184. aiecs/llm/config/__init__.py +51 -0
  185. aiecs/llm/config/config_loader.py +275 -0
  186. aiecs/llm/config/config_validator.py +236 -0
  187. aiecs/llm/config/model_config.py +151 -0
  188. aiecs/llm/utils/__init__.py +10 -0
  189. aiecs/llm/utils/validate_config.py +91 -0
  190. aiecs/main.py +363 -0
  191. aiecs/scripts/__init__.py +3 -0
  192. aiecs/scripts/aid/VERSION_MANAGEMENT.md +97 -0
  193. aiecs/scripts/aid/__init__.py +19 -0
  194. aiecs/scripts/aid/version_manager.py +215 -0
  195. aiecs/scripts/dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md +242 -0
  196. aiecs/scripts/dependance_check/README_DEPENDENCY_CHECKER.md +310 -0
  197. aiecs/scripts/dependance_check/__init__.py +17 -0
  198. aiecs/scripts/dependance_check/dependency_checker.py +938 -0
  199. aiecs/scripts/dependance_check/dependency_fixer.py +391 -0
  200. aiecs/scripts/dependance_check/download_nlp_data.py +396 -0
  201. aiecs/scripts/dependance_check/quick_dependency_check.py +270 -0
  202. aiecs/scripts/dependance_check/setup_nlp_data.sh +217 -0
  203. aiecs/scripts/dependance_patch/__init__.py +7 -0
  204. aiecs/scripts/dependance_patch/fix_weasel/README_WEASEL_PATCH.md +126 -0
  205. aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
  206. aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.py +128 -0
  207. aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.sh +82 -0
  208. aiecs/scripts/dependance_patch/fix_weasel/patch_weasel_library.sh +188 -0
  209. aiecs/scripts/dependance_patch/fix_weasel/run_weasel_patch.sh +41 -0
  210. aiecs/scripts/tools_develop/README.md +449 -0
  211. aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
  212. aiecs/scripts/tools_develop/__init__.py +21 -0
  213. aiecs/scripts/tools_develop/check_type_annotations.py +259 -0
  214. aiecs/scripts/tools_develop/validate_tool_schemas.py +422 -0
  215. aiecs/scripts/tools_develop/verify_tools.py +356 -0
  216. aiecs/tasks/__init__.py +1 -0
  217. aiecs/tasks/worker.py +172 -0
  218. aiecs/tools/__init__.py +299 -0
  219. aiecs/tools/apisource/__init__.py +99 -0
  220. aiecs/tools/apisource/intelligence/__init__.py +19 -0
  221. aiecs/tools/apisource/intelligence/data_fusion.py +381 -0
  222. aiecs/tools/apisource/intelligence/query_analyzer.py +413 -0
  223. aiecs/tools/apisource/intelligence/search_enhancer.py +388 -0
  224. aiecs/tools/apisource/monitoring/__init__.py +9 -0
  225. aiecs/tools/apisource/monitoring/metrics.py +303 -0
  226. aiecs/tools/apisource/providers/__init__.py +115 -0
  227. aiecs/tools/apisource/providers/base.py +664 -0
  228. aiecs/tools/apisource/providers/census.py +401 -0
  229. aiecs/tools/apisource/providers/fred.py +564 -0
  230. aiecs/tools/apisource/providers/newsapi.py +412 -0
  231. aiecs/tools/apisource/providers/worldbank.py +357 -0
  232. aiecs/tools/apisource/reliability/__init__.py +12 -0
  233. aiecs/tools/apisource/reliability/error_handler.py +375 -0
  234. aiecs/tools/apisource/reliability/fallback_strategy.py +391 -0
  235. aiecs/tools/apisource/tool.py +850 -0
  236. aiecs/tools/apisource/utils/__init__.py +9 -0
  237. aiecs/tools/apisource/utils/validators.py +338 -0
  238. aiecs/tools/base_tool.py +201 -0
  239. aiecs/tools/docs/__init__.py +121 -0
  240. aiecs/tools/docs/ai_document_orchestrator.py +599 -0
  241. aiecs/tools/docs/ai_document_writer_orchestrator.py +2403 -0
  242. aiecs/tools/docs/content_insertion_tool.py +1333 -0
  243. aiecs/tools/docs/document_creator_tool.py +1317 -0
  244. aiecs/tools/docs/document_layout_tool.py +1166 -0
  245. aiecs/tools/docs/document_parser_tool.py +994 -0
  246. aiecs/tools/docs/document_writer_tool.py +1818 -0
  247. aiecs/tools/knowledge_graph/__init__.py +17 -0
  248. aiecs/tools/knowledge_graph/graph_reasoning_tool.py +734 -0
  249. aiecs/tools/knowledge_graph/graph_search_tool.py +923 -0
  250. aiecs/tools/knowledge_graph/kg_builder_tool.py +476 -0
  251. aiecs/tools/langchain_adapter.py +542 -0
  252. aiecs/tools/schema_generator.py +275 -0
  253. aiecs/tools/search_tool/__init__.py +100 -0
  254. aiecs/tools/search_tool/analyzers.py +589 -0
  255. aiecs/tools/search_tool/cache.py +260 -0
  256. aiecs/tools/search_tool/constants.py +128 -0
  257. aiecs/tools/search_tool/context.py +216 -0
  258. aiecs/tools/search_tool/core.py +749 -0
  259. aiecs/tools/search_tool/deduplicator.py +123 -0
  260. aiecs/tools/search_tool/error_handler.py +271 -0
  261. aiecs/tools/search_tool/metrics.py +371 -0
  262. aiecs/tools/search_tool/rate_limiter.py +178 -0
  263. aiecs/tools/search_tool/schemas.py +277 -0
  264. aiecs/tools/statistics/__init__.py +80 -0
  265. aiecs/tools/statistics/ai_data_analysis_orchestrator.py +643 -0
  266. aiecs/tools/statistics/ai_insight_generator_tool.py +505 -0
  267. aiecs/tools/statistics/ai_report_orchestrator_tool.py +694 -0
  268. aiecs/tools/statistics/data_loader_tool.py +564 -0
  269. aiecs/tools/statistics/data_profiler_tool.py +658 -0
  270. aiecs/tools/statistics/data_transformer_tool.py +573 -0
  271. aiecs/tools/statistics/data_visualizer_tool.py +495 -0
  272. aiecs/tools/statistics/model_trainer_tool.py +487 -0
  273. aiecs/tools/statistics/statistical_analyzer_tool.py +459 -0
  274. aiecs/tools/task_tools/__init__.py +86 -0
  275. aiecs/tools/task_tools/chart_tool.py +732 -0
  276. aiecs/tools/task_tools/classfire_tool.py +922 -0
  277. aiecs/tools/task_tools/image_tool.py +447 -0
  278. aiecs/tools/task_tools/office_tool.py +684 -0
  279. aiecs/tools/task_tools/pandas_tool.py +635 -0
  280. aiecs/tools/task_tools/report_tool.py +635 -0
  281. aiecs/tools/task_tools/research_tool.py +392 -0
  282. aiecs/tools/task_tools/scraper_tool.py +715 -0
  283. aiecs/tools/task_tools/stats_tool.py +688 -0
  284. aiecs/tools/temp_file_manager.py +130 -0
  285. aiecs/tools/tool_executor/__init__.py +37 -0
  286. aiecs/tools/tool_executor/tool_executor.py +881 -0
  287. aiecs/utils/LLM_output_structor.py +445 -0
  288. aiecs/utils/__init__.py +34 -0
  289. aiecs/utils/base_callback.py +47 -0
  290. aiecs/utils/cache_provider.py +695 -0
  291. aiecs/utils/execution_utils.py +184 -0
  292. aiecs/utils/logging.py +1 -0
  293. aiecs/utils/prompt_loader.py +14 -0
  294. aiecs/utils/token_usage_repository.py +323 -0
  295. aiecs/ws/__init__.py +0 -0
  296. aiecs/ws/socket_server.py +52 -0
  297. aiecs-1.5.1.dist-info/METADATA +608 -0
  298. aiecs-1.5.1.dist-info/RECORD +302 -0
  299. aiecs-1.5.1.dist-info/WHEEL +5 -0
  300. aiecs-1.5.1.dist-info/entry_points.txt +10 -0
  301. aiecs-1.5.1.dist-info/licenses/LICENSE +225 -0
  302. aiecs-1.5.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,327 @@
1
+ """
2
+ LLM-based Entity Extractor
3
+
4
+ Extracts entities from text using Large Language Models (GPT-4, Gemini, etc.).
5
+ Uses AIECS's LLM client infrastructure for provider-agnostic extraction.
6
+ """
7
+
8
+ import json
9
+ import uuid
10
+ from typing import List, Optional, Dict, Any
11
+ from aiecs.application.knowledge_graph.extractors.base import EntityExtractor
12
+ from aiecs.domain.knowledge_graph.models.entity import Entity
13
+ from aiecs.domain.knowledge_graph.schema.graph_schema import GraphSchema
14
+ from aiecs.llm import get_llm_manager, AIProvider
15
+
16
+
17
+ class LLMEntityExtractor(EntityExtractor):
18
+ """
19
+ Extract entities using Large Language Models
20
+
21
+ This extractor uses LLMs (like GPT-4, Gemini) to identify and extract entities
22
+ from text. It's schema-aware and can extract custom entity types with properties.
23
+
24
+ Features:
25
+ - Schema-guided extraction (tells LLM what entity types to look for)
26
+ - Property extraction (not just entity names, but also attributes)
27
+ - Confidence scoring (LLM provides confidence for each entity)
28
+ - Configurable LLM provider (Vertex AI default, configurable)
29
+
30
+ Example:
31
+ ```python
32
+ from aiecs.llm import AIProvider
33
+
34
+ extractor = LLMEntityExtractor(
35
+ schema=graph_schema,
36
+ provider=AIProvider.VERTEX,
37
+ model="gemini-pro"
38
+ )
39
+
40
+ entities = await extractor.extract_entities(
41
+ "Alice, a 30-year-old data scientist, works at Tech Corp."
42
+ )
43
+ # Returns: [
44
+ # Entity(type="Person", properties={"name": "Alice", "age": 30, "occupation": "data scientist"}),
45
+ # Entity(type="Company", properties={"name": "Tech Corp"})
46
+ # ]
47
+ ```
48
+ """
49
+
50
+ def __init__(
51
+ self,
52
+ schema: Optional[GraphSchema] = None,
53
+ provider: Optional[AIProvider] = None,
54
+ model: Optional[str] = None,
55
+ temperature: float = 0.1, # Low temperature for more deterministic extraction
56
+ max_tokens: Optional[int] = 2000,
57
+ ):
58
+ """
59
+ Initialize LLM entity extractor
60
+
61
+ Args:
62
+ schema: Optional GraphSchema to guide extraction (provides entity types and properties)
63
+ provider: LLM provider to use (default: Vertex AI via AIECS configuration)
64
+ model: Specific model to use (default: from AIECS configuration)
65
+ temperature: LLM temperature (0.1 = more deterministic, good for extraction)
66
+ max_tokens: Maximum tokens in response
67
+ """
68
+ self.schema = schema
69
+ self.provider = provider
70
+ self.model = model
71
+ self.temperature = temperature
72
+ self.max_tokens = max_tokens
73
+ self._llm_manager = None # Lazy-loaded in async methods
74
+
75
+ async def extract_entities(
76
+ self, text: str, entity_types: Optional[List[str]] = None, **kwargs
77
+ ) -> List[Entity]:
78
+ """
79
+ Extract entities from text using LLM
80
+
81
+ Args:
82
+ text: Input text to extract entities from
83
+ entity_types: Optional filter for specific entity types
84
+ **kwargs: Additional parameters (e.g., custom prompt, examples)
85
+
86
+ Returns:
87
+ List of extracted Entity objects
88
+
89
+ Raises:
90
+ ValueError: If text is empty
91
+ RuntimeError: If LLM extraction fails
92
+ """
93
+ if not text or not text.strip():
94
+ raise ValueError("Input text cannot be empty")
95
+
96
+ # Lazy-load LLM manager
97
+ if self._llm_manager is None:
98
+ self._llm_manager = await get_llm_manager()
99
+
100
+ # Build extraction prompt
101
+ prompt = self._build_extraction_prompt(text, entity_types)
102
+
103
+ # Call LLM
104
+ try:
105
+ response = await self._llm_manager.generate_text(
106
+ messages=prompt,
107
+ provider=self.provider,
108
+ model=self.model,
109
+ temperature=self.temperature,
110
+ max_tokens=self.max_tokens,
111
+ )
112
+
113
+ # Parse LLM response to Entity objects
114
+ entities = self._parse_llm_response(response.content)
115
+
116
+ return entities
117
+
118
+ except Exception as e:
119
+ raise RuntimeError(f"LLM entity extraction failed: {str(e)}") from e
120
+
121
+ def _build_extraction_prompt(self, text: str, entity_types: Optional[List[str]] = None) -> str:
122
+ """
123
+ Build prompt for LLM entity extraction
124
+
125
+ The prompt is structured to:
126
+ 1. Explain the task (entity extraction)
127
+ 2. Provide entity type schemas (if available)
128
+ 3. Request JSON output format
129
+ 4. Include the text to extract from
130
+
131
+ Args:
132
+ text: Input text
133
+ entity_types: Optional filter for entity types
134
+
135
+ Returns:
136
+ Formatted prompt string
137
+ """
138
+ # Determine which entity types to extract
139
+ types_to_extract = []
140
+ if self.schema:
141
+ available_types = self.schema.get_entity_type_names()
142
+ if entity_types:
143
+ # Filter to requested types that exist in schema
144
+ types_to_extract = [t for t in entity_types if t in available_types]
145
+ else:
146
+ # Use all types from schema
147
+ types_to_extract = available_types
148
+ elif entity_types:
149
+ # No schema, but user specified types
150
+ types_to_extract = entity_types
151
+ else:
152
+ # No schema and no filter - use common types
153
+ types_to_extract = [
154
+ "Person",
155
+ "Organization",
156
+ "Location",
157
+ "Event",
158
+ "Product",
159
+ ]
160
+
161
+ # Build entity type descriptions
162
+ type_descriptions = []
163
+ for entity_type in types_to_extract:
164
+ if self.schema and self.schema.has_entity_type(entity_type):
165
+ # Use schema definition
166
+ schema_type = self.schema.get_entity_type(entity_type)
167
+ properties = list(schema_type.properties.keys()) if schema_type.properties else []
168
+ prop_str = ", ".join(properties) if properties else "any relevant properties"
169
+ desc = f"- {entity_type}: {schema_type.description or 'Extract properties: ' + prop_str}"
170
+ type_descriptions.append(desc)
171
+ else:
172
+ # Generic description
173
+ type_descriptions.append(
174
+ f"- {entity_type}: Extract name and any relevant properties"
175
+ )
176
+
177
+ types_description = "\n".join(type_descriptions)
178
+
179
+ # Build prompt
180
+ prompt = f"""You are an expert at extracting structured entities from text.
181
+
182
+ Extract entities of the following types from the text:
183
+ {types_description}
184
+
185
+ For each entity, provide:
186
+ 1. type: The entity type (one of the types listed above)
187
+ 2. properties: A dictionary of properties (e.g., name, age, location, etc.)
188
+ 3. confidence: Your confidence in this extraction (0.0 to 1.0)
189
+
190
+ Return ONLY a valid JSON array with this structure:
191
+ [
192
+ {{
193
+ "type": "EntityType",
194
+ "properties": {{"property1": "value1", "property2": "value2"}},
195
+ "confidence": 0.95
196
+ }}
197
+ ]
198
+
199
+ Important:
200
+ - Extract ALL entities you find of the specified types
201
+ - Include as many relevant properties as you can find
202
+ - Use consistent property names (e.g., "name" not "title" or "full_name")
203
+ - If unsure about a property, omit it rather than guessing
204
+ - Confidence should reflect how certain you are about the entity and its properties
205
+
206
+ Text to extract from:
207
+ \"\"\"{text}\"\"\"
208
+
209
+ JSON output:"""
210
+
211
+ return prompt
212
+
213
+ def _parse_llm_response(self, response_text: str) -> List[Entity]:
214
+ """
215
+ Parse LLM response to Entity objects
216
+
217
+ Expected JSON format:
218
+ [
219
+ {"type": "Person", "properties": {"name": "Alice", "age": 30}, "confidence": 0.95},
220
+ {"type": "Company", "properties": {"name": "Tech Corp"}, "confidence": 0.90}
221
+ ]
222
+
223
+ Args:
224
+ response_text: LLM response string (should be JSON)
225
+
226
+ Returns:
227
+ List of Entity objects
228
+ """
229
+ entities = []
230
+
231
+ try:
232
+ # Extract JSON from response (LLM might include extra text)
233
+ json_str = self._extract_json_from_text(response_text)
234
+
235
+ # Parse JSON
236
+ extracted_data = json.loads(json_str)
237
+
238
+ if not isinstance(extracted_data, list):
239
+ # Sometimes LLM returns single object instead of array
240
+ extracted_data = [extracted_data]
241
+
242
+ # Convert to Entity objects
243
+ for item in extracted_data:
244
+ entity_type = item.get("type", "Unknown")
245
+ properties = item.get("properties", {})
246
+ confidence = item.get("confidence", 0.5)
247
+
248
+ # Generate unique ID
249
+ entity_id = self._generate_entity_id(entity_type, properties)
250
+
251
+ # Create Entity
252
+ entity = Entity(
253
+ id=entity_id,
254
+ entity_type=entity_type,
255
+ properties=properties,
256
+ )
257
+
258
+ # Store confidence in properties for later use
259
+ entity.properties["_extraction_confidence"] = confidence
260
+
261
+ entities.append(entity)
262
+
263
+ except json.JSONDecodeError as e:
264
+ # Log error but don't fail completely
265
+ # In production, you might want to retry or use fallback
266
+ print(f"Warning: Failed to parse LLM response as JSON: {e}")
267
+ print(f"Response was: {response_text[:200]}...")
268
+ return []
269
+
270
+ return entities
271
+
272
+ def _extract_json_from_text(self, text: str) -> str:
273
+ """
274
+ Extract JSON array from text (handles cases where LLM includes extra text)
275
+
276
+ Args:
277
+ text: Response text that may contain JSON
278
+
279
+ Returns:
280
+ Extracted JSON string
281
+ """
282
+ # Find JSON array boundaries
283
+ start = text.find("[")
284
+ end = text.rfind("]") + 1
285
+
286
+ if start != -1 and end > start:
287
+ return text[start:end]
288
+
289
+ # Try to find JSON object (single entity)
290
+ start = text.find("{")
291
+ end = text.rfind("}") + 1
292
+
293
+ if start != -1 and end > start:
294
+ return text[start:end]
295
+
296
+ # No JSON found, return original
297
+ return text
298
+
299
+ def _generate_entity_id(self, entity_type: str, properties: Dict[str, Any]) -> str:
300
+ """
301
+ Generate a unique ID for an entity
302
+
303
+ Uses entity type + key property (usually "name") to create deterministic ID,
304
+ with fallback to UUID for uniqueness.
305
+
306
+ Args:
307
+ entity_type: Entity type name
308
+ properties: Entity properties
309
+
310
+ Returns:
311
+ Unique entity ID string
312
+ """
313
+ # Try to use name for deterministic ID
314
+ name = properties.get("name") or properties.get("title") or properties.get("id")
315
+
316
+ if name:
317
+ # Create deterministic ID from type + name
318
+ # Normalize to lowercase and remove spaces
319
+ normalized = f"{entity_type}_{name}".lower().replace(" ", "_")
320
+ # Add short hash for uniqueness
321
+ import hashlib
322
+
323
+ hash_suffix = hashlib.md5(normalized.encode()).hexdigest()[:8]
324
+ return f"{normalized}_{hash_suffix}"
325
+ else:
326
+ # No name property, use UUID
327
+ return f"{entity_type.lower()}_{uuid.uuid4().hex[:12]}"
@@ -0,0 +1,349 @@
1
+ """
2
+ LLM-based Relation Extractor
3
+
4
+ Extracts relations between entities using Large Language Models.
5
+ """
6
+
7
+ import json
8
+ import uuid
9
+ from typing import List, Optional
10
+ from aiecs.application.knowledge_graph.extractors.base import RelationExtractor
11
+ from aiecs.domain.knowledge_graph.models.entity import Entity
12
+ from aiecs.domain.knowledge_graph.models.relation import Relation
13
+ from aiecs.domain.knowledge_graph.schema.graph_schema import GraphSchema
14
+ from aiecs.llm import get_llm_manager, AIProvider
15
+
16
+
17
+ class LLMRelationExtractor(RelationExtractor):
18
+ """
19
+ Extract relations between entities using LLMs
20
+
21
+ Given text and a list of entities, identifies relationships between them.
22
+ Uses LLMs to understand semantic relationships and extract structured relations.
23
+
24
+ Features:
25
+ - Schema-aware extraction (uses relation types from schema)
26
+ - Entity-aware (only extracts relations between known entities)
27
+ - Property extraction (relation properties/attributes)
28
+ - Confidence scoring
29
+ - Directional relation support
30
+
31
+ Example:
32
+ ```python
33
+ extractor = LLMRelationExtractor(schema=graph_schema)
34
+
35
+ alice = Entity(id="e1", type="Person", properties={"name": "Alice"})
36
+ tech_corp = Entity(id="e2", type="Company", properties={"name": "Tech Corp"})
37
+
38
+ relations = await extractor.extract_relations(
39
+ text="Alice works as a senior engineer at Tech Corp.",
40
+ entities=[alice, tech_corp]
41
+ )
42
+ # Returns: [
43
+ # Relation(
44
+ # source_id="e1",
45
+ # target_id="e2",
46
+ # relation_type="WORKS_FOR",
47
+ # properties={"title": "senior engineer"}
48
+ # )
49
+ # ]
50
+ ```
51
+ """
52
+
53
+ def __init__(
54
+ self,
55
+ schema: Optional[GraphSchema] = None,
56
+ provider: Optional[AIProvider] = None,
57
+ model: Optional[str] = None,
58
+ temperature: float = 0.1,
59
+ max_tokens: Optional[int] = 2000,
60
+ ):
61
+ """
62
+ Initialize LLM relation extractor
63
+
64
+ Args:
65
+ schema: Optional GraphSchema to guide extraction
66
+ provider: LLM provider (default: Vertex AI)
67
+ model: Specific model to use
68
+ temperature: LLM temperature (low for deterministic extraction)
69
+ max_tokens: Maximum tokens in response
70
+ """
71
+ self.schema = schema
72
+ self.provider = provider
73
+ self.model = model
74
+ self.temperature = temperature
75
+ self.max_tokens = max_tokens
76
+ self._llm_manager = None # Lazy-loaded in async methods
77
+
78
+ async def extract_relations(
79
+ self,
80
+ text: str,
81
+ entities: List[Entity],
82
+ relation_types: Optional[List[str]] = None,
83
+ **kwargs,
84
+ ) -> List[Relation]:
85
+ """
86
+ Extract relations from text given known entities
87
+
88
+ Args:
89
+ text: Input text containing entities
90
+ entities: List of entities already extracted
91
+ relation_types: Optional filter for specific relation types
92
+ **kwargs: Additional parameters
93
+
94
+ Returns:
95
+ List of extracted Relation objects
96
+
97
+ Raises:
98
+ ValueError: If text or entities are empty
99
+ RuntimeError: If LLM extraction fails
100
+ """
101
+ if not text or not text.strip():
102
+ raise ValueError("Input text cannot be empty")
103
+
104
+ if not entities or len(entities) < 2:
105
+ # Need at least 2 entities to have a relation
106
+ return []
107
+
108
+ # Lazy-load LLM manager
109
+ if self._llm_manager is None:
110
+ self._llm_manager = await get_llm_manager()
111
+
112
+ # Build extraction prompt
113
+ prompt = self._build_extraction_prompt(text, entities, relation_types)
114
+
115
+ # Call LLM
116
+ try:
117
+ response = await self._llm_manager.generate_text(
118
+ messages=prompt,
119
+ provider=self.provider,
120
+ model=self.model,
121
+ temperature=self.temperature,
122
+ max_tokens=self.max_tokens,
123
+ )
124
+
125
+ # Parse LLM response to Relation objects
126
+ relations = self._parse_llm_response(response.content, entities)
127
+
128
+ return relations
129
+
130
+ except Exception as e:
131
+ raise RuntimeError(f"LLM relation extraction failed: {str(e)}") from e
132
+
133
+ def _build_extraction_prompt(
134
+ self,
135
+ text: str,
136
+ entities: List[Entity],
137
+ relation_types: Optional[List[str]] = None,
138
+ ) -> str:
139
+ """
140
+ Build prompt for LLM relation extraction
141
+
142
+ The prompt includes:
143
+ 1. Task description
144
+ 2. List of entities to consider
145
+ 3. Relation types to extract (from schema)
146
+ 4. Output format specification
147
+ 5. The text to analyze
148
+
149
+ Args:
150
+ text: Input text
151
+ entities: List of known entities
152
+ relation_types: Optional filter for relation types
153
+
154
+ Returns:
155
+ Formatted prompt string
156
+ """
157
+ # Build entity reference list
158
+ entity_list = []
159
+ entity_index = {}
160
+ for idx, entity in enumerate(entities):
161
+ entity_name = self._get_entity_name(entity)
162
+ entity_list.append(f" [{idx}] {entity.entity_type}: {entity_name} (ID: {entity.id})")
163
+ entity_index[entity.id] = idx
164
+
165
+ entities_section = "\n".join(entity_list)
166
+
167
+ # Build relation type descriptions
168
+ types_to_extract = []
169
+ if self.schema:
170
+ available_types = self.schema.get_relation_type_names()
171
+ if relation_types:
172
+ types_to_extract = [t for t in relation_types if t in available_types]
173
+ else:
174
+ types_to_extract = available_types
175
+ elif relation_types:
176
+ types_to_extract = relation_types
177
+ else:
178
+ # No schema, use common relation types
179
+ types_to_extract = [
180
+ "WORKS_FOR",
181
+ "LOCATED_IN",
182
+ "PART_OF",
183
+ "KNOWS",
184
+ "OWNS",
185
+ "MANAGES",
186
+ "PRODUCES",
187
+ "RELATED_TO",
188
+ ]
189
+
190
+ # Build relation type descriptions
191
+ relation_descriptions = []
192
+ for rel_type in types_to_extract:
193
+ if self.schema and self.schema.has_relation_type(rel_type):
194
+ schema_rel = self.schema.get_relation_type(rel_type)
195
+ desc = schema_rel.description or f"'{rel_type}' relation"
196
+ relation_descriptions.append(f"- {rel_type}: {desc}")
197
+ else:
198
+ relation_descriptions.append(f"- {rel_type}: Extract this type of relationship")
199
+
200
+ relations_section = "\n".join(relation_descriptions)
201
+
202
+ # Build prompt
203
+ prompt = f"""You are an expert at extracting relationships between entities from text.
204
+
205
+ Given the following entities:
206
+ {entities_section}
207
+
208
+ Extract all relationships between these entities from the text.
209
+
210
+ Allowed relation types:
211
+ {relations_section}
212
+
213
+ For each relation, provide:
214
+ 1. source_id: ID of the source entity (from list above)
215
+ 2. target_id: ID of the target entity (from list above)
216
+ 3. relation_type: Type of relation (one of the allowed types)
217
+ 4. properties: Optional dictionary of relation properties (e.g., since="2020", role="engineer")
218
+ 5. confidence: Your confidence in this extraction (0.0 to 1.0)
219
+
220
+ Return ONLY a valid JSON array with this structure:
221
+ [
222
+ {{
223
+ "source_id": "entity_id_here",
224
+ "target_id": "entity_id_here",
225
+ "relation_type": "RELATION_TYPE",
226
+ "properties": {{"property1": "value1"}},
227
+ "confidence": 0.95
228
+ }}
229
+ ]
230
+
231
+ Important:
232
+ - Only extract relations that are explicitly stated or strongly implied in the text
233
+ - Use the exact entity IDs from the list above
234
+ - Relations should be directional (source -> target matters)
235
+ - If unsure about a property, omit it
236
+ - Return empty array [] if no relations found
237
+
238
+ Text to analyze:
239
+ \"\"\"{text}\"\"\"
240
+
241
+ JSON output:"""
242
+
243
+ return prompt
244
+
245
+ def _parse_llm_response(self, response_text: str, entities: List[Entity]) -> List[Relation]:
246
+ """
247
+ Parse LLM response to Relation objects
248
+
249
+ Expected JSON format:
250
+ [
251
+ {
252
+ "source_id": "e1",
253
+ "target_id": "e2",
254
+ "relation_type": "WORKS_FOR",
255
+ "properties": {"title": "engineer"},
256
+ "confidence": 0.95
257
+ }
258
+ ]
259
+
260
+ Args:
261
+ response_text: LLM response string
262
+ entities: List of entities for validation
263
+
264
+ Returns:
265
+ List of Relation objects
266
+ """
267
+ relations = []
268
+ entity_ids = {e.id for e in entities}
269
+
270
+ try:
271
+ # Extract JSON from response
272
+ json_str = self._extract_json_from_text(response_text)
273
+
274
+ # Parse JSON
275
+ extracted_data = json.loads(json_str)
276
+
277
+ if not isinstance(extracted_data, list):
278
+ extracted_data = [extracted_data]
279
+
280
+ # Convert to Relation objects
281
+ for item in extracted_data:
282
+ source_id = item.get("source_id")
283
+ target_id = item.get("target_id")
284
+ relation_type = item.get("relation_type")
285
+ properties = item.get("properties", {})
286
+ confidence = item.get("confidence", 0.5)
287
+
288
+ # Validate required fields
289
+ if not source_id or not target_id:
290
+ continue
291
+ if not relation_type: # relation_type is required and cannot be None
292
+ continue
293
+ if source_id not in entity_ids or target_id not in entity_ids:
294
+ # LLM hallucinated entity IDs
295
+ continue
296
+ if source_id == target_id:
297
+ # Self-loop, skip
298
+ continue
299
+
300
+ # Generate unique ID
301
+ relation_id = str(uuid.uuid4())
302
+
303
+ # Create Relation
304
+ relation = Relation(
305
+ id=relation_id,
306
+ relation_type=relation_type,
307
+ source_id=source_id,
308
+ target_id=target_id,
309
+ properties=properties,
310
+ )
311
+
312
+ # Store confidence
313
+ relation.properties["_extraction_confidence"] = confidence
314
+
315
+ relations.append(relation)
316
+
317
+ except json.JSONDecodeError as e:
318
+ print(f"Warning: Failed to parse LLM response as JSON: {e}")
319
+ print(f"Response was: {response_text[:200]}...")
320
+ return []
321
+
322
+ return relations
323
+
324
+ def _extract_json_from_text(self, text: str) -> str:
325
+ """Extract JSON array from text"""
326
+ # Find JSON array boundaries
327
+ start = text.find("[")
328
+ end = text.rfind("]") + 1
329
+
330
+ if start != -1 and end > start:
331
+ return text[start:end]
332
+
333
+ # Try single object
334
+ start = text.find("{")
335
+ end = text.rfind("}") + 1
336
+
337
+ if start != -1 and end > start:
338
+ return text[start:end]
339
+
340
+ return text
341
+
342
+ def _get_entity_name(self, entity: Entity) -> str:
343
+ """Extract entity name from properties"""
344
+ return (
345
+ entity.properties.get("name")
346
+ or entity.properties.get("title")
347
+ or entity.properties.get("text")
348
+ or f"{entity.entity_type}_{entity.id[:8]}"
349
+ )