aiecs 1.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aiecs/__init__.py +72 -0
- aiecs/__main__.py +41 -0
- aiecs/aiecs_client.py +469 -0
- aiecs/application/__init__.py +10 -0
- aiecs/application/executors/__init__.py +10 -0
- aiecs/application/executors/operation_executor.py +363 -0
- aiecs/application/knowledge_graph/__init__.py +7 -0
- aiecs/application/knowledge_graph/builder/__init__.py +37 -0
- aiecs/application/knowledge_graph/builder/document_builder.py +375 -0
- aiecs/application/knowledge_graph/builder/graph_builder.py +356 -0
- aiecs/application/knowledge_graph/builder/schema_mapping.py +531 -0
- aiecs/application/knowledge_graph/builder/structured_pipeline.py +443 -0
- aiecs/application/knowledge_graph/builder/text_chunker.py +319 -0
- aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
- aiecs/application/knowledge_graph/extractors/base.py +100 -0
- aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +327 -0
- aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +349 -0
- aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +244 -0
- aiecs/application/knowledge_graph/fusion/__init__.py +23 -0
- aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +387 -0
- aiecs/application/knowledge_graph/fusion/entity_linker.py +343 -0
- aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +580 -0
- aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +189 -0
- aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
- aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +344 -0
- aiecs/application/knowledge_graph/pattern_matching/query_executor.py +378 -0
- aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
- aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +199 -0
- aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
- aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
- aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +347 -0
- aiecs/application/knowledge_graph/reasoning/inference_engine.py +504 -0
- aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +167 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +630 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +654 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +477 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +390 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +217 -0
- aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +169 -0
- aiecs/application/knowledge_graph/reasoning/query_planner.py +872 -0
- aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +554 -0
- aiecs/application/knowledge_graph/retrieval/__init__.py +19 -0
- aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +596 -0
- aiecs/application/knowledge_graph/search/__init__.py +59 -0
- aiecs/application/knowledge_graph/search/hybrid_search.py +423 -0
- aiecs/application/knowledge_graph/search/reranker.py +295 -0
- aiecs/application/knowledge_graph/search/reranker_strategies.py +553 -0
- aiecs/application/knowledge_graph/search/text_similarity.py +398 -0
- aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
- aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +329 -0
- aiecs/application/knowledge_graph/traversal/path_scorer.py +269 -0
- aiecs/application/knowledge_graph/validators/__init__.py +13 -0
- aiecs/application/knowledge_graph/validators/relation_validator.py +189 -0
- aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
- aiecs/application/knowledge_graph/visualization/graph_visualizer.py +321 -0
- aiecs/common/__init__.py +9 -0
- aiecs/common/knowledge_graph/__init__.py +17 -0
- aiecs/common/knowledge_graph/runnable.py +484 -0
- aiecs/config/__init__.py +16 -0
- aiecs/config/config.py +498 -0
- aiecs/config/graph_config.py +137 -0
- aiecs/config/registry.py +23 -0
- aiecs/core/__init__.py +46 -0
- aiecs/core/interface/__init__.py +34 -0
- aiecs/core/interface/execution_interface.py +152 -0
- aiecs/core/interface/storage_interface.py +171 -0
- aiecs/domain/__init__.py +289 -0
- aiecs/domain/agent/__init__.py +189 -0
- aiecs/domain/agent/base_agent.py +697 -0
- aiecs/domain/agent/exceptions.py +103 -0
- aiecs/domain/agent/graph_aware_mixin.py +559 -0
- aiecs/domain/agent/hybrid_agent.py +490 -0
- aiecs/domain/agent/integration/__init__.py +26 -0
- aiecs/domain/agent/integration/context_compressor.py +222 -0
- aiecs/domain/agent/integration/context_engine_adapter.py +252 -0
- aiecs/domain/agent/integration/retry_policy.py +219 -0
- aiecs/domain/agent/integration/role_config.py +213 -0
- aiecs/domain/agent/knowledge_aware_agent.py +646 -0
- aiecs/domain/agent/lifecycle.py +296 -0
- aiecs/domain/agent/llm_agent.py +300 -0
- aiecs/domain/agent/memory/__init__.py +12 -0
- aiecs/domain/agent/memory/conversation.py +197 -0
- aiecs/domain/agent/migration/__init__.py +14 -0
- aiecs/domain/agent/migration/conversion.py +160 -0
- aiecs/domain/agent/migration/legacy_wrapper.py +90 -0
- aiecs/domain/agent/models.py +317 -0
- aiecs/domain/agent/observability.py +407 -0
- aiecs/domain/agent/persistence.py +289 -0
- aiecs/domain/agent/prompts/__init__.py +29 -0
- aiecs/domain/agent/prompts/builder.py +161 -0
- aiecs/domain/agent/prompts/formatters.py +189 -0
- aiecs/domain/agent/prompts/template.py +255 -0
- aiecs/domain/agent/registry.py +260 -0
- aiecs/domain/agent/tool_agent.py +257 -0
- aiecs/domain/agent/tools/__init__.py +12 -0
- aiecs/domain/agent/tools/schema_generator.py +221 -0
- aiecs/domain/community/__init__.py +155 -0
- aiecs/domain/community/agent_adapter.py +477 -0
- aiecs/domain/community/analytics.py +481 -0
- aiecs/domain/community/collaborative_workflow.py +642 -0
- aiecs/domain/community/communication_hub.py +645 -0
- aiecs/domain/community/community_builder.py +320 -0
- aiecs/domain/community/community_integration.py +800 -0
- aiecs/domain/community/community_manager.py +813 -0
- aiecs/domain/community/decision_engine.py +879 -0
- aiecs/domain/community/exceptions.py +225 -0
- aiecs/domain/community/models/__init__.py +33 -0
- aiecs/domain/community/models/community_models.py +268 -0
- aiecs/domain/community/resource_manager.py +457 -0
- aiecs/domain/community/shared_context_manager.py +603 -0
- aiecs/domain/context/__init__.py +58 -0
- aiecs/domain/context/context_engine.py +989 -0
- aiecs/domain/context/conversation_models.py +354 -0
- aiecs/domain/context/graph_memory.py +467 -0
- aiecs/domain/execution/__init__.py +12 -0
- aiecs/domain/execution/model.py +57 -0
- aiecs/domain/knowledge_graph/__init__.py +19 -0
- aiecs/domain/knowledge_graph/models/__init__.py +52 -0
- aiecs/domain/knowledge_graph/models/entity.py +130 -0
- aiecs/domain/knowledge_graph/models/evidence.py +194 -0
- aiecs/domain/knowledge_graph/models/inference_rule.py +186 -0
- aiecs/domain/knowledge_graph/models/path.py +179 -0
- aiecs/domain/knowledge_graph/models/path_pattern.py +173 -0
- aiecs/domain/knowledge_graph/models/query.py +272 -0
- aiecs/domain/knowledge_graph/models/query_plan.py +187 -0
- aiecs/domain/knowledge_graph/models/relation.py +136 -0
- aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
- aiecs/domain/knowledge_graph/schema/entity_type.py +135 -0
- aiecs/domain/knowledge_graph/schema/graph_schema.py +271 -0
- aiecs/domain/knowledge_graph/schema/property_schema.py +155 -0
- aiecs/domain/knowledge_graph/schema/relation_type.py +171 -0
- aiecs/domain/knowledge_graph/schema/schema_manager.py +496 -0
- aiecs/domain/knowledge_graph/schema/type_enums.py +205 -0
- aiecs/domain/task/__init__.py +13 -0
- aiecs/domain/task/dsl_processor.py +613 -0
- aiecs/domain/task/model.py +62 -0
- aiecs/domain/task/task_context.py +268 -0
- aiecs/infrastructure/__init__.py +24 -0
- aiecs/infrastructure/graph_storage/__init__.py +11 -0
- aiecs/infrastructure/graph_storage/base.py +601 -0
- aiecs/infrastructure/graph_storage/batch_operations.py +449 -0
- aiecs/infrastructure/graph_storage/cache.py +429 -0
- aiecs/infrastructure/graph_storage/distributed.py +226 -0
- aiecs/infrastructure/graph_storage/error_handling.py +390 -0
- aiecs/infrastructure/graph_storage/graceful_degradation.py +306 -0
- aiecs/infrastructure/graph_storage/health_checks.py +378 -0
- aiecs/infrastructure/graph_storage/in_memory.py +514 -0
- aiecs/infrastructure/graph_storage/index_optimization.py +483 -0
- aiecs/infrastructure/graph_storage/lazy_loading.py +410 -0
- aiecs/infrastructure/graph_storage/metrics.py +357 -0
- aiecs/infrastructure/graph_storage/migration.py +413 -0
- aiecs/infrastructure/graph_storage/pagination.py +471 -0
- aiecs/infrastructure/graph_storage/performance_monitoring.py +466 -0
- aiecs/infrastructure/graph_storage/postgres.py +871 -0
- aiecs/infrastructure/graph_storage/query_optimizer.py +635 -0
- aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
- aiecs/infrastructure/graph_storage/sqlite.py +623 -0
- aiecs/infrastructure/graph_storage/streaming.py +495 -0
- aiecs/infrastructure/messaging/__init__.py +13 -0
- aiecs/infrastructure/messaging/celery_task_manager.py +383 -0
- aiecs/infrastructure/messaging/websocket_manager.py +298 -0
- aiecs/infrastructure/monitoring/__init__.py +34 -0
- aiecs/infrastructure/monitoring/executor_metrics.py +174 -0
- aiecs/infrastructure/monitoring/global_metrics_manager.py +213 -0
- aiecs/infrastructure/monitoring/structured_logger.py +48 -0
- aiecs/infrastructure/monitoring/tracing_manager.py +410 -0
- aiecs/infrastructure/persistence/__init__.py +24 -0
- aiecs/infrastructure/persistence/context_engine_client.py +187 -0
- aiecs/infrastructure/persistence/database_manager.py +333 -0
- aiecs/infrastructure/persistence/file_storage.py +754 -0
- aiecs/infrastructure/persistence/redis_client.py +220 -0
- aiecs/llm/__init__.py +86 -0
- aiecs/llm/callbacks/__init__.py +11 -0
- aiecs/llm/callbacks/custom_callbacks.py +264 -0
- aiecs/llm/client_factory.py +420 -0
- aiecs/llm/clients/__init__.py +33 -0
- aiecs/llm/clients/base_client.py +193 -0
- aiecs/llm/clients/googleai_client.py +181 -0
- aiecs/llm/clients/openai_client.py +131 -0
- aiecs/llm/clients/vertex_client.py +437 -0
- aiecs/llm/clients/xai_client.py +184 -0
- aiecs/llm/config/__init__.py +51 -0
- aiecs/llm/config/config_loader.py +275 -0
- aiecs/llm/config/config_validator.py +236 -0
- aiecs/llm/config/model_config.py +151 -0
- aiecs/llm/utils/__init__.py +10 -0
- aiecs/llm/utils/validate_config.py +91 -0
- aiecs/main.py +363 -0
- aiecs/scripts/__init__.py +3 -0
- aiecs/scripts/aid/VERSION_MANAGEMENT.md +97 -0
- aiecs/scripts/aid/__init__.py +19 -0
- aiecs/scripts/aid/version_manager.py +215 -0
- aiecs/scripts/dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md +242 -0
- aiecs/scripts/dependance_check/README_DEPENDENCY_CHECKER.md +310 -0
- aiecs/scripts/dependance_check/__init__.py +17 -0
- aiecs/scripts/dependance_check/dependency_checker.py +938 -0
- aiecs/scripts/dependance_check/dependency_fixer.py +391 -0
- aiecs/scripts/dependance_check/download_nlp_data.py +396 -0
- aiecs/scripts/dependance_check/quick_dependency_check.py +270 -0
- aiecs/scripts/dependance_check/setup_nlp_data.sh +217 -0
- aiecs/scripts/dependance_patch/__init__.py +7 -0
- aiecs/scripts/dependance_patch/fix_weasel/README_WEASEL_PATCH.md +126 -0
- aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
- aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.py +128 -0
- aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.sh +82 -0
- aiecs/scripts/dependance_patch/fix_weasel/patch_weasel_library.sh +188 -0
- aiecs/scripts/dependance_patch/fix_weasel/run_weasel_patch.sh +41 -0
- aiecs/scripts/tools_develop/README.md +449 -0
- aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
- aiecs/scripts/tools_develop/__init__.py +21 -0
- aiecs/scripts/tools_develop/check_type_annotations.py +259 -0
- aiecs/scripts/tools_develop/validate_tool_schemas.py +422 -0
- aiecs/scripts/tools_develop/verify_tools.py +356 -0
- aiecs/tasks/__init__.py +1 -0
- aiecs/tasks/worker.py +172 -0
- aiecs/tools/__init__.py +299 -0
- aiecs/tools/apisource/__init__.py +99 -0
- aiecs/tools/apisource/intelligence/__init__.py +19 -0
- aiecs/tools/apisource/intelligence/data_fusion.py +381 -0
- aiecs/tools/apisource/intelligence/query_analyzer.py +413 -0
- aiecs/tools/apisource/intelligence/search_enhancer.py +388 -0
- aiecs/tools/apisource/monitoring/__init__.py +9 -0
- aiecs/tools/apisource/monitoring/metrics.py +303 -0
- aiecs/tools/apisource/providers/__init__.py +115 -0
- aiecs/tools/apisource/providers/base.py +664 -0
- aiecs/tools/apisource/providers/census.py +401 -0
- aiecs/tools/apisource/providers/fred.py +564 -0
- aiecs/tools/apisource/providers/newsapi.py +412 -0
- aiecs/tools/apisource/providers/worldbank.py +357 -0
- aiecs/tools/apisource/reliability/__init__.py +12 -0
- aiecs/tools/apisource/reliability/error_handler.py +375 -0
- aiecs/tools/apisource/reliability/fallback_strategy.py +391 -0
- aiecs/tools/apisource/tool.py +850 -0
- aiecs/tools/apisource/utils/__init__.py +9 -0
- aiecs/tools/apisource/utils/validators.py +338 -0
- aiecs/tools/base_tool.py +201 -0
- aiecs/tools/docs/__init__.py +121 -0
- aiecs/tools/docs/ai_document_orchestrator.py +599 -0
- aiecs/tools/docs/ai_document_writer_orchestrator.py +2403 -0
- aiecs/tools/docs/content_insertion_tool.py +1333 -0
- aiecs/tools/docs/document_creator_tool.py +1317 -0
- aiecs/tools/docs/document_layout_tool.py +1166 -0
- aiecs/tools/docs/document_parser_tool.py +994 -0
- aiecs/tools/docs/document_writer_tool.py +1818 -0
- aiecs/tools/knowledge_graph/__init__.py +17 -0
- aiecs/tools/knowledge_graph/graph_reasoning_tool.py +734 -0
- aiecs/tools/knowledge_graph/graph_search_tool.py +923 -0
- aiecs/tools/knowledge_graph/kg_builder_tool.py +476 -0
- aiecs/tools/langchain_adapter.py +542 -0
- aiecs/tools/schema_generator.py +275 -0
- aiecs/tools/search_tool/__init__.py +100 -0
- aiecs/tools/search_tool/analyzers.py +589 -0
- aiecs/tools/search_tool/cache.py +260 -0
- aiecs/tools/search_tool/constants.py +128 -0
- aiecs/tools/search_tool/context.py +216 -0
- aiecs/tools/search_tool/core.py +749 -0
- aiecs/tools/search_tool/deduplicator.py +123 -0
- aiecs/tools/search_tool/error_handler.py +271 -0
- aiecs/tools/search_tool/metrics.py +371 -0
- aiecs/tools/search_tool/rate_limiter.py +178 -0
- aiecs/tools/search_tool/schemas.py +277 -0
- aiecs/tools/statistics/__init__.py +80 -0
- aiecs/tools/statistics/ai_data_analysis_orchestrator.py +643 -0
- aiecs/tools/statistics/ai_insight_generator_tool.py +505 -0
- aiecs/tools/statistics/ai_report_orchestrator_tool.py +694 -0
- aiecs/tools/statistics/data_loader_tool.py +564 -0
- aiecs/tools/statistics/data_profiler_tool.py +658 -0
- aiecs/tools/statistics/data_transformer_tool.py +573 -0
- aiecs/tools/statistics/data_visualizer_tool.py +495 -0
- aiecs/tools/statistics/model_trainer_tool.py +487 -0
- aiecs/tools/statistics/statistical_analyzer_tool.py +459 -0
- aiecs/tools/task_tools/__init__.py +86 -0
- aiecs/tools/task_tools/chart_tool.py +732 -0
- aiecs/tools/task_tools/classfire_tool.py +922 -0
- aiecs/tools/task_tools/image_tool.py +447 -0
- aiecs/tools/task_tools/office_tool.py +684 -0
- aiecs/tools/task_tools/pandas_tool.py +635 -0
- aiecs/tools/task_tools/report_tool.py +635 -0
- aiecs/tools/task_tools/research_tool.py +392 -0
- aiecs/tools/task_tools/scraper_tool.py +715 -0
- aiecs/tools/task_tools/stats_tool.py +688 -0
- aiecs/tools/temp_file_manager.py +130 -0
- aiecs/tools/tool_executor/__init__.py +37 -0
- aiecs/tools/tool_executor/tool_executor.py +881 -0
- aiecs/utils/LLM_output_structor.py +445 -0
- aiecs/utils/__init__.py +34 -0
- aiecs/utils/base_callback.py +47 -0
- aiecs/utils/cache_provider.py +695 -0
- aiecs/utils/execution_utils.py +184 -0
- aiecs/utils/logging.py +1 -0
- aiecs/utils/prompt_loader.py +14 -0
- aiecs/utils/token_usage_repository.py +323 -0
- aiecs/ws/__init__.py +0 -0
- aiecs/ws/socket_server.py +52 -0
- aiecs-1.5.1.dist-info/METADATA +608 -0
- aiecs-1.5.1.dist-info/RECORD +302 -0
- aiecs-1.5.1.dist-info/WHEEL +5 -0
- aiecs-1.5.1.dist-info/entry_points.txt +10 -0
- aiecs-1.5.1.dist-info/licenses/LICENSE +225 -0
- aiecs-1.5.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,327 @@
|
|
|
1
|
+
"""
|
|
2
|
+
LLM-based Entity Extractor
|
|
3
|
+
|
|
4
|
+
Extracts entities from text using Large Language Models (GPT-4, Gemini, etc.).
|
|
5
|
+
Uses AIECS's LLM client infrastructure for provider-agnostic extraction.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import json
|
|
9
|
+
import uuid
|
|
10
|
+
from typing import List, Optional, Dict, Any
|
|
11
|
+
from aiecs.application.knowledge_graph.extractors.base import EntityExtractor
|
|
12
|
+
from aiecs.domain.knowledge_graph.models.entity import Entity
|
|
13
|
+
from aiecs.domain.knowledge_graph.schema.graph_schema import GraphSchema
|
|
14
|
+
from aiecs.llm import get_llm_manager, AIProvider
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class LLMEntityExtractor(EntityExtractor):
|
|
18
|
+
"""
|
|
19
|
+
Extract entities using Large Language Models
|
|
20
|
+
|
|
21
|
+
This extractor uses LLMs (like GPT-4, Gemini) to identify and extract entities
|
|
22
|
+
from text. It's schema-aware and can extract custom entity types with properties.
|
|
23
|
+
|
|
24
|
+
Features:
|
|
25
|
+
- Schema-guided extraction (tells LLM what entity types to look for)
|
|
26
|
+
- Property extraction (not just entity names, but also attributes)
|
|
27
|
+
- Confidence scoring (LLM provides confidence for each entity)
|
|
28
|
+
- Configurable LLM provider (Vertex AI default, configurable)
|
|
29
|
+
|
|
30
|
+
Example:
|
|
31
|
+
```python
|
|
32
|
+
from aiecs.llm import AIProvider
|
|
33
|
+
|
|
34
|
+
extractor = LLMEntityExtractor(
|
|
35
|
+
schema=graph_schema,
|
|
36
|
+
provider=AIProvider.VERTEX,
|
|
37
|
+
model="gemini-pro"
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
entities = await extractor.extract_entities(
|
|
41
|
+
"Alice, a 30-year-old data scientist, works at Tech Corp."
|
|
42
|
+
)
|
|
43
|
+
# Returns: [
|
|
44
|
+
# Entity(type="Person", properties={"name": "Alice", "age": 30, "occupation": "data scientist"}),
|
|
45
|
+
# Entity(type="Company", properties={"name": "Tech Corp"})
|
|
46
|
+
# ]
|
|
47
|
+
```
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
def __init__(
|
|
51
|
+
self,
|
|
52
|
+
schema: Optional[GraphSchema] = None,
|
|
53
|
+
provider: Optional[AIProvider] = None,
|
|
54
|
+
model: Optional[str] = None,
|
|
55
|
+
temperature: float = 0.1, # Low temperature for more deterministic extraction
|
|
56
|
+
max_tokens: Optional[int] = 2000,
|
|
57
|
+
):
|
|
58
|
+
"""
|
|
59
|
+
Initialize LLM entity extractor
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
schema: Optional GraphSchema to guide extraction (provides entity types and properties)
|
|
63
|
+
provider: LLM provider to use (default: Vertex AI via AIECS configuration)
|
|
64
|
+
model: Specific model to use (default: from AIECS configuration)
|
|
65
|
+
temperature: LLM temperature (0.1 = more deterministic, good for extraction)
|
|
66
|
+
max_tokens: Maximum tokens in response
|
|
67
|
+
"""
|
|
68
|
+
self.schema = schema
|
|
69
|
+
self.provider = provider
|
|
70
|
+
self.model = model
|
|
71
|
+
self.temperature = temperature
|
|
72
|
+
self.max_tokens = max_tokens
|
|
73
|
+
self._llm_manager = None # Lazy-loaded in async methods
|
|
74
|
+
|
|
75
|
+
async def extract_entities(
|
|
76
|
+
self, text: str, entity_types: Optional[List[str]] = None, **kwargs
|
|
77
|
+
) -> List[Entity]:
|
|
78
|
+
"""
|
|
79
|
+
Extract entities from text using LLM
|
|
80
|
+
|
|
81
|
+
Args:
|
|
82
|
+
text: Input text to extract entities from
|
|
83
|
+
entity_types: Optional filter for specific entity types
|
|
84
|
+
**kwargs: Additional parameters (e.g., custom prompt, examples)
|
|
85
|
+
|
|
86
|
+
Returns:
|
|
87
|
+
List of extracted Entity objects
|
|
88
|
+
|
|
89
|
+
Raises:
|
|
90
|
+
ValueError: If text is empty
|
|
91
|
+
RuntimeError: If LLM extraction fails
|
|
92
|
+
"""
|
|
93
|
+
if not text or not text.strip():
|
|
94
|
+
raise ValueError("Input text cannot be empty")
|
|
95
|
+
|
|
96
|
+
# Lazy-load LLM manager
|
|
97
|
+
if self._llm_manager is None:
|
|
98
|
+
self._llm_manager = await get_llm_manager()
|
|
99
|
+
|
|
100
|
+
# Build extraction prompt
|
|
101
|
+
prompt = self._build_extraction_prompt(text, entity_types)
|
|
102
|
+
|
|
103
|
+
# Call LLM
|
|
104
|
+
try:
|
|
105
|
+
response = await self._llm_manager.generate_text(
|
|
106
|
+
messages=prompt,
|
|
107
|
+
provider=self.provider,
|
|
108
|
+
model=self.model,
|
|
109
|
+
temperature=self.temperature,
|
|
110
|
+
max_tokens=self.max_tokens,
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
# Parse LLM response to Entity objects
|
|
114
|
+
entities = self._parse_llm_response(response.content)
|
|
115
|
+
|
|
116
|
+
return entities
|
|
117
|
+
|
|
118
|
+
except Exception as e:
|
|
119
|
+
raise RuntimeError(f"LLM entity extraction failed: {str(e)}") from e
|
|
120
|
+
|
|
121
|
+
def _build_extraction_prompt(self, text: str, entity_types: Optional[List[str]] = None) -> str:
|
|
122
|
+
"""
|
|
123
|
+
Build prompt for LLM entity extraction
|
|
124
|
+
|
|
125
|
+
The prompt is structured to:
|
|
126
|
+
1. Explain the task (entity extraction)
|
|
127
|
+
2. Provide entity type schemas (if available)
|
|
128
|
+
3. Request JSON output format
|
|
129
|
+
4. Include the text to extract from
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
text: Input text
|
|
133
|
+
entity_types: Optional filter for entity types
|
|
134
|
+
|
|
135
|
+
Returns:
|
|
136
|
+
Formatted prompt string
|
|
137
|
+
"""
|
|
138
|
+
# Determine which entity types to extract
|
|
139
|
+
types_to_extract = []
|
|
140
|
+
if self.schema:
|
|
141
|
+
available_types = self.schema.get_entity_type_names()
|
|
142
|
+
if entity_types:
|
|
143
|
+
# Filter to requested types that exist in schema
|
|
144
|
+
types_to_extract = [t for t in entity_types if t in available_types]
|
|
145
|
+
else:
|
|
146
|
+
# Use all types from schema
|
|
147
|
+
types_to_extract = available_types
|
|
148
|
+
elif entity_types:
|
|
149
|
+
# No schema, but user specified types
|
|
150
|
+
types_to_extract = entity_types
|
|
151
|
+
else:
|
|
152
|
+
# No schema and no filter - use common types
|
|
153
|
+
types_to_extract = [
|
|
154
|
+
"Person",
|
|
155
|
+
"Organization",
|
|
156
|
+
"Location",
|
|
157
|
+
"Event",
|
|
158
|
+
"Product",
|
|
159
|
+
]
|
|
160
|
+
|
|
161
|
+
# Build entity type descriptions
|
|
162
|
+
type_descriptions = []
|
|
163
|
+
for entity_type in types_to_extract:
|
|
164
|
+
if self.schema and self.schema.has_entity_type(entity_type):
|
|
165
|
+
# Use schema definition
|
|
166
|
+
schema_type = self.schema.get_entity_type(entity_type)
|
|
167
|
+
properties = list(schema_type.properties.keys()) if schema_type.properties else []
|
|
168
|
+
prop_str = ", ".join(properties) if properties else "any relevant properties"
|
|
169
|
+
desc = f"- {entity_type}: {schema_type.description or 'Extract properties: ' + prop_str}"
|
|
170
|
+
type_descriptions.append(desc)
|
|
171
|
+
else:
|
|
172
|
+
# Generic description
|
|
173
|
+
type_descriptions.append(
|
|
174
|
+
f"- {entity_type}: Extract name and any relevant properties"
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
types_description = "\n".join(type_descriptions)
|
|
178
|
+
|
|
179
|
+
# Build prompt
|
|
180
|
+
prompt = f"""You are an expert at extracting structured entities from text.
|
|
181
|
+
|
|
182
|
+
Extract entities of the following types from the text:
|
|
183
|
+
{types_description}
|
|
184
|
+
|
|
185
|
+
For each entity, provide:
|
|
186
|
+
1. type: The entity type (one of the types listed above)
|
|
187
|
+
2. properties: A dictionary of properties (e.g., name, age, location, etc.)
|
|
188
|
+
3. confidence: Your confidence in this extraction (0.0 to 1.0)
|
|
189
|
+
|
|
190
|
+
Return ONLY a valid JSON array with this structure:
|
|
191
|
+
[
|
|
192
|
+
{{
|
|
193
|
+
"type": "EntityType",
|
|
194
|
+
"properties": {{"property1": "value1", "property2": "value2"}},
|
|
195
|
+
"confidence": 0.95
|
|
196
|
+
}}
|
|
197
|
+
]
|
|
198
|
+
|
|
199
|
+
Important:
|
|
200
|
+
- Extract ALL entities you find of the specified types
|
|
201
|
+
- Include as many relevant properties as you can find
|
|
202
|
+
- Use consistent property names (e.g., "name" not "title" or "full_name")
|
|
203
|
+
- If unsure about a property, omit it rather than guessing
|
|
204
|
+
- Confidence should reflect how certain you are about the entity and its properties
|
|
205
|
+
|
|
206
|
+
Text to extract from:
|
|
207
|
+
\"\"\"{text}\"\"\"
|
|
208
|
+
|
|
209
|
+
JSON output:"""
|
|
210
|
+
|
|
211
|
+
return prompt
|
|
212
|
+
|
|
213
|
+
def _parse_llm_response(self, response_text: str) -> List[Entity]:
|
|
214
|
+
"""
|
|
215
|
+
Parse LLM response to Entity objects
|
|
216
|
+
|
|
217
|
+
Expected JSON format:
|
|
218
|
+
[
|
|
219
|
+
{"type": "Person", "properties": {"name": "Alice", "age": 30}, "confidence": 0.95},
|
|
220
|
+
{"type": "Company", "properties": {"name": "Tech Corp"}, "confidence": 0.90}
|
|
221
|
+
]
|
|
222
|
+
|
|
223
|
+
Args:
|
|
224
|
+
response_text: LLM response string (should be JSON)
|
|
225
|
+
|
|
226
|
+
Returns:
|
|
227
|
+
List of Entity objects
|
|
228
|
+
"""
|
|
229
|
+
entities = []
|
|
230
|
+
|
|
231
|
+
try:
|
|
232
|
+
# Extract JSON from response (LLM might include extra text)
|
|
233
|
+
json_str = self._extract_json_from_text(response_text)
|
|
234
|
+
|
|
235
|
+
# Parse JSON
|
|
236
|
+
extracted_data = json.loads(json_str)
|
|
237
|
+
|
|
238
|
+
if not isinstance(extracted_data, list):
|
|
239
|
+
# Sometimes LLM returns single object instead of array
|
|
240
|
+
extracted_data = [extracted_data]
|
|
241
|
+
|
|
242
|
+
# Convert to Entity objects
|
|
243
|
+
for item in extracted_data:
|
|
244
|
+
entity_type = item.get("type", "Unknown")
|
|
245
|
+
properties = item.get("properties", {})
|
|
246
|
+
confidence = item.get("confidence", 0.5)
|
|
247
|
+
|
|
248
|
+
# Generate unique ID
|
|
249
|
+
entity_id = self._generate_entity_id(entity_type, properties)
|
|
250
|
+
|
|
251
|
+
# Create Entity
|
|
252
|
+
entity = Entity(
|
|
253
|
+
id=entity_id,
|
|
254
|
+
entity_type=entity_type,
|
|
255
|
+
properties=properties,
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
# Store confidence in properties for later use
|
|
259
|
+
entity.properties["_extraction_confidence"] = confidence
|
|
260
|
+
|
|
261
|
+
entities.append(entity)
|
|
262
|
+
|
|
263
|
+
except json.JSONDecodeError as e:
|
|
264
|
+
# Log error but don't fail completely
|
|
265
|
+
# In production, you might want to retry or use fallback
|
|
266
|
+
print(f"Warning: Failed to parse LLM response as JSON: {e}")
|
|
267
|
+
print(f"Response was: {response_text[:200]}...")
|
|
268
|
+
return []
|
|
269
|
+
|
|
270
|
+
return entities
|
|
271
|
+
|
|
272
|
+
def _extract_json_from_text(self, text: str) -> str:
|
|
273
|
+
"""
|
|
274
|
+
Extract JSON array from text (handles cases where LLM includes extra text)
|
|
275
|
+
|
|
276
|
+
Args:
|
|
277
|
+
text: Response text that may contain JSON
|
|
278
|
+
|
|
279
|
+
Returns:
|
|
280
|
+
Extracted JSON string
|
|
281
|
+
"""
|
|
282
|
+
# Find JSON array boundaries
|
|
283
|
+
start = text.find("[")
|
|
284
|
+
end = text.rfind("]") + 1
|
|
285
|
+
|
|
286
|
+
if start != -1 and end > start:
|
|
287
|
+
return text[start:end]
|
|
288
|
+
|
|
289
|
+
# Try to find JSON object (single entity)
|
|
290
|
+
start = text.find("{")
|
|
291
|
+
end = text.rfind("}") + 1
|
|
292
|
+
|
|
293
|
+
if start != -1 and end > start:
|
|
294
|
+
return text[start:end]
|
|
295
|
+
|
|
296
|
+
# No JSON found, return original
|
|
297
|
+
return text
|
|
298
|
+
|
|
299
|
+
def _generate_entity_id(self, entity_type: str, properties: Dict[str, Any]) -> str:
|
|
300
|
+
"""
|
|
301
|
+
Generate a unique ID for an entity
|
|
302
|
+
|
|
303
|
+
Uses entity type + key property (usually "name") to create deterministic ID,
|
|
304
|
+
with fallback to UUID for uniqueness.
|
|
305
|
+
|
|
306
|
+
Args:
|
|
307
|
+
entity_type: Entity type name
|
|
308
|
+
properties: Entity properties
|
|
309
|
+
|
|
310
|
+
Returns:
|
|
311
|
+
Unique entity ID string
|
|
312
|
+
"""
|
|
313
|
+
# Try to use name for deterministic ID
|
|
314
|
+
name = properties.get("name") or properties.get("title") or properties.get("id")
|
|
315
|
+
|
|
316
|
+
if name:
|
|
317
|
+
# Create deterministic ID from type + name
|
|
318
|
+
# Normalize to lowercase and remove spaces
|
|
319
|
+
normalized = f"{entity_type}_{name}".lower().replace(" ", "_")
|
|
320
|
+
# Add short hash for uniqueness
|
|
321
|
+
import hashlib
|
|
322
|
+
|
|
323
|
+
hash_suffix = hashlib.md5(normalized.encode()).hexdigest()[:8]
|
|
324
|
+
return f"{normalized}_{hash_suffix}"
|
|
325
|
+
else:
|
|
326
|
+
# No name property, use UUID
|
|
327
|
+
return f"{entity_type.lower()}_{uuid.uuid4().hex[:12]}"
|
|
@@ -0,0 +1,349 @@
|
|
|
1
|
+
"""
|
|
2
|
+
LLM-based Relation Extractor
|
|
3
|
+
|
|
4
|
+
Extracts relations between entities using Large Language Models.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
import uuid
|
|
9
|
+
from typing import List, Optional
|
|
10
|
+
from aiecs.application.knowledge_graph.extractors.base import RelationExtractor
|
|
11
|
+
from aiecs.domain.knowledge_graph.models.entity import Entity
|
|
12
|
+
from aiecs.domain.knowledge_graph.models.relation import Relation
|
|
13
|
+
from aiecs.domain.knowledge_graph.schema.graph_schema import GraphSchema
|
|
14
|
+
from aiecs.llm import get_llm_manager, AIProvider
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class LLMRelationExtractor(RelationExtractor):
|
|
18
|
+
"""
|
|
19
|
+
Extract relations between entities using LLMs
|
|
20
|
+
|
|
21
|
+
Given text and a list of entities, identifies relationships between them.
|
|
22
|
+
Uses LLMs to understand semantic relationships and extract structured relations.
|
|
23
|
+
|
|
24
|
+
Features:
|
|
25
|
+
- Schema-aware extraction (uses relation types from schema)
|
|
26
|
+
- Entity-aware (only extracts relations between known entities)
|
|
27
|
+
- Property extraction (relation properties/attributes)
|
|
28
|
+
- Confidence scoring
|
|
29
|
+
- Directional relation support
|
|
30
|
+
|
|
31
|
+
Example:
|
|
32
|
+
```python
|
|
33
|
+
extractor = LLMRelationExtractor(schema=graph_schema)
|
|
34
|
+
|
|
35
|
+
alice = Entity(id="e1", type="Person", properties={"name": "Alice"})
|
|
36
|
+
tech_corp = Entity(id="e2", type="Company", properties={"name": "Tech Corp"})
|
|
37
|
+
|
|
38
|
+
relations = await extractor.extract_relations(
|
|
39
|
+
text="Alice works as a senior engineer at Tech Corp.",
|
|
40
|
+
entities=[alice, tech_corp]
|
|
41
|
+
)
|
|
42
|
+
# Returns: [
|
|
43
|
+
# Relation(
|
|
44
|
+
# source_id="e1",
|
|
45
|
+
# target_id="e2",
|
|
46
|
+
# relation_type="WORKS_FOR",
|
|
47
|
+
# properties={"title": "senior engineer"}
|
|
48
|
+
# )
|
|
49
|
+
# ]
|
|
50
|
+
```
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
def __init__(
|
|
54
|
+
self,
|
|
55
|
+
schema: Optional[GraphSchema] = None,
|
|
56
|
+
provider: Optional[AIProvider] = None,
|
|
57
|
+
model: Optional[str] = None,
|
|
58
|
+
temperature: float = 0.1,
|
|
59
|
+
max_tokens: Optional[int] = 2000,
|
|
60
|
+
):
|
|
61
|
+
"""
|
|
62
|
+
Initialize LLM relation extractor
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
schema: Optional GraphSchema to guide extraction
|
|
66
|
+
provider: LLM provider (default: Vertex AI)
|
|
67
|
+
model: Specific model to use
|
|
68
|
+
temperature: LLM temperature (low for deterministic extraction)
|
|
69
|
+
max_tokens: Maximum tokens in response
|
|
70
|
+
"""
|
|
71
|
+
self.schema = schema
|
|
72
|
+
self.provider = provider
|
|
73
|
+
self.model = model
|
|
74
|
+
self.temperature = temperature
|
|
75
|
+
self.max_tokens = max_tokens
|
|
76
|
+
self._llm_manager = None # Lazy-loaded in async methods
|
|
77
|
+
|
|
78
|
+
async def extract_relations(
|
|
79
|
+
self,
|
|
80
|
+
text: str,
|
|
81
|
+
entities: List[Entity],
|
|
82
|
+
relation_types: Optional[List[str]] = None,
|
|
83
|
+
**kwargs,
|
|
84
|
+
) -> List[Relation]:
|
|
85
|
+
"""
|
|
86
|
+
Extract relations from text given known entities
|
|
87
|
+
|
|
88
|
+
Args:
|
|
89
|
+
text: Input text containing entities
|
|
90
|
+
entities: List of entities already extracted
|
|
91
|
+
relation_types: Optional filter for specific relation types
|
|
92
|
+
**kwargs: Additional parameters
|
|
93
|
+
|
|
94
|
+
Returns:
|
|
95
|
+
List of extracted Relation objects
|
|
96
|
+
|
|
97
|
+
Raises:
|
|
98
|
+
ValueError: If text or entities are empty
|
|
99
|
+
RuntimeError: If LLM extraction fails
|
|
100
|
+
"""
|
|
101
|
+
if not text or not text.strip():
|
|
102
|
+
raise ValueError("Input text cannot be empty")
|
|
103
|
+
|
|
104
|
+
if not entities or len(entities) < 2:
|
|
105
|
+
# Need at least 2 entities to have a relation
|
|
106
|
+
return []
|
|
107
|
+
|
|
108
|
+
# Lazy-load LLM manager
|
|
109
|
+
if self._llm_manager is None:
|
|
110
|
+
self._llm_manager = await get_llm_manager()
|
|
111
|
+
|
|
112
|
+
# Build extraction prompt
|
|
113
|
+
prompt = self._build_extraction_prompt(text, entities, relation_types)
|
|
114
|
+
|
|
115
|
+
# Call LLM
|
|
116
|
+
try:
|
|
117
|
+
response = await self._llm_manager.generate_text(
|
|
118
|
+
messages=prompt,
|
|
119
|
+
provider=self.provider,
|
|
120
|
+
model=self.model,
|
|
121
|
+
temperature=self.temperature,
|
|
122
|
+
max_tokens=self.max_tokens,
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
# Parse LLM response to Relation objects
|
|
126
|
+
relations = self._parse_llm_response(response.content, entities)
|
|
127
|
+
|
|
128
|
+
return relations
|
|
129
|
+
|
|
130
|
+
except Exception as e:
|
|
131
|
+
raise RuntimeError(f"LLM relation extraction failed: {str(e)}") from e
|
|
132
|
+
|
|
133
|
+
def _build_extraction_prompt(
|
|
134
|
+
self,
|
|
135
|
+
text: str,
|
|
136
|
+
entities: List[Entity],
|
|
137
|
+
relation_types: Optional[List[str]] = None,
|
|
138
|
+
) -> str:
|
|
139
|
+
"""
|
|
140
|
+
Build prompt for LLM relation extraction
|
|
141
|
+
|
|
142
|
+
The prompt includes:
|
|
143
|
+
1. Task description
|
|
144
|
+
2. List of entities to consider
|
|
145
|
+
3. Relation types to extract (from schema)
|
|
146
|
+
4. Output format specification
|
|
147
|
+
5. The text to analyze
|
|
148
|
+
|
|
149
|
+
Args:
|
|
150
|
+
text: Input text
|
|
151
|
+
entities: List of known entities
|
|
152
|
+
relation_types: Optional filter for relation types
|
|
153
|
+
|
|
154
|
+
Returns:
|
|
155
|
+
Formatted prompt string
|
|
156
|
+
"""
|
|
157
|
+
# Build entity reference list
|
|
158
|
+
entity_list = []
|
|
159
|
+
entity_index = {}
|
|
160
|
+
for idx, entity in enumerate(entities):
|
|
161
|
+
entity_name = self._get_entity_name(entity)
|
|
162
|
+
entity_list.append(f" [{idx}] {entity.entity_type}: {entity_name} (ID: {entity.id})")
|
|
163
|
+
entity_index[entity.id] = idx
|
|
164
|
+
|
|
165
|
+
entities_section = "\n".join(entity_list)
|
|
166
|
+
|
|
167
|
+
# Build relation type descriptions
|
|
168
|
+
types_to_extract = []
|
|
169
|
+
if self.schema:
|
|
170
|
+
available_types = self.schema.get_relation_type_names()
|
|
171
|
+
if relation_types:
|
|
172
|
+
types_to_extract = [t for t in relation_types if t in available_types]
|
|
173
|
+
else:
|
|
174
|
+
types_to_extract = available_types
|
|
175
|
+
elif relation_types:
|
|
176
|
+
types_to_extract = relation_types
|
|
177
|
+
else:
|
|
178
|
+
# No schema, use common relation types
|
|
179
|
+
types_to_extract = [
|
|
180
|
+
"WORKS_FOR",
|
|
181
|
+
"LOCATED_IN",
|
|
182
|
+
"PART_OF",
|
|
183
|
+
"KNOWS",
|
|
184
|
+
"OWNS",
|
|
185
|
+
"MANAGES",
|
|
186
|
+
"PRODUCES",
|
|
187
|
+
"RELATED_TO",
|
|
188
|
+
]
|
|
189
|
+
|
|
190
|
+
# Build relation type descriptions
|
|
191
|
+
relation_descriptions = []
|
|
192
|
+
for rel_type in types_to_extract:
|
|
193
|
+
if self.schema and self.schema.has_relation_type(rel_type):
|
|
194
|
+
schema_rel = self.schema.get_relation_type(rel_type)
|
|
195
|
+
desc = schema_rel.description or f"'{rel_type}' relation"
|
|
196
|
+
relation_descriptions.append(f"- {rel_type}: {desc}")
|
|
197
|
+
else:
|
|
198
|
+
relation_descriptions.append(f"- {rel_type}: Extract this type of relationship")
|
|
199
|
+
|
|
200
|
+
relations_section = "\n".join(relation_descriptions)
|
|
201
|
+
|
|
202
|
+
# Build prompt
|
|
203
|
+
prompt = f"""You are an expert at extracting relationships between entities from text.
|
|
204
|
+
|
|
205
|
+
Given the following entities:
|
|
206
|
+
{entities_section}
|
|
207
|
+
|
|
208
|
+
Extract all relationships between these entities from the text.
|
|
209
|
+
|
|
210
|
+
Allowed relation types:
|
|
211
|
+
{relations_section}
|
|
212
|
+
|
|
213
|
+
For each relation, provide:
|
|
214
|
+
1. source_id: ID of the source entity (from list above)
|
|
215
|
+
2. target_id: ID of the target entity (from list above)
|
|
216
|
+
3. relation_type: Type of relation (one of the allowed types)
|
|
217
|
+
4. properties: Optional dictionary of relation properties (e.g., since="2020", role="engineer")
|
|
218
|
+
5. confidence: Your confidence in this extraction (0.0 to 1.0)
|
|
219
|
+
|
|
220
|
+
Return ONLY a valid JSON array with this structure:
|
|
221
|
+
[
|
|
222
|
+
{{
|
|
223
|
+
"source_id": "entity_id_here",
|
|
224
|
+
"target_id": "entity_id_here",
|
|
225
|
+
"relation_type": "RELATION_TYPE",
|
|
226
|
+
"properties": {{"property1": "value1"}},
|
|
227
|
+
"confidence": 0.95
|
|
228
|
+
}}
|
|
229
|
+
]
|
|
230
|
+
|
|
231
|
+
Important:
|
|
232
|
+
- Only extract relations that are explicitly stated or strongly implied in the text
|
|
233
|
+
- Use the exact entity IDs from the list above
|
|
234
|
+
- Relations should be directional (source -> target matters)
|
|
235
|
+
- If unsure about a property, omit it
|
|
236
|
+
- Return empty array [] if no relations found
|
|
237
|
+
|
|
238
|
+
Text to analyze:
|
|
239
|
+
\"\"\"{text}\"\"\"
|
|
240
|
+
|
|
241
|
+
JSON output:"""
|
|
242
|
+
|
|
243
|
+
return prompt
|
|
244
|
+
|
|
245
|
+
def _parse_llm_response(self, response_text: str, entities: List[Entity]) -> List[Relation]:
|
|
246
|
+
"""
|
|
247
|
+
Parse LLM response to Relation objects
|
|
248
|
+
|
|
249
|
+
Expected JSON format:
|
|
250
|
+
[
|
|
251
|
+
{
|
|
252
|
+
"source_id": "e1",
|
|
253
|
+
"target_id": "e2",
|
|
254
|
+
"relation_type": "WORKS_FOR",
|
|
255
|
+
"properties": {"title": "engineer"},
|
|
256
|
+
"confidence": 0.95
|
|
257
|
+
}
|
|
258
|
+
]
|
|
259
|
+
|
|
260
|
+
Args:
|
|
261
|
+
response_text: LLM response string
|
|
262
|
+
entities: List of entities for validation
|
|
263
|
+
|
|
264
|
+
Returns:
|
|
265
|
+
List of Relation objects
|
|
266
|
+
"""
|
|
267
|
+
relations = []
|
|
268
|
+
entity_ids = {e.id for e in entities}
|
|
269
|
+
|
|
270
|
+
try:
|
|
271
|
+
# Extract JSON from response
|
|
272
|
+
json_str = self._extract_json_from_text(response_text)
|
|
273
|
+
|
|
274
|
+
# Parse JSON
|
|
275
|
+
extracted_data = json.loads(json_str)
|
|
276
|
+
|
|
277
|
+
if not isinstance(extracted_data, list):
|
|
278
|
+
extracted_data = [extracted_data]
|
|
279
|
+
|
|
280
|
+
# Convert to Relation objects
|
|
281
|
+
for item in extracted_data:
|
|
282
|
+
source_id = item.get("source_id")
|
|
283
|
+
target_id = item.get("target_id")
|
|
284
|
+
relation_type = item.get("relation_type")
|
|
285
|
+
properties = item.get("properties", {})
|
|
286
|
+
confidence = item.get("confidence", 0.5)
|
|
287
|
+
|
|
288
|
+
# Validate required fields
|
|
289
|
+
if not source_id or not target_id:
|
|
290
|
+
continue
|
|
291
|
+
if not relation_type: # relation_type is required and cannot be None
|
|
292
|
+
continue
|
|
293
|
+
if source_id not in entity_ids or target_id not in entity_ids:
|
|
294
|
+
# LLM hallucinated entity IDs
|
|
295
|
+
continue
|
|
296
|
+
if source_id == target_id:
|
|
297
|
+
# Self-loop, skip
|
|
298
|
+
continue
|
|
299
|
+
|
|
300
|
+
# Generate unique ID
|
|
301
|
+
relation_id = str(uuid.uuid4())
|
|
302
|
+
|
|
303
|
+
# Create Relation
|
|
304
|
+
relation = Relation(
|
|
305
|
+
id=relation_id,
|
|
306
|
+
relation_type=relation_type,
|
|
307
|
+
source_id=source_id,
|
|
308
|
+
target_id=target_id,
|
|
309
|
+
properties=properties,
|
|
310
|
+
)
|
|
311
|
+
|
|
312
|
+
# Store confidence
|
|
313
|
+
relation.properties["_extraction_confidence"] = confidence
|
|
314
|
+
|
|
315
|
+
relations.append(relation)
|
|
316
|
+
|
|
317
|
+
except json.JSONDecodeError as e:
|
|
318
|
+
print(f"Warning: Failed to parse LLM response as JSON: {e}")
|
|
319
|
+
print(f"Response was: {response_text[:200]}...")
|
|
320
|
+
return []
|
|
321
|
+
|
|
322
|
+
return relations
|
|
323
|
+
|
|
324
|
+
def _extract_json_from_text(self, text: str) -> str:
|
|
325
|
+
"""Extract JSON array from text"""
|
|
326
|
+
# Find JSON array boundaries
|
|
327
|
+
start = text.find("[")
|
|
328
|
+
end = text.rfind("]") + 1
|
|
329
|
+
|
|
330
|
+
if start != -1 and end > start:
|
|
331
|
+
return text[start:end]
|
|
332
|
+
|
|
333
|
+
# Try single object
|
|
334
|
+
start = text.find("{")
|
|
335
|
+
end = text.rfind("}") + 1
|
|
336
|
+
|
|
337
|
+
if start != -1 and end > start:
|
|
338
|
+
return text[start:end]
|
|
339
|
+
|
|
340
|
+
return text
|
|
341
|
+
|
|
342
|
+
def _get_entity_name(self, entity: Entity) -> str:
|
|
343
|
+
"""Extract entity name from properties"""
|
|
344
|
+
return (
|
|
345
|
+
entity.properties.get("name")
|
|
346
|
+
or entity.properties.get("title")
|
|
347
|
+
or entity.properties.get("text")
|
|
348
|
+
or f"{entity.entity_type}_{entity.id[:8]}"
|
|
349
|
+
)
|