aiecs 1.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aiecs/__init__.py +72 -0
- aiecs/__main__.py +41 -0
- aiecs/aiecs_client.py +469 -0
- aiecs/application/__init__.py +10 -0
- aiecs/application/executors/__init__.py +10 -0
- aiecs/application/executors/operation_executor.py +363 -0
- aiecs/application/knowledge_graph/__init__.py +7 -0
- aiecs/application/knowledge_graph/builder/__init__.py +37 -0
- aiecs/application/knowledge_graph/builder/document_builder.py +375 -0
- aiecs/application/knowledge_graph/builder/graph_builder.py +356 -0
- aiecs/application/knowledge_graph/builder/schema_mapping.py +531 -0
- aiecs/application/knowledge_graph/builder/structured_pipeline.py +443 -0
- aiecs/application/knowledge_graph/builder/text_chunker.py +319 -0
- aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
- aiecs/application/knowledge_graph/extractors/base.py +100 -0
- aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +327 -0
- aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +349 -0
- aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +244 -0
- aiecs/application/knowledge_graph/fusion/__init__.py +23 -0
- aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +387 -0
- aiecs/application/knowledge_graph/fusion/entity_linker.py +343 -0
- aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +580 -0
- aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +189 -0
- aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
- aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +344 -0
- aiecs/application/knowledge_graph/pattern_matching/query_executor.py +378 -0
- aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
- aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +199 -0
- aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
- aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
- aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +347 -0
- aiecs/application/knowledge_graph/reasoning/inference_engine.py +504 -0
- aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +167 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +630 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +654 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +477 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +390 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +217 -0
- aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +169 -0
- aiecs/application/knowledge_graph/reasoning/query_planner.py +872 -0
- aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +554 -0
- aiecs/application/knowledge_graph/retrieval/__init__.py +19 -0
- aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +596 -0
- aiecs/application/knowledge_graph/search/__init__.py +59 -0
- aiecs/application/knowledge_graph/search/hybrid_search.py +423 -0
- aiecs/application/knowledge_graph/search/reranker.py +295 -0
- aiecs/application/knowledge_graph/search/reranker_strategies.py +553 -0
- aiecs/application/knowledge_graph/search/text_similarity.py +398 -0
- aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
- aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +329 -0
- aiecs/application/knowledge_graph/traversal/path_scorer.py +269 -0
- aiecs/application/knowledge_graph/validators/__init__.py +13 -0
- aiecs/application/knowledge_graph/validators/relation_validator.py +189 -0
- aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
- aiecs/application/knowledge_graph/visualization/graph_visualizer.py +321 -0
- aiecs/common/__init__.py +9 -0
- aiecs/common/knowledge_graph/__init__.py +17 -0
- aiecs/common/knowledge_graph/runnable.py +484 -0
- aiecs/config/__init__.py +16 -0
- aiecs/config/config.py +498 -0
- aiecs/config/graph_config.py +137 -0
- aiecs/config/registry.py +23 -0
- aiecs/core/__init__.py +46 -0
- aiecs/core/interface/__init__.py +34 -0
- aiecs/core/interface/execution_interface.py +152 -0
- aiecs/core/interface/storage_interface.py +171 -0
- aiecs/domain/__init__.py +289 -0
- aiecs/domain/agent/__init__.py +189 -0
- aiecs/domain/agent/base_agent.py +697 -0
- aiecs/domain/agent/exceptions.py +103 -0
- aiecs/domain/agent/graph_aware_mixin.py +559 -0
- aiecs/domain/agent/hybrid_agent.py +490 -0
- aiecs/domain/agent/integration/__init__.py +26 -0
- aiecs/domain/agent/integration/context_compressor.py +222 -0
- aiecs/domain/agent/integration/context_engine_adapter.py +252 -0
- aiecs/domain/agent/integration/retry_policy.py +219 -0
- aiecs/domain/agent/integration/role_config.py +213 -0
- aiecs/domain/agent/knowledge_aware_agent.py +646 -0
- aiecs/domain/agent/lifecycle.py +296 -0
- aiecs/domain/agent/llm_agent.py +300 -0
- aiecs/domain/agent/memory/__init__.py +12 -0
- aiecs/domain/agent/memory/conversation.py +197 -0
- aiecs/domain/agent/migration/__init__.py +14 -0
- aiecs/domain/agent/migration/conversion.py +160 -0
- aiecs/domain/agent/migration/legacy_wrapper.py +90 -0
- aiecs/domain/agent/models.py +317 -0
- aiecs/domain/agent/observability.py +407 -0
- aiecs/domain/agent/persistence.py +289 -0
- aiecs/domain/agent/prompts/__init__.py +29 -0
- aiecs/domain/agent/prompts/builder.py +161 -0
- aiecs/domain/agent/prompts/formatters.py +189 -0
- aiecs/domain/agent/prompts/template.py +255 -0
- aiecs/domain/agent/registry.py +260 -0
- aiecs/domain/agent/tool_agent.py +257 -0
- aiecs/domain/agent/tools/__init__.py +12 -0
- aiecs/domain/agent/tools/schema_generator.py +221 -0
- aiecs/domain/community/__init__.py +155 -0
- aiecs/domain/community/agent_adapter.py +477 -0
- aiecs/domain/community/analytics.py +481 -0
- aiecs/domain/community/collaborative_workflow.py +642 -0
- aiecs/domain/community/communication_hub.py +645 -0
- aiecs/domain/community/community_builder.py +320 -0
- aiecs/domain/community/community_integration.py +800 -0
- aiecs/domain/community/community_manager.py +813 -0
- aiecs/domain/community/decision_engine.py +879 -0
- aiecs/domain/community/exceptions.py +225 -0
- aiecs/domain/community/models/__init__.py +33 -0
- aiecs/domain/community/models/community_models.py +268 -0
- aiecs/domain/community/resource_manager.py +457 -0
- aiecs/domain/community/shared_context_manager.py +603 -0
- aiecs/domain/context/__init__.py +58 -0
- aiecs/domain/context/context_engine.py +989 -0
- aiecs/domain/context/conversation_models.py +354 -0
- aiecs/domain/context/graph_memory.py +467 -0
- aiecs/domain/execution/__init__.py +12 -0
- aiecs/domain/execution/model.py +57 -0
- aiecs/domain/knowledge_graph/__init__.py +19 -0
- aiecs/domain/knowledge_graph/models/__init__.py +52 -0
- aiecs/domain/knowledge_graph/models/entity.py +130 -0
- aiecs/domain/knowledge_graph/models/evidence.py +194 -0
- aiecs/domain/knowledge_graph/models/inference_rule.py +186 -0
- aiecs/domain/knowledge_graph/models/path.py +179 -0
- aiecs/domain/knowledge_graph/models/path_pattern.py +173 -0
- aiecs/domain/knowledge_graph/models/query.py +272 -0
- aiecs/domain/knowledge_graph/models/query_plan.py +187 -0
- aiecs/domain/knowledge_graph/models/relation.py +136 -0
- aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
- aiecs/domain/knowledge_graph/schema/entity_type.py +135 -0
- aiecs/domain/knowledge_graph/schema/graph_schema.py +271 -0
- aiecs/domain/knowledge_graph/schema/property_schema.py +155 -0
- aiecs/domain/knowledge_graph/schema/relation_type.py +171 -0
- aiecs/domain/knowledge_graph/schema/schema_manager.py +496 -0
- aiecs/domain/knowledge_graph/schema/type_enums.py +205 -0
- aiecs/domain/task/__init__.py +13 -0
- aiecs/domain/task/dsl_processor.py +613 -0
- aiecs/domain/task/model.py +62 -0
- aiecs/domain/task/task_context.py +268 -0
- aiecs/infrastructure/__init__.py +24 -0
- aiecs/infrastructure/graph_storage/__init__.py +11 -0
- aiecs/infrastructure/graph_storage/base.py +601 -0
- aiecs/infrastructure/graph_storage/batch_operations.py +449 -0
- aiecs/infrastructure/graph_storage/cache.py +429 -0
- aiecs/infrastructure/graph_storage/distributed.py +226 -0
- aiecs/infrastructure/graph_storage/error_handling.py +390 -0
- aiecs/infrastructure/graph_storage/graceful_degradation.py +306 -0
- aiecs/infrastructure/graph_storage/health_checks.py +378 -0
- aiecs/infrastructure/graph_storage/in_memory.py +514 -0
- aiecs/infrastructure/graph_storage/index_optimization.py +483 -0
- aiecs/infrastructure/graph_storage/lazy_loading.py +410 -0
- aiecs/infrastructure/graph_storage/metrics.py +357 -0
- aiecs/infrastructure/graph_storage/migration.py +413 -0
- aiecs/infrastructure/graph_storage/pagination.py +471 -0
- aiecs/infrastructure/graph_storage/performance_monitoring.py +466 -0
- aiecs/infrastructure/graph_storage/postgres.py +871 -0
- aiecs/infrastructure/graph_storage/query_optimizer.py +635 -0
- aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
- aiecs/infrastructure/graph_storage/sqlite.py +623 -0
- aiecs/infrastructure/graph_storage/streaming.py +495 -0
- aiecs/infrastructure/messaging/__init__.py +13 -0
- aiecs/infrastructure/messaging/celery_task_manager.py +383 -0
- aiecs/infrastructure/messaging/websocket_manager.py +298 -0
- aiecs/infrastructure/monitoring/__init__.py +34 -0
- aiecs/infrastructure/monitoring/executor_metrics.py +174 -0
- aiecs/infrastructure/monitoring/global_metrics_manager.py +213 -0
- aiecs/infrastructure/monitoring/structured_logger.py +48 -0
- aiecs/infrastructure/monitoring/tracing_manager.py +410 -0
- aiecs/infrastructure/persistence/__init__.py +24 -0
- aiecs/infrastructure/persistence/context_engine_client.py +187 -0
- aiecs/infrastructure/persistence/database_manager.py +333 -0
- aiecs/infrastructure/persistence/file_storage.py +754 -0
- aiecs/infrastructure/persistence/redis_client.py +220 -0
- aiecs/llm/__init__.py +86 -0
- aiecs/llm/callbacks/__init__.py +11 -0
- aiecs/llm/callbacks/custom_callbacks.py +264 -0
- aiecs/llm/client_factory.py +420 -0
- aiecs/llm/clients/__init__.py +33 -0
- aiecs/llm/clients/base_client.py +193 -0
- aiecs/llm/clients/googleai_client.py +181 -0
- aiecs/llm/clients/openai_client.py +131 -0
- aiecs/llm/clients/vertex_client.py +437 -0
- aiecs/llm/clients/xai_client.py +184 -0
- aiecs/llm/config/__init__.py +51 -0
- aiecs/llm/config/config_loader.py +275 -0
- aiecs/llm/config/config_validator.py +236 -0
- aiecs/llm/config/model_config.py +151 -0
- aiecs/llm/utils/__init__.py +10 -0
- aiecs/llm/utils/validate_config.py +91 -0
- aiecs/main.py +363 -0
- aiecs/scripts/__init__.py +3 -0
- aiecs/scripts/aid/VERSION_MANAGEMENT.md +97 -0
- aiecs/scripts/aid/__init__.py +19 -0
- aiecs/scripts/aid/version_manager.py +215 -0
- aiecs/scripts/dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md +242 -0
- aiecs/scripts/dependance_check/README_DEPENDENCY_CHECKER.md +310 -0
- aiecs/scripts/dependance_check/__init__.py +17 -0
- aiecs/scripts/dependance_check/dependency_checker.py +938 -0
- aiecs/scripts/dependance_check/dependency_fixer.py +391 -0
- aiecs/scripts/dependance_check/download_nlp_data.py +396 -0
- aiecs/scripts/dependance_check/quick_dependency_check.py +270 -0
- aiecs/scripts/dependance_check/setup_nlp_data.sh +217 -0
- aiecs/scripts/dependance_patch/__init__.py +7 -0
- aiecs/scripts/dependance_patch/fix_weasel/README_WEASEL_PATCH.md +126 -0
- aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
- aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.py +128 -0
- aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.sh +82 -0
- aiecs/scripts/dependance_patch/fix_weasel/patch_weasel_library.sh +188 -0
- aiecs/scripts/dependance_patch/fix_weasel/run_weasel_patch.sh +41 -0
- aiecs/scripts/tools_develop/README.md +449 -0
- aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
- aiecs/scripts/tools_develop/__init__.py +21 -0
- aiecs/scripts/tools_develop/check_type_annotations.py +259 -0
- aiecs/scripts/tools_develop/validate_tool_schemas.py +422 -0
- aiecs/scripts/tools_develop/verify_tools.py +356 -0
- aiecs/tasks/__init__.py +1 -0
- aiecs/tasks/worker.py +172 -0
- aiecs/tools/__init__.py +299 -0
- aiecs/tools/apisource/__init__.py +99 -0
- aiecs/tools/apisource/intelligence/__init__.py +19 -0
- aiecs/tools/apisource/intelligence/data_fusion.py +381 -0
- aiecs/tools/apisource/intelligence/query_analyzer.py +413 -0
- aiecs/tools/apisource/intelligence/search_enhancer.py +388 -0
- aiecs/tools/apisource/monitoring/__init__.py +9 -0
- aiecs/tools/apisource/monitoring/metrics.py +303 -0
- aiecs/tools/apisource/providers/__init__.py +115 -0
- aiecs/tools/apisource/providers/base.py +664 -0
- aiecs/tools/apisource/providers/census.py +401 -0
- aiecs/tools/apisource/providers/fred.py +564 -0
- aiecs/tools/apisource/providers/newsapi.py +412 -0
- aiecs/tools/apisource/providers/worldbank.py +357 -0
- aiecs/tools/apisource/reliability/__init__.py +12 -0
- aiecs/tools/apisource/reliability/error_handler.py +375 -0
- aiecs/tools/apisource/reliability/fallback_strategy.py +391 -0
- aiecs/tools/apisource/tool.py +850 -0
- aiecs/tools/apisource/utils/__init__.py +9 -0
- aiecs/tools/apisource/utils/validators.py +338 -0
- aiecs/tools/base_tool.py +201 -0
- aiecs/tools/docs/__init__.py +121 -0
- aiecs/tools/docs/ai_document_orchestrator.py +599 -0
- aiecs/tools/docs/ai_document_writer_orchestrator.py +2403 -0
- aiecs/tools/docs/content_insertion_tool.py +1333 -0
- aiecs/tools/docs/document_creator_tool.py +1317 -0
- aiecs/tools/docs/document_layout_tool.py +1166 -0
- aiecs/tools/docs/document_parser_tool.py +994 -0
- aiecs/tools/docs/document_writer_tool.py +1818 -0
- aiecs/tools/knowledge_graph/__init__.py +17 -0
- aiecs/tools/knowledge_graph/graph_reasoning_tool.py +734 -0
- aiecs/tools/knowledge_graph/graph_search_tool.py +923 -0
- aiecs/tools/knowledge_graph/kg_builder_tool.py +476 -0
- aiecs/tools/langchain_adapter.py +542 -0
- aiecs/tools/schema_generator.py +275 -0
- aiecs/tools/search_tool/__init__.py +100 -0
- aiecs/tools/search_tool/analyzers.py +589 -0
- aiecs/tools/search_tool/cache.py +260 -0
- aiecs/tools/search_tool/constants.py +128 -0
- aiecs/tools/search_tool/context.py +216 -0
- aiecs/tools/search_tool/core.py +749 -0
- aiecs/tools/search_tool/deduplicator.py +123 -0
- aiecs/tools/search_tool/error_handler.py +271 -0
- aiecs/tools/search_tool/metrics.py +371 -0
- aiecs/tools/search_tool/rate_limiter.py +178 -0
- aiecs/tools/search_tool/schemas.py +277 -0
- aiecs/tools/statistics/__init__.py +80 -0
- aiecs/tools/statistics/ai_data_analysis_orchestrator.py +643 -0
- aiecs/tools/statistics/ai_insight_generator_tool.py +505 -0
- aiecs/tools/statistics/ai_report_orchestrator_tool.py +694 -0
- aiecs/tools/statistics/data_loader_tool.py +564 -0
- aiecs/tools/statistics/data_profiler_tool.py +658 -0
- aiecs/tools/statistics/data_transformer_tool.py +573 -0
- aiecs/tools/statistics/data_visualizer_tool.py +495 -0
- aiecs/tools/statistics/model_trainer_tool.py +487 -0
- aiecs/tools/statistics/statistical_analyzer_tool.py +459 -0
- aiecs/tools/task_tools/__init__.py +86 -0
- aiecs/tools/task_tools/chart_tool.py +732 -0
- aiecs/tools/task_tools/classfire_tool.py +922 -0
- aiecs/tools/task_tools/image_tool.py +447 -0
- aiecs/tools/task_tools/office_tool.py +684 -0
- aiecs/tools/task_tools/pandas_tool.py +635 -0
- aiecs/tools/task_tools/report_tool.py +635 -0
- aiecs/tools/task_tools/research_tool.py +392 -0
- aiecs/tools/task_tools/scraper_tool.py +715 -0
- aiecs/tools/task_tools/stats_tool.py +688 -0
- aiecs/tools/temp_file_manager.py +130 -0
- aiecs/tools/tool_executor/__init__.py +37 -0
- aiecs/tools/tool_executor/tool_executor.py +881 -0
- aiecs/utils/LLM_output_structor.py +445 -0
- aiecs/utils/__init__.py +34 -0
- aiecs/utils/base_callback.py +47 -0
- aiecs/utils/cache_provider.py +695 -0
- aiecs/utils/execution_utils.py +184 -0
- aiecs/utils/logging.py +1 -0
- aiecs/utils/prompt_loader.py +14 -0
- aiecs/utils/token_usage_repository.py +323 -0
- aiecs/ws/__init__.py +0 -0
- aiecs/ws/socket_server.py +52 -0
- aiecs-1.5.1.dist-info/METADATA +608 -0
- aiecs-1.5.1.dist-info/RECORD +302 -0
- aiecs-1.5.1.dist-info/WHEEL +5 -0
- aiecs-1.5.1.dist-info/entry_points.txt +10 -0
- aiecs-1.5.1.dist-info/licenses/LICENSE +225 -0
- aiecs-1.5.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,356 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Graph Builder - Main Pipeline Orchestrator
|
|
3
|
+
|
|
4
|
+
Orchestrates the full document-to-graph conversion pipeline.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import asyncio
|
|
8
|
+
from typing import List, Optional, Dict, Any, Callable
|
|
9
|
+
from dataclasses import dataclass, field
|
|
10
|
+
from datetime import datetime
|
|
11
|
+
|
|
12
|
+
from aiecs.domain.knowledge_graph.schema.graph_schema import GraphSchema
|
|
13
|
+
from aiecs.infrastructure.graph_storage.base import GraphStore
|
|
14
|
+
from aiecs.application.knowledge_graph.extractors.base import (
|
|
15
|
+
EntityExtractor,
|
|
16
|
+
RelationExtractor,
|
|
17
|
+
)
|
|
18
|
+
from aiecs.application.knowledge_graph.fusion.entity_deduplicator import (
|
|
19
|
+
EntityDeduplicator,
|
|
20
|
+
)
|
|
21
|
+
from aiecs.application.knowledge_graph.fusion.entity_linker import EntityLinker
|
|
22
|
+
from aiecs.application.knowledge_graph.fusion.relation_deduplicator import (
|
|
23
|
+
RelationDeduplicator,
|
|
24
|
+
)
|
|
25
|
+
from aiecs.application.knowledge_graph.validators.relation_validator import (
|
|
26
|
+
RelationValidator,
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass
|
|
31
|
+
class BuildResult:
|
|
32
|
+
"""
|
|
33
|
+
Result of graph building operation
|
|
34
|
+
|
|
35
|
+
Attributes:
|
|
36
|
+
success: Whether build completed successfully
|
|
37
|
+
entities_added: Number of entities added to graph
|
|
38
|
+
relations_added: Number of relations added to graph
|
|
39
|
+
entities_linked: Number of entities linked to existing entities
|
|
40
|
+
entities_deduplicated: Number of entities deduplicated
|
|
41
|
+
relations_deduplicated: Number of relations deduplicated
|
|
42
|
+
errors: List of errors encountered
|
|
43
|
+
warnings: List of warnings
|
|
44
|
+
metadata: Additional metadata about the build
|
|
45
|
+
start_time: When build started
|
|
46
|
+
end_time: When build ended
|
|
47
|
+
duration_seconds: Total duration in seconds
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
success: bool = True
|
|
51
|
+
entities_added: int = 0
|
|
52
|
+
relations_added: int = 0
|
|
53
|
+
entities_linked: int = 0
|
|
54
|
+
entities_deduplicated: int = 0
|
|
55
|
+
relations_deduplicated: int = 0
|
|
56
|
+
errors: List[str] = field(default_factory=list)
|
|
57
|
+
warnings: List[str] = field(default_factory=list)
|
|
58
|
+
metadata: Dict[str, Any] = field(default_factory=dict)
|
|
59
|
+
start_time: Optional[datetime] = None
|
|
60
|
+
end_time: Optional[datetime] = None
|
|
61
|
+
duration_seconds: float = 0.0
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class GraphBuilder:
|
|
65
|
+
"""
|
|
66
|
+
Main pipeline for building knowledge graphs from text
|
|
67
|
+
|
|
68
|
+
The pipeline:
|
|
69
|
+
1. Extract entities from text
|
|
70
|
+
2. Deduplicate entities
|
|
71
|
+
3. Link entities to existing graph
|
|
72
|
+
4. Extract relations between entities
|
|
73
|
+
5. Validate relations
|
|
74
|
+
6. Deduplicate relations
|
|
75
|
+
7. Store entities and relations in graph
|
|
76
|
+
|
|
77
|
+
Features:
|
|
78
|
+
- Async/parallel processing
|
|
79
|
+
- Progress callbacks
|
|
80
|
+
- Error handling and recovery
|
|
81
|
+
- Provenance tracking
|
|
82
|
+
- Configurable components
|
|
83
|
+
|
|
84
|
+
Example:
|
|
85
|
+
```python
|
|
86
|
+
# Initialize components
|
|
87
|
+
entity_extractor = LLMEntityExtractor(schema)
|
|
88
|
+
relation_extractor = LLMRelationExtractor(schema)
|
|
89
|
+
|
|
90
|
+
# Create builder
|
|
91
|
+
builder = GraphBuilder(
|
|
92
|
+
graph_store=store,
|
|
93
|
+
entity_extractor=entity_extractor,
|
|
94
|
+
relation_extractor=relation_extractor,
|
|
95
|
+
schema=schema
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
# Build graph from text
|
|
99
|
+
result = await builder.build_from_text(
|
|
100
|
+
text="Alice works at Tech Corp.",
|
|
101
|
+
source="document_1.pdf"
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
print(f"Added {result.entities_added} entities, {result.relations_added} relations")
|
|
105
|
+
```
|
|
106
|
+
"""
|
|
107
|
+
|
|
108
|
+
def __init__(
|
|
109
|
+
self,
|
|
110
|
+
graph_store: GraphStore,
|
|
111
|
+
entity_extractor: EntityExtractor,
|
|
112
|
+
relation_extractor: RelationExtractor,
|
|
113
|
+
schema: Optional[GraphSchema] = None,
|
|
114
|
+
enable_deduplication: bool = True,
|
|
115
|
+
enable_linking: bool = True,
|
|
116
|
+
enable_validation: bool = True,
|
|
117
|
+
progress_callback: Optional[Callable[[str, float], None]] = None,
|
|
118
|
+
):
|
|
119
|
+
"""
|
|
120
|
+
Initialize graph builder
|
|
121
|
+
|
|
122
|
+
Args:
|
|
123
|
+
graph_store: Graph storage to save entities/relations
|
|
124
|
+
entity_extractor: Entity extractor to use
|
|
125
|
+
relation_extractor: Relation extractor to use
|
|
126
|
+
schema: Optional schema for validation
|
|
127
|
+
enable_deduplication: Enable entity/relation deduplication
|
|
128
|
+
enable_linking: Enable linking to existing entities
|
|
129
|
+
enable_validation: Enable relation validation
|
|
130
|
+
progress_callback: Optional callback for progress updates (message, progress_pct)
|
|
131
|
+
"""
|
|
132
|
+
self.graph_store = graph_store
|
|
133
|
+
self.entity_extractor = entity_extractor
|
|
134
|
+
self.relation_extractor = relation_extractor
|
|
135
|
+
self.schema = schema
|
|
136
|
+
self.enable_deduplication = enable_deduplication
|
|
137
|
+
self.enable_linking = enable_linking
|
|
138
|
+
self.enable_validation = enable_validation
|
|
139
|
+
self.progress_callback = progress_callback
|
|
140
|
+
|
|
141
|
+
# Initialize fusion components
|
|
142
|
+
self.entity_deduplicator = EntityDeduplicator() if enable_deduplication else None
|
|
143
|
+
self.entity_linker = EntityLinker(graph_store) if enable_linking else None
|
|
144
|
+
self.relation_deduplicator = RelationDeduplicator() if enable_deduplication else None
|
|
145
|
+
self.relation_validator = (
|
|
146
|
+
RelationValidator(schema) if enable_validation and schema else None
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
async def build_from_text(
|
|
150
|
+
self,
|
|
151
|
+
text: str,
|
|
152
|
+
source: Optional[str] = None,
|
|
153
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
154
|
+
) -> BuildResult:
|
|
155
|
+
"""
|
|
156
|
+
Build knowledge graph from text
|
|
157
|
+
|
|
158
|
+
Args:
|
|
159
|
+
text: Input text to process
|
|
160
|
+
source: Optional source identifier (document name, URL, etc.)
|
|
161
|
+
metadata: Optional metadata to attach to entities/relations
|
|
162
|
+
|
|
163
|
+
Returns:
|
|
164
|
+
BuildResult with statistics and errors
|
|
165
|
+
"""
|
|
166
|
+
result = BuildResult(start_time=datetime.now())
|
|
167
|
+
|
|
168
|
+
try:
|
|
169
|
+
self._report_progress("Starting entity extraction", 0.1)
|
|
170
|
+
|
|
171
|
+
# Step 1: Extract entities
|
|
172
|
+
entities = await self.entity_extractor.extract_entities(text)
|
|
173
|
+
|
|
174
|
+
if not entities:
|
|
175
|
+
result.warnings.append("No entities extracted from text")
|
|
176
|
+
return self._finalize_result(result)
|
|
177
|
+
|
|
178
|
+
self._report_progress(f"Extracted {len(entities)} entities", 0.2)
|
|
179
|
+
|
|
180
|
+
# Step 2: Deduplicate entities (within this text)
|
|
181
|
+
if self.enable_deduplication and self.entity_deduplicator:
|
|
182
|
+
original_count = len(entities)
|
|
183
|
+
entities = await self.entity_deduplicator.deduplicate(entities)
|
|
184
|
+
result.entities_deduplicated = original_count - len(entities)
|
|
185
|
+
self._report_progress(f"Deduplicated to {len(entities)} entities", 0.3)
|
|
186
|
+
|
|
187
|
+
# Step 3: Link entities to existing graph
|
|
188
|
+
linked_entities = []
|
|
189
|
+
new_entities = []
|
|
190
|
+
|
|
191
|
+
if self.enable_linking and self.entity_linker:
|
|
192
|
+
self._report_progress("Linking entities to graph", 0.4)
|
|
193
|
+
link_results = await self.entity_linker.link_entities(entities)
|
|
194
|
+
|
|
195
|
+
for link_result in link_results:
|
|
196
|
+
if link_result.linked:
|
|
197
|
+
linked_entities.append(link_result.existing_entity)
|
|
198
|
+
result.entities_linked += 1
|
|
199
|
+
else:
|
|
200
|
+
new_entities.append(link_result.new_entity)
|
|
201
|
+
else:
|
|
202
|
+
new_entities = entities
|
|
203
|
+
|
|
204
|
+
# Combine linked and new entities for relation extraction
|
|
205
|
+
all_entities = linked_entities + new_entities
|
|
206
|
+
|
|
207
|
+
# Step 4: Extract relations
|
|
208
|
+
if len(all_entities) >= 2:
|
|
209
|
+
self._report_progress(
|
|
210
|
+
f"Extracting relations from {len(all_entities)} entities",
|
|
211
|
+
0.5,
|
|
212
|
+
)
|
|
213
|
+
relations = await self.relation_extractor.extract_relations(text, all_entities)
|
|
214
|
+
self._report_progress(f"Extracted {len(relations)} relations", 0.6)
|
|
215
|
+
else:
|
|
216
|
+
relations = []
|
|
217
|
+
result.warnings.append("Not enough entities for relation extraction")
|
|
218
|
+
|
|
219
|
+
# Step 5: Validate relations
|
|
220
|
+
valid_relations = relations
|
|
221
|
+
if self.enable_validation and self.relation_validator and relations:
|
|
222
|
+
self._report_progress("Validating relations", 0.7)
|
|
223
|
+
valid_relations = self.relation_validator.filter_valid_relations(
|
|
224
|
+
relations, all_entities
|
|
225
|
+
)
|
|
226
|
+
invalid_count = len(relations) - len(valid_relations)
|
|
227
|
+
if invalid_count > 0:
|
|
228
|
+
result.warnings.append(f"{invalid_count} relations failed validation")
|
|
229
|
+
|
|
230
|
+
# Step 6: Deduplicate relations
|
|
231
|
+
if self.enable_deduplication and self.relation_deduplicator and valid_relations:
|
|
232
|
+
original_count = len(valid_relations)
|
|
233
|
+
valid_relations = await self.relation_deduplicator.deduplicate(valid_relations)
|
|
234
|
+
result.relations_deduplicated = original_count - len(valid_relations)
|
|
235
|
+
self._report_progress(f"Deduplicated to {len(valid_relations)} relations", 0.8)
|
|
236
|
+
|
|
237
|
+
# Step 7: Store in graph
|
|
238
|
+
self._report_progress("Storing entities and relations in graph", 0.9)
|
|
239
|
+
|
|
240
|
+
# Add provenance metadata
|
|
241
|
+
if source or metadata:
|
|
242
|
+
provenance = {"source": source} if source else {}
|
|
243
|
+
if metadata:
|
|
244
|
+
provenance.update(metadata)
|
|
245
|
+
|
|
246
|
+
# Add provenance to entities
|
|
247
|
+
for entity in new_entities:
|
|
248
|
+
if not entity.properties:
|
|
249
|
+
entity.properties = {}
|
|
250
|
+
entity.properties["_provenance"] = provenance
|
|
251
|
+
|
|
252
|
+
# Add provenance to relations
|
|
253
|
+
for relation in valid_relations:
|
|
254
|
+
if not relation.properties:
|
|
255
|
+
relation.properties = {}
|
|
256
|
+
relation.properties["_provenance"] = provenance
|
|
257
|
+
|
|
258
|
+
# Store entities
|
|
259
|
+
for entity in new_entities:
|
|
260
|
+
await self.graph_store.add_entity(entity)
|
|
261
|
+
result.entities_added += 1
|
|
262
|
+
|
|
263
|
+
# Store relations
|
|
264
|
+
for relation in valid_relations:
|
|
265
|
+
await self.graph_store.add_relation(relation)
|
|
266
|
+
result.relations_added += 1
|
|
267
|
+
|
|
268
|
+
self._report_progress("Build complete", 1.0)
|
|
269
|
+
|
|
270
|
+
except Exception as e:
|
|
271
|
+
result.success = False
|
|
272
|
+
result.errors.append(f"Build failed: {str(e)}")
|
|
273
|
+
|
|
274
|
+
return self._finalize_result(result)
|
|
275
|
+
|
|
276
|
+
async def build_batch(
|
|
277
|
+
self,
|
|
278
|
+
texts: List[str],
|
|
279
|
+
sources: Optional[List[str]] = None,
|
|
280
|
+
parallel: bool = True,
|
|
281
|
+
max_parallel: int = 5,
|
|
282
|
+
) -> List[BuildResult]:
|
|
283
|
+
"""
|
|
284
|
+
Build graph from multiple texts in batch
|
|
285
|
+
|
|
286
|
+
Args:
|
|
287
|
+
texts: List of texts to process
|
|
288
|
+
sources: Optional list of source identifiers (same length as texts)
|
|
289
|
+
parallel: Process in parallel (default: True)
|
|
290
|
+
max_parallel: Maximum parallel tasks (default: 5)
|
|
291
|
+
|
|
292
|
+
Returns:
|
|
293
|
+
List of BuildResult objects (one per text)
|
|
294
|
+
"""
|
|
295
|
+
if sources and len(sources) != len(texts):
|
|
296
|
+
raise ValueError("sources list must match texts list length")
|
|
297
|
+
|
|
298
|
+
if not sources:
|
|
299
|
+
sources = [f"text_{i}" for i in range(len(texts))]
|
|
300
|
+
|
|
301
|
+
if parallel:
|
|
302
|
+
# Process in parallel with semaphore for concurrency control
|
|
303
|
+
semaphore = asyncio.Semaphore(max_parallel)
|
|
304
|
+
|
|
305
|
+
async def process_one(text, source):
|
|
306
|
+
async with semaphore:
|
|
307
|
+
return await self.build_from_text(text, source)
|
|
308
|
+
|
|
309
|
+
tasks = [process_one(text, source) for text, source in zip(texts, sources)]
|
|
310
|
+
results = await asyncio.gather(*tasks, return_exceptions=True)
|
|
311
|
+
|
|
312
|
+
# Handle exceptions
|
|
313
|
+
for i, result in enumerate(results):
|
|
314
|
+
if isinstance(result, Exception):
|
|
315
|
+
error_result = BuildResult(success=False)
|
|
316
|
+
error_result.errors.append(str(result))
|
|
317
|
+
results[i] = error_result
|
|
318
|
+
|
|
319
|
+
return results
|
|
320
|
+
else:
|
|
321
|
+
# Process sequentially
|
|
322
|
+
results = []
|
|
323
|
+
for text, source in zip(texts, sources):
|
|
324
|
+
result = await self.build_from_text(text, source)
|
|
325
|
+
results.append(result)
|
|
326
|
+
return results
|
|
327
|
+
|
|
328
|
+
def _report_progress(self, message: str, progress: float):
|
|
329
|
+
"""
|
|
330
|
+
Report progress via callback
|
|
331
|
+
|
|
332
|
+
Args:
|
|
333
|
+
message: Progress message
|
|
334
|
+
progress: Progress percentage (0.0-1.0)
|
|
335
|
+
"""
|
|
336
|
+
if self.progress_callback:
|
|
337
|
+
try:
|
|
338
|
+
self.progress_callback(message, progress)
|
|
339
|
+
except Exception as e:
|
|
340
|
+
# Don't let callback errors break the pipeline
|
|
341
|
+
print(f"Warning: Progress callback error: {e}")
|
|
342
|
+
|
|
343
|
+
def _finalize_result(self, result: BuildResult) -> BuildResult:
|
|
344
|
+
"""
|
|
345
|
+
Finalize build result with timing information
|
|
346
|
+
|
|
347
|
+
Args:
|
|
348
|
+
result: BuildResult to finalize
|
|
349
|
+
|
|
350
|
+
Returns:
|
|
351
|
+
Finalized BuildResult
|
|
352
|
+
"""
|
|
353
|
+
result.end_time = datetime.now()
|
|
354
|
+
if result.start_time:
|
|
355
|
+
result.duration_seconds = (result.end_time - result.start_time).total_seconds()
|
|
356
|
+
return result
|