aiecs 1.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aiecs/__init__.py +72 -0
- aiecs/__main__.py +41 -0
- aiecs/aiecs_client.py +469 -0
- aiecs/application/__init__.py +10 -0
- aiecs/application/executors/__init__.py +10 -0
- aiecs/application/executors/operation_executor.py +363 -0
- aiecs/application/knowledge_graph/__init__.py +7 -0
- aiecs/application/knowledge_graph/builder/__init__.py +37 -0
- aiecs/application/knowledge_graph/builder/document_builder.py +375 -0
- aiecs/application/knowledge_graph/builder/graph_builder.py +356 -0
- aiecs/application/knowledge_graph/builder/schema_mapping.py +531 -0
- aiecs/application/knowledge_graph/builder/structured_pipeline.py +443 -0
- aiecs/application/knowledge_graph/builder/text_chunker.py +319 -0
- aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
- aiecs/application/knowledge_graph/extractors/base.py +100 -0
- aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +327 -0
- aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +349 -0
- aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +244 -0
- aiecs/application/knowledge_graph/fusion/__init__.py +23 -0
- aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +387 -0
- aiecs/application/knowledge_graph/fusion/entity_linker.py +343 -0
- aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +580 -0
- aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +189 -0
- aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
- aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +344 -0
- aiecs/application/knowledge_graph/pattern_matching/query_executor.py +378 -0
- aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
- aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +199 -0
- aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
- aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
- aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +347 -0
- aiecs/application/knowledge_graph/reasoning/inference_engine.py +504 -0
- aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +167 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +630 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +654 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +477 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +390 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +217 -0
- aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +169 -0
- aiecs/application/knowledge_graph/reasoning/query_planner.py +872 -0
- aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +554 -0
- aiecs/application/knowledge_graph/retrieval/__init__.py +19 -0
- aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +596 -0
- aiecs/application/knowledge_graph/search/__init__.py +59 -0
- aiecs/application/knowledge_graph/search/hybrid_search.py +423 -0
- aiecs/application/knowledge_graph/search/reranker.py +295 -0
- aiecs/application/knowledge_graph/search/reranker_strategies.py +553 -0
- aiecs/application/knowledge_graph/search/text_similarity.py +398 -0
- aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
- aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +329 -0
- aiecs/application/knowledge_graph/traversal/path_scorer.py +269 -0
- aiecs/application/knowledge_graph/validators/__init__.py +13 -0
- aiecs/application/knowledge_graph/validators/relation_validator.py +189 -0
- aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
- aiecs/application/knowledge_graph/visualization/graph_visualizer.py +321 -0
- aiecs/common/__init__.py +9 -0
- aiecs/common/knowledge_graph/__init__.py +17 -0
- aiecs/common/knowledge_graph/runnable.py +484 -0
- aiecs/config/__init__.py +16 -0
- aiecs/config/config.py +498 -0
- aiecs/config/graph_config.py +137 -0
- aiecs/config/registry.py +23 -0
- aiecs/core/__init__.py +46 -0
- aiecs/core/interface/__init__.py +34 -0
- aiecs/core/interface/execution_interface.py +152 -0
- aiecs/core/interface/storage_interface.py +171 -0
- aiecs/domain/__init__.py +289 -0
- aiecs/domain/agent/__init__.py +189 -0
- aiecs/domain/agent/base_agent.py +697 -0
- aiecs/domain/agent/exceptions.py +103 -0
- aiecs/domain/agent/graph_aware_mixin.py +559 -0
- aiecs/domain/agent/hybrid_agent.py +490 -0
- aiecs/domain/agent/integration/__init__.py +26 -0
- aiecs/domain/agent/integration/context_compressor.py +222 -0
- aiecs/domain/agent/integration/context_engine_adapter.py +252 -0
- aiecs/domain/agent/integration/retry_policy.py +219 -0
- aiecs/domain/agent/integration/role_config.py +213 -0
- aiecs/domain/agent/knowledge_aware_agent.py +646 -0
- aiecs/domain/agent/lifecycle.py +296 -0
- aiecs/domain/agent/llm_agent.py +300 -0
- aiecs/domain/agent/memory/__init__.py +12 -0
- aiecs/domain/agent/memory/conversation.py +197 -0
- aiecs/domain/agent/migration/__init__.py +14 -0
- aiecs/domain/agent/migration/conversion.py +160 -0
- aiecs/domain/agent/migration/legacy_wrapper.py +90 -0
- aiecs/domain/agent/models.py +317 -0
- aiecs/domain/agent/observability.py +407 -0
- aiecs/domain/agent/persistence.py +289 -0
- aiecs/domain/agent/prompts/__init__.py +29 -0
- aiecs/domain/agent/prompts/builder.py +161 -0
- aiecs/domain/agent/prompts/formatters.py +189 -0
- aiecs/domain/agent/prompts/template.py +255 -0
- aiecs/domain/agent/registry.py +260 -0
- aiecs/domain/agent/tool_agent.py +257 -0
- aiecs/domain/agent/tools/__init__.py +12 -0
- aiecs/domain/agent/tools/schema_generator.py +221 -0
- aiecs/domain/community/__init__.py +155 -0
- aiecs/domain/community/agent_adapter.py +477 -0
- aiecs/domain/community/analytics.py +481 -0
- aiecs/domain/community/collaborative_workflow.py +642 -0
- aiecs/domain/community/communication_hub.py +645 -0
- aiecs/domain/community/community_builder.py +320 -0
- aiecs/domain/community/community_integration.py +800 -0
- aiecs/domain/community/community_manager.py +813 -0
- aiecs/domain/community/decision_engine.py +879 -0
- aiecs/domain/community/exceptions.py +225 -0
- aiecs/domain/community/models/__init__.py +33 -0
- aiecs/domain/community/models/community_models.py +268 -0
- aiecs/domain/community/resource_manager.py +457 -0
- aiecs/domain/community/shared_context_manager.py +603 -0
- aiecs/domain/context/__init__.py +58 -0
- aiecs/domain/context/context_engine.py +989 -0
- aiecs/domain/context/conversation_models.py +354 -0
- aiecs/domain/context/graph_memory.py +467 -0
- aiecs/domain/execution/__init__.py +12 -0
- aiecs/domain/execution/model.py +57 -0
- aiecs/domain/knowledge_graph/__init__.py +19 -0
- aiecs/domain/knowledge_graph/models/__init__.py +52 -0
- aiecs/domain/knowledge_graph/models/entity.py +130 -0
- aiecs/domain/knowledge_graph/models/evidence.py +194 -0
- aiecs/domain/knowledge_graph/models/inference_rule.py +186 -0
- aiecs/domain/knowledge_graph/models/path.py +179 -0
- aiecs/domain/knowledge_graph/models/path_pattern.py +173 -0
- aiecs/domain/knowledge_graph/models/query.py +272 -0
- aiecs/domain/knowledge_graph/models/query_plan.py +187 -0
- aiecs/domain/knowledge_graph/models/relation.py +136 -0
- aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
- aiecs/domain/knowledge_graph/schema/entity_type.py +135 -0
- aiecs/domain/knowledge_graph/schema/graph_schema.py +271 -0
- aiecs/domain/knowledge_graph/schema/property_schema.py +155 -0
- aiecs/domain/knowledge_graph/schema/relation_type.py +171 -0
- aiecs/domain/knowledge_graph/schema/schema_manager.py +496 -0
- aiecs/domain/knowledge_graph/schema/type_enums.py +205 -0
- aiecs/domain/task/__init__.py +13 -0
- aiecs/domain/task/dsl_processor.py +613 -0
- aiecs/domain/task/model.py +62 -0
- aiecs/domain/task/task_context.py +268 -0
- aiecs/infrastructure/__init__.py +24 -0
- aiecs/infrastructure/graph_storage/__init__.py +11 -0
- aiecs/infrastructure/graph_storage/base.py +601 -0
- aiecs/infrastructure/graph_storage/batch_operations.py +449 -0
- aiecs/infrastructure/graph_storage/cache.py +429 -0
- aiecs/infrastructure/graph_storage/distributed.py +226 -0
- aiecs/infrastructure/graph_storage/error_handling.py +390 -0
- aiecs/infrastructure/graph_storage/graceful_degradation.py +306 -0
- aiecs/infrastructure/graph_storage/health_checks.py +378 -0
- aiecs/infrastructure/graph_storage/in_memory.py +514 -0
- aiecs/infrastructure/graph_storage/index_optimization.py +483 -0
- aiecs/infrastructure/graph_storage/lazy_loading.py +410 -0
- aiecs/infrastructure/graph_storage/metrics.py +357 -0
- aiecs/infrastructure/graph_storage/migration.py +413 -0
- aiecs/infrastructure/graph_storage/pagination.py +471 -0
- aiecs/infrastructure/graph_storage/performance_monitoring.py +466 -0
- aiecs/infrastructure/graph_storage/postgres.py +871 -0
- aiecs/infrastructure/graph_storage/query_optimizer.py +635 -0
- aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
- aiecs/infrastructure/graph_storage/sqlite.py +623 -0
- aiecs/infrastructure/graph_storage/streaming.py +495 -0
- aiecs/infrastructure/messaging/__init__.py +13 -0
- aiecs/infrastructure/messaging/celery_task_manager.py +383 -0
- aiecs/infrastructure/messaging/websocket_manager.py +298 -0
- aiecs/infrastructure/monitoring/__init__.py +34 -0
- aiecs/infrastructure/monitoring/executor_metrics.py +174 -0
- aiecs/infrastructure/monitoring/global_metrics_manager.py +213 -0
- aiecs/infrastructure/monitoring/structured_logger.py +48 -0
- aiecs/infrastructure/monitoring/tracing_manager.py +410 -0
- aiecs/infrastructure/persistence/__init__.py +24 -0
- aiecs/infrastructure/persistence/context_engine_client.py +187 -0
- aiecs/infrastructure/persistence/database_manager.py +333 -0
- aiecs/infrastructure/persistence/file_storage.py +754 -0
- aiecs/infrastructure/persistence/redis_client.py +220 -0
- aiecs/llm/__init__.py +86 -0
- aiecs/llm/callbacks/__init__.py +11 -0
- aiecs/llm/callbacks/custom_callbacks.py +264 -0
- aiecs/llm/client_factory.py +420 -0
- aiecs/llm/clients/__init__.py +33 -0
- aiecs/llm/clients/base_client.py +193 -0
- aiecs/llm/clients/googleai_client.py +181 -0
- aiecs/llm/clients/openai_client.py +131 -0
- aiecs/llm/clients/vertex_client.py +437 -0
- aiecs/llm/clients/xai_client.py +184 -0
- aiecs/llm/config/__init__.py +51 -0
- aiecs/llm/config/config_loader.py +275 -0
- aiecs/llm/config/config_validator.py +236 -0
- aiecs/llm/config/model_config.py +151 -0
- aiecs/llm/utils/__init__.py +10 -0
- aiecs/llm/utils/validate_config.py +91 -0
- aiecs/main.py +363 -0
- aiecs/scripts/__init__.py +3 -0
- aiecs/scripts/aid/VERSION_MANAGEMENT.md +97 -0
- aiecs/scripts/aid/__init__.py +19 -0
- aiecs/scripts/aid/version_manager.py +215 -0
- aiecs/scripts/dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md +242 -0
- aiecs/scripts/dependance_check/README_DEPENDENCY_CHECKER.md +310 -0
- aiecs/scripts/dependance_check/__init__.py +17 -0
- aiecs/scripts/dependance_check/dependency_checker.py +938 -0
- aiecs/scripts/dependance_check/dependency_fixer.py +391 -0
- aiecs/scripts/dependance_check/download_nlp_data.py +396 -0
- aiecs/scripts/dependance_check/quick_dependency_check.py +270 -0
- aiecs/scripts/dependance_check/setup_nlp_data.sh +217 -0
- aiecs/scripts/dependance_patch/__init__.py +7 -0
- aiecs/scripts/dependance_patch/fix_weasel/README_WEASEL_PATCH.md +126 -0
- aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
- aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.py +128 -0
- aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.sh +82 -0
- aiecs/scripts/dependance_patch/fix_weasel/patch_weasel_library.sh +188 -0
- aiecs/scripts/dependance_patch/fix_weasel/run_weasel_patch.sh +41 -0
- aiecs/scripts/tools_develop/README.md +449 -0
- aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
- aiecs/scripts/tools_develop/__init__.py +21 -0
- aiecs/scripts/tools_develop/check_type_annotations.py +259 -0
- aiecs/scripts/tools_develop/validate_tool_schemas.py +422 -0
- aiecs/scripts/tools_develop/verify_tools.py +356 -0
- aiecs/tasks/__init__.py +1 -0
- aiecs/tasks/worker.py +172 -0
- aiecs/tools/__init__.py +299 -0
- aiecs/tools/apisource/__init__.py +99 -0
- aiecs/tools/apisource/intelligence/__init__.py +19 -0
- aiecs/tools/apisource/intelligence/data_fusion.py +381 -0
- aiecs/tools/apisource/intelligence/query_analyzer.py +413 -0
- aiecs/tools/apisource/intelligence/search_enhancer.py +388 -0
- aiecs/tools/apisource/monitoring/__init__.py +9 -0
- aiecs/tools/apisource/monitoring/metrics.py +303 -0
- aiecs/tools/apisource/providers/__init__.py +115 -0
- aiecs/tools/apisource/providers/base.py +664 -0
- aiecs/tools/apisource/providers/census.py +401 -0
- aiecs/tools/apisource/providers/fred.py +564 -0
- aiecs/tools/apisource/providers/newsapi.py +412 -0
- aiecs/tools/apisource/providers/worldbank.py +357 -0
- aiecs/tools/apisource/reliability/__init__.py +12 -0
- aiecs/tools/apisource/reliability/error_handler.py +375 -0
- aiecs/tools/apisource/reliability/fallback_strategy.py +391 -0
- aiecs/tools/apisource/tool.py +850 -0
- aiecs/tools/apisource/utils/__init__.py +9 -0
- aiecs/tools/apisource/utils/validators.py +338 -0
- aiecs/tools/base_tool.py +201 -0
- aiecs/tools/docs/__init__.py +121 -0
- aiecs/tools/docs/ai_document_orchestrator.py +599 -0
- aiecs/tools/docs/ai_document_writer_orchestrator.py +2403 -0
- aiecs/tools/docs/content_insertion_tool.py +1333 -0
- aiecs/tools/docs/document_creator_tool.py +1317 -0
- aiecs/tools/docs/document_layout_tool.py +1166 -0
- aiecs/tools/docs/document_parser_tool.py +994 -0
- aiecs/tools/docs/document_writer_tool.py +1818 -0
- aiecs/tools/knowledge_graph/__init__.py +17 -0
- aiecs/tools/knowledge_graph/graph_reasoning_tool.py +734 -0
- aiecs/tools/knowledge_graph/graph_search_tool.py +923 -0
- aiecs/tools/knowledge_graph/kg_builder_tool.py +476 -0
- aiecs/tools/langchain_adapter.py +542 -0
- aiecs/tools/schema_generator.py +275 -0
- aiecs/tools/search_tool/__init__.py +100 -0
- aiecs/tools/search_tool/analyzers.py +589 -0
- aiecs/tools/search_tool/cache.py +260 -0
- aiecs/tools/search_tool/constants.py +128 -0
- aiecs/tools/search_tool/context.py +216 -0
- aiecs/tools/search_tool/core.py +749 -0
- aiecs/tools/search_tool/deduplicator.py +123 -0
- aiecs/tools/search_tool/error_handler.py +271 -0
- aiecs/tools/search_tool/metrics.py +371 -0
- aiecs/tools/search_tool/rate_limiter.py +178 -0
- aiecs/tools/search_tool/schemas.py +277 -0
- aiecs/tools/statistics/__init__.py +80 -0
- aiecs/tools/statistics/ai_data_analysis_orchestrator.py +643 -0
- aiecs/tools/statistics/ai_insight_generator_tool.py +505 -0
- aiecs/tools/statistics/ai_report_orchestrator_tool.py +694 -0
- aiecs/tools/statistics/data_loader_tool.py +564 -0
- aiecs/tools/statistics/data_profiler_tool.py +658 -0
- aiecs/tools/statistics/data_transformer_tool.py +573 -0
- aiecs/tools/statistics/data_visualizer_tool.py +495 -0
- aiecs/tools/statistics/model_trainer_tool.py +487 -0
- aiecs/tools/statistics/statistical_analyzer_tool.py +459 -0
- aiecs/tools/task_tools/__init__.py +86 -0
- aiecs/tools/task_tools/chart_tool.py +732 -0
- aiecs/tools/task_tools/classfire_tool.py +922 -0
- aiecs/tools/task_tools/image_tool.py +447 -0
- aiecs/tools/task_tools/office_tool.py +684 -0
- aiecs/tools/task_tools/pandas_tool.py +635 -0
- aiecs/tools/task_tools/report_tool.py +635 -0
- aiecs/tools/task_tools/research_tool.py +392 -0
- aiecs/tools/task_tools/scraper_tool.py +715 -0
- aiecs/tools/task_tools/stats_tool.py +688 -0
- aiecs/tools/temp_file_manager.py +130 -0
- aiecs/tools/tool_executor/__init__.py +37 -0
- aiecs/tools/tool_executor/tool_executor.py +881 -0
- aiecs/utils/LLM_output_structor.py +445 -0
- aiecs/utils/__init__.py +34 -0
- aiecs/utils/base_callback.py +47 -0
- aiecs/utils/cache_provider.py +695 -0
- aiecs/utils/execution_utils.py +184 -0
- aiecs/utils/logging.py +1 -0
- aiecs/utils/prompt_loader.py +14 -0
- aiecs/utils/token_usage_repository.py +323 -0
- aiecs/ws/__init__.py +0 -0
- aiecs/ws/socket_server.py +52 -0
- aiecs-1.5.1.dist-info/METADATA +608 -0
- aiecs-1.5.1.dist-info/RECORD +302 -0
- aiecs-1.5.1.dist-info/WHEEL +5 -0
- aiecs-1.5.1.dist-info/entry_points.txt +10 -0
- aiecs-1.5.1.dist-info/licenses/LICENSE +225 -0
- aiecs-1.5.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,476 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Knowledge Graph Builder Tool
|
|
3
|
+
|
|
4
|
+
AIECS tool for building knowledge graphs from text and documents.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from typing import Dict, Any, List, Optional
|
|
8
|
+
from pydantic import BaseModel, Field
|
|
9
|
+
|
|
10
|
+
from aiecs.tools.base_tool import BaseTool
|
|
11
|
+
from aiecs.tools import register_tool
|
|
12
|
+
from aiecs.infrastructure.graph_storage.in_memory import InMemoryGraphStore
|
|
13
|
+
from aiecs.application.knowledge_graph.extractors.llm_entity_extractor import (
|
|
14
|
+
LLMEntityExtractor,
|
|
15
|
+
)
|
|
16
|
+
from aiecs.application.knowledge_graph.extractors.llm_relation_extractor import (
|
|
17
|
+
LLMRelationExtractor,
|
|
18
|
+
)
|
|
19
|
+
from aiecs.application.knowledge_graph.builder.graph_builder import (
|
|
20
|
+
GraphBuilder,
|
|
21
|
+
)
|
|
22
|
+
from aiecs.application.knowledge_graph.builder.document_builder import (
|
|
23
|
+
DocumentGraphBuilder,
|
|
24
|
+
)
|
|
25
|
+
from aiecs.application.knowledge_graph.builder.structured_pipeline import (
|
|
26
|
+
StructuredDataPipeline,
|
|
27
|
+
)
|
|
28
|
+
from aiecs.application.knowledge_graph.builder.schema_mapping import (
|
|
29
|
+
SchemaMapping,
|
|
30
|
+
EntityMapping,
|
|
31
|
+
RelationMapping,
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class KGBuilderInput(BaseModel):
|
|
36
|
+
"""Input schema for Knowledge Graph Builder Tool (legacy, for execute() method)"""
|
|
37
|
+
|
|
38
|
+
action: str = Field(
|
|
39
|
+
...,
|
|
40
|
+
description="Action to perform: 'build_from_text', 'build_from_document', 'build_from_structured_data', 'get_stats'",
|
|
41
|
+
)
|
|
42
|
+
text: Optional[str] = Field(
|
|
43
|
+
None,
|
|
44
|
+
description="Text to extract knowledge from (for 'build_from_text' action)",
|
|
45
|
+
)
|
|
46
|
+
document_path: Optional[str] = Field(
|
|
47
|
+
None,
|
|
48
|
+
description="Path to document file (for 'build_from_document' action)",
|
|
49
|
+
)
|
|
50
|
+
source: Optional[str] = Field(
|
|
51
|
+
None,
|
|
52
|
+
description="Optional source identifier (document name, URL, etc.)",
|
|
53
|
+
)
|
|
54
|
+
entity_types: Optional[List[str]] = Field(
|
|
55
|
+
None,
|
|
56
|
+
description=(
|
|
57
|
+
"Optional list of entity types to extract " "(e.g., ['Person', 'Company', 'Location'])"
|
|
58
|
+
),
|
|
59
|
+
)
|
|
60
|
+
data_path: Optional[str] = Field(
|
|
61
|
+
None,
|
|
62
|
+
description="Path to structured data file (CSV or JSON) for 'build_from_structured_data' action",
|
|
63
|
+
)
|
|
64
|
+
schema_mapping: Optional[Dict[str, Any]] = Field(
|
|
65
|
+
None,
|
|
66
|
+
description="Schema mapping configuration for structured data import (entity_mappings, relation_mappings)",
|
|
67
|
+
)
|
|
68
|
+
relation_types: Optional[List[str]] = Field(
|
|
69
|
+
None,
|
|
70
|
+
description="Optional list of relation types to extract (e.g., ['WORKS_FOR', 'LOCATED_IN'])",
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
# Schemas for individual operations (used with run_async)
|
|
75
|
+
class BuildFromTextSchema(BaseModel):
|
|
76
|
+
"""Schema for build_from_text operation"""
|
|
77
|
+
|
|
78
|
+
text: str = Field(..., description="Text to extract knowledge from")
|
|
79
|
+
source: Optional[str] = Field(
|
|
80
|
+
default="unknown",
|
|
81
|
+
description="Optional source identifier (document name, URL, etc.)",
|
|
82
|
+
)
|
|
83
|
+
entity_types: Optional[List[str]] = Field(
|
|
84
|
+
default=None,
|
|
85
|
+
description="Optional list of entity types to extract (e.g., ['Person', 'Company', 'Location'])",
|
|
86
|
+
)
|
|
87
|
+
relation_types: Optional[List[str]] = Field(
|
|
88
|
+
default=None,
|
|
89
|
+
description="Optional list of relation types to extract (e.g., ['WORKS_FOR', 'LOCATED_IN'])",
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
class BuildFromDocumentSchema(BaseModel):
|
|
94
|
+
"""Schema for build_from_document operation"""
|
|
95
|
+
|
|
96
|
+
document_path: str = Field(..., description="Path to document file (PDF, DOCX, TXT, etc.)")
|
|
97
|
+
entity_types: Optional[List[str]] = Field(
|
|
98
|
+
default=None, description="Optional list of entity types to extract"
|
|
99
|
+
)
|
|
100
|
+
relation_types: Optional[List[str]] = Field(
|
|
101
|
+
default=None, description="Optional list of relation types to extract"
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
class GetStatsSchema(BaseModel):
|
|
106
|
+
"""Schema for get_stats operation"""
|
|
107
|
+
|
|
108
|
+
pass # No parameters needed
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
@register_tool("kg_builder")
|
|
112
|
+
class KnowledgeGraphBuilderTool(BaseTool):
|
|
113
|
+
"""
|
|
114
|
+
Knowledge Graph Builder Tool
|
|
115
|
+
|
|
116
|
+
Allows AI agents to build knowledge graphs from text and documents.
|
|
117
|
+
Extracts entities and relations, stores them in a graph database.
|
|
118
|
+
|
|
119
|
+
Actions:
|
|
120
|
+
- build_from_text: Extract knowledge from text
|
|
121
|
+
- build_from_document: Extract knowledge from a document (PDF, DOCX, TXT)
|
|
122
|
+
- get_stats: Get statistics about the knowledge graph
|
|
123
|
+
|
|
124
|
+
Example Usage:
|
|
125
|
+
```python
|
|
126
|
+
# Build from text
|
|
127
|
+
result = tool.execute({
|
|
128
|
+
"action": "build_from_text",
|
|
129
|
+
"text": "Alice works at Tech Corp in San Francisco.",
|
|
130
|
+
"source": "conversation_1"
|
|
131
|
+
})
|
|
132
|
+
|
|
133
|
+
# Build from document
|
|
134
|
+
result = tool.execute({
|
|
135
|
+
"action": "build_from_document",
|
|
136
|
+
"document_path": "/path/to/research_paper.pdf"
|
|
137
|
+
})
|
|
138
|
+
|
|
139
|
+
# Get stats
|
|
140
|
+
stats = tool.execute({
|
|
141
|
+
"action": "get_stats"
|
|
142
|
+
})
|
|
143
|
+
```
|
|
144
|
+
"""
|
|
145
|
+
|
|
146
|
+
name: str = "kg_builder"
|
|
147
|
+
description: str = """Build knowledge graphs from text and documents.
|
|
148
|
+
|
|
149
|
+
This tool extracts entities (people, companies, locations, etc.) and relations
|
|
150
|
+
between them from text or documents, and stores them in a knowledge graph.
|
|
151
|
+
|
|
152
|
+
Use this tool when you need to:
|
|
153
|
+
- Extract structured knowledge from unstructured text
|
|
154
|
+
- Build a knowledge base from documents
|
|
155
|
+
- Understand relationships between entities in text
|
|
156
|
+
- Create a queryable graph of information
|
|
157
|
+
"""
|
|
158
|
+
|
|
159
|
+
input_schema: type[BaseModel] = KGBuilderInput
|
|
160
|
+
|
|
161
|
+
def __init__(self):
|
|
162
|
+
super().__init__()
|
|
163
|
+
|
|
164
|
+
# Initialize graph store (in-memory for now)
|
|
165
|
+
# In production, this would be configurable (SQLite, PostgreSQL, etc.)
|
|
166
|
+
self.graph_store = None
|
|
167
|
+
self.graph_builder = None
|
|
168
|
+
self.document_builder = None
|
|
169
|
+
self._initialized = False
|
|
170
|
+
|
|
171
|
+
async def _initialize(self):
|
|
172
|
+
"""Lazy initialization of components"""
|
|
173
|
+
if self._initialized:
|
|
174
|
+
return
|
|
175
|
+
|
|
176
|
+
# Initialize graph store
|
|
177
|
+
self.graph_store = InMemoryGraphStore()
|
|
178
|
+
await self.graph_store.initialize()
|
|
179
|
+
|
|
180
|
+
# Initialize extractors
|
|
181
|
+
entity_extractor = LLMEntityExtractor()
|
|
182
|
+
relation_extractor = LLMRelationExtractor()
|
|
183
|
+
|
|
184
|
+
# Initialize graph builder
|
|
185
|
+
self.graph_builder = GraphBuilder(
|
|
186
|
+
graph_store=self.graph_store,
|
|
187
|
+
entity_extractor=entity_extractor,
|
|
188
|
+
relation_extractor=relation_extractor,
|
|
189
|
+
enable_deduplication=True,
|
|
190
|
+
enable_linking=True,
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
# Initialize document builder
|
|
194
|
+
self.document_builder = DocumentGraphBuilder(
|
|
195
|
+
graph_builder=self.graph_builder,
|
|
196
|
+
chunk_size=2000,
|
|
197
|
+
enable_chunking=True,
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
self._initialized = True
|
|
201
|
+
|
|
202
|
+
async def _execute(self, **kwargs) -> Dict[str, Any]:
|
|
203
|
+
"""
|
|
204
|
+
Execute the knowledge graph builder tool
|
|
205
|
+
|
|
206
|
+
Args:
|
|
207
|
+
**kwargs: Tool input parameters
|
|
208
|
+
|
|
209
|
+
Returns:
|
|
210
|
+
Dictionary with results
|
|
211
|
+
"""
|
|
212
|
+
# Initialize components
|
|
213
|
+
await self._initialize()
|
|
214
|
+
|
|
215
|
+
action = kwargs.get("action")
|
|
216
|
+
|
|
217
|
+
if action == "build_from_text":
|
|
218
|
+
return await self._build_from_text(kwargs)
|
|
219
|
+
elif action == "build_from_document":
|
|
220
|
+
return await self._build_from_document(kwargs)
|
|
221
|
+
elif action == "build_from_structured_data":
|
|
222
|
+
return await self._build_from_structured_data(kwargs)
|
|
223
|
+
elif action == "get_stats":
|
|
224
|
+
return await self._get_stats()
|
|
225
|
+
else:
|
|
226
|
+
return {
|
|
227
|
+
"success": False,
|
|
228
|
+
"error": (
|
|
229
|
+
f"Unknown action: {action}. "
|
|
230
|
+
f"Supported actions: build_from_text, build_from_document, build_from_structured_data, get_stats"
|
|
231
|
+
),
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
async def _build_from_text(self, kwargs: Dict[str, Any]) -> Dict[str, Any]:
|
|
235
|
+
"""
|
|
236
|
+
Build knowledge graph from text
|
|
237
|
+
|
|
238
|
+
Args:
|
|
239
|
+
kwargs: Tool input parameters
|
|
240
|
+
|
|
241
|
+
Returns:
|
|
242
|
+
Build result dictionary
|
|
243
|
+
"""
|
|
244
|
+
text = kwargs.get("text")
|
|
245
|
+
if not text:
|
|
246
|
+
return {
|
|
247
|
+
"success": False,
|
|
248
|
+
"error": "Missing required parameter: text",
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
source = kwargs.get("source", "unknown")
|
|
252
|
+
|
|
253
|
+
# Build graph
|
|
254
|
+
result = await self.graph_builder.build_from_text(text=text, source=source)
|
|
255
|
+
|
|
256
|
+
return {
|
|
257
|
+
"success": result.success,
|
|
258
|
+
"entities_added": result.entities_added,
|
|
259
|
+
"relations_added": result.relations_added,
|
|
260
|
+
"entities_linked": result.entities_linked,
|
|
261
|
+
"entities_deduplicated": result.entities_deduplicated,
|
|
262
|
+
"relations_deduplicated": result.relations_deduplicated,
|
|
263
|
+
"duration_seconds": result.duration_seconds,
|
|
264
|
+
"errors": result.errors,
|
|
265
|
+
"warnings": result.warnings,
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
async def _build_from_document(self, kwargs: Dict[str, Any]) -> Dict[str, Any]:
|
|
269
|
+
"""
|
|
270
|
+
Build knowledge graph from document
|
|
271
|
+
|
|
272
|
+
Args:
|
|
273
|
+
kwargs: Tool input parameters
|
|
274
|
+
|
|
275
|
+
Returns:
|
|
276
|
+
Build result dictionary
|
|
277
|
+
"""
|
|
278
|
+
document_path = kwargs.get("document_path")
|
|
279
|
+
if not document_path:
|
|
280
|
+
return {
|
|
281
|
+
"success": False,
|
|
282
|
+
"error": "Missing required parameter: document_path",
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
# Build graph from document
|
|
286
|
+
result = await self.document_builder.build_from_document(document_path)
|
|
287
|
+
|
|
288
|
+
return {
|
|
289
|
+
"success": result.success,
|
|
290
|
+
"document_path": result.document_path,
|
|
291
|
+
"document_type": result.document_type,
|
|
292
|
+
"total_chunks": result.total_chunks,
|
|
293
|
+
"chunks_processed": result.chunks_processed,
|
|
294
|
+
"total_entities_added": result.total_entities_added,
|
|
295
|
+
"total_relations_added": result.total_relations_added,
|
|
296
|
+
"errors": result.errors,
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
async def _build_from_structured_data(self, kwargs: Dict[str, Any]) -> Dict[str, Any]:
|
|
300
|
+
"""
|
|
301
|
+
Build knowledge graph from structured data (CSV or JSON)
|
|
302
|
+
|
|
303
|
+
Args:
|
|
304
|
+
kwargs: Tool input parameters
|
|
305
|
+
|
|
306
|
+
Returns:
|
|
307
|
+
Build result dictionary
|
|
308
|
+
"""
|
|
309
|
+
data_path = kwargs.get("data_path")
|
|
310
|
+
if not data_path:
|
|
311
|
+
return {
|
|
312
|
+
"success": False,
|
|
313
|
+
"error": "Missing required parameter: data_path",
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
schema_mapping_dict = kwargs.get("schema_mapping")
|
|
317
|
+
if not schema_mapping_dict:
|
|
318
|
+
return {
|
|
319
|
+
"success": False,
|
|
320
|
+
"error": "Missing required parameter: schema_mapping",
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
try:
|
|
324
|
+
# Parse schema mapping
|
|
325
|
+
entity_mappings = []
|
|
326
|
+
for em_dict in schema_mapping_dict.get("entity_mappings", []):
|
|
327
|
+
entity_mappings.append(EntityMapping(**em_dict))
|
|
328
|
+
|
|
329
|
+
relation_mappings = []
|
|
330
|
+
for rm_dict in schema_mapping_dict.get("relation_mappings", []):
|
|
331
|
+
relation_mappings.append(RelationMapping(**rm_dict))
|
|
332
|
+
|
|
333
|
+
schema_mapping = SchemaMapping(
|
|
334
|
+
entity_mappings=entity_mappings,
|
|
335
|
+
relation_mappings=relation_mappings,
|
|
336
|
+
)
|
|
337
|
+
|
|
338
|
+
# Create structured data pipeline
|
|
339
|
+
pipeline = StructuredDataPipeline(
|
|
340
|
+
mapping=schema_mapping,
|
|
341
|
+
graph_store=self.graph_store,
|
|
342
|
+
batch_size=100,
|
|
343
|
+
skip_errors=True,
|
|
344
|
+
)
|
|
345
|
+
|
|
346
|
+
# Import data based on file extension
|
|
347
|
+
if data_path.endswith(".csv"):
|
|
348
|
+
result = await pipeline.import_from_csv(data_path)
|
|
349
|
+
elif data_path.endswith(".json"):
|
|
350
|
+
result = await pipeline.import_from_json(data_path)
|
|
351
|
+
else:
|
|
352
|
+
return {
|
|
353
|
+
"success": False,
|
|
354
|
+
"error": "Unsupported file format. Supported: .csv, .json",
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
return {
|
|
358
|
+
"success": result.success,
|
|
359
|
+
"data_path": data_path,
|
|
360
|
+
"entities_added": result.entities_added,
|
|
361
|
+
"relations_added": result.relations_added,
|
|
362
|
+
"rows_processed": result.rows_processed,
|
|
363
|
+
"rows_failed": result.rows_failed,
|
|
364
|
+
"duration_seconds": result.duration_seconds,
|
|
365
|
+
"errors": result.errors,
|
|
366
|
+
"warnings": result.warnings,
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
except Exception as e:
|
|
370
|
+
return {
|
|
371
|
+
"success": False,
|
|
372
|
+
"error": f"Failed to import structured data: {str(e)}",
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
async def _get_stats(self) -> Dict[str, Any]:
|
|
376
|
+
"""
|
|
377
|
+
Get knowledge graph statistics
|
|
378
|
+
|
|
379
|
+
Returns:
|
|
380
|
+
Statistics dictionary
|
|
381
|
+
"""
|
|
382
|
+
# Handle both async and sync get_stats methods
|
|
383
|
+
if hasattr(self.graph_store.get_stats, "__call__"):
|
|
384
|
+
stats_result = self.graph_store.get_stats()
|
|
385
|
+
# Check if it's a coroutine
|
|
386
|
+
if hasattr(stats_result, "__await__"):
|
|
387
|
+
stats = await stats_result
|
|
388
|
+
else:
|
|
389
|
+
stats = stats_result
|
|
390
|
+
else:
|
|
391
|
+
stats = self.graph_store.get_stats()
|
|
392
|
+
|
|
393
|
+
return {"success": True, "stats": stats}
|
|
394
|
+
|
|
395
|
+
# Public methods for ToolExecutor integration
|
|
396
|
+
async def build_from_text(
|
|
397
|
+
self,
|
|
398
|
+
text: str,
|
|
399
|
+
source: Optional[str] = "unknown",
|
|
400
|
+
entity_types: Optional[List[str]] = None,
|
|
401
|
+
relation_types: Optional[List[str]] = None,
|
|
402
|
+
) -> Dict[str, Any]:
|
|
403
|
+
"""
|
|
404
|
+
Build knowledge graph from text (public method for ToolExecutor)
|
|
405
|
+
|
|
406
|
+
Args:
|
|
407
|
+
text: Text to extract knowledge from
|
|
408
|
+
source: Optional source identifier
|
|
409
|
+
entity_types: Optional list of entity types to extract
|
|
410
|
+
relation_types: Optional list of relation types to extract
|
|
411
|
+
|
|
412
|
+
Returns:
|
|
413
|
+
Build result dictionary
|
|
414
|
+
"""
|
|
415
|
+
await self._initialize()
|
|
416
|
+
return await self._build_from_text(
|
|
417
|
+
{
|
|
418
|
+
"text": text,
|
|
419
|
+
"source": source,
|
|
420
|
+
"entity_types": entity_types,
|
|
421
|
+
"relation_types": relation_types,
|
|
422
|
+
}
|
|
423
|
+
)
|
|
424
|
+
|
|
425
|
+
async def build_from_document(
|
|
426
|
+
self,
|
|
427
|
+
document_path: str,
|
|
428
|
+
entity_types: Optional[List[str]] = None,
|
|
429
|
+
relation_types: Optional[List[str]] = None,
|
|
430
|
+
) -> Dict[str, Any]:
|
|
431
|
+
"""
|
|
432
|
+
Build knowledge graph from document (public method for ToolExecutor)
|
|
433
|
+
|
|
434
|
+
Args:
|
|
435
|
+
document_path: Path to document file
|
|
436
|
+
entity_types: Optional list of entity types to extract
|
|
437
|
+
relation_types: Optional list of relation types to extract
|
|
438
|
+
|
|
439
|
+
Returns:
|
|
440
|
+
Build result dictionary
|
|
441
|
+
"""
|
|
442
|
+
await self._initialize()
|
|
443
|
+
return await self._build_from_document(
|
|
444
|
+
{
|
|
445
|
+
"document_path": document_path,
|
|
446
|
+
"entity_types": entity_types,
|
|
447
|
+
"relation_types": relation_types,
|
|
448
|
+
}
|
|
449
|
+
)
|
|
450
|
+
|
|
451
|
+
async def get_stats(self) -> Dict[str, Any]:
|
|
452
|
+
"""
|
|
453
|
+
Get knowledge graph statistics (public method for ToolExecutor)
|
|
454
|
+
|
|
455
|
+
Returns:
|
|
456
|
+
Statistics dictionary
|
|
457
|
+
"""
|
|
458
|
+
await self._initialize()
|
|
459
|
+
return await self._get_stats()
|
|
460
|
+
|
|
461
|
+
async def execute(self, **kwargs) -> Dict[str, Any]:
|
|
462
|
+
"""
|
|
463
|
+
Execute the tool (public interface)
|
|
464
|
+
|
|
465
|
+
Args:
|
|
466
|
+
**kwargs: Tool input parameters
|
|
467
|
+
|
|
468
|
+
Returns:
|
|
469
|
+
Dictionary with results
|
|
470
|
+
"""
|
|
471
|
+
return await self._execute(**kwargs)
|
|
472
|
+
|
|
473
|
+
async def close(self):
|
|
474
|
+
"""Clean up resources"""
|
|
475
|
+
if self.graph_store and self._initialized:
|
|
476
|
+
await self.graph_store.close()
|