aiecs 1.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aiecs/__init__.py +72 -0
- aiecs/__main__.py +41 -0
- aiecs/aiecs_client.py +469 -0
- aiecs/application/__init__.py +10 -0
- aiecs/application/executors/__init__.py +10 -0
- aiecs/application/executors/operation_executor.py +363 -0
- aiecs/application/knowledge_graph/__init__.py +7 -0
- aiecs/application/knowledge_graph/builder/__init__.py +37 -0
- aiecs/application/knowledge_graph/builder/document_builder.py +375 -0
- aiecs/application/knowledge_graph/builder/graph_builder.py +356 -0
- aiecs/application/knowledge_graph/builder/schema_mapping.py +531 -0
- aiecs/application/knowledge_graph/builder/structured_pipeline.py +443 -0
- aiecs/application/knowledge_graph/builder/text_chunker.py +319 -0
- aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
- aiecs/application/knowledge_graph/extractors/base.py +100 -0
- aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +327 -0
- aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +349 -0
- aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +244 -0
- aiecs/application/knowledge_graph/fusion/__init__.py +23 -0
- aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +387 -0
- aiecs/application/knowledge_graph/fusion/entity_linker.py +343 -0
- aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +580 -0
- aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +189 -0
- aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
- aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +344 -0
- aiecs/application/knowledge_graph/pattern_matching/query_executor.py +378 -0
- aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
- aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +199 -0
- aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
- aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
- aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +347 -0
- aiecs/application/knowledge_graph/reasoning/inference_engine.py +504 -0
- aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +167 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +630 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +654 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +477 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +390 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +217 -0
- aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +169 -0
- aiecs/application/knowledge_graph/reasoning/query_planner.py +872 -0
- aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +554 -0
- aiecs/application/knowledge_graph/retrieval/__init__.py +19 -0
- aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +596 -0
- aiecs/application/knowledge_graph/search/__init__.py +59 -0
- aiecs/application/knowledge_graph/search/hybrid_search.py +423 -0
- aiecs/application/knowledge_graph/search/reranker.py +295 -0
- aiecs/application/knowledge_graph/search/reranker_strategies.py +553 -0
- aiecs/application/knowledge_graph/search/text_similarity.py +398 -0
- aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
- aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +329 -0
- aiecs/application/knowledge_graph/traversal/path_scorer.py +269 -0
- aiecs/application/knowledge_graph/validators/__init__.py +13 -0
- aiecs/application/knowledge_graph/validators/relation_validator.py +189 -0
- aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
- aiecs/application/knowledge_graph/visualization/graph_visualizer.py +321 -0
- aiecs/common/__init__.py +9 -0
- aiecs/common/knowledge_graph/__init__.py +17 -0
- aiecs/common/knowledge_graph/runnable.py +484 -0
- aiecs/config/__init__.py +16 -0
- aiecs/config/config.py +498 -0
- aiecs/config/graph_config.py +137 -0
- aiecs/config/registry.py +23 -0
- aiecs/core/__init__.py +46 -0
- aiecs/core/interface/__init__.py +34 -0
- aiecs/core/interface/execution_interface.py +152 -0
- aiecs/core/interface/storage_interface.py +171 -0
- aiecs/domain/__init__.py +289 -0
- aiecs/domain/agent/__init__.py +189 -0
- aiecs/domain/agent/base_agent.py +697 -0
- aiecs/domain/agent/exceptions.py +103 -0
- aiecs/domain/agent/graph_aware_mixin.py +559 -0
- aiecs/domain/agent/hybrid_agent.py +490 -0
- aiecs/domain/agent/integration/__init__.py +26 -0
- aiecs/domain/agent/integration/context_compressor.py +222 -0
- aiecs/domain/agent/integration/context_engine_adapter.py +252 -0
- aiecs/domain/agent/integration/retry_policy.py +219 -0
- aiecs/domain/agent/integration/role_config.py +213 -0
- aiecs/domain/agent/knowledge_aware_agent.py +646 -0
- aiecs/domain/agent/lifecycle.py +296 -0
- aiecs/domain/agent/llm_agent.py +300 -0
- aiecs/domain/agent/memory/__init__.py +12 -0
- aiecs/domain/agent/memory/conversation.py +197 -0
- aiecs/domain/agent/migration/__init__.py +14 -0
- aiecs/domain/agent/migration/conversion.py +160 -0
- aiecs/domain/agent/migration/legacy_wrapper.py +90 -0
- aiecs/domain/agent/models.py +317 -0
- aiecs/domain/agent/observability.py +407 -0
- aiecs/domain/agent/persistence.py +289 -0
- aiecs/domain/agent/prompts/__init__.py +29 -0
- aiecs/domain/agent/prompts/builder.py +161 -0
- aiecs/domain/agent/prompts/formatters.py +189 -0
- aiecs/domain/agent/prompts/template.py +255 -0
- aiecs/domain/agent/registry.py +260 -0
- aiecs/domain/agent/tool_agent.py +257 -0
- aiecs/domain/agent/tools/__init__.py +12 -0
- aiecs/domain/agent/tools/schema_generator.py +221 -0
- aiecs/domain/community/__init__.py +155 -0
- aiecs/domain/community/agent_adapter.py +477 -0
- aiecs/domain/community/analytics.py +481 -0
- aiecs/domain/community/collaborative_workflow.py +642 -0
- aiecs/domain/community/communication_hub.py +645 -0
- aiecs/domain/community/community_builder.py +320 -0
- aiecs/domain/community/community_integration.py +800 -0
- aiecs/domain/community/community_manager.py +813 -0
- aiecs/domain/community/decision_engine.py +879 -0
- aiecs/domain/community/exceptions.py +225 -0
- aiecs/domain/community/models/__init__.py +33 -0
- aiecs/domain/community/models/community_models.py +268 -0
- aiecs/domain/community/resource_manager.py +457 -0
- aiecs/domain/community/shared_context_manager.py +603 -0
- aiecs/domain/context/__init__.py +58 -0
- aiecs/domain/context/context_engine.py +989 -0
- aiecs/domain/context/conversation_models.py +354 -0
- aiecs/domain/context/graph_memory.py +467 -0
- aiecs/domain/execution/__init__.py +12 -0
- aiecs/domain/execution/model.py +57 -0
- aiecs/domain/knowledge_graph/__init__.py +19 -0
- aiecs/domain/knowledge_graph/models/__init__.py +52 -0
- aiecs/domain/knowledge_graph/models/entity.py +130 -0
- aiecs/domain/knowledge_graph/models/evidence.py +194 -0
- aiecs/domain/knowledge_graph/models/inference_rule.py +186 -0
- aiecs/domain/knowledge_graph/models/path.py +179 -0
- aiecs/domain/knowledge_graph/models/path_pattern.py +173 -0
- aiecs/domain/knowledge_graph/models/query.py +272 -0
- aiecs/domain/knowledge_graph/models/query_plan.py +187 -0
- aiecs/domain/knowledge_graph/models/relation.py +136 -0
- aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
- aiecs/domain/knowledge_graph/schema/entity_type.py +135 -0
- aiecs/domain/knowledge_graph/schema/graph_schema.py +271 -0
- aiecs/domain/knowledge_graph/schema/property_schema.py +155 -0
- aiecs/domain/knowledge_graph/schema/relation_type.py +171 -0
- aiecs/domain/knowledge_graph/schema/schema_manager.py +496 -0
- aiecs/domain/knowledge_graph/schema/type_enums.py +205 -0
- aiecs/domain/task/__init__.py +13 -0
- aiecs/domain/task/dsl_processor.py +613 -0
- aiecs/domain/task/model.py +62 -0
- aiecs/domain/task/task_context.py +268 -0
- aiecs/infrastructure/__init__.py +24 -0
- aiecs/infrastructure/graph_storage/__init__.py +11 -0
- aiecs/infrastructure/graph_storage/base.py +601 -0
- aiecs/infrastructure/graph_storage/batch_operations.py +449 -0
- aiecs/infrastructure/graph_storage/cache.py +429 -0
- aiecs/infrastructure/graph_storage/distributed.py +226 -0
- aiecs/infrastructure/graph_storage/error_handling.py +390 -0
- aiecs/infrastructure/graph_storage/graceful_degradation.py +306 -0
- aiecs/infrastructure/graph_storage/health_checks.py +378 -0
- aiecs/infrastructure/graph_storage/in_memory.py +514 -0
- aiecs/infrastructure/graph_storage/index_optimization.py +483 -0
- aiecs/infrastructure/graph_storage/lazy_loading.py +410 -0
- aiecs/infrastructure/graph_storage/metrics.py +357 -0
- aiecs/infrastructure/graph_storage/migration.py +413 -0
- aiecs/infrastructure/graph_storage/pagination.py +471 -0
- aiecs/infrastructure/graph_storage/performance_monitoring.py +466 -0
- aiecs/infrastructure/graph_storage/postgres.py +871 -0
- aiecs/infrastructure/graph_storage/query_optimizer.py +635 -0
- aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
- aiecs/infrastructure/graph_storage/sqlite.py +623 -0
- aiecs/infrastructure/graph_storage/streaming.py +495 -0
- aiecs/infrastructure/messaging/__init__.py +13 -0
- aiecs/infrastructure/messaging/celery_task_manager.py +383 -0
- aiecs/infrastructure/messaging/websocket_manager.py +298 -0
- aiecs/infrastructure/monitoring/__init__.py +34 -0
- aiecs/infrastructure/monitoring/executor_metrics.py +174 -0
- aiecs/infrastructure/monitoring/global_metrics_manager.py +213 -0
- aiecs/infrastructure/monitoring/structured_logger.py +48 -0
- aiecs/infrastructure/monitoring/tracing_manager.py +410 -0
- aiecs/infrastructure/persistence/__init__.py +24 -0
- aiecs/infrastructure/persistence/context_engine_client.py +187 -0
- aiecs/infrastructure/persistence/database_manager.py +333 -0
- aiecs/infrastructure/persistence/file_storage.py +754 -0
- aiecs/infrastructure/persistence/redis_client.py +220 -0
- aiecs/llm/__init__.py +86 -0
- aiecs/llm/callbacks/__init__.py +11 -0
- aiecs/llm/callbacks/custom_callbacks.py +264 -0
- aiecs/llm/client_factory.py +420 -0
- aiecs/llm/clients/__init__.py +33 -0
- aiecs/llm/clients/base_client.py +193 -0
- aiecs/llm/clients/googleai_client.py +181 -0
- aiecs/llm/clients/openai_client.py +131 -0
- aiecs/llm/clients/vertex_client.py +437 -0
- aiecs/llm/clients/xai_client.py +184 -0
- aiecs/llm/config/__init__.py +51 -0
- aiecs/llm/config/config_loader.py +275 -0
- aiecs/llm/config/config_validator.py +236 -0
- aiecs/llm/config/model_config.py +151 -0
- aiecs/llm/utils/__init__.py +10 -0
- aiecs/llm/utils/validate_config.py +91 -0
- aiecs/main.py +363 -0
- aiecs/scripts/__init__.py +3 -0
- aiecs/scripts/aid/VERSION_MANAGEMENT.md +97 -0
- aiecs/scripts/aid/__init__.py +19 -0
- aiecs/scripts/aid/version_manager.py +215 -0
- aiecs/scripts/dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md +242 -0
- aiecs/scripts/dependance_check/README_DEPENDENCY_CHECKER.md +310 -0
- aiecs/scripts/dependance_check/__init__.py +17 -0
- aiecs/scripts/dependance_check/dependency_checker.py +938 -0
- aiecs/scripts/dependance_check/dependency_fixer.py +391 -0
- aiecs/scripts/dependance_check/download_nlp_data.py +396 -0
- aiecs/scripts/dependance_check/quick_dependency_check.py +270 -0
- aiecs/scripts/dependance_check/setup_nlp_data.sh +217 -0
- aiecs/scripts/dependance_patch/__init__.py +7 -0
- aiecs/scripts/dependance_patch/fix_weasel/README_WEASEL_PATCH.md +126 -0
- aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
- aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.py +128 -0
- aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.sh +82 -0
- aiecs/scripts/dependance_patch/fix_weasel/patch_weasel_library.sh +188 -0
- aiecs/scripts/dependance_patch/fix_weasel/run_weasel_patch.sh +41 -0
- aiecs/scripts/tools_develop/README.md +449 -0
- aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
- aiecs/scripts/tools_develop/__init__.py +21 -0
- aiecs/scripts/tools_develop/check_type_annotations.py +259 -0
- aiecs/scripts/tools_develop/validate_tool_schemas.py +422 -0
- aiecs/scripts/tools_develop/verify_tools.py +356 -0
- aiecs/tasks/__init__.py +1 -0
- aiecs/tasks/worker.py +172 -0
- aiecs/tools/__init__.py +299 -0
- aiecs/tools/apisource/__init__.py +99 -0
- aiecs/tools/apisource/intelligence/__init__.py +19 -0
- aiecs/tools/apisource/intelligence/data_fusion.py +381 -0
- aiecs/tools/apisource/intelligence/query_analyzer.py +413 -0
- aiecs/tools/apisource/intelligence/search_enhancer.py +388 -0
- aiecs/tools/apisource/monitoring/__init__.py +9 -0
- aiecs/tools/apisource/monitoring/metrics.py +303 -0
- aiecs/tools/apisource/providers/__init__.py +115 -0
- aiecs/tools/apisource/providers/base.py +664 -0
- aiecs/tools/apisource/providers/census.py +401 -0
- aiecs/tools/apisource/providers/fred.py +564 -0
- aiecs/tools/apisource/providers/newsapi.py +412 -0
- aiecs/tools/apisource/providers/worldbank.py +357 -0
- aiecs/tools/apisource/reliability/__init__.py +12 -0
- aiecs/tools/apisource/reliability/error_handler.py +375 -0
- aiecs/tools/apisource/reliability/fallback_strategy.py +391 -0
- aiecs/tools/apisource/tool.py +850 -0
- aiecs/tools/apisource/utils/__init__.py +9 -0
- aiecs/tools/apisource/utils/validators.py +338 -0
- aiecs/tools/base_tool.py +201 -0
- aiecs/tools/docs/__init__.py +121 -0
- aiecs/tools/docs/ai_document_orchestrator.py +599 -0
- aiecs/tools/docs/ai_document_writer_orchestrator.py +2403 -0
- aiecs/tools/docs/content_insertion_tool.py +1333 -0
- aiecs/tools/docs/document_creator_tool.py +1317 -0
- aiecs/tools/docs/document_layout_tool.py +1166 -0
- aiecs/tools/docs/document_parser_tool.py +994 -0
- aiecs/tools/docs/document_writer_tool.py +1818 -0
- aiecs/tools/knowledge_graph/__init__.py +17 -0
- aiecs/tools/knowledge_graph/graph_reasoning_tool.py +734 -0
- aiecs/tools/knowledge_graph/graph_search_tool.py +923 -0
- aiecs/tools/knowledge_graph/kg_builder_tool.py +476 -0
- aiecs/tools/langchain_adapter.py +542 -0
- aiecs/tools/schema_generator.py +275 -0
- aiecs/tools/search_tool/__init__.py +100 -0
- aiecs/tools/search_tool/analyzers.py +589 -0
- aiecs/tools/search_tool/cache.py +260 -0
- aiecs/tools/search_tool/constants.py +128 -0
- aiecs/tools/search_tool/context.py +216 -0
- aiecs/tools/search_tool/core.py +749 -0
- aiecs/tools/search_tool/deduplicator.py +123 -0
- aiecs/tools/search_tool/error_handler.py +271 -0
- aiecs/tools/search_tool/metrics.py +371 -0
- aiecs/tools/search_tool/rate_limiter.py +178 -0
- aiecs/tools/search_tool/schemas.py +277 -0
- aiecs/tools/statistics/__init__.py +80 -0
- aiecs/tools/statistics/ai_data_analysis_orchestrator.py +643 -0
- aiecs/tools/statistics/ai_insight_generator_tool.py +505 -0
- aiecs/tools/statistics/ai_report_orchestrator_tool.py +694 -0
- aiecs/tools/statistics/data_loader_tool.py +564 -0
- aiecs/tools/statistics/data_profiler_tool.py +658 -0
- aiecs/tools/statistics/data_transformer_tool.py +573 -0
- aiecs/tools/statistics/data_visualizer_tool.py +495 -0
- aiecs/tools/statistics/model_trainer_tool.py +487 -0
- aiecs/tools/statistics/statistical_analyzer_tool.py +459 -0
- aiecs/tools/task_tools/__init__.py +86 -0
- aiecs/tools/task_tools/chart_tool.py +732 -0
- aiecs/tools/task_tools/classfire_tool.py +922 -0
- aiecs/tools/task_tools/image_tool.py +447 -0
- aiecs/tools/task_tools/office_tool.py +684 -0
- aiecs/tools/task_tools/pandas_tool.py +635 -0
- aiecs/tools/task_tools/report_tool.py +635 -0
- aiecs/tools/task_tools/research_tool.py +392 -0
- aiecs/tools/task_tools/scraper_tool.py +715 -0
- aiecs/tools/task_tools/stats_tool.py +688 -0
- aiecs/tools/temp_file_manager.py +130 -0
- aiecs/tools/tool_executor/__init__.py +37 -0
- aiecs/tools/tool_executor/tool_executor.py +881 -0
- aiecs/utils/LLM_output_structor.py +445 -0
- aiecs/utils/__init__.py +34 -0
- aiecs/utils/base_callback.py +47 -0
- aiecs/utils/cache_provider.py +695 -0
- aiecs/utils/execution_utils.py +184 -0
- aiecs/utils/logging.py +1 -0
- aiecs/utils/prompt_loader.py +14 -0
- aiecs/utils/token_usage_repository.py +323 -0
- aiecs/ws/__init__.py +0 -0
- aiecs/ws/socket_server.py +52 -0
- aiecs-1.5.1.dist-info/METADATA +608 -0
- aiecs-1.5.1.dist-info/RECORD +302 -0
- aiecs-1.5.1.dist-info/WHEEL +5 -0
- aiecs-1.5.1.dist-info/entry_points.txt +10 -0
- aiecs-1.5.1.dist-info/licenses/LICENSE +225 -0
- aiecs-1.5.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Relation Deduplicator
|
|
3
|
+
|
|
4
|
+
Identifies and removes duplicate relations.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from typing import List, Tuple, Dict
|
|
8
|
+
from aiecs.domain.knowledge_graph.models.relation import Relation
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class RelationDeduplicator:
|
|
12
|
+
"""
|
|
13
|
+
Deduplicate relations based on equivalence
|
|
14
|
+
|
|
15
|
+
Two relations are considered duplicates if they have:
|
|
16
|
+
- Same source entity
|
|
17
|
+
- Same target entity
|
|
18
|
+
- Same relation type
|
|
19
|
+
- (Optionally) Similar properties
|
|
20
|
+
|
|
21
|
+
This handles cases like:
|
|
22
|
+
- Extracting "Alice WORKS_FOR Tech Corp" multiple times from different sentences
|
|
23
|
+
- Multiple mentions of the same relationship with slight variations
|
|
24
|
+
|
|
25
|
+
Example:
|
|
26
|
+
```python
|
|
27
|
+
deduplicator = RelationDeduplicator()
|
|
28
|
+
|
|
29
|
+
relations = [
|
|
30
|
+
Relation(source="e1", target="e2", type="WORKS_FOR"),
|
|
31
|
+
Relation(source="e1", target="e2", type="WORKS_FOR", properties={"since": "2020"}),
|
|
32
|
+
Relation(source="e1", target="e3", type="KNOWS")
|
|
33
|
+
]
|
|
34
|
+
|
|
35
|
+
deduplicated = await deduplicator.deduplicate(relations)
|
|
36
|
+
# Returns: [
|
|
37
|
+
# Relation(source="e1", target="e2", type="WORKS_FOR", properties={"since": "2020"}),
|
|
38
|
+
# Relation(source="e1", target="e3", type="KNOWS")
|
|
39
|
+
# ]
|
|
40
|
+
```
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
def __init__(self, merge_properties: bool = True):
|
|
44
|
+
"""
|
|
45
|
+
Initialize relation deduplicator
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
merge_properties: If True, merge properties from duplicate relations
|
|
49
|
+
"""
|
|
50
|
+
self.merge_properties = merge_properties
|
|
51
|
+
|
|
52
|
+
async def deduplicate(self, relations: List[Relation]) -> List[Relation]:
|
|
53
|
+
"""
|
|
54
|
+
Deduplicate a list of relations
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
relations: List of relations to deduplicate
|
|
58
|
+
|
|
59
|
+
Returns:
|
|
60
|
+
List of unique relations (with merged properties if enabled)
|
|
61
|
+
"""
|
|
62
|
+
if not relations:
|
|
63
|
+
return []
|
|
64
|
+
|
|
65
|
+
# Group relations by (source, target, type) tuple
|
|
66
|
+
relation_groups: Dict[Tuple[str, str, str], List[Relation]] = {}
|
|
67
|
+
|
|
68
|
+
for relation in relations:
|
|
69
|
+
key = (
|
|
70
|
+
relation.source_id,
|
|
71
|
+
relation.target_id,
|
|
72
|
+
relation.relation_type,
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
if key not in relation_groups:
|
|
76
|
+
relation_groups[key] = []
|
|
77
|
+
relation_groups[key].append(relation)
|
|
78
|
+
|
|
79
|
+
# For each group, merge duplicates
|
|
80
|
+
deduplicated = []
|
|
81
|
+
for key, group in relation_groups.items():
|
|
82
|
+
if len(group) == 1:
|
|
83
|
+
deduplicated.append(group[0])
|
|
84
|
+
else:
|
|
85
|
+
merged = self._merge_relations(group)
|
|
86
|
+
deduplicated.append(merged)
|
|
87
|
+
|
|
88
|
+
return deduplicated
|
|
89
|
+
|
|
90
|
+
def _merge_relations(self, relations: List[Relation]) -> Relation:
|
|
91
|
+
"""
|
|
92
|
+
Merge a group of duplicate relations into one
|
|
93
|
+
|
|
94
|
+
Strategy:
|
|
95
|
+
- Use first relation as base
|
|
96
|
+
- Merge properties (prefer non-empty values)
|
|
97
|
+
- Keep highest weight
|
|
98
|
+
- Keep highest confidence
|
|
99
|
+
|
|
100
|
+
Args:
|
|
101
|
+
relations: List of duplicate relations
|
|
102
|
+
|
|
103
|
+
Returns:
|
|
104
|
+
Merged relation
|
|
105
|
+
"""
|
|
106
|
+
if len(relations) == 1:
|
|
107
|
+
return relations[0]
|
|
108
|
+
|
|
109
|
+
# Use first relation as base
|
|
110
|
+
base = relations[0]
|
|
111
|
+
|
|
112
|
+
# Merge properties
|
|
113
|
+
merged_properties = dict(base.properties) if base.properties else {}
|
|
114
|
+
|
|
115
|
+
if self.merge_properties:
|
|
116
|
+
for relation in relations[1:]:
|
|
117
|
+
if relation.properties:
|
|
118
|
+
for key, value in relation.properties.items():
|
|
119
|
+
# Add property if not exists or current value is empty
|
|
120
|
+
if key not in merged_properties or not merged_properties[key]:
|
|
121
|
+
merged_properties[key] = value
|
|
122
|
+
|
|
123
|
+
# Take highest weight
|
|
124
|
+
max_weight = max(r.weight for r in relations)
|
|
125
|
+
|
|
126
|
+
# Take highest confidence (if present in properties)
|
|
127
|
+
confidences = [
|
|
128
|
+
r.properties.get("_extraction_confidence", 0.5) for r in relations if r.properties
|
|
129
|
+
]
|
|
130
|
+
if confidences:
|
|
131
|
+
merged_properties["_extraction_confidence"] = max(confidences)
|
|
132
|
+
|
|
133
|
+
# Track merge count
|
|
134
|
+
merged_properties["_merged_count"] = len(relations)
|
|
135
|
+
|
|
136
|
+
# Create merged relation
|
|
137
|
+
merged = Relation(
|
|
138
|
+
id=base.id,
|
|
139
|
+
relation_type=base.relation_type,
|
|
140
|
+
source_id=base.source_id,
|
|
141
|
+
target_id=base.target_id,
|
|
142
|
+
properties=merged_properties,
|
|
143
|
+
weight=max_weight,
|
|
144
|
+
source=base.source,
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
return merged
|
|
148
|
+
|
|
149
|
+
def find_duplicates(self, relations: List[Relation]) -> List[Tuple[Relation, Relation]]:
|
|
150
|
+
"""
|
|
151
|
+
Find pairs of duplicate relations without merging
|
|
152
|
+
|
|
153
|
+
Useful for debugging or manual review.
|
|
154
|
+
|
|
155
|
+
Args:
|
|
156
|
+
relations: List of relations to check
|
|
157
|
+
|
|
158
|
+
Returns:
|
|
159
|
+
List of (relation1, relation2) tuples that are duplicates
|
|
160
|
+
"""
|
|
161
|
+
duplicates = []
|
|
162
|
+
n = len(relations)
|
|
163
|
+
|
|
164
|
+
for i in range(n):
|
|
165
|
+
for j in range(i + 1, n):
|
|
166
|
+
r1 = relations[i]
|
|
167
|
+
r2 = relations[j]
|
|
168
|
+
|
|
169
|
+
if self._are_duplicates(r1, r2):
|
|
170
|
+
duplicates.append((r1, r2))
|
|
171
|
+
|
|
172
|
+
return duplicates
|
|
173
|
+
|
|
174
|
+
def _are_duplicates(self, r1: Relation, r2: Relation) -> bool:
|
|
175
|
+
"""
|
|
176
|
+
Check if two relations are duplicates
|
|
177
|
+
|
|
178
|
+
Args:
|
|
179
|
+
r1: First relation
|
|
180
|
+
r2: Second relation
|
|
181
|
+
|
|
182
|
+
Returns:
|
|
183
|
+
True if relations are duplicates
|
|
184
|
+
"""
|
|
185
|
+
return (
|
|
186
|
+
r1.source_id == r2.source_id
|
|
187
|
+
and r1.target_id == r2.target_id
|
|
188
|
+
and r1.relation_type == r2.relation_type
|
|
189
|
+
)
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Pattern Matching Module
|
|
3
|
+
|
|
4
|
+
Provides graph pattern matching capabilities for custom query execution.
|
|
5
|
+
|
|
6
|
+
Phase: 3.3 - Full Custom Query Execution
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from aiecs.application.knowledge_graph.pattern_matching.pattern_matcher import (
|
|
10
|
+
PatternMatcher,
|
|
11
|
+
PatternMatch,
|
|
12
|
+
)
|
|
13
|
+
from aiecs.application.knowledge_graph.pattern_matching.query_executor import (
|
|
14
|
+
CustomQueryExecutor,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
__all__ = [
|
|
18
|
+
"PatternMatcher",
|
|
19
|
+
"PatternMatch",
|
|
20
|
+
"CustomQueryExecutor",
|
|
21
|
+
]
|
|
@@ -0,0 +1,344 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Pattern Matching Engine
|
|
3
|
+
|
|
4
|
+
Implements graph pattern matching for custom query execution.
|
|
5
|
+
|
|
6
|
+
Phase: 3.3 - Full Custom Query Execution
|
|
7
|
+
Version: 1.0
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from typing import List, Dict, Any, Optional
|
|
11
|
+
from aiecs.domain.knowledge_graph.models.entity import Entity
|
|
12
|
+
from aiecs.domain.knowledge_graph.models.relation import Relation
|
|
13
|
+
from aiecs.domain.knowledge_graph.models.path import Path
|
|
14
|
+
from aiecs.domain.knowledge_graph.models.path_pattern import PathPattern
|
|
15
|
+
from aiecs.infrastructure.graph_storage.base import GraphStore
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class PatternMatch:
|
|
19
|
+
"""
|
|
20
|
+
Represents a single pattern match result
|
|
21
|
+
|
|
22
|
+
Attributes:
|
|
23
|
+
entities: Matched entities
|
|
24
|
+
relations: Matched relations
|
|
25
|
+
bindings: Variable bindings (if pattern uses variables)
|
|
26
|
+
score: Match score (0.0-1.0)
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
def __init__(
|
|
30
|
+
self,
|
|
31
|
+
entities: List[Entity],
|
|
32
|
+
relations: List[Relation],
|
|
33
|
+
bindings: Optional[Dict[str, Any]] = None,
|
|
34
|
+
score: float = 1.0,
|
|
35
|
+
):
|
|
36
|
+
self.entities = entities
|
|
37
|
+
self.relations = relations
|
|
38
|
+
self.bindings = bindings or {}
|
|
39
|
+
self.score = score
|
|
40
|
+
|
|
41
|
+
def __repr__(self) -> str:
|
|
42
|
+
return f"PatternMatch(entities={len(self.entities)}, relations={len(self.relations)}, score={self.score})"
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class PatternMatcher:
|
|
46
|
+
"""
|
|
47
|
+
Graph Pattern Matching Engine
|
|
48
|
+
|
|
49
|
+
Executes pattern matching queries against a graph store.
|
|
50
|
+
Supports:
|
|
51
|
+
- Single pattern matching
|
|
52
|
+
- Multiple pattern matching (AND semantics)
|
|
53
|
+
- Optional pattern matching
|
|
54
|
+
- Cycle detection and handling
|
|
55
|
+
- Result projection and aggregation
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
def __init__(self, graph_store: GraphStore):
|
|
59
|
+
"""
|
|
60
|
+
Initialize pattern matcher
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
graph_store: Graph storage backend
|
|
64
|
+
"""
|
|
65
|
+
self.graph_store = graph_store
|
|
66
|
+
|
|
67
|
+
async def match_pattern(
|
|
68
|
+
self,
|
|
69
|
+
pattern: PathPattern,
|
|
70
|
+
start_entity_id: Optional[str] = None,
|
|
71
|
+
max_matches: int = 100,
|
|
72
|
+
) -> List[PatternMatch]:
|
|
73
|
+
"""
|
|
74
|
+
Match a single pattern in the graph
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
pattern: Pattern to match
|
|
78
|
+
start_entity_id: Optional starting entity ID
|
|
79
|
+
max_matches: Maximum number of matches to return
|
|
80
|
+
|
|
81
|
+
Returns:
|
|
82
|
+
List of pattern matches
|
|
83
|
+
"""
|
|
84
|
+
matches = []
|
|
85
|
+
|
|
86
|
+
if start_entity_id:
|
|
87
|
+
# Start from specific entity
|
|
88
|
+
start_entity = await self.graph_store.get_entity(start_entity_id)
|
|
89
|
+
if not start_entity:
|
|
90
|
+
return []
|
|
91
|
+
|
|
92
|
+
# Find paths matching the pattern
|
|
93
|
+
paths = await self._find_matching_paths(start_entity, pattern, max_matches)
|
|
94
|
+
|
|
95
|
+
for path in paths:
|
|
96
|
+
match = PatternMatch(entities=path.nodes, relations=path.edges, score=1.0)
|
|
97
|
+
matches.append(match)
|
|
98
|
+
else:
|
|
99
|
+
# Find all entities matching the pattern
|
|
100
|
+
# This is more expensive - iterate through all entities
|
|
101
|
+
all_entities = await self._get_all_entities(pattern.entity_types)
|
|
102
|
+
|
|
103
|
+
for entity in all_entities[:max_matches]:
|
|
104
|
+
paths = await self._find_matching_paths(entity, pattern, max_matches=1)
|
|
105
|
+
|
|
106
|
+
if paths:
|
|
107
|
+
match = PatternMatch(
|
|
108
|
+
entities=paths[0].nodes,
|
|
109
|
+
relations=paths[0].edges,
|
|
110
|
+
score=1.0,
|
|
111
|
+
)
|
|
112
|
+
matches.append(match)
|
|
113
|
+
|
|
114
|
+
if len(matches) >= max_matches:
|
|
115
|
+
break
|
|
116
|
+
|
|
117
|
+
return matches
|
|
118
|
+
|
|
119
|
+
async def match_multiple_patterns(
|
|
120
|
+
self,
|
|
121
|
+
patterns: List[PathPattern],
|
|
122
|
+
start_entity_id: Optional[str] = None,
|
|
123
|
+
max_matches: int = 100,
|
|
124
|
+
) -> List[PatternMatch]:
|
|
125
|
+
"""
|
|
126
|
+
Match multiple patterns (AND semantics)
|
|
127
|
+
|
|
128
|
+
All patterns must match for a result to be included.
|
|
129
|
+
|
|
130
|
+
Args:
|
|
131
|
+
patterns: List of patterns to match
|
|
132
|
+
start_entity_id: Optional starting entity ID
|
|
133
|
+
max_matches: Maximum number of matches to return
|
|
134
|
+
|
|
135
|
+
Returns:
|
|
136
|
+
List of pattern matches where all patterns matched
|
|
137
|
+
"""
|
|
138
|
+
if not patterns:
|
|
139
|
+
return []
|
|
140
|
+
|
|
141
|
+
# Match first pattern
|
|
142
|
+
first_matches = await self.match_pattern(patterns[0], start_entity_id, max_matches)
|
|
143
|
+
|
|
144
|
+
if len(patterns) == 1:
|
|
145
|
+
return first_matches
|
|
146
|
+
|
|
147
|
+
# Filter matches that also match remaining patterns
|
|
148
|
+
combined_matches = []
|
|
149
|
+
|
|
150
|
+
for match in first_matches:
|
|
151
|
+
# Check if remaining patterns match
|
|
152
|
+
all_match = True
|
|
153
|
+
combined_entities = list(match.entities)
|
|
154
|
+
combined_relations = list(match.relations)
|
|
155
|
+
|
|
156
|
+
for pattern in patterns[1:]:
|
|
157
|
+
# Try to match pattern starting from any entity in current
|
|
158
|
+
# match
|
|
159
|
+
pattern_matched = False
|
|
160
|
+
|
|
161
|
+
for entity in match.entities:
|
|
162
|
+
sub_matches = await self.match_pattern(pattern, entity.id, max_matches=1)
|
|
163
|
+
|
|
164
|
+
if sub_matches:
|
|
165
|
+
# Add new entities and relations
|
|
166
|
+
for sub_match in sub_matches:
|
|
167
|
+
combined_entities.extend(sub_match.entities)
|
|
168
|
+
combined_relations.extend(sub_match.relations)
|
|
169
|
+
pattern_matched = True
|
|
170
|
+
break
|
|
171
|
+
|
|
172
|
+
if not pattern_matched:
|
|
173
|
+
all_match = False
|
|
174
|
+
break
|
|
175
|
+
|
|
176
|
+
if all_match:
|
|
177
|
+
combined_match = PatternMatch(
|
|
178
|
+
entities=combined_entities,
|
|
179
|
+
relations=combined_relations,
|
|
180
|
+
score=match.score,
|
|
181
|
+
)
|
|
182
|
+
combined_matches.append(combined_match)
|
|
183
|
+
|
|
184
|
+
return combined_matches[:max_matches]
|
|
185
|
+
|
|
186
|
+
async def match_optional_patterns(
|
|
187
|
+
self,
|
|
188
|
+
required_patterns: List[PathPattern],
|
|
189
|
+
optional_patterns: List[PathPattern],
|
|
190
|
+
start_entity_id: Optional[str] = None,
|
|
191
|
+
max_matches: int = 100,
|
|
192
|
+
) -> List[PatternMatch]:
|
|
193
|
+
"""
|
|
194
|
+
Match required patterns with optional patterns
|
|
195
|
+
|
|
196
|
+
Required patterns must match. Optional patterns are included if they match.
|
|
197
|
+
|
|
198
|
+
Args:
|
|
199
|
+
required_patterns: Patterns that must match
|
|
200
|
+
optional_patterns: Patterns that may or may not match
|
|
201
|
+
start_entity_id: Optional starting entity ID
|
|
202
|
+
max_matches: Maximum number of matches to return
|
|
203
|
+
|
|
204
|
+
Returns:
|
|
205
|
+
List of pattern matches
|
|
206
|
+
"""
|
|
207
|
+
# Match required patterns first
|
|
208
|
+
required_matches = await self.match_multiple_patterns(
|
|
209
|
+
required_patterns, start_entity_id, max_matches
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
if not optional_patterns:
|
|
213
|
+
return required_matches
|
|
214
|
+
|
|
215
|
+
# Try to extend with optional patterns
|
|
216
|
+
extended_matches = []
|
|
217
|
+
|
|
218
|
+
for match in required_matches:
|
|
219
|
+
combined_entities = list(match.entities)
|
|
220
|
+
combined_relations = list(match.relations)
|
|
221
|
+
|
|
222
|
+
# Try to match each optional pattern
|
|
223
|
+
for pattern in optional_patterns:
|
|
224
|
+
for entity in match.entities:
|
|
225
|
+
sub_matches = await self.match_pattern(pattern, entity.id, max_matches=1)
|
|
226
|
+
|
|
227
|
+
if sub_matches:
|
|
228
|
+
# Add optional entities and relations
|
|
229
|
+
for sub_match in sub_matches:
|
|
230
|
+
combined_entities.extend(sub_match.entities)
|
|
231
|
+
combined_relations.extend(sub_match.relations)
|
|
232
|
+
break
|
|
233
|
+
|
|
234
|
+
extended_match = PatternMatch(
|
|
235
|
+
entities=combined_entities,
|
|
236
|
+
relations=combined_relations,
|
|
237
|
+
score=match.score,
|
|
238
|
+
)
|
|
239
|
+
extended_matches.append(extended_match)
|
|
240
|
+
|
|
241
|
+
return extended_matches
|
|
242
|
+
|
|
243
|
+
async def _find_matching_paths(
|
|
244
|
+
self,
|
|
245
|
+
start_entity: Entity,
|
|
246
|
+
pattern: PathPattern,
|
|
247
|
+
max_matches: int = 100,
|
|
248
|
+
) -> List[Path]:
|
|
249
|
+
"""
|
|
250
|
+
Find paths matching a pattern starting from an entity
|
|
251
|
+
|
|
252
|
+
Args:
|
|
253
|
+
start_entity: Starting entity
|
|
254
|
+
pattern: Pattern to match
|
|
255
|
+
max_matches: Maximum number of paths to return
|
|
256
|
+
|
|
257
|
+
Returns:
|
|
258
|
+
List of matching paths
|
|
259
|
+
"""
|
|
260
|
+
# Use graph store's traverse method with pattern constraints
|
|
261
|
+
paths = await self.graph_store.traverse(
|
|
262
|
+
start_entity.id,
|
|
263
|
+
relation_type=(pattern.relation_types[0] if pattern.relation_types else None),
|
|
264
|
+
max_depth=pattern.max_depth,
|
|
265
|
+
max_results=max_matches,
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
# Filter paths based on pattern constraints
|
|
269
|
+
matching_paths = []
|
|
270
|
+
|
|
271
|
+
for path in paths:
|
|
272
|
+
if self._path_matches_pattern(path, pattern):
|
|
273
|
+
matching_paths.append(path)
|
|
274
|
+
|
|
275
|
+
return matching_paths
|
|
276
|
+
|
|
277
|
+
def _path_matches_pattern(self, path: Path, pattern: PathPattern) -> bool:
|
|
278
|
+
"""
|
|
279
|
+
Check if a path matches a pattern
|
|
280
|
+
|
|
281
|
+
Args:
|
|
282
|
+
path: Path to check
|
|
283
|
+
pattern: Pattern to match against
|
|
284
|
+
|
|
285
|
+
Returns:
|
|
286
|
+
True if path matches pattern
|
|
287
|
+
"""
|
|
288
|
+
# Check path length
|
|
289
|
+
if len(path.edges) < pattern.min_path_length:
|
|
290
|
+
return False
|
|
291
|
+
|
|
292
|
+
if len(path.edges) > pattern.max_depth:
|
|
293
|
+
return False
|
|
294
|
+
|
|
295
|
+
# Check entity types
|
|
296
|
+
if pattern.entity_types:
|
|
297
|
+
for entity in path.nodes:
|
|
298
|
+
if entity.entity_type not in pattern.entity_types:
|
|
299
|
+
return False
|
|
300
|
+
|
|
301
|
+
# Check relation types
|
|
302
|
+
if pattern.relation_types:
|
|
303
|
+
for relation in path.edges:
|
|
304
|
+
if relation.relation_type not in pattern.relation_types:
|
|
305
|
+
return False
|
|
306
|
+
|
|
307
|
+
# Check required relation sequence
|
|
308
|
+
if pattern.required_relation_sequence:
|
|
309
|
+
if len(path.edges) != len(pattern.required_relation_sequence):
|
|
310
|
+
return False
|
|
311
|
+
|
|
312
|
+
for i, relation in enumerate(path.edges):
|
|
313
|
+
if relation.relation_type != pattern.required_relation_sequence[i]:
|
|
314
|
+
return False
|
|
315
|
+
|
|
316
|
+
# Check cycles
|
|
317
|
+
if not pattern.allow_cycles:
|
|
318
|
+
entity_ids = [entity.id for entity in path.nodes]
|
|
319
|
+
if len(entity_ids) != len(set(entity_ids)):
|
|
320
|
+
return False
|
|
321
|
+
|
|
322
|
+
# Check excluded entities
|
|
323
|
+
if pattern.excluded_entity_ids:
|
|
324
|
+
for entity in path.nodes:
|
|
325
|
+
if entity.id in pattern.excluded_entity_ids:
|
|
326
|
+
return False
|
|
327
|
+
|
|
328
|
+
return True
|
|
329
|
+
|
|
330
|
+
async def _get_all_entities(self, entity_types: Optional[List[str]] = None) -> List[Entity]:
|
|
331
|
+
"""
|
|
332
|
+
Get all entities, optionally filtered by type
|
|
333
|
+
|
|
334
|
+
Args:
|
|
335
|
+
entity_types: Optional list of entity types to filter by
|
|
336
|
+
|
|
337
|
+
Returns:
|
|
338
|
+
List of entities
|
|
339
|
+
"""
|
|
340
|
+
# This is a placeholder - actual implementation depends on graph store
|
|
341
|
+
# For now, we'll return empty list and rely on start_entity_id
|
|
342
|
+
# In a real implementation, this would query the graph store for all
|
|
343
|
+
# entities
|
|
344
|
+
return []
|