aiecs 1.0.1__py3-none-any.whl → 1.7.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of aiecs might be problematic. Click here for more details.
- aiecs/__init__.py +13 -16
- aiecs/__main__.py +7 -7
- aiecs/aiecs_client.py +269 -75
- aiecs/application/executors/operation_executor.py +79 -54
- aiecs/application/knowledge_graph/__init__.py +7 -0
- aiecs/application/knowledge_graph/builder/__init__.py +37 -0
- aiecs/application/knowledge_graph/builder/data_quality.py +302 -0
- aiecs/application/knowledge_graph/builder/data_reshaping.py +293 -0
- aiecs/application/knowledge_graph/builder/document_builder.py +369 -0
- aiecs/application/knowledge_graph/builder/graph_builder.py +490 -0
- aiecs/application/knowledge_graph/builder/import_optimizer.py +396 -0
- aiecs/application/knowledge_graph/builder/schema_inference.py +462 -0
- aiecs/application/knowledge_graph/builder/schema_mapping.py +563 -0
- aiecs/application/knowledge_graph/builder/structured_pipeline.py +1384 -0
- aiecs/application/knowledge_graph/builder/text_chunker.py +317 -0
- aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
- aiecs/application/knowledge_graph/extractors/base.py +98 -0
- aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +422 -0
- aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +347 -0
- aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +241 -0
- aiecs/application/knowledge_graph/fusion/__init__.py +78 -0
- aiecs/application/knowledge_graph/fusion/ab_testing.py +395 -0
- aiecs/application/knowledge_graph/fusion/abbreviation_expander.py +327 -0
- aiecs/application/knowledge_graph/fusion/alias_index.py +597 -0
- aiecs/application/knowledge_graph/fusion/alias_matcher.py +384 -0
- aiecs/application/knowledge_graph/fusion/cache_coordinator.py +343 -0
- aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +433 -0
- aiecs/application/knowledge_graph/fusion/entity_linker.py +511 -0
- aiecs/application/knowledge_graph/fusion/evaluation_dataset.py +240 -0
- aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +632 -0
- aiecs/application/knowledge_graph/fusion/matching_config.py +489 -0
- aiecs/application/knowledge_graph/fusion/name_normalizer.py +352 -0
- aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +183 -0
- aiecs/application/knowledge_graph/fusion/semantic_name_matcher.py +464 -0
- aiecs/application/knowledge_graph/fusion/similarity_pipeline.py +534 -0
- aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
- aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +342 -0
- aiecs/application/knowledge_graph/pattern_matching/query_executor.py +366 -0
- aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
- aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +195 -0
- aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
- aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
- aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +341 -0
- aiecs/application/knowledge_graph/reasoning/inference_engine.py +500 -0
- aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +163 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +913 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +866 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +475 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +396 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +208 -0
- aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +170 -0
- aiecs/application/knowledge_graph/reasoning/query_planner.py +855 -0
- aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +518 -0
- aiecs/application/knowledge_graph/retrieval/__init__.py +27 -0
- aiecs/application/knowledge_graph/retrieval/query_intent_classifier.py +211 -0
- aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +592 -0
- aiecs/application/knowledge_graph/retrieval/strategy_types.py +23 -0
- aiecs/application/knowledge_graph/search/__init__.py +59 -0
- aiecs/application/knowledge_graph/search/hybrid_search.py +457 -0
- aiecs/application/knowledge_graph/search/reranker.py +293 -0
- aiecs/application/knowledge_graph/search/reranker_strategies.py +535 -0
- aiecs/application/knowledge_graph/search/text_similarity.py +392 -0
- aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
- aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +305 -0
- aiecs/application/knowledge_graph/traversal/path_scorer.py +271 -0
- aiecs/application/knowledge_graph/validators/__init__.py +13 -0
- aiecs/application/knowledge_graph/validators/relation_validator.py +239 -0
- aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
- aiecs/application/knowledge_graph/visualization/graph_visualizer.py +313 -0
- aiecs/common/__init__.py +9 -0
- aiecs/common/knowledge_graph/__init__.py +17 -0
- aiecs/common/knowledge_graph/runnable.py +471 -0
- aiecs/config/__init__.py +20 -5
- aiecs/config/config.py +762 -31
- aiecs/config/graph_config.py +131 -0
- aiecs/config/tool_config.py +435 -0
- aiecs/core/__init__.py +29 -13
- aiecs/core/interface/__init__.py +2 -2
- aiecs/core/interface/execution_interface.py +22 -22
- aiecs/core/interface/storage_interface.py +37 -88
- aiecs/core/registry/__init__.py +31 -0
- aiecs/core/registry/service_registry.py +92 -0
- aiecs/domain/__init__.py +270 -1
- aiecs/domain/agent/__init__.py +191 -0
- aiecs/domain/agent/base_agent.py +3949 -0
- aiecs/domain/agent/exceptions.py +99 -0
- aiecs/domain/agent/graph_aware_mixin.py +569 -0
- aiecs/domain/agent/hybrid_agent.py +1731 -0
- aiecs/domain/agent/integration/__init__.py +29 -0
- aiecs/domain/agent/integration/context_compressor.py +216 -0
- aiecs/domain/agent/integration/context_engine_adapter.py +587 -0
- aiecs/domain/agent/integration/protocols.py +281 -0
- aiecs/domain/agent/integration/retry_policy.py +218 -0
- aiecs/domain/agent/integration/role_config.py +213 -0
- aiecs/domain/agent/knowledge_aware_agent.py +1892 -0
- aiecs/domain/agent/lifecycle.py +291 -0
- aiecs/domain/agent/llm_agent.py +692 -0
- aiecs/domain/agent/memory/__init__.py +12 -0
- aiecs/domain/agent/memory/conversation.py +1124 -0
- aiecs/domain/agent/migration/__init__.py +14 -0
- aiecs/domain/agent/migration/conversion.py +163 -0
- aiecs/domain/agent/migration/legacy_wrapper.py +86 -0
- aiecs/domain/agent/models.py +894 -0
- aiecs/domain/agent/observability.py +479 -0
- aiecs/domain/agent/persistence.py +449 -0
- aiecs/domain/agent/prompts/__init__.py +29 -0
- aiecs/domain/agent/prompts/builder.py +159 -0
- aiecs/domain/agent/prompts/formatters.py +187 -0
- aiecs/domain/agent/prompts/template.py +255 -0
- aiecs/domain/agent/registry.py +253 -0
- aiecs/domain/agent/tool_agent.py +444 -0
- aiecs/domain/agent/tools/__init__.py +15 -0
- aiecs/domain/agent/tools/schema_generator.py +377 -0
- aiecs/domain/community/__init__.py +155 -0
- aiecs/domain/community/agent_adapter.py +469 -0
- aiecs/domain/community/analytics.py +432 -0
- aiecs/domain/community/collaborative_workflow.py +648 -0
- aiecs/domain/community/communication_hub.py +634 -0
- aiecs/domain/community/community_builder.py +320 -0
- aiecs/domain/community/community_integration.py +796 -0
- aiecs/domain/community/community_manager.py +803 -0
- aiecs/domain/community/decision_engine.py +849 -0
- aiecs/domain/community/exceptions.py +231 -0
- aiecs/domain/community/models/__init__.py +33 -0
- aiecs/domain/community/models/community_models.py +234 -0
- aiecs/domain/community/resource_manager.py +461 -0
- aiecs/domain/community/shared_context_manager.py +589 -0
- aiecs/domain/context/__init__.py +40 -10
- aiecs/domain/context/context_engine.py +1910 -0
- aiecs/domain/context/conversation_models.py +87 -53
- aiecs/domain/context/graph_memory.py +582 -0
- aiecs/domain/execution/model.py +12 -4
- aiecs/domain/knowledge_graph/__init__.py +19 -0
- aiecs/domain/knowledge_graph/models/__init__.py +52 -0
- aiecs/domain/knowledge_graph/models/entity.py +148 -0
- aiecs/domain/knowledge_graph/models/evidence.py +178 -0
- aiecs/domain/knowledge_graph/models/inference_rule.py +184 -0
- aiecs/domain/knowledge_graph/models/path.py +171 -0
- aiecs/domain/knowledge_graph/models/path_pattern.py +171 -0
- aiecs/domain/knowledge_graph/models/query.py +261 -0
- aiecs/domain/knowledge_graph/models/query_plan.py +181 -0
- aiecs/domain/knowledge_graph/models/relation.py +202 -0
- aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
- aiecs/domain/knowledge_graph/schema/entity_type.py +131 -0
- aiecs/domain/knowledge_graph/schema/graph_schema.py +253 -0
- aiecs/domain/knowledge_graph/schema/property_schema.py +143 -0
- aiecs/domain/knowledge_graph/schema/relation_type.py +163 -0
- aiecs/domain/knowledge_graph/schema/schema_manager.py +691 -0
- aiecs/domain/knowledge_graph/schema/type_enums.py +209 -0
- aiecs/domain/task/dsl_processor.py +172 -56
- aiecs/domain/task/model.py +20 -8
- aiecs/domain/task/task_context.py +27 -24
- aiecs/infrastructure/__init__.py +0 -2
- aiecs/infrastructure/graph_storage/__init__.py +11 -0
- aiecs/infrastructure/graph_storage/base.py +837 -0
- aiecs/infrastructure/graph_storage/batch_operations.py +458 -0
- aiecs/infrastructure/graph_storage/cache.py +424 -0
- aiecs/infrastructure/graph_storage/distributed.py +223 -0
- aiecs/infrastructure/graph_storage/error_handling.py +380 -0
- aiecs/infrastructure/graph_storage/graceful_degradation.py +294 -0
- aiecs/infrastructure/graph_storage/health_checks.py +378 -0
- aiecs/infrastructure/graph_storage/in_memory.py +1197 -0
- aiecs/infrastructure/graph_storage/index_optimization.py +446 -0
- aiecs/infrastructure/graph_storage/lazy_loading.py +431 -0
- aiecs/infrastructure/graph_storage/metrics.py +344 -0
- aiecs/infrastructure/graph_storage/migration.py +400 -0
- aiecs/infrastructure/graph_storage/pagination.py +483 -0
- aiecs/infrastructure/graph_storage/performance_monitoring.py +456 -0
- aiecs/infrastructure/graph_storage/postgres.py +1563 -0
- aiecs/infrastructure/graph_storage/property_storage.py +353 -0
- aiecs/infrastructure/graph_storage/protocols.py +76 -0
- aiecs/infrastructure/graph_storage/query_optimizer.py +642 -0
- aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
- aiecs/infrastructure/graph_storage/sqlite.py +1373 -0
- aiecs/infrastructure/graph_storage/streaming.py +487 -0
- aiecs/infrastructure/graph_storage/tenant.py +412 -0
- aiecs/infrastructure/messaging/celery_task_manager.py +92 -54
- aiecs/infrastructure/messaging/websocket_manager.py +51 -35
- aiecs/infrastructure/monitoring/__init__.py +22 -0
- aiecs/infrastructure/monitoring/executor_metrics.py +45 -11
- aiecs/infrastructure/monitoring/global_metrics_manager.py +212 -0
- aiecs/infrastructure/monitoring/structured_logger.py +3 -7
- aiecs/infrastructure/monitoring/tracing_manager.py +63 -35
- aiecs/infrastructure/persistence/__init__.py +14 -1
- aiecs/infrastructure/persistence/context_engine_client.py +184 -0
- aiecs/infrastructure/persistence/database_manager.py +67 -43
- aiecs/infrastructure/persistence/file_storage.py +180 -103
- aiecs/infrastructure/persistence/redis_client.py +74 -21
- aiecs/llm/__init__.py +73 -25
- aiecs/llm/callbacks/__init__.py +11 -0
- aiecs/llm/{custom_callbacks.py → callbacks/custom_callbacks.py} +26 -19
- aiecs/llm/client_factory.py +230 -37
- aiecs/llm/client_resolver.py +155 -0
- aiecs/llm/clients/__init__.py +38 -0
- aiecs/llm/clients/base_client.py +328 -0
- aiecs/llm/clients/google_function_calling_mixin.py +415 -0
- aiecs/llm/clients/googleai_client.py +314 -0
- aiecs/llm/clients/openai_client.py +158 -0
- aiecs/llm/clients/openai_compatible_mixin.py +367 -0
- aiecs/llm/clients/vertex_client.py +1186 -0
- aiecs/llm/clients/xai_client.py +201 -0
- aiecs/llm/config/__init__.py +51 -0
- aiecs/llm/config/config_loader.py +272 -0
- aiecs/llm/config/config_validator.py +206 -0
- aiecs/llm/config/model_config.py +143 -0
- aiecs/llm/protocols.py +149 -0
- aiecs/llm/utils/__init__.py +10 -0
- aiecs/llm/utils/validate_config.py +89 -0
- aiecs/main.py +140 -121
- aiecs/scripts/aid/VERSION_MANAGEMENT.md +138 -0
- aiecs/scripts/aid/__init__.py +19 -0
- aiecs/scripts/aid/module_checker.py +499 -0
- aiecs/scripts/aid/version_manager.py +235 -0
- aiecs/scripts/{DEPENDENCY_SYSTEM_SUMMARY.md → dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md} +1 -0
- aiecs/scripts/{README_DEPENDENCY_CHECKER.md → dependance_check/README_DEPENDENCY_CHECKER.md} +1 -0
- aiecs/scripts/dependance_check/__init__.py +15 -0
- aiecs/scripts/dependance_check/dependency_checker.py +1835 -0
- aiecs/scripts/{dependency_fixer.py → dependance_check/dependency_fixer.py} +192 -90
- aiecs/scripts/{download_nlp_data.py → dependance_check/download_nlp_data.py} +203 -71
- aiecs/scripts/dependance_patch/__init__.py +7 -0
- aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
- aiecs/scripts/{fix_weasel_validator.py → dependance_patch/fix_weasel/fix_weasel_validator.py} +21 -14
- aiecs/scripts/{patch_weasel_library.sh → dependance_patch/fix_weasel/patch_weasel_library.sh} +1 -1
- aiecs/scripts/knowledge_graph/__init__.py +3 -0
- aiecs/scripts/knowledge_graph/run_threshold_experiments.py +212 -0
- aiecs/scripts/migrations/multi_tenancy/README.md +142 -0
- aiecs/scripts/tools_develop/README.md +671 -0
- aiecs/scripts/tools_develop/README_CONFIG_CHECKER.md +273 -0
- aiecs/scripts/tools_develop/TOOLS_CONFIG_GUIDE.md +1287 -0
- aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
- aiecs/scripts/tools_develop/__init__.py +21 -0
- aiecs/scripts/tools_develop/check_all_tools_config.py +548 -0
- aiecs/scripts/tools_develop/check_type_annotations.py +257 -0
- aiecs/scripts/tools_develop/pre-commit-schema-coverage.sh +66 -0
- aiecs/scripts/tools_develop/schema_coverage.py +511 -0
- aiecs/scripts/tools_develop/validate_tool_schemas.py +475 -0
- aiecs/scripts/tools_develop/verify_executor_config_fix.py +98 -0
- aiecs/scripts/tools_develop/verify_tools.py +352 -0
- aiecs/tasks/__init__.py +0 -1
- aiecs/tasks/worker.py +115 -47
- aiecs/tools/__init__.py +194 -72
- aiecs/tools/apisource/__init__.py +99 -0
- aiecs/tools/apisource/intelligence/__init__.py +19 -0
- aiecs/tools/apisource/intelligence/data_fusion.py +632 -0
- aiecs/tools/apisource/intelligence/query_analyzer.py +417 -0
- aiecs/tools/apisource/intelligence/search_enhancer.py +385 -0
- aiecs/tools/apisource/monitoring/__init__.py +9 -0
- aiecs/tools/apisource/monitoring/metrics.py +330 -0
- aiecs/tools/apisource/providers/__init__.py +112 -0
- aiecs/tools/apisource/providers/base.py +671 -0
- aiecs/tools/apisource/providers/census.py +397 -0
- aiecs/tools/apisource/providers/fred.py +535 -0
- aiecs/tools/apisource/providers/newsapi.py +409 -0
- aiecs/tools/apisource/providers/worldbank.py +352 -0
- aiecs/tools/apisource/reliability/__init__.py +12 -0
- aiecs/tools/apisource/reliability/error_handler.py +363 -0
- aiecs/tools/apisource/reliability/fallback_strategy.py +376 -0
- aiecs/tools/apisource/tool.py +832 -0
- aiecs/tools/apisource/utils/__init__.py +9 -0
- aiecs/tools/apisource/utils/validators.py +334 -0
- aiecs/tools/base_tool.py +415 -21
- aiecs/tools/docs/__init__.py +121 -0
- aiecs/tools/docs/ai_document_orchestrator.py +607 -0
- aiecs/tools/docs/ai_document_writer_orchestrator.py +2350 -0
- aiecs/tools/docs/content_insertion_tool.py +1320 -0
- aiecs/tools/docs/document_creator_tool.py +1464 -0
- aiecs/tools/docs/document_layout_tool.py +1160 -0
- aiecs/tools/docs/document_parser_tool.py +1016 -0
- aiecs/tools/docs/document_writer_tool.py +2008 -0
- aiecs/tools/knowledge_graph/__init__.py +17 -0
- aiecs/tools/knowledge_graph/graph_reasoning_tool.py +807 -0
- aiecs/tools/knowledge_graph/graph_search_tool.py +944 -0
- aiecs/tools/knowledge_graph/kg_builder_tool.py +524 -0
- aiecs/tools/langchain_adapter.py +300 -138
- aiecs/tools/schema_generator.py +455 -0
- aiecs/tools/search_tool/__init__.py +100 -0
- aiecs/tools/search_tool/analyzers.py +581 -0
- aiecs/tools/search_tool/cache.py +264 -0
- aiecs/tools/search_tool/constants.py +128 -0
- aiecs/tools/search_tool/context.py +224 -0
- aiecs/tools/search_tool/core.py +778 -0
- aiecs/tools/search_tool/deduplicator.py +119 -0
- aiecs/tools/search_tool/error_handler.py +242 -0
- aiecs/tools/search_tool/metrics.py +343 -0
- aiecs/tools/search_tool/rate_limiter.py +172 -0
- aiecs/tools/search_tool/schemas.py +275 -0
- aiecs/tools/statistics/__init__.py +80 -0
- aiecs/tools/statistics/ai_data_analysis_orchestrator.py +646 -0
- aiecs/tools/statistics/ai_insight_generator_tool.py +508 -0
- aiecs/tools/statistics/ai_report_orchestrator_tool.py +684 -0
- aiecs/tools/statistics/data_loader_tool.py +555 -0
- aiecs/tools/statistics/data_profiler_tool.py +638 -0
- aiecs/tools/statistics/data_transformer_tool.py +580 -0
- aiecs/tools/statistics/data_visualizer_tool.py +498 -0
- aiecs/tools/statistics/model_trainer_tool.py +507 -0
- aiecs/tools/statistics/statistical_analyzer_tool.py +472 -0
- aiecs/tools/task_tools/__init__.py +49 -36
- aiecs/tools/task_tools/chart_tool.py +200 -184
- aiecs/tools/task_tools/classfire_tool.py +268 -267
- aiecs/tools/task_tools/image_tool.py +220 -141
- aiecs/tools/task_tools/office_tool.py +226 -146
- aiecs/tools/task_tools/pandas_tool.py +477 -121
- aiecs/tools/task_tools/report_tool.py +390 -142
- aiecs/tools/task_tools/research_tool.py +149 -79
- aiecs/tools/task_tools/scraper_tool.py +339 -145
- aiecs/tools/task_tools/stats_tool.py +448 -209
- aiecs/tools/temp_file_manager.py +26 -24
- aiecs/tools/tool_executor/__init__.py +18 -16
- aiecs/tools/tool_executor/tool_executor.py +364 -52
- aiecs/utils/LLM_output_structor.py +74 -48
- aiecs/utils/__init__.py +14 -3
- aiecs/utils/base_callback.py +0 -3
- aiecs/utils/cache_provider.py +696 -0
- aiecs/utils/execution_utils.py +50 -31
- aiecs/utils/prompt_loader.py +1 -0
- aiecs/utils/token_usage_repository.py +37 -11
- aiecs/ws/socket_server.py +14 -4
- {aiecs-1.0.1.dist-info → aiecs-1.7.17.dist-info}/METADATA +52 -15
- aiecs-1.7.17.dist-info/RECORD +337 -0
- aiecs-1.7.17.dist-info/entry_points.txt +13 -0
- aiecs/config/registry.py +0 -19
- aiecs/domain/context/content_engine.py +0 -982
- aiecs/llm/base_client.py +0 -99
- aiecs/llm/openai_client.py +0 -125
- aiecs/llm/vertex_client.py +0 -186
- aiecs/llm/xai_client.py +0 -184
- aiecs/scripts/dependency_checker.py +0 -857
- aiecs/scripts/quick_dependency_check.py +0 -269
- aiecs/tools/task_tools/search_api.py +0 -7
- aiecs-1.0.1.dist-info/RECORD +0 -90
- aiecs-1.0.1.dist-info/entry_points.txt +0 -7
- /aiecs/scripts/{setup_nlp_data.sh → dependance_check/setup_nlp_data.sh} +0 -0
- /aiecs/scripts/{README_WEASEL_PATCH.md → dependance_patch/fix_weasel/README_WEASEL_PATCH.md} +0 -0
- /aiecs/scripts/{fix_weasel_validator.sh → dependance_patch/fix_weasel/fix_weasel_validator.sh} +0 -0
- /aiecs/scripts/{run_weasel_patch.sh → dependance_patch/fix_weasel/run_weasel_patch.sh} +0 -0
- {aiecs-1.0.1.dist-info → aiecs-1.7.17.dist-info}/WHEEL +0 -0
- {aiecs-1.0.1.dist-info → aiecs-1.7.17.dist-info}/licenses/LICENSE +0 -0
- {aiecs-1.0.1.dist-info → aiecs-1.7.17.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,384 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Alias-Based Matching
|
|
3
|
+
|
|
4
|
+
Provides O(1) alias lookup for entity matching using the AliasIndex.
|
|
5
|
+
Supports alias propagation during entity merge operations.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import logging
|
|
9
|
+
from typing import Dict, List, Optional, Set, Tuple
|
|
10
|
+
from dataclasses import dataclass
|
|
11
|
+
|
|
12
|
+
from aiecs.domain.knowledge_graph.models.entity import Entity
|
|
13
|
+
from aiecs.application.knowledge_graph.fusion.alias_index import (
|
|
14
|
+
AliasIndex,
|
|
15
|
+
AliasEntry,
|
|
16
|
+
MatchType,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
logger = logging.getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass
|
|
23
|
+
class AliasMatchResult:
|
|
24
|
+
"""Result of alias-based entity lookup"""
|
|
25
|
+
entity_id: str
|
|
26
|
+
matched_alias: str
|
|
27
|
+
match_type: MatchType
|
|
28
|
+
confidence: float
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class AliasMatcher:
|
|
32
|
+
"""
|
|
33
|
+
Alias-based entity matching with O(1) lookup.
|
|
34
|
+
|
|
35
|
+
Uses AliasIndex for fast alias lookups and supports alias propagation
|
|
36
|
+
during entity merge operations.
|
|
37
|
+
|
|
38
|
+
Entity `_known_aliases` Property:
|
|
39
|
+
Entities can define known aliases in their properties:
|
|
40
|
+
```python
|
|
41
|
+
entity = Entity(
|
|
42
|
+
id="person_123",
|
|
43
|
+
entity_type="Person",
|
|
44
|
+
properties={
|
|
45
|
+
"name": "Albert Einstein",
|
|
46
|
+
"_known_aliases": ["A. Einstein", "Einstein", "Albert"]
|
|
47
|
+
}
|
|
48
|
+
)
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
Example:
|
|
52
|
+
```python
|
|
53
|
+
matcher = AliasMatcher()
|
|
54
|
+
|
|
55
|
+
# Initialize index from entities
|
|
56
|
+
await matcher.build_index(entities)
|
|
57
|
+
|
|
58
|
+
# O(1) lookup
|
|
59
|
+
match = await matcher.lookup("A. Einstein")
|
|
60
|
+
if match:
|
|
61
|
+
print(f"Found: {match.entity_id}")
|
|
62
|
+
|
|
63
|
+
# Alias propagation on merge
|
|
64
|
+
await matcher.propagate_aliases(
|
|
65
|
+
source_entity_id="person_456",
|
|
66
|
+
target_entity_id="person_123"
|
|
67
|
+
)
|
|
68
|
+
```
|
|
69
|
+
"""
|
|
70
|
+
|
|
71
|
+
def __init__(self, alias_index: Optional[AliasIndex] = None):
|
|
72
|
+
"""
|
|
73
|
+
Initialize AliasMatcher.
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
alias_index: Optional AliasIndex instance (creates new one if not provided)
|
|
77
|
+
"""
|
|
78
|
+
self._index = alias_index or AliasIndex()
|
|
79
|
+
|
|
80
|
+
@property
|
|
81
|
+
def alias_index(self) -> AliasIndex:
|
|
82
|
+
"""Get the underlying AliasIndex"""
|
|
83
|
+
return self._index
|
|
84
|
+
|
|
85
|
+
async def build_index(self, entities: List[Entity]) -> int:
|
|
86
|
+
"""
|
|
87
|
+
Build alias index from a list of entities.
|
|
88
|
+
|
|
89
|
+
Extracts aliases from:
|
|
90
|
+
1. Entity name (properties["name"])
|
|
91
|
+
2. Known aliases (properties["_known_aliases"])
|
|
92
|
+
3. Historical aliases (properties["_aliases"])
|
|
93
|
+
|
|
94
|
+
Args:
|
|
95
|
+
entities: List of entities to index
|
|
96
|
+
|
|
97
|
+
Returns:
|
|
98
|
+
Number of aliases indexed
|
|
99
|
+
"""
|
|
100
|
+
total_count = 0
|
|
101
|
+
|
|
102
|
+
for entity in entities:
|
|
103
|
+
entity_aliases = self._extract_aliases(entity)
|
|
104
|
+
for alias in entity_aliases:
|
|
105
|
+
await self._index.add_alias(
|
|
106
|
+
alias=alias.lower(),
|
|
107
|
+
entity_id=entity.id,
|
|
108
|
+
match_type=MatchType.ALIAS,
|
|
109
|
+
)
|
|
110
|
+
total_count += 1
|
|
111
|
+
|
|
112
|
+
logger.info(f"Built alias index with {total_count} aliases from {len(entities)} entities")
|
|
113
|
+
return total_count
|
|
114
|
+
|
|
115
|
+
def _extract_aliases(self, entity: Entity) -> Set[str]:
|
|
116
|
+
"""
|
|
117
|
+
Extract all aliases from an entity.
|
|
118
|
+
|
|
119
|
+
Sources:
|
|
120
|
+
1. Entity name
|
|
121
|
+
2. _known_aliases property
|
|
122
|
+
3. _aliases property (from previous merges)
|
|
123
|
+
|
|
124
|
+
Args:
|
|
125
|
+
entity: Entity to extract aliases from
|
|
126
|
+
|
|
127
|
+
Returns:
|
|
128
|
+
Set of alias strings
|
|
129
|
+
"""
|
|
130
|
+
aliases = set()
|
|
131
|
+
|
|
132
|
+
# Get main name
|
|
133
|
+
name = entity.properties.get("name") or entity.properties.get("title") or ""
|
|
134
|
+
if name:
|
|
135
|
+
aliases.add(name)
|
|
136
|
+
|
|
137
|
+
# Get known aliases
|
|
138
|
+
known_aliases = entity.properties.get("_known_aliases", [])
|
|
139
|
+
if isinstance(known_aliases, list):
|
|
140
|
+
aliases.update(known_aliases)
|
|
141
|
+
|
|
142
|
+
# Get historical aliases (from merges)
|
|
143
|
+
historical_aliases = entity.properties.get("_aliases", [])
|
|
144
|
+
if isinstance(historical_aliases, list):
|
|
145
|
+
aliases.update(historical_aliases)
|
|
146
|
+
|
|
147
|
+
return aliases
|
|
148
|
+
|
|
149
|
+
async def lookup(self, name: str) -> Optional[AliasMatchResult]:
|
|
150
|
+
"""
|
|
151
|
+
Look up an entity by alias.
|
|
152
|
+
|
|
153
|
+
O(1) lookup via AliasIndex.
|
|
154
|
+
|
|
155
|
+
Args:
|
|
156
|
+
name: Name or alias to look up
|
|
157
|
+
|
|
158
|
+
Returns:
|
|
159
|
+
AliasMatchResult if found, None otherwise
|
|
160
|
+
"""
|
|
161
|
+
entry = await self._index.lookup(name)
|
|
162
|
+
if entry:
|
|
163
|
+
return AliasMatchResult(
|
|
164
|
+
entity_id=entry.entity_id,
|
|
165
|
+
matched_alias=name.lower(), # The alias is the key we looked up
|
|
166
|
+
match_type=entry.match_type,
|
|
167
|
+
confidence=0.98, # Default confidence for alias match
|
|
168
|
+
)
|
|
169
|
+
return None
|
|
170
|
+
|
|
171
|
+
async def add_entity(self, entity: Entity) -> int:
|
|
172
|
+
"""
|
|
173
|
+
Add entity aliases to the index.
|
|
174
|
+
|
|
175
|
+
Args:
|
|
176
|
+
entity: Entity to add
|
|
177
|
+
|
|
178
|
+
Returns:
|
|
179
|
+
Number of aliases added
|
|
180
|
+
"""
|
|
181
|
+
aliases = self._extract_aliases(entity)
|
|
182
|
+
count = 0
|
|
183
|
+
|
|
184
|
+
for alias in aliases:
|
|
185
|
+
await self._index.add_alias(
|
|
186
|
+
alias=alias.lower(),
|
|
187
|
+
entity_id=entity.id,
|
|
188
|
+
match_type=MatchType.ALIAS,
|
|
189
|
+
)
|
|
190
|
+
count += 1
|
|
191
|
+
|
|
192
|
+
return count
|
|
193
|
+
|
|
194
|
+
async def remove_entity(self, entity_id: str) -> int:
|
|
195
|
+
"""
|
|
196
|
+
Remove all aliases for an entity from the index.
|
|
197
|
+
|
|
198
|
+
Args:
|
|
199
|
+
entity_id: Entity ID to remove
|
|
200
|
+
|
|
201
|
+
Returns:
|
|
202
|
+
Number of aliases removed
|
|
203
|
+
"""
|
|
204
|
+
return await self._index.remove_entity_aliases(entity_id)
|
|
205
|
+
|
|
206
|
+
async def propagate_aliases(
|
|
207
|
+
self,
|
|
208
|
+
source_entity_id: str,
|
|
209
|
+
target_entity_id: str,
|
|
210
|
+
) -> int:
|
|
211
|
+
"""
|
|
212
|
+
Propagate aliases from source entity to target entity during merge.
|
|
213
|
+
|
|
214
|
+
Used when merging duplicate entities:
|
|
215
|
+
1. Get all aliases pointing to source entity
|
|
216
|
+
2. Update them to point to target entity
|
|
217
|
+
|
|
218
|
+
This is an atomic operation using transactions.
|
|
219
|
+
|
|
220
|
+
Args:
|
|
221
|
+
source_entity_id: Entity being merged (will be deleted)
|
|
222
|
+
target_entity_id: Entity receiving the merge
|
|
223
|
+
|
|
224
|
+
Returns:
|
|
225
|
+
Number of aliases propagated
|
|
226
|
+
"""
|
|
227
|
+
# Get all aliases for source entity (list of alias strings)
|
|
228
|
+
source_aliases = await self._index.get_entity_aliases(source_entity_id)
|
|
229
|
+
|
|
230
|
+
if not source_aliases:
|
|
231
|
+
return 0
|
|
232
|
+
|
|
233
|
+
# Use transaction for atomic update
|
|
234
|
+
async with self._index.transaction() as tx:
|
|
235
|
+
# Remove aliases from source using transaction context
|
|
236
|
+
for alias in source_aliases:
|
|
237
|
+
await tx.delete(alias)
|
|
238
|
+
|
|
239
|
+
# Add aliases to target using transaction context
|
|
240
|
+
for alias in source_aliases:
|
|
241
|
+
entry = AliasEntry(
|
|
242
|
+
entity_id=target_entity_id,
|
|
243
|
+
match_type=MatchType.ALIAS,
|
|
244
|
+
)
|
|
245
|
+
await tx.set(alias, entry)
|
|
246
|
+
|
|
247
|
+
logger.info(
|
|
248
|
+
f"Propagated {len(source_aliases)} aliases from {source_entity_id} to {target_entity_id}"
|
|
249
|
+
)
|
|
250
|
+
return len(source_aliases)
|
|
251
|
+
|
|
252
|
+
async def find_matching_entity(
|
|
253
|
+
self,
|
|
254
|
+
candidate_names: List[str],
|
|
255
|
+
) -> Optional[AliasMatchResult]:
|
|
256
|
+
"""
|
|
257
|
+
Find an entity matching any of the candidate names.
|
|
258
|
+
|
|
259
|
+
Tries each candidate name in order and returns the first match.
|
|
260
|
+
|
|
261
|
+
Args:
|
|
262
|
+
candidate_names: List of names to try
|
|
263
|
+
|
|
264
|
+
Returns:
|
|
265
|
+
AliasMatchResult if any name matches, None otherwise
|
|
266
|
+
"""
|
|
267
|
+
for name in candidate_names:
|
|
268
|
+
match = await self.lookup(name)
|
|
269
|
+
if match:
|
|
270
|
+
return match
|
|
271
|
+
return None
|
|
272
|
+
|
|
273
|
+
async def get_entity_aliases(self, entity_id: str) -> List[str]:
|
|
274
|
+
"""
|
|
275
|
+
Get all aliases for an entity.
|
|
276
|
+
|
|
277
|
+
Args:
|
|
278
|
+
entity_id: Entity ID
|
|
279
|
+
|
|
280
|
+
Returns:
|
|
281
|
+
List of alias strings
|
|
282
|
+
"""
|
|
283
|
+
# get_entity_aliases already returns List[str]
|
|
284
|
+
return await self._index.get_entity_aliases(entity_id)
|
|
285
|
+
|
|
286
|
+
async def size(self) -> int:
|
|
287
|
+
"""Get number of aliases in the index"""
|
|
288
|
+
return await self._index.size()
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
def get_known_aliases(entity: Entity) -> List[str]:
|
|
292
|
+
"""
|
|
293
|
+
Get known aliases from an entity's properties.
|
|
294
|
+
|
|
295
|
+
Helper function to access the _known_aliases property.
|
|
296
|
+
|
|
297
|
+
Args:
|
|
298
|
+
entity: Entity to get aliases from
|
|
299
|
+
|
|
300
|
+
Returns:
|
|
301
|
+
List of known aliases (empty list if none)
|
|
302
|
+
"""
|
|
303
|
+
return entity.properties.get("_known_aliases", [])
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
def set_known_aliases(entity: Entity, aliases: List[str]) -> None:
|
|
307
|
+
"""
|
|
308
|
+
Set known aliases on an entity.
|
|
309
|
+
|
|
310
|
+
Helper function to set the _known_aliases property.
|
|
311
|
+
|
|
312
|
+
Args:
|
|
313
|
+
entity: Entity to update
|
|
314
|
+
aliases: List of aliases to set
|
|
315
|
+
"""
|
|
316
|
+
entity.properties["_known_aliases"] = aliases
|
|
317
|
+
|
|
318
|
+
|
|
319
|
+
def add_known_alias(entity: Entity, alias: str) -> None:
|
|
320
|
+
"""
|
|
321
|
+
Add a known alias to an entity.
|
|
322
|
+
|
|
323
|
+
Args:
|
|
324
|
+
entity: Entity to update
|
|
325
|
+
alias: Alias to add
|
|
326
|
+
"""
|
|
327
|
+
aliases = entity.properties.get("_known_aliases", [])
|
|
328
|
+
if alias not in aliases:
|
|
329
|
+
aliases.append(alias)
|
|
330
|
+
entity.properties["_known_aliases"] = aliases
|
|
331
|
+
|
|
332
|
+
|
|
333
|
+
def merge_aliases(target: Entity, source: Entity) -> List[str]:
|
|
334
|
+
"""
|
|
335
|
+
Merge aliases from source entity into target entity.
|
|
336
|
+
|
|
337
|
+
Used during entity merge operations. Combines:
|
|
338
|
+
- Source entity name
|
|
339
|
+
- Source _known_aliases
|
|
340
|
+
- Source _aliases
|
|
341
|
+
|
|
342
|
+
Into target's _known_aliases (avoiding duplicates).
|
|
343
|
+
|
|
344
|
+
Args:
|
|
345
|
+
target: Entity receiving aliases
|
|
346
|
+
source: Entity providing aliases
|
|
347
|
+
|
|
348
|
+
Returns:
|
|
349
|
+
List of newly added aliases
|
|
350
|
+
"""
|
|
351
|
+
# Get existing target aliases
|
|
352
|
+
target_aliases = set(get_known_aliases(target))
|
|
353
|
+
# Normalize to lowercase for case-insensitive comparison
|
|
354
|
+
target_aliases_normalized = {a.lower() for a in target_aliases}
|
|
355
|
+
target_name = target.properties.get("name", "").lower()
|
|
356
|
+
|
|
357
|
+
# Collect source aliases
|
|
358
|
+
source_name = source.properties.get("name", "")
|
|
359
|
+
source_known = source.properties.get("_known_aliases", [])
|
|
360
|
+
source_historical = source.properties.get("_aliases", [])
|
|
361
|
+
|
|
362
|
+
# Validate that source_known and source_historical are lists
|
|
363
|
+
if not isinstance(source_known, list):
|
|
364
|
+
source_known = []
|
|
365
|
+
if not isinstance(source_historical, list):
|
|
366
|
+
source_historical = []
|
|
367
|
+
|
|
368
|
+
# Find new aliases to add
|
|
369
|
+
new_aliases = []
|
|
370
|
+
|
|
371
|
+
for alias in [source_name] + source_known + source_historical:
|
|
372
|
+
if alias:
|
|
373
|
+
alias_lower = alias.lower()
|
|
374
|
+
# Case-insensitive comparison: check normalized versions
|
|
375
|
+
if alias_lower != target_name and alias_lower not in target_aliases_normalized:
|
|
376
|
+
target_aliases.add(alias)
|
|
377
|
+
target_aliases_normalized.add(alias_lower)
|
|
378
|
+
new_aliases.append(alias)
|
|
379
|
+
|
|
380
|
+
# Update target
|
|
381
|
+
set_known_aliases(target, list(target_aliases))
|
|
382
|
+
|
|
383
|
+
return new_aliases
|
|
384
|
+
|