aiecs 1.0.1__py3-none-any.whl → 1.7.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of aiecs might be problematic. Click here for more details.
- aiecs/__init__.py +13 -16
- aiecs/__main__.py +7 -7
- aiecs/aiecs_client.py +269 -75
- aiecs/application/executors/operation_executor.py +79 -54
- aiecs/application/knowledge_graph/__init__.py +7 -0
- aiecs/application/knowledge_graph/builder/__init__.py +37 -0
- aiecs/application/knowledge_graph/builder/data_quality.py +302 -0
- aiecs/application/knowledge_graph/builder/data_reshaping.py +293 -0
- aiecs/application/knowledge_graph/builder/document_builder.py +369 -0
- aiecs/application/knowledge_graph/builder/graph_builder.py +490 -0
- aiecs/application/knowledge_graph/builder/import_optimizer.py +396 -0
- aiecs/application/knowledge_graph/builder/schema_inference.py +462 -0
- aiecs/application/knowledge_graph/builder/schema_mapping.py +563 -0
- aiecs/application/knowledge_graph/builder/structured_pipeline.py +1384 -0
- aiecs/application/knowledge_graph/builder/text_chunker.py +317 -0
- aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
- aiecs/application/knowledge_graph/extractors/base.py +98 -0
- aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +422 -0
- aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +347 -0
- aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +241 -0
- aiecs/application/knowledge_graph/fusion/__init__.py +78 -0
- aiecs/application/knowledge_graph/fusion/ab_testing.py +395 -0
- aiecs/application/knowledge_graph/fusion/abbreviation_expander.py +327 -0
- aiecs/application/knowledge_graph/fusion/alias_index.py +597 -0
- aiecs/application/knowledge_graph/fusion/alias_matcher.py +384 -0
- aiecs/application/knowledge_graph/fusion/cache_coordinator.py +343 -0
- aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +433 -0
- aiecs/application/knowledge_graph/fusion/entity_linker.py +511 -0
- aiecs/application/knowledge_graph/fusion/evaluation_dataset.py +240 -0
- aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +632 -0
- aiecs/application/knowledge_graph/fusion/matching_config.py +489 -0
- aiecs/application/knowledge_graph/fusion/name_normalizer.py +352 -0
- aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +183 -0
- aiecs/application/knowledge_graph/fusion/semantic_name_matcher.py +464 -0
- aiecs/application/knowledge_graph/fusion/similarity_pipeline.py +534 -0
- aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
- aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +342 -0
- aiecs/application/knowledge_graph/pattern_matching/query_executor.py +366 -0
- aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
- aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +195 -0
- aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
- aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
- aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +341 -0
- aiecs/application/knowledge_graph/reasoning/inference_engine.py +500 -0
- aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +163 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +913 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +866 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +475 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +396 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +208 -0
- aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +170 -0
- aiecs/application/knowledge_graph/reasoning/query_planner.py +855 -0
- aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +518 -0
- aiecs/application/knowledge_graph/retrieval/__init__.py +27 -0
- aiecs/application/knowledge_graph/retrieval/query_intent_classifier.py +211 -0
- aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +592 -0
- aiecs/application/knowledge_graph/retrieval/strategy_types.py +23 -0
- aiecs/application/knowledge_graph/search/__init__.py +59 -0
- aiecs/application/knowledge_graph/search/hybrid_search.py +457 -0
- aiecs/application/knowledge_graph/search/reranker.py +293 -0
- aiecs/application/knowledge_graph/search/reranker_strategies.py +535 -0
- aiecs/application/knowledge_graph/search/text_similarity.py +392 -0
- aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
- aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +305 -0
- aiecs/application/knowledge_graph/traversal/path_scorer.py +271 -0
- aiecs/application/knowledge_graph/validators/__init__.py +13 -0
- aiecs/application/knowledge_graph/validators/relation_validator.py +239 -0
- aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
- aiecs/application/knowledge_graph/visualization/graph_visualizer.py +313 -0
- aiecs/common/__init__.py +9 -0
- aiecs/common/knowledge_graph/__init__.py +17 -0
- aiecs/common/knowledge_graph/runnable.py +471 -0
- aiecs/config/__init__.py +20 -5
- aiecs/config/config.py +762 -31
- aiecs/config/graph_config.py +131 -0
- aiecs/config/tool_config.py +399 -0
- aiecs/core/__init__.py +29 -13
- aiecs/core/interface/__init__.py +2 -2
- aiecs/core/interface/execution_interface.py +22 -22
- aiecs/core/interface/storage_interface.py +37 -88
- aiecs/core/registry/__init__.py +31 -0
- aiecs/core/registry/service_registry.py +92 -0
- aiecs/domain/__init__.py +270 -1
- aiecs/domain/agent/__init__.py +191 -0
- aiecs/domain/agent/base_agent.py +3870 -0
- aiecs/domain/agent/exceptions.py +99 -0
- aiecs/domain/agent/graph_aware_mixin.py +569 -0
- aiecs/domain/agent/hybrid_agent.py +1435 -0
- aiecs/domain/agent/integration/__init__.py +29 -0
- aiecs/domain/agent/integration/context_compressor.py +216 -0
- aiecs/domain/agent/integration/context_engine_adapter.py +587 -0
- aiecs/domain/agent/integration/protocols.py +281 -0
- aiecs/domain/agent/integration/retry_policy.py +218 -0
- aiecs/domain/agent/integration/role_config.py +213 -0
- aiecs/domain/agent/knowledge_aware_agent.py +1892 -0
- aiecs/domain/agent/lifecycle.py +291 -0
- aiecs/domain/agent/llm_agent.py +692 -0
- aiecs/domain/agent/memory/__init__.py +12 -0
- aiecs/domain/agent/memory/conversation.py +1124 -0
- aiecs/domain/agent/migration/__init__.py +14 -0
- aiecs/domain/agent/migration/conversion.py +163 -0
- aiecs/domain/agent/migration/legacy_wrapper.py +86 -0
- aiecs/domain/agent/models.py +884 -0
- aiecs/domain/agent/observability.py +479 -0
- aiecs/domain/agent/persistence.py +449 -0
- aiecs/domain/agent/prompts/__init__.py +29 -0
- aiecs/domain/agent/prompts/builder.py +159 -0
- aiecs/domain/agent/prompts/formatters.py +187 -0
- aiecs/domain/agent/prompts/template.py +255 -0
- aiecs/domain/agent/registry.py +253 -0
- aiecs/domain/agent/tool_agent.py +444 -0
- aiecs/domain/agent/tools/__init__.py +15 -0
- aiecs/domain/agent/tools/schema_generator.py +364 -0
- aiecs/domain/community/__init__.py +155 -0
- aiecs/domain/community/agent_adapter.py +469 -0
- aiecs/domain/community/analytics.py +432 -0
- aiecs/domain/community/collaborative_workflow.py +648 -0
- aiecs/domain/community/communication_hub.py +634 -0
- aiecs/domain/community/community_builder.py +320 -0
- aiecs/domain/community/community_integration.py +796 -0
- aiecs/domain/community/community_manager.py +803 -0
- aiecs/domain/community/decision_engine.py +849 -0
- aiecs/domain/community/exceptions.py +231 -0
- aiecs/domain/community/models/__init__.py +33 -0
- aiecs/domain/community/models/community_models.py +234 -0
- aiecs/domain/community/resource_manager.py +461 -0
- aiecs/domain/community/shared_context_manager.py +589 -0
- aiecs/domain/context/__init__.py +40 -10
- aiecs/domain/context/context_engine.py +1910 -0
- aiecs/domain/context/conversation_models.py +87 -53
- aiecs/domain/context/graph_memory.py +582 -0
- aiecs/domain/execution/model.py +12 -4
- aiecs/domain/knowledge_graph/__init__.py +19 -0
- aiecs/domain/knowledge_graph/models/__init__.py +52 -0
- aiecs/domain/knowledge_graph/models/entity.py +148 -0
- aiecs/domain/knowledge_graph/models/evidence.py +178 -0
- aiecs/domain/knowledge_graph/models/inference_rule.py +184 -0
- aiecs/domain/knowledge_graph/models/path.py +171 -0
- aiecs/domain/knowledge_graph/models/path_pattern.py +171 -0
- aiecs/domain/knowledge_graph/models/query.py +261 -0
- aiecs/domain/knowledge_graph/models/query_plan.py +181 -0
- aiecs/domain/knowledge_graph/models/relation.py +202 -0
- aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
- aiecs/domain/knowledge_graph/schema/entity_type.py +131 -0
- aiecs/domain/knowledge_graph/schema/graph_schema.py +253 -0
- aiecs/domain/knowledge_graph/schema/property_schema.py +143 -0
- aiecs/domain/knowledge_graph/schema/relation_type.py +163 -0
- aiecs/domain/knowledge_graph/schema/schema_manager.py +691 -0
- aiecs/domain/knowledge_graph/schema/type_enums.py +209 -0
- aiecs/domain/task/dsl_processor.py +172 -56
- aiecs/domain/task/model.py +20 -8
- aiecs/domain/task/task_context.py +27 -24
- aiecs/infrastructure/__init__.py +0 -2
- aiecs/infrastructure/graph_storage/__init__.py +11 -0
- aiecs/infrastructure/graph_storage/base.py +837 -0
- aiecs/infrastructure/graph_storage/batch_operations.py +458 -0
- aiecs/infrastructure/graph_storage/cache.py +424 -0
- aiecs/infrastructure/graph_storage/distributed.py +223 -0
- aiecs/infrastructure/graph_storage/error_handling.py +380 -0
- aiecs/infrastructure/graph_storage/graceful_degradation.py +294 -0
- aiecs/infrastructure/graph_storage/health_checks.py +378 -0
- aiecs/infrastructure/graph_storage/in_memory.py +1197 -0
- aiecs/infrastructure/graph_storage/index_optimization.py +446 -0
- aiecs/infrastructure/graph_storage/lazy_loading.py +431 -0
- aiecs/infrastructure/graph_storage/metrics.py +344 -0
- aiecs/infrastructure/graph_storage/migration.py +400 -0
- aiecs/infrastructure/graph_storage/pagination.py +483 -0
- aiecs/infrastructure/graph_storage/performance_monitoring.py +456 -0
- aiecs/infrastructure/graph_storage/postgres.py +1563 -0
- aiecs/infrastructure/graph_storage/property_storage.py +353 -0
- aiecs/infrastructure/graph_storage/protocols.py +76 -0
- aiecs/infrastructure/graph_storage/query_optimizer.py +642 -0
- aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
- aiecs/infrastructure/graph_storage/sqlite.py +1373 -0
- aiecs/infrastructure/graph_storage/streaming.py +487 -0
- aiecs/infrastructure/graph_storage/tenant.py +412 -0
- aiecs/infrastructure/messaging/celery_task_manager.py +92 -54
- aiecs/infrastructure/messaging/websocket_manager.py +51 -35
- aiecs/infrastructure/monitoring/__init__.py +22 -0
- aiecs/infrastructure/monitoring/executor_metrics.py +45 -11
- aiecs/infrastructure/monitoring/global_metrics_manager.py +212 -0
- aiecs/infrastructure/monitoring/structured_logger.py +3 -7
- aiecs/infrastructure/monitoring/tracing_manager.py +63 -35
- aiecs/infrastructure/persistence/__init__.py +14 -1
- aiecs/infrastructure/persistence/context_engine_client.py +184 -0
- aiecs/infrastructure/persistence/database_manager.py +67 -43
- aiecs/infrastructure/persistence/file_storage.py +180 -103
- aiecs/infrastructure/persistence/redis_client.py +74 -21
- aiecs/llm/__init__.py +73 -25
- aiecs/llm/callbacks/__init__.py +11 -0
- aiecs/llm/{custom_callbacks.py → callbacks/custom_callbacks.py} +26 -19
- aiecs/llm/client_factory.py +224 -36
- aiecs/llm/client_resolver.py +155 -0
- aiecs/llm/clients/__init__.py +38 -0
- aiecs/llm/clients/base_client.py +324 -0
- aiecs/llm/clients/google_function_calling_mixin.py +457 -0
- aiecs/llm/clients/googleai_client.py +241 -0
- aiecs/llm/clients/openai_client.py +158 -0
- aiecs/llm/clients/openai_compatible_mixin.py +367 -0
- aiecs/llm/clients/vertex_client.py +897 -0
- aiecs/llm/clients/xai_client.py +201 -0
- aiecs/llm/config/__init__.py +51 -0
- aiecs/llm/config/config_loader.py +272 -0
- aiecs/llm/config/config_validator.py +206 -0
- aiecs/llm/config/model_config.py +143 -0
- aiecs/llm/protocols.py +149 -0
- aiecs/llm/utils/__init__.py +10 -0
- aiecs/llm/utils/validate_config.py +89 -0
- aiecs/main.py +140 -121
- aiecs/scripts/aid/VERSION_MANAGEMENT.md +138 -0
- aiecs/scripts/aid/__init__.py +19 -0
- aiecs/scripts/aid/module_checker.py +499 -0
- aiecs/scripts/aid/version_manager.py +235 -0
- aiecs/scripts/{DEPENDENCY_SYSTEM_SUMMARY.md → dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md} +1 -0
- aiecs/scripts/{README_DEPENDENCY_CHECKER.md → dependance_check/README_DEPENDENCY_CHECKER.md} +1 -0
- aiecs/scripts/dependance_check/__init__.py +15 -0
- aiecs/scripts/dependance_check/dependency_checker.py +1835 -0
- aiecs/scripts/{dependency_fixer.py → dependance_check/dependency_fixer.py} +192 -90
- aiecs/scripts/{download_nlp_data.py → dependance_check/download_nlp_data.py} +203 -71
- aiecs/scripts/dependance_patch/__init__.py +7 -0
- aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
- aiecs/scripts/{fix_weasel_validator.py → dependance_patch/fix_weasel/fix_weasel_validator.py} +21 -14
- aiecs/scripts/{patch_weasel_library.sh → dependance_patch/fix_weasel/patch_weasel_library.sh} +1 -1
- aiecs/scripts/knowledge_graph/__init__.py +3 -0
- aiecs/scripts/knowledge_graph/run_threshold_experiments.py +212 -0
- aiecs/scripts/migrations/multi_tenancy/README.md +142 -0
- aiecs/scripts/tools_develop/README.md +671 -0
- aiecs/scripts/tools_develop/README_CONFIG_CHECKER.md +273 -0
- aiecs/scripts/tools_develop/TOOLS_CONFIG_GUIDE.md +1287 -0
- aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
- aiecs/scripts/tools_develop/__init__.py +21 -0
- aiecs/scripts/tools_develop/check_all_tools_config.py +548 -0
- aiecs/scripts/tools_develop/check_type_annotations.py +257 -0
- aiecs/scripts/tools_develop/pre-commit-schema-coverage.sh +66 -0
- aiecs/scripts/tools_develop/schema_coverage.py +511 -0
- aiecs/scripts/tools_develop/validate_tool_schemas.py +475 -0
- aiecs/scripts/tools_develop/verify_executor_config_fix.py +98 -0
- aiecs/scripts/tools_develop/verify_tools.py +352 -0
- aiecs/tasks/__init__.py +0 -1
- aiecs/tasks/worker.py +115 -47
- aiecs/tools/__init__.py +194 -72
- aiecs/tools/apisource/__init__.py +99 -0
- aiecs/tools/apisource/intelligence/__init__.py +19 -0
- aiecs/tools/apisource/intelligence/data_fusion.py +632 -0
- aiecs/tools/apisource/intelligence/query_analyzer.py +417 -0
- aiecs/tools/apisource/intelligence/search_enhancer.py +385 -0
- aiecs/tools/apisource/monitoring/__init__.py +9 -0
- aiecs/tools/apisource/monitoring/metrics.py +330 -0
- aiecs/tools/apisource/providers/__init__.py +112 -0
- aiecs/tools/apisource/providers/base.py +671 -0
- aiecs/tools/apisource/providers/census.py +397 -0
- aiecs/tools/apisource/providers/fred.py +535 -0
- aiecs/tools/apisource/providers/newsapi.py +409 -0
- aiecs/tools/apisource/providers/worldbank.py +352 -0
- aiecs/tools/apisource/reliability/__init__.py +12 -0
- aiecs/tools/apisource/reliability/error_handler.py +363 -0
- aiecs/tools/apisource/reliability/fallback_strategy.py +376 -0
- aiecs/tools/apisource/tool.py +832 -0
- aiecs/tools/apisource/utils/__init__.py +9 -0
- aiecs/tools/apisource/utils/validators.py +334 -0
- aiecs/tools/base_tool.py +415 -21
- aiecs/tools/docs/__init__.py +121 -0
- aiecs/tools/docs/ai_document_orchestrator.py +607 -0
- aiecs/tools/docs/ai_document_writer_orchestrator.py +2350 -0
- aiecs/tools/docs/content_insertion_tool.py +1320 -0
- aiecs/tools/docs/document_creator_tool.py +1323 -0
- aiecs/tools/docs/document_layout_tool.py +1160 -0
- aiecs/tools/docs/document_parser_tool.py +1011 -0
- aiecs/tools/docs/document_writer_tool.py +1829 -0
- aiecs/tools/knowledge_graph/__init__.py +17 -0
- aiecs/tools/knowledge_graph/graph_reasoning_tool.py +807 -0
- aiecs/tools/knowledge_graph/graph_search_tool.py +944 -0
- aiecs/tools/knowledge_graph/kg_builder_tool.py +524 -0
- aiecs/tools/langchain_adapter.py +300 -138
- aiecs/tools/schema_generator.py +455 -0
- aiecs/tools/search_tool/__init__.py +100 -0
- aiecs/tools/search_tool/analyzers.py +581 -0
- aiecs/tools/search_tool/cache.py +264 -0
- aiecs/tools/search_tool/constants.py +128 -0
- aiecs/tools/search_tool/context.py +224 -0
- aiecs/tools/search_tool/core.py +778 -0
- aiecs/tools/search_tool/deduplicator.py +119 -0
- aiecs/tools/search_tool/error_handler.py +242 -0
- aiecs/tools/search_tool/metrics.py +343 -0
- aiecs/tools/search_tool/rate_limiter.py +172 -0
- aiecs/tools/search_tool/schemas.py +275 -0
- aiecs/tools/statistics/__init__.py +80 -0
- aiecs/tools/statistics/ai_data_analysis_orchestrator.py +646 -0
- aiecs/tools/statistics/ai_insight_generator_tool.py +508 -0
- aiecs/tools/statistics/ai_report_orchestrator_tool.py +684 -0
- aiecs/tools/statistics/data_loader_tool.py +555 -0
- aiecs/tools/statistics/data_profiler_tool.py +638 -0
- aiecs/tools/statistics/data_transformer_tool.py +580 -0
- aiecs/tools/statistics/data_visualizer_tool.py +498 -0
- aiecs/tools/statistics/model_trainer_tool.py +507 -0
- aiecs/tools/statistics/statistical_analyzer_tool.py +472 -0
- aiecs/tools/task_tools/__init__.py +49 -36
- aiecs/tools/task_tools/chart_tool.py +200 -184
- aiecs/tools/task_tools/classfire_tool.py +268 -267
- aiecs/tools/task_tools/image_tool.py +175 -131
- aiecs/tools/task_tools/office_tool.py +226 -146
- aiecs/tools/task_tools/pandas_tool.py +477 -121
- aiecs/tools/task_tools/report_tool.py +390 -142
- aiecs/tools/task_tools/research_tool.py +149 -79
- aiecs/tools/task_tools/scraper_tool.py +339 -145
- aiecs/tools/task_tools/stats_tool.py +448 -209
- aiecs/tools/temp_file_manager.py +26 -24
- aiecs/tools/tool_executor/__init__.py +18 -16
- aiecs/tools/tool_executor/tool_executor.py +364 -52
- aiecs/utils/LLM_output_structor.py +74 -48
- aiecs/utils/__init__.py +14 -3
- aiecs/utils/base_callback.py +0 -3
- aiecs/utils/cache_provider.py +696 -0
- aiecs/utils/execution_utils.py +50 -31
- aiecs/utils/prompt_loader.py +1 -0
- aiecs/utils/token_usage_repository.py +37 -11
- aiecs/ws/socket_server.py +14 -4
- {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/METADATA +52 -15
- aiecs-1.7.6.dist-info/RECORD +337 -0
- aiecs-1.7.6.dist-info/entry_points.txt +13 -0
- aiecs/config/registry.py +0 -19
- aiecs/domain/context/content_engine.py +0 -982
- aiecs/llm/base_client.py +0 -99
- aiecs/llm/openai_client.py +0 -125
- aiecs/llm/vertex_client.py +0 -186
- aiecs/llm/xai_client.py +0 -184
- aiecs/scripts/dependency_checker.py +0 -857
- aiecs/scripts/quick_dependency_check.py +0 -269
- aiecs/tools/task_tools/search_api.py +0 -7
- aiecs-1.0.1.dist-info/RECORD +0 -90
- aiecs-1.0.1.dist-info/entry_points.txt +0 -7
- /aiecs/scripts/{setup_nlp_data.sh → dependance_check/setup_nlp_data.sh} +0 -0
- /aiecs/scripts/{README_WEASEL_PATCH.md → dependance_patch/fix_weasel/README_WEASEL_PATCH.md} +0 -0
- /aiecs/scripts/{fix_weasel_validator.sh → dependance_patch/fix_weasel/fix_weasel_validator.sh} +0 -0
- /aiecs/scripts/{run_weasel_patch.sh → dependance_patch/fix_weasel/run_weasel_patch.sh} +0 -0
- {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/WHEEL +0 -0
- {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/licenses/LICENSE +0 -0
- {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,534 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Enhanced Similarity Pipeline for Knowledge Graph Entity Matching.
|
|
3
|
+
|
|
4
|
+
Orchestrates multiple matching strategies in a configurable pipeline:
|
|
5
|
+
1. Exact match (normalized)
|
|
6
|
+
2. Alias match (via AliasIndex)
|
|
7
|
+
3. Abbreviation match (via AbbreviationExpander)
|
|
8
|
+
4. Normalized name match (via NameNormalizer)
|
|
9
|
+
5. Semantic embedding match (via SemanticNameMatcher)
|
|
10
|
+
6. String similarity (fallback)
|
|
11
|
+
|
|
12
|
+
Supports per-entity-type stage filtering and early-exit optimization.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
import logging
|
|
16
|
+
from dataclasses import dataclass, field
|
|
17
|
+
from difflib import SequenceMatcher
|
|
18
|
+
from enum import Enum
|
|
19
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
20
|
+
|
|
21
|
+
from aiecs.application.knowledge_graph.fusion.matching_config import (
|
|
22
|
+
EntityTypeConfig,
|
|
23
|
+
FusionMatchingConfig,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
logger = logging.getLogger(__name__)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class MatchStage(str, Enum):
|
|
30
|
+
"""Matching stages in the similarity pipeline."""
|
|
31
|
+
EXACT = "exact"
|
|
32
|
+
ALIAS = "alias"
|
|
33
|
+
ABBREVIATION = "abbreviation"
|
|
34
|
+
NORMALIZED = "normalized"
|
|
35
|
+
SEMANTIC = "semantic"
|
|
36
|
+
STRING = "string"
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@dataclass
|
|
40
|
+
class MatchResult:
|
|
41
|
+
"""
|
|
42
|
+
Result from a matching stage.
|
|
43
|
+
|
|
44
|
+
Attributes:
|
|
45
|
+
score: Similarity score (0.0 to 1.0)
|
|
46
|
+
stage: Stage that produced this result
|
|
47
|
+
is_match: Whether this is considered a match (above threshold)
|
|
48
|
+
details: Additional details about the match
|
|
49
|
+
"""
|
|
50
|
+
score: float
|
|
51
|
+
stage: MatchStage
|
|
52
|
+
is_match: bool = False
|
|
53
|
+
details: Dict[str, Any] = field(default_factory=dict)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
@dataclass
|
|
57
|
+
class PipelineResult:
|
|
58
|
+
"""
|
|
59
|
+
Result from the full similarity pipeline.
|
|
60
|
+
|
|
61
|
+
Attributes:
|
|
62
|
+
final_score: Final similarity score after pipeline execution
|
|
63
|
+
is_match: Whether the entities are considered a match
|
|
64
|
+
matched_stage: Stage that produced the match (if any)
|
|
65
|
+
stage_results: Results from each stage that was executed
|
|
66
|
+
early_exit: Whether pipeline exited early on high-confidence match
|
|
67
|
+
"""
|
|
68
|
+
final_score: float
|
|
69
|
+
is_match: bool
|
|
70
|
+
matched_stage: Optional[MatchStage] = None
|
|
71
|
+
stage_results: List[MatchResult] = field(default_factory=list)
|
|
72
|
+
early_exit: bool = False
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class SimilarityPipeline:
|
|
76
|
+
"""
|
|
77
|
+
Orchestrates multiple matching strategies for entity similarity.
|
|
78
|
+
|
|
79
|
+
The pipeline executes matching stages in order, supporting:
|
|
80
|
+
- Per-entity-type stage filtering
|
|
81
|
+
- Early exit on high-confidence matches
|
|
82
|
+
- Configurable thresholds for each stage
|
|
83
|
+
|
|
84
|
+
Example:
|
|
85
|
+
```python
|
|
86
|
+
config = FusionMatchingConfig(
|
|
87
|
+
alias_match_score=0.98,
|
|
88
|
+
entity_type_configs={
|
|
89
|
+
"Person": EntityTypeConfig(
|
|
90
|
+
enabled_stages=["exact", "alias", "normalized"],
|
|
91
|
+
semantic_enabled=False
|
|
92
|
+
)
|
|
93
|
+
}
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
pipeline = SimilarityPipeline(
|
|
97
|
+
config=config,
|
|
98
|
+
alias_matcher=alias_matcher,
|
|
99
|
+
abbreviation_expander=expander,
|
|
100
|
+
name_normalizer=normalizer,
|
|
101
|
+
semantic_matcher=semantic_matcher,
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
result = await pipeline.compute_similarity(
|
|
105
|
+
name1="Dr. John Smith",
|
|
106
|
+
name2="J. Smith",
|
|
107
|
+
entity_type="Person"
|
|
108
|
+
)
|
|
109
|
+
```
|
|
110
|
+
"""
|
|
111
|
+
|
|
112
|
+
# Default stage order
|
|
113
|
+
DEFAULT_STAGE_ORDER = [
|
|
114
|
+
MatchStage.EXACT,
|
|
115
|
+
MatchStage.ALIAS,
|
|
116
|
+
MatchStage.ABBREVIATION,
|
|
117
|
+
MatchStage.NORMALIZED,
|
|
118
|
+
MatchStage.SEMANTIC,
|
|
119
|
+
MatchStage.STRING,
|
|
120
|
+
]
|
|
121
|
+
|
|
122
|
+
def __init__(
|
|
123
|
+
self,
|
|
124
|
+
config: Optional[FusionMatchingConfig] = None,
|
|
125
|
+
alias_matcher: Optional[Any] = None,
|
|
126
|
+
abbreviation_expander: Optional[Any] = None,
|
|
127
|
+
name_normalizer: Optional[Any] = None,
|
|
128
|
+
semantic_matcher: Optional[Any] = None,
|
|
129
|
+
early_exit_threshold: float = 0.95,
|
|
130
|
+
):
|
|
131
|
+
"""
|
|
132
|
+
Initialize similarity pipeline.
|
|
133
|
+
|
|
134
|
+
Args:
|
|
135
|
+
config: Matching configuration (uses defaults if not provided)
|
|
136
|
+
alias_matcher: AliasMatcher instance for alias lookup
|
|
137
|
+
abbreviation_expander: AbbreviationExpander for acronym handling
|
|
138
|
+
name_normalizer: NameNormalizer for name normalization
|
|
139
|
+
semantic_matcher: SemanticNameMatcher for embedding-based matching
|
|
140
|
+
early_exit_threshold: Score threshold for early exit (skip later stages)
|
|
141
|
+
"""
|
|
142
|
+
self._config = config or FusionMatchingConfig()
|
|
143
|
+
self._alias_matcher = alias_matcher
|
|
144
|
+
self._abbreviation_expander = abbreviation_expander
|
|
145
|
+
self._name_normalizer = name_normalizer
|
|
146
|
+
self._semantic_matcher = semantic_matcher
|
|
147
|
+
self._early_exit_threshold = early_exit_threshold
|
|
148
|
+
|
|
149
|
+
# Statistics
|
|
150
|
+
self._match_counts: Dict[MatchStage, int] = {stage: 0 for stage in MatchStage}
|
|
151
|
+
self._early_exit_count = 0
|
|
152
|
+
self._total_comparisons = 0
|
|
153
|
+
|
|
154
|
+
@property
|
|
155
|
+
def config(self) -> FusionMatchingConfig:
|
|
156
|
+
"""Get current configuration."""
|
|
157
|
+
return self._config
|
|
158
|
+
|
|
159
|
+
def set_config(self, config: FusionMatchingConfig) -> None:
|
|
160
|
+
"""Update configuration."""
|
|
161
|
+
self._config = config
|
|
162
|
+
|
|
163
|
+
def set_alias_matcher(self, matcher: Any) -> None:
|
|
164
|
+
"""Set alias matcher instance."""
|
|
165
|
+
self._alias_matcher = matcher
|
|
166
|
+
|
|
167
|
+
def set_abbreviation_expander(self, expander: Any) -> None:
|
|
168
|
+
"""Set abbreviation expander instance."""
|
|
169
|
+
self._abbreviation_expander = expander
|
|
170
|
+
|
|
171
|
+
def set_name_normalizer(self, normalizer: Any) -> None:
|
|
172
|
+
"""Set name normalizer instance."""
|
|
173
|
+
self._name_normalizer = normalizer
|
|
174
|
+
|
|
175
|
+
def set_semantic_matcher(self, matcher: Any) -> None:
|
|
176
|
+
"""Set semantic matcher instance."""
|
|
177
|
+
self._semantic_matcher = matcher
|
|
178
|
+
|
|
179
|
+
async def compute_similarity(
|
|
180
|
+
self,
|
|
181
|
+
name1: str,
|
|
182
|
+
name2: str,
|
|
183
|
+
entity_type: Optional[str] = None,
|
|
184
|
+
threshold: Optional[float] = None,
|
|
185
|
+
) -> PipelineResult:
|
|
186
|
+
"""
|
|
187
|
+
Compute similarity between two names using the matching pipeline.
|
|
188
|
+
|
|
189
|
+
Executes stages in order, respecting per-entity-type configuration
|
|
190
|
+
and early-exit optimization.
|
|
191
|
+
|
|
192
|
+
Args:
|
|
193
|
+
name1: First name to compare
|
|
194
|
+
name2: Second name to compare
|
|
195
|
+
entity_type: Entity type for per-type configuration (optional)
|
|
196
|
+
threshold: Override threshold for match determination (optional)
|
|
197
|
+
|
|
198
|
+
Returns:
|
|
199
|
+
PipelineResult with score and stage information
|
|
200
|
+
"""
|
|
201
|
+
self._total_comparisons += 1
|
|
202
|
+
|
|
203
|
+
# Get effective config for entity type
|
|
204
|
+
type_config = self._config.get_config_for_type(entity_type or "_default")
|
|
205
|
+
|
|
206
|
+
# Determine effective threshold
|
|
207
|
+
effective_threshold = threshold or self._config.semantic_threshold
|
|
208
|
+
|
|
209
|
+
# Get enabled stages for this entity type
|
|
210
|
+
enabled_stages = self._get_enabled_stages(type_config)
|
|
211
|
+
|
|
212
|
+
# Execute pipeline
|
|
213
|
+
stage_results: List[MatchResult] = []
|
|
214
|
+
best_score = 0.0
|
|
215
|
+
matched_stage: Optional[MatchStage] = None
|
|
216
|
+
early_exit = False
|
|
217
|
+
|
|
218
|
+
for stage in self.DEFAULT_STAGE_ORDER:
|
|
219
|
+
if stage not in enabled_stages:
|
|
220
|
+
continue
|
|
221
|
+
|
|
222
|
+
result = await self._execute_stage(
|
|
223
|
+
stage, name1, name2, type_config
|
|
224
|
+
)
|
|
225
|
+
stage_results.append(result)
|
|
226
|
+
|
|
227
|
+
if result.score > best_score:
|
|
228
|
+
best_score = result.score
|
|
229
|
+
matched_stage = stage
|
|
230
|
+
|
|
231
|
+
# Check for early exit on high-confidence match
|
|
232
|
+
if result.score >= self._early_exit_threshold:
|
|
233
|
+
self._early_exit_count += 1
|
|
234
|
+
early_exit = True
|
|
235
|
+
logger.debug(
|
|
236
|
+
f"Early exit at stage {stage.value} with score {result.score:.3f}"
|
|
237
|
+
)
|
|
238
|
+
break
|
|
239
|
+
|
|
240
|
+
# Determine if this is a match
|
|
241
|
+
is_match = best_score >= effective_threshold
|
|
242
|
+
if is_match and matched_stage:
|
|
243
|
+
self._match_counts[matched_stage] += 1
|
|
244
|
+
|
|
245
|
+
return PipelineResult(
|
|
246
|
+
final_score=best_score,
|
|
247
|
+
is_match=is_match,
|
|
248
|
+
matched_stage=matched_stage,
|
|
249
|
+
stage_results=stage_results,
|
|
250
|
+
early_exit=early_exit,
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
def _get_enabled_stages(
|
|
254
|
+
self, type_config: EntityTypeConfig
|
|
255
|
+
) -> List[MatchStage]:
|
|
256
|
+
"""Get list of enabled stages for entity type config."""
|
|
257
|
+
enabled = []
|
|
258
|
+
for stage in self.DEFAULT_STAGE_ORDER:
|
|
259
|
+
if type_config.is_stage_enabled(stage.value):
|
|
260
|
+
enabled.append(stage)
|
|
261
|
+
return enabled
|
|
262
|
+
|
|
263
|
+
async def _execute_stage(
|
|
264
|
+
self,
|
|
265
|
+
stage: MatchStage,
|
|
266
|
+
name1: str,
|
|
267
|
+
name2: str,
|
|
268
|
+
type_config: EntityTypeConfig,
|
|
269
|
+
) -> MatchResult:
|
|
270
|
+
"""Execute a single matching stage."""
|
|
271
|
+
try:
|
|
272
|
+
if stage == MatchStage.EXACT:
|
|
273
|
+
return self._exact_match(name1, name2)
|
|
274
|
+
elif stage == MatchStage.ALIAS:
|
|
275
|
+
return await self._alias_match(name1, name2, type_config)
|
|
276
|
+
elif stage == MatchStage.ABBREVIATION:
|
|
277
|
+
return self._abbreviation_match(name1, name2, type_config)
|
|
278
|
+
elif stage == MatchStage.NORMALIZED:
|
|
279
|
+
return self._normalized_match(name1, name2, type_config)
|
|
280
|
+
elif stage == MatchStage.SEMANTIC:
|
|
281
|
+
return await self._semantic_match(name1, name2, type_config)
|
|
282
|
+
elif stage == MatchStage.STRING:
|
|
283
|
+
return self._string_similarity(name1, name2, type_config)
|
|
284
|
+
else:
|
|
285
|
+
return MatchResult(score=0.0, stage=stage)
|
|
286
|
+
except Exception as e:
|
|
287
|
+
logger.warning(f"Error in stage {stage.value}: {e}")
|
|
288
|
+
return MatchResult(score=0.0, stage=stage, details={"error": str(e)})
|
|
289
|
+
|
|
290
|
+
def _exact_match(self, name1: str, name2: str) -> MatchResult:
|
|
291
|
+
"""Check for exact match (case-insensitive, normalized whitespace)."""
|
|
292
|
+
n1 = " ".join(name1.lower().split())
|
|
293
|
+
n2 = " ".join(name2.lower().split())
|
|
294
|
+
|
|
295
|
+
if n1 == n2:
|
|
296
|
+
return MatchResult(
|
|
297
|
+
score=1.0,
|
|
298
|
+
stage=MatchStage.EXACT,
|
|
299
|
+
is_match=True,
|
|
300
|
+
details={"normalized_name": n1},
|
|
301
|
+
)
|
|
302
|
+
return MatchResult(score=0.0, stage=MatchStage.EXACT)
|
|
303
|
+
|
|
304
|
+
async def _alias_match(
|
|
305
|
+
self, name1: str, name2: str, type_config: EntityTypeConfig
|
|
306
|
+
) -> MatchResult:
|
|
307
|
+
"""Check for alias match via AliasIndex."""
|
|
308
|
+
if self._alias_matcher is None:
|
|
309
|
+
return MatchResult(score=0.0, stage=MatchStage.ALIAS)
|
|
310
|
+
|
|
311
|
+
# Look up both names in alias index
|
|
312
|
+
match1 = await self._alias_matcher.lookup(name1)
|
|
313
|
+
match2 = await self._alias_matcher.lookup(name2)
|
|
314
|
+
|
|
315
|
+
# Check if they point to the same entity
|
|
316
|
+
if match1 and match2 and match1.entity_id == match2.entity_id:
|
|
317
|
+
score = type_config.get_threshold(
|
|
318
|
+
"alias_match_score", self._config.alias_match_score
|
|
319
|
+
)
|
|
320
|
+
return MatchResult(
|
|
321
|
+
score=score,
|
|
322
|
+
stage=MatchStage.ALIAS,
|
|
323
|
+
is_match=True,
|
|
324
|
+
details={"entity_id": match1.entity_id},
|
|
325
|
+
)
|
|
326
|
+
|
|
327
|
+
return MatchResult(score=0.0, stage=MatchStage.ALIAS)
|
|
328
|
+
|
|
329
|
+
def _abbreviation_match(
|
|
330
|
+
self, name1: str, name2: str, type_config: EntityTypeConfig
|
|
331
|
+
) -> MatchResult:
|
|
332
|
+
"""Check for abbreviation/acronym match."""
|
|
333
|
+
if self._abbreviation_expander is None:
|
|
334
|
+
return MatchResult(score=0.0, stage=MatchStage.ABBREVIATION)
|
|
335
|
+
|
|
336
|
+
# Check if names match via abbreviation expansion
|
|
337
|
+
if self._abbreviation_expander.matches(name1, name2):
|
|
338
|
+
score = type_config.get_threshold(
|
|
339
|
+
"abbreviation_match_score", self._config.abbreviation_match_score
|
|
340
|
+
)
|
|
341
|
+
return MatchResult(
|
|
342
|
+
score=score,
|
|
343
|
+
stage=MatchStage.ABBREVIATION,
|
|
344
|
+
is_match=True,
|
|
345
|
+
details={"abbreviation_match": True},
|
|
346
|
+
)
|
|
347
|
+
|
|
348
|
+
return MatchResult(score=0.0, stage=MatchStage.ABBREVIATION)
|
|
349
|
+
|
|
350
|
+
def _normalized_match(
|
|
351
|
+
self, name1: str, name2: str, type_config: EntityTypeConfig
|
|
352
|
+
) -> MatchResult:
|
|
353
|
+
"""Check for normalized name match (after stripping prefixes/suffixes)."""
|
|
354
|
+
if self._name_normalizer is None:
|
|
355
|
+
return MatchResult(score=0.0, stage=MatchStage.NORMALIZED)
|
|
356
|
+
|
|
357
|
+
result1 = self._name_normalizer.normalize(name1)
|
|
358
|
+
result2 = self._name_normalizer.normalize(name2)
|
|
359
|
+
|
|
360
|
+
# Check exact normalized match
|
|
361
|
+
if result1.normalized == result2.normalized:
|
|
362
|
+
score = type_config.get_threshold(
|
|
363
|
+
"normalization_match_score", self._config.normalization_match_score
|
|
364
|
+
)
|
|
365
|
+
return MatchResult(
|
|
366
|
+
score=score,
|
|
367
|
+
stage=MatchStage.NORMALIZED,
|
|
368
|
+
is_match=True,
|
|
369
|
+
details={
|
|
370
|
+
"normalized1": result1.normalized,
|
|
371
|
+
"normalized2": result2.normalized,
|
|
372
|
+
},
|
|
373
|
+
)
|
|
374
|
+
|
|
375
|
+
# Check if one matches with initials expanded
|
|
376
|
+
if self._name_normalizer.names_match_with_initials(name1, name2):
|
|
377
|
+
score = type_config.get_threshold(
|
|
378
|
+
"normalization_match_score", self._config.normalization_match_score
|
|
379
|
+
)
|
|
380
|
+
return MatchResult(
|
|
381
|
+
score=score * 0.95, # Slightly lower for initial matches
|
|
382
|
+
stage=MatchStage.NORMALIZED,
|
|
383
|
+
is_match=True,
|
|
384
|
+
details={"initial_match": True},
|
|
385
|
+
)
|
|
386
|
+
|
|
387
|
+
return MatchResult(score=0.0, stage=MatchStage.NORMALIZED)
|
|
388
|
+
|
|
389
|
+
async def _semantic_match(
|
|
390
|
+
self, name1: str, name2: str, type_config: EntityTypeConfig
|
|
391
|
+
) -> MatchResult:
|
|
392
|
+
"""Check for semantic similarity via embeddings."""
|
|
393
|
+
if self._semantic_matcher is None:
|
|
394
|
+
return MatchResult(score=0.0, stage=MatchStage.SEMANTIC)
|
|
395
|
+
|
|
396
|
+
if not type_config.semantic_enabled:
|
|
397
|
+
return MatchResult(score=0.0, stage=MatchStage.SEMANTIC)
|
|
398
|
+
|
|
399
|
+
# Get semantic threshold
|
|
400
|
+
threshold = type_config.get_threshold(
|
|
401
|
+
"semantic_threshold", self._config.semantic_threshold
|
|
402
|
+
)
|
|
403
|
+
|
|
404
|
+
# Compute semantic similarity
|
|
405
|
+
result = await self._semantic_matcher.match(name1, name2, threshold=threshold)
|
|
406
|
+
|
|
407
|
+
return MatchResult(
|
|
408
|
+
score=result.similarity,
|
|
409
|
+
stage=MatchStage.SEMANTIC,
|
|
410
|
+
is_match=result.is_match,
|
|
411
|
+
details={"threshold": threshold},
|
|
412
|
+
)
|
|
413
|
+
|
|
414
|
+
def _string_similarity(
|
|
415
|
+
self, name1: str, name2: str, type_config: EntityTypeConfig
|
|
416
|
+
) -> MatchResult:
|
|
417
|
+
"""Compute string similarity as fallback."""
|
|
418
|
+
# Normalize strings
|
|
419
|
+
n1 = name1.lower().strip()
|
|
420
|
+
n2 = name2.lower().strip()
|
|
421
|
+
|
|
422
|
+
# One is substring of other
|
|
423
|
+
if n1 in n2 or n2 in n1:
|
|
424
|
+
return MatchResult(
|
|
425
|
+
score=0.90,
|
|
426
|
+
stage=MatchStage.STRING,
|
|
427
|
+
is_match=True,
|
|
428
|
+
details={"substring_match": True},
|
|
429
|
+
)
|
|
430
|
+
|
|
431
|
+
# Sequence matcher
|
|
432
|
+
seq_similarity = SequenceMatcher(None, n1, n2).ratio()
|
|
433
|
+
|
|
434
|
+
# Token overlap (for multi-word names)
|
|
435
|
+
tokens1 = set(n1.split())
|
|
436
|
+
tokens2 = set(n2.split())
|
|
437
|
+
if tokens1 and tokens2:
|
|
438
|
+
token_overlap = len(tokens1 & tokens2) / len(tokens1 | tokens2)
|
|
439
|
+
else:
|
|
440
|
+
token_overlap = 0.0
|
|
441
|
+
|
|
442
|
+
# Combine scores
|
|
443
|
+
final_score = max(seq_similarity, 0.7 * seq_similarity + 0.3 * token_overlap)
|
|
444
|
+
|
|
445
|
+
threshold = type_config.get_threshold(
|
|
446
|
+
"string_similarity_threshold", self._config.string_similarity_threshold
|
|
447
|
+
)
|
|
448
|
+
|
|
449
|
+
return MatchResult(
|
|
450
|
+
score=final_score,
|
|
451
|
+
stage=MatchStage.STRING,
|
|
452
|
+
is_match=final_score >= threshold,
|
|
453
|
+
details={
|
|
454
|
+
"seq_similarity": seq_similarity,
|
|
455
|
+
"token_overlap": token_overlap,
|
|
456
|
+
},
|
|
457
|
+
)
|
|
458
|
+
|
|
459
|
+
def get_stats(self) -> Dict[str, Any]:
|
|
460
|
+
"""Get pipeline execution statistics."""
|
|
461
|
+
return {
|
|
462
|
+
"total_comparisons": self._total_comparisons,
|
|
463
|
+
"early_exit_count": self._early_exit_count,
|
|
464
|
+
"early_exit_rate": (
|
|
465
|
+
self._early_exit_count / self._total_comparisons
|
|
466
|
+
if self._total_comparisons > 0
|
|
467
|
+
else 0.0
|
|
468
|
+
),
|
|
469
|
+
"match_counts": {
|
|
470
|
+
stage.value: count
|
|
471
|
+
for stage, count in self._match_counts.items()
|
|
472
|
+
},
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
def reset_stats(self) -> None:
|
|
476
|
+
"""Reset execution statistics."""
|
|
477
|
+
self._match_counts = {stage: 0 for stage in MatchStage}
|
|
478
|
+
self._early_exit_count = 0
|
|
479
|
+
self._total_comparisons = 0
|
|
480
|
+
|
|
481
|
+
def compute_similarity_sync(
|
|
482
|
+
self,
|
|
483
|
+
name1: str,
|
|
484
|
+
name2: str,
|
|
485
|
+
entity_type: Optional[str] = None,
|
|
486
|
+
) -> float:
|
|
487
|
+
"""
|
|
488
|
+
Synchronous string similarity computation (no async matchers).
|
|
489
|
+
|
|
490
|
+
Useful for quick similarity checks without async overhead.
|
|
491
|
+
Only uses exact match and string similarity stages.
|
|
492
|
+
|
|
493
|
+
Args:
|
|
494
|
+
name1: First name
|
|
495
|
+
name2: Second name
|
|
496
|
+
entity_type: Entity type (unused, for API consistency)
|
|
497
|
+
|
|
498
|
+
Returns:
|
|
499
|
+
Similarity score (0.0 to 1.0)
|
|
500
|
+
"""
|
|
501
|
+
# Check exact match first
|
|
502
|
+
n1 = " ".join(name1.lower().split())
|
|
503
|
+
n2 = " ".join(name2.lower().split())
|
|
504
|
+
|
|
505
|
+
if n1 == n2:
|
|
506
|
+
return 1.0
|
|
507
|
+
|
|
508
|
+
# Normalized match (if normalizer available)
|
|
509
|
+
if self._name_normalizer:
|
|
510
|
+
result1 = self._name_normalizer.normalize(name1)
|
|
511
|
+
result2 = self._name_normalizer.normalize(name2)
|
|
512
|
+
if result1.normalized == result2.normalized:
|
|
513
|
+
return self._config.normalization_match_score
|
|
514
|
+
if self._name_normalizer.names_match_with_initials(name1, name2):
|
|
515
|
+
return self._config.normalization_match_score * 0.95
|
|
516
|
+
|
|
517
|
+
# Abbreviation match (if expander available)
|
|
518
|
+
if self._abbreviation_expander:
|
|
519
|
+
if self._abbreviation_expander.matches(name1, name2):
|
|
520
|
+
return self._config.abbreviation_match_score
|
|
521
|
+
|
|
522
|
+
# String similarity fallback
|
|
523
|
+
if n1 in n2 or n2 in n1:
|
|
524
|
+
return 0.90
|
|
525
|
+
|
|
526
|
+
seq_similarity = SequenceMatcher(None, n1, n2).ratio()
|
|
527
|
+
tokens1 = set(n1.split())
|
|
528
|
+
tokens2 = set(n2.split())
|
|
529
|
+
if tokens1 and tokens2:
|
|
530
|
+
token_overlap = len(tokens1 & tokens2) / len(tokens1 | tokens2)
|
|
531
|
+
else:
|
|
532
|
+
token_overlap = 0.0
|
|
533
|
+
|
|
534
|
+
return max(seq_similarity, 0.7 * seq_similarity + 0.3 * token_overlap)
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Pattern Matching Module
|
|
3
|
+
|
|
4
|
+
Provides graph pattern matching capabilities for custom query execution.
|
|
5
|
+
|
|
6
|
+
Phase: 3.3 - Full Custom Query Execution
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from aiecs.application.knowledge_graph.pattern_matching.pattern_matcher import (
|
|
10
|
+
PatternMatcher,
|
|
11
|
+
PatternMatch,
|
|
12
|
+
)
|
|
13
|
+
from aiecs.application.knowledge_graph.pattern_matching.query_executor import (
|
|
14
|
+
CustomQueryExecutor,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
__all__ = [
|
|
18
|
+
"PatternMatcher",
|
|
19
|
+
"PatternMatch",
|
|
20
|
+
"CustomQueryExecutor",
|
|
21
|
+
]
|