aiecs 1.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aiecs/__init__.py +72 -0
- aiecs/__main__.py +41 -0
- aiecs/aiecs_client.py +469 -0
- aiecs/application/__init__.py +10 -0
- aiecs/application/executors/__init__.py +10 -0
- aiecs/application/executors/operation_executor.py +363 -0
- aiecs/application/knowledge_graph/__init__.py +7 -0
- aiecs/application/knowledge_graph/builder/__init__.py +37 -0
- aiecs/application/knowledge_graph/builder/document_builder.py +375 -0
- aiecs/application/knowledge_graph/builder/graph_builder.py +356 -0
- aiecs/application/knowledge_graph/builder/schema_mapping.py +531 -0
- aiecs/application/knowledge_graph/builder/structured_pipeline.py +443 -0
- aiecs/application/knowledge_graph/builder/text_chunker.py +319 -0
- aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
- aiecs/application/knowledge_graph/extractors/base.py +100 -0
- aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +327 -0
- aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +349 -0
- aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +244 -0
- aiecs/application/knowledge_graph/fusion/__init__.py +23 -0
- aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +387 -0
- aiecs/application/knowledge_graph/fusion/entity_linker.py +343 -0
- aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +580 -0
- aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +189 -0
- aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
- aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +344 -0
- aiecs/application/knowledge_graph/pattern_matching/query_executor.py +378 -0
- aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
- aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +199 -0
- aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
- aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
- aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +347 -0
- aiecs/application/knowledge_graph/reasoning/inference_engine.py +504 -0
- aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +167 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +630 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +654 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +477 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +390 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +217 -0
- aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +169 -0
- aiecs/application/knowledge_graph/reasoning/query_planner.py +872 -0
- aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +554 -0
- aiecs/application/knowledge_graph/retrieval/__init__.py +19 -0
- aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +596 -0
- aiecs/application/knowledge_graph/search/__init__.py +59 -0
- aiecs/application/knowledge_graph/search/hybrid_search.py +423 -0
- aiecs/application/knowledge_graph/search/reranker.py +295 -0
- aiecs/application/knowledge_graph/search/reranker_strategies.py +553 -0
- aiecs/application/knowledge_graph/search/text_similarity.py +398 -0
- aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
- aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +329 -0
- aiecs/application/knowledge_graph/traversal/path_scorer.py +269 -0
- aiecs/application/knowledge_graph/validators/__init__.py +13 -0
- aiecs/application/knowledge_graph/validators/relation_validator.py +189 -0
- aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
- aiecs/application/knowledge_graph/visualization/graph_visualizer.py +321 -0
- aiecs/common/__init__.py +9 -0
- aiecs/common/knowledge_graph/__init__.py +17 -0
- aiecs/common/knowledge_graph/runnable.py +484 -0
- aiecs/config/__init__.py +16 -0
- aiecs/config/config.py +498 -0
- aiecs/config/graph_config.py +137 -0
- aiecs/config/registry.py +23 -0
- aiecs/core/__init__.py +46 -0
- aiecs/core/interface/__init__.py +34 -0
- aiecs/core/interface/execution_interface.py +152 -0
- aiecs/core/interface/storage_interface.py +171 -0
- aiecs/domain/__init__.py +289 -0
- aiecs/domain/agent/__init__.py +189 -0
- aiecs/domain/agent/base_agent.py +697 -0
- aiecs/domain/agent/exceptions.py +103 -0
- aiecs/domain/agent/graph_aware_mixin.py +559 -0
- aiecs/domain/agent/hybrid_agent.py +490 -0
- aiecs/domain/agent/integration/__init__.py +26 -0
- aiecs/domain/agent/integration/context_compressor.py +222 -0
- aiecs/domain/agent/integration/context_engine_adapter.py +252 -0
- aiecs/domain/agent/integration/retry_policy.py +219 -0
- aiecs/domain/agent/integration/role_config.py +213 -0
- aiecs/domain/agent/knowledge_aware_agent.py +646 -0
- aiecs/domain/agent/lifecycle.py +296 -0
- aiecs/domain/agent/llm_agent.py +300 -0
- aiecs/domain/agent/memory/__init__.py +12 -0
- aiecs/domain/agent/memory/conversation.py +197 -0
- aiecs/domain/agent/migration/__init__.py +14 -0
- aiecs/domain/agent/migration/conversion.py +160 -0
- aiecs/domain/agent/migration/legacy_wrapper.py +90 -0
- aiecs/domain/agent/models.py +317 -0
- aiecs/domain/agent/observability.py +407 -0
- aiecs/domain/agent/persistence.py +289 -0
- aiecs/domain/agent/prompts/__init__.py +29 -0
- aiecs/domain/agent/prompts/builder.py +161 -0
- aiecs/domain/agent/prompts/formatters.py +189 -0
- aiecs/domain/agent/prompts/template.py +255 -0
- aiecs/domain/agent/registry.py +260 -0
- aiecs/domain/agent/tool_agent.py +257 -0
- aiecs/domain/agent/tools/__init__.py +12 -0
- aiecs/domain/agent/tools/schema_generator.py +221 -0
- aiecs/domain/community/__init__.py +155 -0
- aiecs/domain/community/agent_adapter.py +477 -0
- aiecs/domain/community/analytics.py +481 -0
- aiecs/domain/community/collaborative_workflow.py +642 -0
- aiecs/domain/community/communication_hub.py +645 -0
- aiecs/domain/community/community_builder.py +320 -0
- aiecs/domain/community/community_integration.py +800 -0
- aiecs/domain/community/community_manager.py +813 -0
- aiecs/domain/community/decision_engine.py +879 -0
- aiecs/domain/community/exceptions.py +225 -0
- aiecs/domain/community/models/__init__.py +33 -0
- aiecs/domain/community/models/community_models.py +268 -0
- aiecs/domain/community/resource_manager.py +457 -0
- aiecs/domain/community/shared_context_manager.py +603 -0
- aiecs/domain/context/__init__.py +58 -0
- aiecs/domain/context/context_engine.py +989 -0
- aiecs/domain/context/conversation_models.py +354 -0
- aiecs/domain/context/graph_memory.py +467 -0
- aiecs/domain/execution/__init__.py +12 -0
- aiecs/domain/execution/model.py +57 -0
- aiecs/domain/knowledge_graph/__init__.py +19 -0
- aiecs/domain/knowledge_graph/models/__init__.py +52 -0
- aiecs/domain/knowledge_graph/models/entity.py +130 -0
- aiecs/domain/knowledge_graph/models/evidence.py +194 -0
- aiecs/domain/knowledge_graph/models/inference_rule.py +186 -0
- aiecs/domain/knowledge_graph/models/path.py +179 -0
- aiecs/domain/knowledge_graph/models/path_pattern.py +173 -0
- aiecs/domain/knowledge_graph/models/query.py +272 -0
- aiecs/domain/knowledge_graph/models/query_plan.py +187 -0
- aiecs/domain/knowledge_graph/models/relation.py +136 -0
- aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
- aiecs/domain/knowledge_graph/schema/entity_type.py +135 -0
- aiecs/domain/knowledge_graph/schema/graph_schema.py +271 -0
- aiecs/domain/knowledge_graph/schema/property_schema.py +155 -0
- aiecs/domain/knowledge_graph/schema/relation_type.py +171 -0
- aiecs/domain/knowledge_graph/schema/schema_manager.py +496 -0
- aiecs/domain/knowledge_graph/schema/type_enums.py +205 -0
- aiecs/domain/task/__init__.py +13 -0
- aiecs/domain/task/dsl_processor.py +613 -0
- aiecs/domain/task/model.py +62 -0
- aiecs/domain/task/task_context.py +268 -0
- aiecs/infrastructure/__init__.py +24 -0
- aiecs/infrastructure/graph_storage/__init__.py +11 -0
- aiecs/infrastructure/graph_storage/base.py +601 -0
- aiecs/infrastructure/graph_storage/batch_operations.py +449 -0
- aiecs/infrastructure/graph_storage/cache.py +429 -0
- aiecs/infrastructure/graph_storage/distributed.py +226 -0
- aiecs/infrastructure/graph_storage/error_handling.py +390 -0
- aiecs/infrastructure/graph_storage/graceful_degradation.py +306 -0
- aiecs/infrastructure/graph_storage/health_checks.py +378 -0
- aiecs/infrastructure/graph_storage/in_memory.py +514 -0
- aiecs/infrastructure/graph_storage/index_optimization.py +483 -0
- aiecs/infrastructure/graph_storage/lazy_loading.py +410 -0
- aiecs/infrastructure/graph_storage/metrics.py +357 -0
- aiecs/infrastructure/graph_storage/migration.py +413 -0
- aiecs/infrastructure/graph_storage/pagination.py +471 -0
- aiecs/infrastructure/graph_storage/performance_monitoring.py +466 -0
- aiecs/infrastructure/graph_storage/postgres.py +871 -0
- aiecs/infrastructure/graph_storage/query_optimizer.py +635 -0
- aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
- aiecs/infrastructure/graph_storage/sqlite.py +623 -0
- aiecs/infrastructure/graph_storage/streaming.py +495 -0
- aiecs/infrastructure/messaging/__init__.py +13 -0
- aiecs/infrastructure/messaging/celery_task_manager.py +383 -0
- aiecs/infrastructure/messaging/websocket_manager.py +298 -0
- aiecs/infrastructure/monitoring/__init__.py +34 -0
- aiecs/infrastructure/monitoring/executor_metrics.py +174 -0
- aiecs/infrastructure/monitoring/global_metrics_manager.py +213 -0
- aiecs/infrastructure/monitoring/structured_logger.py +48 -0
- aiecs/infrastructure/monitoring/tracing_manager.py +410 -0
- aiecs/infrastructure/persistence/__init__.py +24 -0
- aiecs/infrastructure/persistence/context_engine_client.py +187 -0
- aiecs/infrastructure/persistence/database_manager.py +333 -0
- aiecs/infrastructure/persistence/file_storage.py +754 -0
- aiecs/infrastructure/persistence/redis_client.py +220 -0
- aiecs/llm/__init__.py +86 -0
- aiecs/llm/callbacks/__init__.py +11 -0
- aiecs/llm/callbacks/custom_callbacks.py +264 -0
- aiecs/llm/client_factory.py +420 -0
- aiecs/llm/clients/__init__.py +33 -0
- aiecs/llm/clients/base_client.py +193 -0
- aiecs/llm/clients/googleai_client.py +181 -0
- aiecs/llm/clients/openai_client.py +131 -0
- aiecs/llm/clients/vertex_client.py +437 -0
- aiecs/llm/clients/xai_client.py +184 -0
- aiecs/llm/config/__init__.py +51 -0
- aiecs/llm/config/config_loader.py +275 -0
- aiecs/llm/config/config_validator.py +236 -0
- aiecs/llm/config/model_config.py +151 -0
- aiecs/llm/utils/__init__.py +10 -0
- aiecs/llm/utils/validate_config.py +91 -0
- aiecs/main.py +363 -0
- aiecs/scripts/__init__.py +3 -0
- aiecs/scripts/aid/VERSION_MANAGEMENT.md +97 -0
- aiecs/scripts/aid/__init__.py +19 -0
- aiecs/scripts/aid/version_manager.py +215 -0
- aiecs/scripts/dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md +242 -0
- aiecs/scripts/dependance_check/README_DEPENDENCY_CHECKER.md +310 -0
- aiecs/scripts/dependance_check/__init__.py +17 -0
- aiecs/scripts/dependance_check/dependency_checker.py +938 -0
- aiecs/scripts/dependance_check/dependency_fixer.py +391 -0
- aiecs/scripts/dependance_check/download_nlp_data.py +396 -0
- aiecs/scripts/dependance_check/quick_dependency_check.py +270 -0
- aiecs/scripts/dependance_check/setup_nlp_data.sh +217 -0
- aiecs/scripts/dependance_patch/__init__.py +7 -0
- aiecs/scripts/dependance_patch/fix_weasel/README_WEASEL_PATCH.md +126 -0
- aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
- aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.py +128 -0
- aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.sh +82 -0
- aiecs/scripts/dependance_patch/fix_weasel/patch_weasel_library.sh +188 -0
- aiecs/scripts/dependance_patch/fix_weasel/run_weasel_patch.sh +41 -0
- aiecs/scripts/tools_develop/README.md +449 -0
- aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
- aiecs/scripts/tools_develop/__init__.py +21 -0
- aiecs/scripts/tools_develop/check_type_annotations.py +259 -0
- aiecs/scripts/tools_develop/validate_tool_schemas.py +422 -0
- aiecs/scripts/tools_develop/verify_tools.py +356 -0
- aiecs/tasks/__init__.py +1 -0
- aiecs/tasks/worker.py +172 -0
- aiecs/tools/__init__.py +299 -0
- aiecs/tools/apisource/__init__.py +99 -0
- aiecs/tools/apisource/intelligence/__init__.py +19 -0
- aiecs/tools/apisource/intelligence/data_fusion.py +381 -0
- aiecs/tools/apisource/intelligence/query_analyzer.py +413 -0
- aiecs/tools/apisource/intelligence/search_enhancer.py +388 -0
- aiecs/tools/apisource/monitoring/__init__.py +9 -0
- aiecs/tools/apisource/monitoring/metrics.py +303 -0
- aiecs/tools/apisource/providers/__init__.py +115 -0
- aiecs/tools/apisource/providers/base.py +664 -0
- aiecs/tools/apisource/providers/census.py +401 -0
- aiecs/tools/apisource/providers/fred.py +564 -0
- aiecs/tools/apisource/providers/newsapi.py +412 -0
- aiecs/tools/apisource/providers/worldbank.py +357 -0
- aiecs/tools/apisource/reliability/__init__.py +12 -0
- aiecs/tools/apisource/reliability/error_handler.py +375 -0
- aiecs/tools/apisource/reliability/fallback_strategy.py +391 -0
- aiecs/tools/apisource/tool.py +850 -0
- aiecs/tools/apisource/utils/__init__.py +9 -0
- aiecs/tools/apisource/utils/validators.py +338 -0
- aiecs/tools/base_tool.py +201 -0
- aiecs/tools/docs/__init__.py +121 -0
- aiecs/tools/docs/ai_document_orchestrator.py +599 -0
- aiecs/tools/docs/ai_document_writer_orchestrator.py +2403 -0
- aiecs/tools/docs/content_insertion_tool.py +1333 -0
- aiecs/tools/docs/document_creator_tool.py +1317 -0
- aiecs/tools/docs/document_layout_tool.py +1166 -0
- aiecs/tools/docs/document_parser_tool.py +994 -0
- aiecs/tools/docs/document_writer_tool.py +1818 -0
- aiecs/tools/knowledge_graph/__init__.py +17 -0
- aiecs/tools/knowledge_graph/graph_reasoning_tool.py +734 -0
- aiecs/tools/knowledge_graph/graph_search_tool.py +923 -0
- aiecs/tools/knowledge_graph/kg_builder_tool.py +476 -0
- aiecs/tools/langchain_adapter.py +542 -0
- aiecs/tools/schema_generator.py +275 -0
- aiecs/tools/search_tool/__init__.py +100 -0
- aiecs/tools/search_tool/analyzers.py +589 -0
- aiecs/tools/search_tool/cache.py +260 -0
- aiecs/tools/search_tool/constants.py +128 -0
- aiecs/tools/search_tool/context.py +216 -0
- aiecs/tools/search_tool/core.py +749 -0
- aiecs/tools/search_tool/deduplicator.py +123 -0
- aiecs/tools/search_tool/error_handler.py +271 -0
- aiecs/tools/search_tool/metrics.py +371 -0
- aiecs/tools/search_tool/rate_limiter.py +178 -0
- aiecs/tools/search_tool/schemas.py +277 -0
- aiecs/tools/statistics/__init__.py +80 -0
- aiecs/tools/statistics/ai_data_analysis_orchestrator.py +643 -0
- aiecs/tools/statistics/ai_insight_generator_tool.py +505 -0
- aiecs/tools/statistics/ai_report_orchestrator_tool.py +694 -0
- aiecs/tools/statistics/data_loader_tool.py +564 -0
- aiecs/tools/statistics/data_profiler_tool.py +658 -0
- aiecs/tools/statistics/data_transformer_tool.py +573 -0
- aiecs/tools/statistics/data_visualizer_tool.py +495 -0
- aiecs/tools/statistics/model_trainer_tool.py +487 -0
- aiecs/tools/statistics/statistical_analyzer_tool.py +459 -0
- aiecs/tools/task_tools/__init__.py +86 -0
- aiecs/tools/task_tools/chart_tool.py +732 -0
- aiecs/tools/task_tools/classfire_tool.py +922 -0
- aiecs/tools/task_tools/image_tool.py +447 -0
- aiecs/tools/task_tools/office_tool.py +684 -0
- aiecs/tools/task_tools/pandas_tool.py +635 -0
- aiecs/tools/task_tools/report_tool.py +635 -0
- aiecs/tools/task_tools/research_tool.py +392 -0
- aiecs/tools/task_tools/scraper_tool.py +715 -0
- aiecs/tools/task_tools/stats_tool.py +688 -0
- aiecs/tools/temp_file_manager.py +130 -0
- aiecs/tools/tool_executor/__init__.py +37 -0
- aiecs/tools/tool_executor/tool_executor.py +881 -0
- aiecs/utils/LLM_output_structor.py +445 -0
- aiecs/utils/__init__.py +34 -0
- aiecs/utils/base_callback.py +47 -0
- aiecs/utils/cache_provider.py +695 -0
- aiecs/utils/execution_utils.py +184 -0
- aiecs/utils/logging.py +1 -0
- aiecs/utils/prompt_loader.py +14 -0
- aiecs/utils/token_usage_repository.py +323 -0
- aiecs/ws/__init__.py +0 -0
- aiecs/ws/socket_server.py +52 -0
- aiecs-1.5.1.dist-info/METADATA +608 -0
- aiecs-1.5.1.dist-info/RECORD +302 -0
- aiecs-1.5.1.dist-info/WHEEL +5 -0
- aiecs-1.5.1.dist-info/entry_points.txt +10 -0
- aiecs-1.5.1.dist-info/licenses/LICENSE +225 -0
- aiecs-1.5.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,553 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Reranking Strategy Implementations
|
|
3
|
+
|
|
4
|
+
Concrete implementations of reranking strategies for different signals:
|
|
5
|
+
- Text similarity (BM25, Jaccard)
|
|
6
|
+
- Semantic similarity (vector embeddings)
|
|
7
|
+
- Structural importance (PageRank, centrality)
|
|
8
|
+
- Hybrid combination
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from typing import List, Optional, Dict
|
|
12
|
+
import numpy as np
|
|
13
|
+
|
|
14
|
+
from aiecs.application.knowledge_graph.search.reranker import RerankerStrategy
|
|
15
|
+
from aiecs.application.knowledge_graph.search.text_similarity import (
|
|
16
|
+
BM25Scorer,
|
|
17
|
+
jaccard_similarity_text,
|
|
18
|
+
cosine_similarity_text,
|
|
19
|
+
)
|
|
20
|
+
from aiecs.domain.knowledge_graph.models.entity import Entity
|
|
21
|
+
from aiecs.infrastructure.graph_storage.base import GraphStore
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class TextSimilarityReranker(RerankerStrategy):
|
|
25
|
+
"""
|
|
26
|
+
Text similarity reranker using BM25 and Jaccard similarity
|
|
27
|
+
|
|
28
|
+
Combines BM25 (term-based relevance) and Jaccard (set overlap) scores
|
|
29
|
+
to rerank entities based on text similarity to query.
|
|
30
|
+
|
|
31
|
+
Example:
|
|
32
|
+
```python
|
|
33
|
+
reranker = TextSimilarityReranker(
|
|
34
|
+
bm25_weight=0.7,
|
|
35
|
+
jaccard_weight=0.3
|
|
36
|
+
)
|
|
37
|
+
scores = await reranker.score("machine learning", entities)
|
|
38
|
+
```
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
def __init__(
|
|
42
|
+
self,
|
|
43
|
+
bm25_weight: float = 0.7,
|
|
44
|
+
jaccard_weight: float = 0.3,
|
|
45
|
+
property_keys: Optional[List[str]] = None,
|
|
46
|
+
):
|
|
47
|
+
"""
|
|
48
|
+
Initialize TextSimilarityReranker
|
|
49
|
+
|
|
50
|
+
Args:
|
|
51
|
+
bm25_weight: Weight for BM25 scores (0.0-1.0)
|
|
52
|
+
jaccard_weight: Weight for Jaccard scores (0.0-1.0)
|
|
53
|
+
property_keys: Optional list of property keys to search
|
|
54
|
+
(default: all string properties)
|
|
55
|
+
"""
|
|
56
|
+
if abs(bm25_weight + jaccard_weight - 1.0) > 1e-6:
|
|
57
|
+
raise ValueError("bm25_weight + jaccard_weight must equal 1.0")
|
|
58
|
+
|
|
59
|
+
self.bm25_weight = bm25_weight
|
|
60
|
+
self.jaccard_weight = jaccard_weight
|
|
61
|
+
self.property_keys = property_keys
|
|
62
|
+
|
|
63
|
+
@property
|
|
64
|
+
def name(self) -> str:
|
|
65
|
+
return "text_similarity"
|
|
66
|
+
|
|
67
|
+
def _extract_text(self, entity: Entity) -> str:
|
|
68
|
+
"""Extract searchable text from entity properties"""
|
|
69
|
+
text_parts = []
|
|
70
|
+
|
|
71
|
+
if self.property_keys:
|
|
72
|
+
# Use specified properties only
|
|
73
|
+
for key in self.property_keys:
|
|
74
|
+
value = entity.properties.get(key)
|
|
75
|
+
if isinstance(value, str):
|
|
76
|
+
text_parts.append(value)
|
|
77
|
+
elif isinstance(value, (list, tuple)):
|
|
78
|
+
text_parts.extend(str(v) for v in value if isinstance(v, str))
|
|
79
|
+
else:
|
|
80
|
+
# Use all string properties
|
|
81
|
+
for key, value in entity.properties.items():
|
|
82
|
+
if isinstance(value, str):
|
|
83
|
+
text_parts.append(value)
|
|
84
|
+
elif isinstance(value, (list, tuple)):
|
|
85
|
+
text_parts.extend(str(v) for v in value if isinstance(v, str))
|
|
86
|
+
|
|
87
|
+
return " ".join(text_parts)
|
|
88
|
+
|
|
89
|
+
async def score(self, query: str, entities: List[Entity], **kwargs) -> List[float]:
|
|
90
|
+
"""
|
|
91
|
+
Compute text similarity scores
|
|
92
|
+
|
|
93
|
+
Args:
|
|
94
|
+
query: Query text
|
|
95
|
+
entities: Entities to score
|
|
96
|
+
**kwargs: Additional parameters (ignored)
|
|
97
|
+
|
|
98
|
+
Returns:
|
|
99
|
+
List of scores (0.0-1.0)
|
|
100
|
+
"""
|
|
101
|
+
if not entities:
|
|
102
|
+
return []
|
|
103
|
+
|
|
104
|
+
if not query:
|
|
105
|
+
return [0.0] * len(entities)
|
|
106
|
+
|
|
107
|
+
# Extract text from entities
|
|
108
|
+
entity_texts = [self._extract_text(entity) for entity in entities]
|
|
109
|
+
|
|
110
|
+
# Compute BM25 scores
|
|
111
|
+
corpus = entity_texts
|
|
112
|
+
scorer = BM25Scorer(corpus)
|
|
113
|
+
bm25_scores = scorer.score(query)
|
|
114
|
+
|
|
115
|
+
# Normalize BM25 scores to [0, 1]
|
|
116
|
+
if bm25_scores:
|
|
117
|
+
min_bm25 = min(bm25_scores)
|
|
118
|
+
max_bm25 = max(bm25_scores)
|
|
119
|
+
if max_bm25 > min_bm25:
|
|
120
|
+
bm25_normalized = [(s - min_bm25) / (max_bm25 - min_bm25) for s in bm25_scores]
|
|
121
|
+
else:
|
|
122
|
+
bm25_normalized = [1.0] * len(bm25_scores)
|
|
123
|
+
else:
|
|
124
|
+
bm25_normalized = [0.0] * len(entities)
|
|
125
|
+
|
|
126
|
+
# Compute Jaccard scores
|
|
127
|
+
jaccard_scores = [jaccard_similarity_text(query, text) for text in entity_texts]
|
|
128
|
+
|
|
129
|
+
# Combine scores
|
|
130
|
+
combined_scores = [
|
|
131
|
+
self.bm25_weight * bm25 + self.jaccard_weight * jaccard
|
|
132
|
+
for bm25, jaccard in zip(bm25_normalized, jaccard_scores)
|
|
133
|
+
]
|
|
134
|
+
|
|
135
|
+
return combined_scores
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
class SemanticReranker(RerankerStrategy):
|
|
139
|
+
"""
|
|
140
|
+
Semantic reranker using vector cosine similarity
|
|
141
|
+
|
|
142
|
+
Uses entity embeddings to compute semantic similarity to query embedding.
|
|
143
|
+
|
|
144
|
+
Example:
|
|
145
|
+
```python
|
|
146
|
+
reranker = SemanticReranker()
|
|
147
|
+
scores = await reranker.score(
|
|
148
|
+
query="machine learning",
|
|
149
|
+
entities=entities,
|
|
150
|
+
query_embedding=[0.1, 0.2, ...]
|
|
151
|
+
)
|
|
152
|
+
```
|
|
153
|
+
"""
|
|
154
|
+
|
|
155
|
+
def __init__(self):
|
|
156
|
+
"""Initialize SemanticReranker"""
|
|
157
|
+
|
|
158
|
+
@property
|
|
159
|
+
def name(self) -> str:
|
|
160
|
+
return "semantic"
|
|
161
|
+
|
|
162
|
+
async def score(
|
|
163
|
+
self,
|
|
164
|
+
query: str,
|
|
165
|
+
entities: List[Entity],
|
|
166
|
+
query_embedding: Optional[List[float]] = None,
|
|
167
|
+
**kwargs,
|
|
168
|
+
) -> List[float]:
|
|
169
|
+
"""
|
|
170
|
+
Compute semantic similarity scores
|
|
171
|
+
|
|
172
|
+
Args:
|
|
173
|
+
query: Query text (used for fallback if no embedding)
|
|
174
|
+
entities: Entities to score
|
|
175
|
+
query_embedding: Optional query embedding vector
|
|
176
|
+
**kwargs: Additional parameters
|
|
177
|
+
|
|
178
|
+
Returns:
|
|
179
|
+
List of scores (0.0-1.0)
|
|
180
|
+
"""
|
|
181
|
+
if not entities:
|
|
182
|
+
return []
|
|
183
|
+
|
|
184
|
+
if query_embedding is None:
|
|
185
|
+
# No embedding provided, return zero scores
|
|
186
|
+
return [0.0] * len(entities)
|
|
187
|
+
|
|
188
|
+
query_vec = np.array(query_embedding, dtype=np.float32)
|
|
189
|
+
query_norm = np.linalg.norm(query_vec)
|
|
190
|
+
|
|
191
|
+
if query_norm == 0:
|
|
192
|
+
return [0.0] * len(entities)
|
|
193
|
+
|
|
194
|
+
scores = []
|
|
195
|
+
|
|
196
|
+
for entity in entities:
|
|
197
|
+
if not entity.embedding:
|
|
198
|
+
scores.append(0.0)
|
|
199
|
+
continue
|
|
200
|
+
|
|
201
|
+
entity_vec = np.array(entity.embedding, dtype=np.float32)
|
|
202
|
+
|
|
203
|
+
# Check dimension compatibility
|
|
204
|
+
if len(query_vec) != len(entity_vec):
|
|
205
|
+
# Dimension mismatch - return zero score
|
|
206
|
+
scores.append(0.0)
|
|
207
|
+
continue
|
|
208
|
+
|
|
209
|
+
entity_norm = np.linalg.norm(entity_vec)
|
|
210
|
+
|
|
211
|
+
if entity_norm == 0:
|
|
212
|
+
scores.append(0.0)
|
|
213
|
+
continue
|
|
214
|
+
|
|
215
|
+
# Cosine similarity
|
|
216
|
+
similarity = np.dot(query_vec, entity_vec) / (query_norm * entity_norm)
|
|
217
|
+
# Normalize to [0, 1] range
|
|
218
|
+
normalized = (similarity + 1) / 2
|
|
219
|
+
scores.append(float(normalized))
|
|
220
|
+
|
|
221
|
+
return scores
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
class StructuralReranker(RerankerStrategy):
|
|
225
|
+
"""
|
|
226
|
+
Structural reranker using graph centrality and PageRank
|
|
227
|
+
|
|
228
|
+
Scores entities based on their structural importance in the graph.
|
|
229
|
+
Uses PageRank scores and degree centrality.
|
|
230
|
+
|
|
231
|
+
Example:
|
|
232
|
+
```python
|
|
233
|
+
reranker = StructuralReranker(graph_store)
|
|
234
|
+
scores = await reranker.score("query", entities)
|
|
235
|
+
```
|
|
236
|
+
"""
|
|
237
|
+
|
|
238
|
+
def __init__(
|
|
239
|
+
self,
|
|
240
|
+
graph_store: GraphStore,
|
|
241
|
+
pagerank_weight: float = 0.7,
|
|
242
|
+
degree_weight: float = 0.3,
|
|
243
|
+
use_cached_scores: bool = True,
|
|
244
|
+
):
|
|
245
|
+
"""
|
|
246
|
+
Initialize StructuralReranker
|
|
247
|
+
|
|
248
|
+
Args:
|
|
249
|
+
graph_store: Graph storage backend
|
|
250
|
+
pagerank_weight: Weight for PageRank scores (0.0-1.0)
|
|
251
|
+
degree_weight: Weight for degree centrality (0.0-1.0)
|
|
252
|
+
use_cached_scores: Whether to cache PageRank scores
|
|
253
|
+
"""
|
|
254
|
+
if abs(pagerank_weight + degree_weight - 1.0) > 1e-6:
|
|
255
|
+
raise ValueError("pagerank_weight + degree_weight must equal 1.0")
|
|
256
|
+
|
|
257
|
+
self.graph_store = graph_store
|
|
258
|
+
self.pagerank_weight = pagerank_weight
|
|
259
|
+
self.degree_weight = degree_weight
|
|
260
|
+
self.use_cached_scores = use_cached_scores
|
|
261
|
+
self._pagerank_cache: Dict[str, float] = {}
|
|
262
|
+
self._degree_cache: Dict[str, int] = {}
|
|
263
|
+
|
|
264
|
+
@property
|
|
265
|
+
def name(self) -> str:
|
|
266
|
+
return "structural"
|
|
267
|
+
|
|
268
|
+
async def _compute_pagerank_scores(self, entity_ids: List[str]) -> Dict[str, float]:
|
|
269
|
+
"""Compute or retrieve cached PageRank scores"""
|
|
270
|
+
# Check cache first
|
|
271
|
+
if self.use_cached_scores:
|
|
272
|
+
cached = {eid: self._pagerank_cache.get(eid, 0.0) for eid in entity_ids}
|
|
273
|
+
if all(score > 0 for score in cached.values()):
|
|
274
|
+
return cached
|
|
275
|
+
|
|
276
|
+
# Compute PageRank using PersonalizedPageRank
|
|
277
|
+
from aiecs.application.knowledge_graph.retrieval.retrieval_strategies import (
|
|
278
|
+
PersonalizedPageRank,
|
|
279
|
+
)
|
|
280
|
+
|
|
281
|
+
ppr = PersonalizedPageRank(self.graph_store)
|
|
282
|
+
|
|
283
|
+
# Use all entities as seeds for global PageRank
|
|
284
|
+
# In practice, you might want to use seed entities from query context
|
|
285
|
+
all_entities = await self.graph_store.get_all_entities()
|
|
286
|
+
seed_ids = [e.id for e in all_entities[: min(10, len(all_entities))]]
|
|
287
|
+
|
|
288
|
+
if not seed_ids:
|
|
289
|
+
return {eid: 0.0 for eid in entity_ids}
|
|
290
|
+
|
|
291
|
+
ppr_results = await ppr.retrieve(
|
|
292
|
+
seed_entity_ids=seed_ids,
|
|
293
|
+
max_results=len(entity_ids) * 2,
|
|
294
|
+
alpha=0.15,
|
|
295
|
+
)
|
|
296
|
+
|
|
297
|
+
# Create score dictionary
|
|
298
|
+
pagerank_scores = {entity.id: score for entity, score in ppr_results}
|
|
299
|
+
|
|
300
|
+
# Normalize to [0, 1]
|
|
301
|
+
if pagerank_scores:
|
|
302
|
+
max_score = max(pagerank_scores.values())
|
|
303
|
+
if max_score > 0:
|
|
304
|
+
pagerank_scores = {eid: score / max_score for eid, score in pagerank_scores.items()}
|
|
305
|
+
|
|
306
|
+
# Update cache
|
|
307
|
+
if self.use_cached_scores:
|
|
308
|
+
self._pagerank_cache.update(pagerank_scores)
|
|
309
|
+
|
|
310
|
+
return {eid: pagerank_scores.get(eid, 0.0) for eid in entity_ids}
|
|
311
|
+
|
|
312
|
+
async def _compute_degree_scores(self, entity_ids: List[str]) -> Dict[str, float]:
|
|
313
|
+
"""Compute degree centrality scores"""
|
|
314
|
+
# Check cache
|
|
315
|
+
if self.use_cached_scores:
|
|
316
|
+
cached = {eid: self._degree_cache.get(eid, 0) for eid in entity_ids}
|
|
317
|
+
if all(deg >= 0 for deg in cached.values()):
|
|
318
|
+
degrees = cached
|
|
319
|
+
else:
|
|
320
|
+
degrees = {}
|
|
321
|
+
else:
|
|
322
|
+
degrees = {}
|
|
323
|
+
|
|
324
|
+
# Compute missing degrees
|
|
325
|
+
for entity_id in entity_ids:
|
|
326
|
+
if entity_id not in degrees:
|
|
327
|
+
neighbors_out = await self.graph_store.get_neighbors(
|
|
328
|
+
entity_id, direction="outgoing"
|
|
329
|
+
)
|
|
330
|
+
neighbors_in = await self.graph_store.get_neighbors(entity_id, direction="incoming")
|
|
331
|
+
degree = len(neighbors_out) + len(neighbors_in)
|
|
332
|
+
degrees[entity_id] = degree
|
|
333
|
+
if self.use_cached_scores:
|
|
334
|
+
self._degree_cache[entity_id] = degree
|
|
335
|
+
|
|
336
|
+
# Normalize to [0, 1]
|
|
337
|
+
if degrees:
|
|
338
|
+
max_degree = max(degrees.values())
|
|
339
|
+
if max_degree > 0:
|
|
340
|
+
return {eid: deg / max_degree for eid, deg in degrees.items()}
|
|
341
|
+
|
|
342
|
+
return {eid: 0.0 for eid in entity_ids}
|
|
343
|
+
|
|
344
|
+
async def score(self, query: str, entities: List[Entity], **kwargs) -> List[float]:
|
|
345
|
+
"""
|
|
346
|
+
Compute structural importance scores
|
|
347
|
+
|
|
348
|
+
Args:
|
|
349
|
+
query: Query text (not used, but required by interface)
|
|
350
|
+
entities: Entities to score
|
|
351
|
+
**kwargs: Additional parameters
|
|
352
|
+
|
|
353
|
+
Returns:
|
|
354
|
+
List of scores (0.0-1.0)
|
|
355
|
+
"""
|
|
356
|
+
if not entities:
|
|
357
|
+
return []
|
|
358
|
+
|
|
359
|
+
entity_ids = [entity.id for entity in entities]
|
|
360
|
+
|
|
361
|
+
# Compute PageRank scores
|
|
362
|
+
pagerank_scores = await self._compute_pagerank_scores(entity_ids)
|
|
363
|
+
|
|
364
|
+
# Compute degree centrality scores
|
|
365
|
+
degree_scores = await self._compute_degree_scores(entity_ids)
|
|
366
|
+
|
|
367
|
+
# Combine scores
|
|
368
|
+
combined_scores = [
|
|
369
|
+
self.pagerank_weight * pagerank_scores.get(entity.id, 0.0)
|
|
370
|
+
+ self.degree_weight * degree_scores.get(entity.id, 0.0)
|
|
371
|
+
for entity in entities
|
|
372
|
+
]
|
|
373
|
+
|
|
374
|
+
return combined_scores
|
|
375
|
+
|
|
376
|
+
|
|
377
|
+
class HybridReranker(RerankerStrategy):
|
|
378
|
+
"""
|
|
379
|
+
Hybrid reranker combining multiple signals
|
|
380
|
+
|
|
381
|
+
Combines text similarity, semantic similarity, and structural importance
|
|
382
|
+
into a single score.
|
|
383
|
+
|
|
384
|
+
Example:
|
|
385
|
+
```python
|
|
386
|
+
reranker = HybridReranker(
|
|
387
|
+
graph_store=store,
|
|
388
|
+
text_weight=0.4,
|
|
389
|
+
semantic_weight=0.4,
|
|
390
|
+
structural_weight=0.2
|
|
391
|
+
)
|
|
392
|
+
scores = await reranker.score(
|
|
393
|
+
query="machine learning",
|
|
394
|
+
entities=entities,
|
|
395
|
+
query_embedding=[0.1, 0.2, ...]
|
|
396
|
+
)
|
|
397
|
+
```
|
|
398
|
+
"""
|
|
399
|
+
|
|
400
|
+
def __init__(
|
|
401
|
+
self,
|
|
402
|
+
graph_store: GraphStore,
|
|
403
|
+
text_weight: float = 0.4,
|
|
404
|
+
semantic_weight: float = 0.4,
|
|
405
|
+
structural_weight: float = 0.2,
|
|
406
|
+
):
|
|
407
|
+
"""
|
|
408
|
+
Initialize HybridReranker
|
|
409
|
+
|
|
410
|
+
Args:
|
|
411
|
+
graph_store: Graph storage backend
|
|
412
|
+
text_weight: Weight for text similarity (0.0-1.0)
|
|
413
|
+
semantic_weight: Weight for semantic similarity (0.0-1.0)
|
|
414
|
+
structural_weight: Weight for structural importance (0.0-1.0)
|
|
415
|
+
"""
|
|
416
|
+
if abs(text_weight + semantic_weight + structural_weight - 1.0) > 1e-6:
|
|
417
|
+
raise ValueError("Weights must sum to 1.0")
|
|
418
|
+
|
|
419
|
+
self.graph_store = graph_store
|
|
420
|
+
self.text_weight = text_weight
|
|
421
|
+
self.semantic_weight = semantic_weight
|
|
422
|
+
self.structural_weight = structural_weight
|
|
423
|
+
|
|
424
|
+
# Initialize sub-strategies
|
|
425
|
+
self.text_reranker = TextSimilarityReranker()
|
|
426
|
+
self.semantic_reranker = SemanticReranker()
|
|
427
|
+
self.structural_reranker = StructuralReranker(graph_store)
|
|
428
|
+
|
|
429
|
+
@property
|
|
430
|
+
def name(self) -> str:
|
|
431
|
+
return "hybrid"
|
|
432
|
+
|
|
433
|
+
async def score(
|
|
434
|
+
self,
|
|
435
|
+
query: str,
|
|
436
|
+
entities: List[Entity],
|
|
437
|
+
query_embedding: Optional[List[float]] = None,
|
|
438
|
+
**kwargs,
|
|
439
|
+
) -> List[float]:
|
|
440
|
+
"""
|
|
441
|
+
Compute hybrid scores combining all signals
|
|
442
|
+
|
|
443
|
+
Args:
|
|
444
|
+
query: Query text
|
|
445
|
+
entities: Entities to score
|
|
446
|
+
query_embedding: Optional query embedding vector
|
|
447
|
+
**kwargs: Additional parameters
|
|
448
|
+
|
|
449
|
+
Returns:
|
|
450
|
+
List of scores (0.0-1.0)
|
|
451
|
+
"""
|
|
452
|
+
if not entities:
|
|
453
|
+
return []
|
|
454
|
+
|
|
455
|
+
# Get scores from each strategy
|
|
456
|
+
text_scores = await self.text_reranker.score(query, entities, **kwargs)
|
|
457
|
+
semantic_scores = await self.semantic_reranker.score(
|
|
458
|
+
query, entities, query_embedding=query_embedding, **kwargs
|
|
459
|
+
)
|
|
460
|
+
structural_scores = await self.structural_reranker.score(query, entities, **kwargs)
|
|
461
|
+
|
|
462
|
+
# Combine scores
|
|
463
|
+
combined_scores = [
|
|
464
|
+
self.text_weight * text
|
|
465
|
+
+ self.semantic_weight * semantic
|
|
466
|
+
+ self.structural_weight * structural
|
|
467
|
+
for text, semantic, structural in zip(text_scores, semantic_scores, structural_scores)
|
|
468
|
+
]
|
|
469
|
+
|
|
470
|
+
return combined_scores
|
|
471
|
+
|
|
472
|
+
|
|
473
|
+
class CrossEncoderReranker(RerankerStrategy):
|
|
474
|
+
"""
|
|
475
|
+
Cross-encoder reranker using transformer models (optional)
|
|
476
|
+
|
|
477
|
+
Uses a cross-encoder model to compute semantic relevance between
|
|
478
|
+
query and entity text. More accurate but slower than bi-encoder.
|
|
479
|
+
|
|
480
|
+
Note: This is a placeholder implementation. For production use,
|
|
481
|
+
integrate with a cross-encoder model library (e.g., sentence-transformers).
|
|
482
|
+
|
|
483
|
+
Example:
|
|
484
|
+
```python
|
|
485
|
+
reranker = CrossEncoderReranker(model_name="cross-encoder/ms-marco-MiniLM-L-6-v2")
|
|
486
|
+
scores = await reranker.score("machine learning", entities)
|
|
487
|
+
```
|
|
488
|
+
"""
|
|
489
|
+
|
|
490
|
+
def __init__(self, model_name: Optional[str] = None, use_gpu: bool = False):
|
|
491
|
+
"""
|
|
492
|
+
Initialize CrossEncoderReranker
|
|
493
|
+
|
|
494
|
+
Args:
|
|
495
|
+
model_name: Optional model name (default: None, uses placeholder)
|
|
496
|
+
use_gpu: Whether to use GPU (if available)
|
|
497
|
+
"""
|
|
498
|
+
self.model_name = model_name
|
|
499
|
+
self.use_gpu = use_gpu
|
|
500
|
+
self._model = None
|
|
501
|
+
|
|
502
|
+
@property
|
|
503
|
+
def name(self) -> str:
|
|
504
|
+
return "cross_encoder"
|
|
505
|
+
|
|
506
|
+
def _extract_text(self, entity: Entity) -> str:
|
|
507
|
+
"""Extract text from entity for encoding"""
|
|
508
|
+
text_parts = []
|
|
509
|
+
for key, value in entity.properties.items():
|
|
510
|
+
if isinstance(value, str):
|
|
511
|
+
text_parts.append(value)
|
|
512
|
+
elif isinstance(value, (list, tuple)):
|
|
513
|
+
text_parts.extend(str(v) for v in value if isinstance(v, str))
|
|
514
|
+
return " ".join(text_parts)
|
|
515
|
+
|
|
516
|
+
async def score(self, query: str, entities: List[Entity], **kwargs) -> List[float]:
|
|
517
|
+
"""
|
|
518
|
+
Compute cross-encoder scores
|
|
519
|
+
|
|
520
|
+
Args:
|
|
521
|
+
query: Query text
|
|
522
|
+
entities: Entities to score
|
|
523
|
+
**kwargs: Additional parameters
|
|
524
|
+
|
|
525
|
+
Returns:
|
|
526
|
+
List of scores (0.0-1.0)
|
|
527
|
+
"""
|
|
528
|
+
if not entities:
|
|
529
|
+
return []
|
|
530
|
+
|
|
531
|
+
if not query:
|
|
532
|
+
return [0.0] * len(entities)
|
|
533
|
+
|
|
534
|
+
# Placeholder implementation
|
|
535
|
+
# In production, this would use a cross-encoder model:
|
|
536
|
+
#
|
|
537
|
+
# if self._model is None:
|
|
538
|
+
# from sentence_transformers import CrossEncoder
|
|
539
|
+
# self._model = CrossEncoder(self.model_name or "cross-encoder/ms-marco-MiniLM-L-6-v2")
|
|
540
|
+
#
|
|
541
|
+
# entity_texts = [self._extract_text(entity) for entity in entities]
|
|
542
|
+
# pairs = [[query, text] for text in entity_texts]
|
|
543
|
+
# scores = self._model.predict(pairs)
|
|
544
|
+
#
|
|
545
|
+
# # Normalize to [0, 1]
|
|
546
|
+
# scores = (scores - scores.min()) / (scores.max() - scores.min() + 1e-10)
|
|
547
|
+
# return scores.tolist()
|
|
548
|
+
|
|
549
|
+
# Fallback: Use cosine similarity as placeholder
|
|
550
|
+
entity_texts = [self._extract_text(entity) for entity in entities]
|
|
551
|
+
scores = [cosine_similarity_text(query, text) for text in entity_texts]
|
|
552
|
+
|
|
553
|
+
return scores
|