aiecs 1.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (302) hide show
  1. aiecs/__init__.py +72 -0
  2. aiecs/__main__.py +41 -0
  3. aiecs/aiecs_client.py +469 -0
  4. aiecs/application/__init__.py +10 -0
  5. aiecs/application/executors/__init__.py +10 -0
  6. aiecs/application/executors/operation_executor.py +363 -0
  7. aiecs/application/knowledge_graph/__init__.py +7 -0
  8. aiecs/application/knowledge_graph/builder/__init__.py +37 -0
  9. aiecs/application/knowledge_graph/builder/document_builder.py +375 -0
  10. aiecs/application/knowledge_graph/builder/graph_builder.py +356 -0
  11. aiecs/application/knowledge_graph/builder/schema_mapping.py +531 -0
  12. aiecs/application/knowledge_graph/builder/structured_pipeline.py +443 -0
  13. aiecs/application/knowledge_graph/builder/text_chunker.py +319 -0
  14. aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
  15. aiecs/application/knowledge_graph/extractors/base.py +100 -0
  16. aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +327 -0
  17. aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +349 -0
  18. aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +244 -0
  19. aiecs/application/knowledge_graph/fusion/__init__.py +23 -0
  20. aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +387 -0
  21. aiecs/application/knowledge_graph/fusion/entity_linker.py +343 -0
  22. aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +580 -0
  23. aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +189 -0
  24. aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
  25. aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +344 -0
  26. aiecs/application/knowledge_graph/pattern_matching/query_executor.py +378 -0
  27. aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
  28. aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +199 -0
  29. aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
  30. aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
  31. aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +347 -0
  32. aiecs/application/knowledge_graph/reasoning/inference_engine.py +504 -0
  33. aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +167 -0
  34. aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
  35. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
  36. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +630 -0
  37. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +654 -0
  38. aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +477 -0
  39. aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +390 -0
  40. aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +217 -0
  41. aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +169 -0
  42. aiecs/application/knowledge_graph/reasoning/query_planner.py +872 -0
  43. aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +554 -0
  44. aiecs/application/knowledge_graph/retrieval/__init__.py +19 -0
  45. aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +596 -0
  46. aiecs/application/knowledge_graph/search/__init__.py +59 -0
  47. aiecs/application/knowledge_graph/search/hybrid_search.py +423 -0
  48. aiecs/application/knowledge_graph/search/reranker.py +295 -0
  49. aiecs/application/knowledge_graph/search/reranker_strategies.py +553 -0
  50. aiecs/application/knowledge_graph/search/text_similarity.py +398 -0
  51. aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
  52. aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +329 -0
  53. aiecs/application/knowledge_graph/traversal/path_scorer.py +269 -0
  54. aiecs/application/knowledge_graph/validators/__init__.py +13 -0
  55. aiecs/application/knowledge_graph/validators/relation_validator.py +189 -0
  56. aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
  57. aiecs/application/knowledge_graph/visualization/graph_visualizer.py +321 -0
  58. aiecs/common/__init__.py +9 -0
  59. aiecs/common/knowledge_graph/__init__.py +17 -0
  60. aiecs/common/knowledge_graph/runnable.py +484 -0
  61. aiecs/config/__init__.py +16 -0
  62. aiecs/config/config.py +498 -0
  63. aiecs/config/graph_config.py +137 -0
  64. aiecs/config/registry.py +23 -0
  65. aiecs/core/__init__.py +46 -0
  66. aiecs/core/interface/__init__.py +34 -0
  67. aiecs/core/interface/execution_interface.py +152 -0
  68. aiecs/core/interface/storage_interface.py +171 -0
  69. aiecs/domain/__init__.py +289 -0
  70. aiecs/domain/agent/__init__.py +189 -0
  71. aiecs/domain/agent/base_agent.py +697 -0
  72. aiecs/domain/agent/exceptions.py +103 -0
  73. aiecs/domain/agent/graph_aware_mixin.py +559 -0
  74. aiecs/domain/agent/hybrid_agent.py +490 -0
  75. aiecs/domain/agent/integration/__init__.py +26 -0
  76. aiecs/domain/agent/integration/context_compressor.py +222 -0
  77. aiecs/domain/agent/integration/context_engine_adapter.py +252 -0
  78. aiecs/domain/agent/integration/retry_policy.py +219 -0
  79. aiecs/domain/agent/integration/role_config.py +213 -0
  80. aiecs/domain/agent/knowledge_aware_agent.py +646 -0
  81. aiecs/domain/agent/lifecycle.py +296 -0
  82. aiecs/domain/agent/llm_agent.py +300 -0
  83. aiecs/domain/agent/memory/__init__.py +12 -0
  84. aiecs/domain/agent/memory/conversation.py +197 -0
  85. aiecs/domain/agent/migration/__init__.py +14 -0
  86. aiecs/domain/agent/migration/conversion.py +160 -0
  87. aiecs/domain/agent/migration/legacy_wrapper.py +90 -0
  88. aiecs/domain/agent/models.py +317 -0
  89. aiecs/domain/agent/observability.py +407 -0
  90. aiecs/domain/agent/persistence.py +289 -0
  91. aiecs/domain/agent/prompts/__init__.py +29 -0
  92. aiecs/domain/agent/prompts/builder.py +161 -0
  93. aiecs/domain/agent/prompts/formatters.py +189 -0
  94. aiecs/domain/agent/prompts/template.py +255 -0
  95. aiecs/domain/agent/registry.py +260 -0
  96. aiecs/domain/agent/tool_agent.py +257 -0
  97. aiecs/domain/agent/tools/__init__.py +12 -0
  98. aiecs/domain/agent/tools/schema_generator.py +221 -0
  99. aiecs/domain/community/__init__.py +155 -0
  100. aiecs/domain/community/agent_adapter.py +477 -0
  101. aiecs/domain/community/analytics.py +481 -0
  102. aiecs/domain/community/collaborative_workflow.py +642 -0
  103. aiecs/domain/community/communication_hub.py +645 -0
  104. aiecs/domain/community/community_builder.py +320 -0
  105. aiecs/domain/community/community_integration.py +800 -0
  106. aiecs/domain/community/community_manager.py +813 -0
  107. aiecs/domain/community/decision_engine.py +879 -0
  108. aiecs/domain/community/exceptions.py +225 -0
  109. aiecs/domain/community/models/__init__.py +33 -0
  110. aiecs/domain/community/models/community_models.py +268 -0
  111. aiecs/domain/community/resource_manager.py +457 -0
  112. aiecs/domain/community/shared_context_manager.py +603 -0
  113. aiecs/domain/context/__init__.py +58 -0
  114. aiecs/domain/context/context_engine.py +989 -0
  115. aiecs/domain/context/conversation_models.py +354 -0
  116. aiecs/domain/context/graph_memory.py +467 -0
  117. aiecs/domain/execution/__init__.py +12 -0
  118. aiecs/domain/execution/model.py +57 -0
  119. aiecs/domain/knowledge_graph/__init__.py +19 -0
  120. aiecs/domain/knowledge_graph/models/__init__.py +52 -0
  121. aiecs/domain/knowledge_graph/models/entity.py +130 -0
  122. aiecs/domain/knowledge_graph/models/evidence.py +194 -0
  123. aiecs/domain/knowledge_graph/models/inference_rule.py +186 -0
  124. aiecs/domain/knowledge_graph/models/path.py +179 -0
  125. aiecs/domain/knowledge_graph/models/path_pattern.py +173 -0
  126. aiecs/domain/knowledge_graph/models/query.py +272 -0
  127. aiecs/domain/knowledge_graph/models/query_plan.py +187 -0
  128. aiecs/domain/knowledge_graph/models/relation.py +136 -0
  129. aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
  130. aiecs/domain/knowledge_graph/schema/entity_type.py +135 -0
  131. aiecs/domain/knowledge_graph/schema/graph_schema.py +271 -0
  132. aiecs/domain/knowledge_graph/schema/property_schema.py +155 -0
  133. aiecs/domain/knowledge_graph/schema/relation_type.py +171 -0
  134. aiecs/domain/knowledge_graph/schema/schema_manager.py +496 -0
  135. aiecs/domain/knowledge_graph/schema/type_enums.py +205 -0
  136. aiecs/domain/task/__init__.py +13 -0
  137. aiecs/domain/task/dsl_processor.py +613 -0
  138. aiecs/domain/task/model.py +62 -0
  139. aiecs/domain/task/task_context.py +268 -0
  140. aiecs/infrastructure/__init__.py +24 -0
  141. aiecs/infrastructure/graph_storage/__init__.py +11 -0
  142. aiecs/infrastructure/graph_storage/base.py +601 -0
  143. aiecs/infrastructure/graph_storage/batch_operations.py +449 -0
  144. aiecs/infrastructure/graph_storage/cache.py +429 -0
  145. aiecs/infrastructure/graph_storage/distributed.py +226 -0
  146. aiecs/infrastructure/graph_storage/error_handling.py +390 -0
  147. aiecs/infrastructure/graph_storage/graceful_degradation.py +306 -0
  148. aiecs/infrastructure/graph_storage/health_checks.py +378 -0
  149. aiecs/infrastructure/graph_storage/in_memory.py +514 -0
  150. aiecs/infrastructure/graph_storage/index_optimization.py +483 -0
  151. aiecs/infrastructure/graph_storage/lazy_loading.py +410 -0
  152. aiecs/infrastructure/graph_storage/metrics.py +357 -0
  153. aiecs/infrastructure/graph_storage/migration.py +413 -0
  154. aiecs/infrastructure/graph_storage/pagination.py +471 -0
  155. aiecs/infrastructure/graph_storage/performance_monitoring.py +466 -0
  156. aiecs/infrastructure/graph_storage/postgres.py +871 -0
  157. aiecs/infrastructure/graph_storage/query_optimizer.py +635 -0
  158. aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
  159. aiecs/infrastructure/graph_storage/sqlite.py +623 -0
  160. aiecs/infrastructure/graph_storage/streaming.py +495 -0
  161. aiecs/infrastructure/messaging/__init__.py +13 -0
  162. aiecs/infrastructure/messaging/celery_task_manager.py +383 -0
  163. aiecs/infrastructure/messaging/websocket_manager.py +298 -0
  164. aiecs/infrastructure/monitoring/__init__.py +34 -0
  165. aiecs/infrastructure/monitoring/executor_metrics.py +174 -0
  166. aiecs/infrastructure/monitoring/global_metrics_manager.py +213 -0
  167. aiecs/infrastructure/monitoring/structured_logger.py +48 -0
  168. aiecs/infrastructure/monitoring/tracing_manager.py +410 -0
  169. aiecs/infrastructure/persistence/__init__.py +24 -0
  170. aiecs/infrastructure/persistence/context_engine_client.py +187 -0
  171. aiecs/infrastructure/persistence/database_manager.py +333 -0
  172. aiecs/infrastructure/persistence/file_storage.py +754 -0
  173. aiecs/infrastructure/persistence/redis_client.py +220 -0
  174. aiecs/llm/__init__.py +86 -0
  175. aiecs/llm/callbacks/__init__.py +11 -0
  176. aiecs/llm/callbacks/custom_callbacks.py +264 -0
  177. aiecs/llm/client_factory.py +420 -0
  178. aiecs/llm/clients/__init__.py +33 -0
  179. aiecs/llm/clients/base_client.py +193 -0
  180. aiecs/llm/clients/googleai_client.py +181 -0
  181. aiecs/llm/clients/openai_client.py +131 -0
  182. aiecs/llm/clients/vertex_client.py +437 -0
  183. aiecs/llm/clients/xai_client.py +184 -0
  184. aiecs/llm/config/__init__.py +51 -0
  185. aiecs/llm/config/config_loader.py +275 -0
  186. aiecs/llm/config/config_validator.py +236 -0
  187. aiecs/llm/config/model_config.py +151 -0
  188. aiecs/llm/utils/__init__.py +10 -0
  189. aiecs/llm/utils/validate_config.py +91 -0
  190. aiecs/main.py +363 -0
  191. aiecs/scripts/__init__.py +3 -0
  192. aiecs/scripts/aid/VERSION_MANAGEMENT.md +97 -0
  193. aiecs/scripts/aid/__init__.py +19 -0
  194. aiecs/scripts/aid/version_manager.py +215 -0
  195. aiecs/scripts/dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md +242 -0
  196. aiecs/scripts/dependance_check/README_DEPENDENCY_CHECKER.md +310 -0
  197. aiecs/scripts/dependance_check/__init__.py +17 -0
  198. aiecs/scripts/dependance_check/dependency_checker.py +938 -0
  199. aiecs/scripts/dependance_check/dependency_fixer.py +391 -0
  200. aiecs/scripts/dependance_check/download_nlp_data.py +396 -0
  201. aiecs/scripts/dependance_check/quick_dependency_check.py +270 -0
  202. aiecs/scripts/dependance_check/setup_nlp_data.sh +217 -0
  203. aiecs/scripts/dependance_patch/__init__.py +7 -0
  204. aiecs/scripts/dependance_patch/fix_weasel/README_WEASEL_PATCH.md +126 -0
  205. aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
  206. aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.py +128 -0
  207. aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.sh +82 -0
  208. aiecs/scripts/dependance_patch/fix_weasel/patch_weasel_library.sh +188 -0
  209. aiecs/scripts/dependance_patch/fix_weasel/run_weasel_patch.sh +41 -0
  210. aiecs/scripts/tools_develop/README.md +449 -0
  211. aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
  212. aiecs/scripts/tools_develop/__init__.py +21 -0
  213. aiecs/scripts/tools_develop/check_type_annotations.py +259 -0
  214. aiecs/scripts/tools_develop/validate_tool_schemas.py +422 -0
  215. aiecs/scripts/tools_develop/verify_tools.py +356 -0
  216. aiecs/tasks/__init__.py +1 -0
  217. aiecs/tasks/worker.py +172 -0
  218. aiecs/tools/__init__.py +299 -0
  219. aiecs/tools/apisource/__init__.py +99 -0
  220. aiecs/tools/apisource/intelligence/__init__.py +19 -0
  221. aiecs/tools/apisource/intelligence/data_fusion.py +381 -0
  222. aiecs/tools/apisource/intelligence/query_analyzer.py +413 -0
  223. aiecs/tools/apisource/intelligence/search_enhancer.py +388 -0
  224. aiecs/tools/apisource/monitoring/__init__.py +9 -0
  225. aiecs/tools/apisource/monitoring/metrics.py +303 -0
  226. aiecs/tools/apisource/providers/__init__.py +115 -0
  227. aiecs/tools/apisource/providers/base.py +664 -0
  228. aiecs/tools/apisource/providers/census.py +401 -0
  229. aiecs/tools/apisource/providers/fred.py +564 -0
  230. aiecs/tools/apisource/providers/newsapi.py +412 -0
  231. aiecs/tools/apisource/providers/worldbank.py +357 -0
  232. aiecs/tools/apisource/reliability/__init__.py +12 -0
  233. aiecs/tools/apisource/reliability/error_handler.py +375 -0
  234. aiecs/tools/apisource/reliability/fallback_strategy.py +391 -0
  235. aiecs/tools/apisource/tool.py +850 -0
  236. aiecs/tools/apisource/utils/__init__.py +9 -0
  237. aiecs/tools/apisource/utils/validators.py +338 -0
  238. aiecs/tools/base_tool.py +201 -0
  239. aiecs/tools/docs/__init__.py +121 -0
  240. aiecs/tools/docs/ai_document_orchestrator.py +599 -0
  241. aiecs/tools/docs/ai_document_writer_orchestrator.py +2403 -0
  242. aiecs/tools/docs/content_insertion_tool.py +1333 -0
  243. aiecs/tools/docs/document_creator_tool.py +1317 -0
  244. aiecs/tools/docs/document_layout_tool.py +1166 -0
  245. aiecs/tools/docs/document_parser_tool.py +994 -0
  246. aiecs/tools/docs/document_writer_tool.py +1818 -0
  247. aiecs/tools/knowledge_graph/__init__.py +17 -0
  248. aiecs/tools/knowledge_graph/graph_reasoning_tool.py +734 -0
  249. aiecs/tools/knowledge_graph/graph_search_tool.py +923 -0
  250. aiecs/tools/knowledge_graph/kg_builder_tool.py +476 -0
  251. aiecs/tools/langchain_adapter.py +542 -0
  252. aiecs/tools/schema_generator.py +275 -0
  253. aiecs/tools/search_tool/__init__.py +100 -0
  254. aiecs/tools/search_tool/analyzers.py +589 -0
  255. aiecs/tools/search_tool/cache.py +260 -0
  256. aiecs/tools/search_tool/constants.py +128 -0
  257. aiecs/tools/search_tool/context.py +216 -0
  258. aiecs/tools/search_tool/core.py +749 -0
  259. aiecs/tools/search_tool/deduplicator.py +123 -0
  260. aiecs/tools/search_tool/error_handler.py +271 -0
  261. aiecs/tools/search_tool/metrics.py +371 -0
  262. aiecs/tools/search_tool/rate_limiter.py +178 -0
  263. aiecs/tools/search_tool/schemas.py +277 -0
  264. aiecs/tools/statistics/__init__.py +80 -0
  265. aiecs/tools/statistics/ai_data_analysis_orchestrator.py +643 -0
  266. aiecs/tools/statistics/ai_insight_generator_tool.py +505 -0
  267. aiecs/tools/statistics/ai_report_orchestrator_tool.py +694 -0
  268. aiecs/tools/statistics/data_loader_tool.py +564 -0
  269. aiecs/tools/statistics/data_profiler_tool.py +658 -0
  270. aiecs/tools/statistics/data_transformer_tool.py +573 -0
  271. aiecs/tools/statistics/data_visualizer_tool.py +495 -0
  272. aiecs/tools/statistics/model_trainer_tool.py +487 -0
  273. aiecs/tools/statistics/statistical_analyzer_tool.py +459 -0
  274. aiecs/tools/task_tools/__init__.py +86 -0
  275. aiecs/tools/task_tools/chart_tool.py +732 -0
  276. aiecs/tools/task_tools/classfire_tool.py +922 -0
  277. aiecs/tools/task_tools/image_tool.py +447 -0
  278. aiecs/tools/task_tools/office_tool.py +684 -0
  279. aiecs/tools/task_tools/pandas_tool.py +635 -0
  280. aiecs/tools/task_tools/report_tool.py +635 -0
  281. aiecs/tools/task_tools/research_tool.py +392 -0
  282. aiecs/tools/task_tools/scraper_tool.py +715 -0
  283. aiecs/tools/task_tools/stats_tool.py +688 -0
  284. aiecs/tools/temp_file_manager.py +130 -0
  285. aiecs/tools/tool_executor/__init__.py +37 -0
  286. aiecs/tools/tool_executor/tool_executor.py +881 -0
  287. aiecs/utils/LLM_output_structor.py +445 -0
  288. aiecs/utils/__init__.py +34 -0
  289. aiecs/utils/base_callback.py +47 -0
  290. aiecs/utils/cache_provider.py +695 -0
  291. aiecs/utils/execution_utils.py +184 -0
  292. aiecs/utils/logging.py +1 -0
  293. aiecs/utils/prompt_loader.py +14 -0
  294. aiecs/utils/token_usage_repository.py +323 -0
  295. aiecs/ws/__init__.py +0 -0
  296. aiecs/ws/socket_server.py +52 -0
  297. aiecs-1.5.1.dist-info/METADATA +608 -0
  298. aiecs-1.5.1.dist-info/RECORD +302 -0
  299. aiecs-1.5.1.dist-info/WHEEL +5 -0
  300. aiecs-1.5.1.dist-info/entry_points.txt +10 -0
  301. aiecs-1.5.1.dist-info/licenses/LICENSE +225 -0
  302. aiecs-1.5.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,553 @@
1
+ """
2
+ Reranking Strategy Implementations
3
+
4
+ Concrete implementations of reranking strategies for different signals:
5
+ - Text similarity (BM25, Jaccard)
6
+ - Semantic similarity (vector embeddings)
7
+ - Structural importance (PageRank, centrality)
8
+ - Hybrid combination
9
+ """
10
+
11
+ from typing import List, Optional, Dict
12
+ import numpy as np
13
+
14
+ from aiecs.application.knowledge_graph.search.reranker import RerankerStrategy
15
+ from aiecs.application.knowledge_graph.search.text_similarity import (
16
+ BM25Scorer,
17
+ jaccard_similarity_text,
18
+ cosine_similarity_text,
19
+ )
20
+ from aiecs.domain.knowledge_graph.models.entity import Entity
21
+ from aiecs.infrastructure.graph_storage.base import GraphStore
22
+
23
+
24
+ class TextSimilarityReranker(RerankerStrategy):
25
+ """
26
+ Text similarity reranker using BM25 and Jaccard similarity
27
+
28
+ Combines BM25 (term-based relevance) and Jaccard (set overlap) scores
29
+ to rerank entities based on text similarity to query.
30
+
31
+ Example:
32
+ ```python
33
+ reranker = TextSimilarityReranker(
34
+ bm25_weight=0.7,
35
+ jaccard_weight=0.3
36
+ )
37
+ scores = await reranker.score("machine learning", entities)
38
+ ```
39
+ """
40
+
41
+ def __init__(
42
+ self,
43
+ bm25_weight: float = 0.7,
44
+ jaccard_weight: float = 0.3,
45
+ property_keys: Optional[List[str]] = None,
46
+ ):
47
+ """
48
+ Initialize TextSimilarityReranker
49
+
50
+ Args:
51
+ bm25_weight: Weight for BM25 scores (0.0-1.0)
52
+ jaccard_weight: Weight for Jaccard scores (0.0-1.0)
53
+ property_keys: Optional list of property keys to search
54
+ (default: all string properties)
55
+ """
56
+ if abs(bm25_weight + jaccard_weight - 1.0) > 1e-6:
57
+ raise ValueError("bm25_weight + jaccard_weight must equal 1.0")
58
+
59
+ self.bm25_weight = bm25_weight
60
+ self.jaccard_weight = jaccard_weight
61
+ self.property_keys = property_keys
62
+
63
+ @property
64
+ def name(self) -> str:
65
+ return "text_similarity"
66
+
67
+ def _extract_text(self, entity: Entity) -> str:
68
+ """Extract searchable text from entity properties"""
69
+ text_parts = []
70
+
71
+ if self.property_keys:
72
+ # Use specified properties only
73
+ for key in self.property_keys:
74
+ value = entity.properties.get(key)
75
+ if isinstance(value, str):
76
+ text_parts.append(value)
77
+ elif isinstance(value, (list, tuple)):
78
+ text_parts.extend(str(v) for v in value if isinstance(v, str))
79
+ else:
80
+ # Use all string properties
81
+ for key, value in entity.properties.items():
82
+ if isinstance(value, str):
83
+ text_parts.append(value)
84
+ elif isinstance(value, (list, tuple)):
85
+ text_parts.extend(str(v) for v in value if isinstance(v, str))
86
+
87
+ return " ".join(text_parts)
88
+
89
+ async def score(self, query: str, entities: List[Entity], **kwargs) -> List[float]:
90
+ """
91
+ Compute text similarity scores
92
+
93
+ Args:
94
+ query: Query text
95
+ entities: Entities to score
96
+ **kwargs: Additional parameters (ignored)
97
+
98
+ Returns:
99
+ List of scores (0.0-1.0)
100
+ """
101
+ if not entities:
102
+ return []
103
+
104
+ if not query:
105
+ return [0.0] * len(entities)
106
+
107
+ # Extract text from entities
108
+ entity_texts = [self._extract_text(entity) for entity in entities]
109
+
110
+ # Compute BM25 scores
111
+ corpus = entity_texts
112
+ scorer = BM25Scorer(corpus)
113
+ bm25_scores = scorer.score(query)
114
+
115
+ # Normalize BM25 scores to [0, 1]
116
+ if bm25_scores:
117
+ min_bm25 = min(bm25_scores)
118
+ max_bm25 = max(bm25_scores)
119
+ if max_bm25 > min_bm25:
120
+ bm25_normalized = [(s - min_bm25) / (max_bm25 - min_bm25) for s in bm25_scores]
121
+ else:
122
+ bm25_normalized = [1.0] * len(bm25_scores)
123
+ else:
124
+ bm25_normalized = [0.0] * len(entities)
125
+
126
+ # Compute Jaccard scores
127
+ jaccard_scores = [jaccard_similarity_text(query, text) for text in entity_texts]
128
+
129
+ # Combine scores
130
+ combined_scores = [
131
+ self.bm25_weight * bm25 + self.jaccard_weight * jaccard
132
+ for bm25, jaccard in zip(bm25_normalized, jaccard_scores)
133
+ ]
134
+
135
+ return combined_scores
136
+
137
+
138
+ class SemanticReranker(RerankerStrategy):
139
+ """
140
+ Semantic reranker using vector cosine similarity
141
+
142
+ Uses entity embeddings to compute semantic similarity to query embedding.
143
+
144
+ Example:
145
+ ```python
146
+ reranker = SemanticReranker()
147
+ scores = await reranker.score(
148
+ query="machine learning",
149
+ entities=entities,
150
+ query_embedding=[0.1, 0.2, ...]
151
+ )
152
+ ```
153
+ """
154
+
155
+ def __init__(self):
156
+ """Initialize SemanticReranker"""
157
+
158
+ @property
159
+ def name(self) -> str:
160
+ return "semantic"
161
+
162
+ async def score(
163
+ self,
164
+ query: str,
165
+ entities: List[Entity],
166
+ query_embedding: Optional[List[float]] = None,
167
+ **kwargs,
168
+ ) -> List[float]:
169
+ """
170
+ Compute semantic similarity scores
171
+
172
+ Args:
173
+ query: Query text (used for fallback if no embedding)
174
+ entities: Entities to score
175
+ query_embedding: Optional query embedding vector
176
+ **kwargs: Additional parameters
177
+
178
+ Returns:
179
+ List of scores (0.0-1.0)
180
+ """
181
+ if not entities:
182
+ return []
183
+
184
+ if query_embedding is None:
185
+ # No embedding provided, return zero scores
186
+ return [0.0] * len(entities)
187
+
188
+ query_vec = np.array(query_embedding, dtype=np.float32)
189
+ query_norm = np.linalg.norm(query_vec)
190
+
191
+ if query_norm == 0:
192
+ return [0.0] * len(entities)
193
+
194
+ scores = []
195
+
196
+ for entity in entities:
197
+ if not entity.embedding:
198
+ scores.append(0.0)
199
+ continue
200
+
201
+ entity_vec = np.array(entity.embedding, dtype=np.float32)
202
+
203
+ # Check dimension compatibility
204
+ if len(query_vec) != len(entity_vec):
205
+ # Dimension mismatch - return zero score
206
+ scores.append(0.0)
207
+ continue
208
+
209
+ entity_norm = np.linalg.norm(entity_vec)
210
+
211
+ if entity_norm == 0:
212
+ scores.append(0.0)
213
+ continue
214
+
215
+ # Cosine similarity
216
+ similarity = np.dot(query_vec, entity_vec) / (query_norm * entity_norm)
217
+ # Normalize to [0, 1] range
218
+ normalized = (similarity + 1) / 2
219
+ scores.append(float(normalized))
220
+
221
+ return scores
222
+
223
+
224
+ class StructuralReranker(RerankerStrategy):
225
+ """
226
+ Structural reranker using graph centrality and PageRank
227
+
228
+ Scores entities based on their structural importance in the graph.
229
+ Uses PageRank scores and degree centrality.
230
+
231
+ Example:
232
+ ```python
233
+ reranker = StructuralReranker(graph_store)
234
+ scores = await reranker.score("query", entities)
235
+ ```
236
+ """
237
+
238
+ def __init__(
239
+ self,
240
+ graph_store: GraphStore,
241
+ pagerank_weight: float = 0.7,
242
+ degree_weight: float = 0.3,
243
+ use_cached_scores: bool = True,
244
+ ):
245
+ """
246
+ Initialize StructuralReranker
247
+
248
+ Args:
249
+ graph_store: Graph storage backend
250
+ pagerank_weight: Weight for PageRank scores (0.0-1.0)
251
+ degree_weight: Weight for degree centrality (0.0-1.0)
252
+ use_cached_scores: Whether to cache PageRank scores
253
+ """
254
+ if abs(pagerank_weight + degree_weight - 1.0) > 1e-6:
255
+ raise ValueError("pagerank_weight + degree_weight must equal 1.0")
256
+
257
+ self.graph_store = graph_store
258
+ self.pagerank_weight = pagerank_weight
259
+ self.degree_weight = degree_weight
260
+ self.use_cached_scores = use_cached_scores
261
+ self._pagerank_cache: Dict[str, float] = {}
262
+ self._degree_cache: Dict[str, int] = {}
263
+
264
+ @property
265
+ def name(self) -> str:
266
+ return "structural"
267
+
268
+ async def _compute_pagerank_scores(self, entity_ids: List[str]) -> Dict[str, float]:
269
+ """Compute or retrieve cached PageRank scores"""
270
+ # Check cache first
271
+ if self.use_cached_scores:
272
+ cached = {eid: self._pagerank_cache.get(eid, 0.0) for eid in entity_ids}
273
+ if all(score > 0 for score in cached.values()):
274
+ return cached
275
+
276
+ # Compute PageRank using PersonalizedPageRank
277
+ from aiecs.application.knowledge_graph.retrieval.retrieval_strategies import (
278
+ PersonalizedPageRank,
279
+ )
280
+
281
+ ppr = PersonalizedPageRank(self.graph_store)
282
+
283
+ # Use all entities as seeds for global PageRank
284
+ # In practice, you might want to use seed entities from query context
285
+ all_entities = await self.graph_store.get_all_entities()
286
+ seed_ids = [e.id for e in all_entities[: min(10, len(all_entities))]]
287
+
288
+ if not seed_ids:
289
+ return {eid: 0.0 for eid in entity_ids}
290
+
291
+ ppr_results = await ppr.retrieve(
292
+ seed_entity_ids=seed_ids,
293
+ max_results=len(entity_ids) * 2,
294
+ alpha=0.15,
295
+ )
296
+
297
+ # Create score dictionary
298
+ pagerank_scores = {entity.id: score for entity, score in ppr_results}
299
+
300
+ # Normalize to [0, 1]
301
+ if pagerank_scores:
302
+ max_score = max(pagerank_scores.values())
303
+ if max_score > 0:
304
+ pagerank_scores = {eid: score / max_score for eid, score in pagerank_scores.items()}
305
+
306
+ # Update cache
307
+ if self.use_cached_scores:
308
+ self._pagerank_cache.update(pagerank_scores)
309
+
310
+ return {eid: pagerank_scores.get(eid, 0.0) for eid in entity_ids}
311
+
312
+ async def _compute_degree_scores(self, entity_ids: List[str]) -> Dict[str, float]:
313
+ """Compute degree centrality scores"""
314
+ # Check cache
315
+ if self.use_cached_scores:
316
+ cached = {eid: self._degree_cache.get(eid, 0) for eid in entity_ids}
317
+ if all(deg >= 0 for deg in cached.values()):
318
+ degrees = cached
319
+ else:
320
+ degrees = {}
321
+ else:
322
+ degrees = {}
323
+
324
+ # Compute missing degrees
325
+ for entity_id in entity_ids:
326
+ if entity_id not in degrees:
327
+ neighbors_out = await self.graph_store.get_neighbors(
328
+ entity_id, direction="outgoing"
329
+ )
330
+ neighbors_in = await self.graph_store.get_neighbors(entity_id, direction="incoming")
331
+ degree = len(neighbors_out) + len(neighbors_in)
332
+ degrees[entity_id] = degree
333
+ if self.use_cached_scores:
334
+ self._degree_cache[entity_id] = degree
335
+
336
+ # Normalize to [0, 1]
337
+ if degrees:
338
+ max_degree = max(degrees.values())
339
+ if max_degree > 0:
340
+ return {eid: deg / max_degree for eid, deg in degrees.items()}
341
+
342
+ return {eid: 0.0 for eid in entity_ids}
343
+
344
+ async def score(self, query: str, entities: List[Entity], **kwargs) -> List[float]:
345
+ """
346
+ Compute structural importance scores
347
+
348
+ Args:
349
+ query: Query text (not used, but required by interface)
350
+ entities: Entities to score
351
+ **kwargs: Additional parameters
352
+
353
+ Returns:
354
+ List of scores (0.0-1.0)
355
+ """
356
+ if not entities:
357
+ return []
358
+
359
+ entity_ids = [entity.id for entity in entities]
360
+
361
+ # Compute PageRank scores
362
+ pagerank_scores = await self._compute_pagerank_scores(entity_ids)
363
+
364
+ # Compute degree centrality scores
365
+ degree_scores = await self._compute_degree_scores(entity_ids)
366
+
367
+ # Combine scores
368
+ combined_scores = [
369
+ self.pagerank_weight * pagerank_scores.get(entity.id, 0.0)
370
+ + self.degree_weight * degree_scores.get(entity.id, 0.0)
371
+ for entity in entities
372
+ ]
373
+
374
+ return combined_scores
375
+
376
+
377
+ class HybridReranker(RerankerStrategy):
378
+ """
379
+ Hybrid reranker combining multiple signals
380
+
381
+ Combines text similarity, semantic similarity, and structural importance
382
+ into a single score.
383
+
384
+ Example:
385
+ ```python
386
+ reranker = HybridReranker(
387
+ graph_store=store,
388
+ text_weight=0.4,
389
+ semantic_weight=0.4,
390
+ structural_weight=0.2
391
+ )
392
+ scores = await reranker.score(
393
+ query="machine learning",
394
+ entities=entities,
395
+ query_embedding=[0.1, 0.2, ...]
396
+ )
397
+ ```
398
+ """
399
+
400
+ def __init__(
401
+ self,
402
+ graph_store: GraphStore,
403
+ text_weight: float = 0.4,
404
+ semantic_weight: float = 0.4,
405
+ structural_weight: float = 0.2,
406
+ ):
407
+ """
408
+ Initialize HybridReranker
409
+
410
+ Args:
411
+ graph_store: Graph storage backend
412
+ text_weight: Weight for text similarity (0.0-1.0)
413
+ semantic_weight: Weight for semantic similarity (0.0-1.0)
414
+ structural_weight: Weight for structural importance (0.0-1.0)
415
+ """
416
+ if abs(text_weight + semantic_weight + structural_weight - 1.0) > 1e-6:
417
+ raise ValueError("Weights must sum to 1.0")
418
+
419
+ self.graph_store = graph_store
420
+ self.text_weight = text_weight
421
+ self.semantic_weight = semantic_weight
422
+ self.structural_weight = structural_weight
423
+
424
+ # Initialize sub-strategies
425
+ self.text_reranker = TextSimilarityReranker()
426
+ self.semantic_reranker = SemanticReranker()
427
+ self.structural_reranker = StructuralReranker(graph_store)
428
+
429
+ @property
430
+ def name(self) -> str:
431
+ return "hybrid"
432
+
433
+ async def score(
434
+ self,
435
+ query: str,
436
+ entities: List[Entity],
437
+ query_embedding: Optional[List[float]] = None,
438
+ **kwargs,
439
+ ) -> List[float]:
440
+ """
441
+ Compute hybrid scores combining all signals
442
+
443
+ Args:
444
+ query: Query text
445
+ entities: Entities to score
446
+ query_embedding: Optional query embedding vector
447
+ **kwargs: Additional parameters
448
+
449
+ Returns:
450
+ List of scores (0.0-1.0)
451
+ """
452
+ if not entities:
453
+ return []
454
+
455
+ # Get scores from each strategy
456
+ text_scores = await self.text_reranker.score(query, entities, **kwargs)
457
+ semantic_scores = await self.semantic_reranker.score(
458
+ query, entities, query_embedding=query_embedding, **kwargs
459
+ )
460
+ structural_scores = await self.structural_reranker.score(query, entities, **kwargs)
461
+
462
+ # Combine scores
463
+ combined_scores = [
464
+ self.text_weight * text
465
+ + self.semantic_weight * semantic
466
+ + self.structural_weight * structural
467
+ for text, semantic, structural in zip(text_scores, semantic_scores, structural_scores)
468
+ ]
469
+
470
+ return combined_scores
471
+
472
+
473
+ class CrossEncoderReranker(RerankerStrategy):
474
+ """
475
+ Cross-encoder reranker using transformer models (optional)
476
+
477
+ Uses a cross-encoder model to compute semantic relevance between
478
+ query and entity text. More accurate but slower than bi-encoder.
479
+
480
+ Note: This is a placeholder implementation. For production use,
481
+ integrate with a cross-encoder model library (e.g., sentence-transformers).
482
+
483
+ Example:
484
+ ```python
485
+ reranker = CrossEncoderReranker(model_name="cross-encoder/ms-marco-MiniLM-L-6-v2")
486
+ scores = await reranker.score("machine learning", entities)
487
+ ```
488
+ """
489
+
490
+ def __init__(self, model_name: Optional[str] = None, use_gpu: bool = False):
491
+ """
492
+ Initialize CrossEncoderReranker
493
+
494
+ Args:
495
+ model_name: Optional model name (default: None, uses placeholder)
496
+ use_gpu: Whether to use GPU (if available)
497
+ """
498
+ self.model_name = model_name
499
+ self.use_gpu = use_gpu
500
+ self._model = None
501
+
502
+ @property
503
+ def name(self) -> str:
504
+ return "cross_encoder"
505
+
506
+ def _extract_text(self, entity: Entity) -> str:
507
+ """Extract text from entity for encoding"""
508
+ text_parts = []
509
+ for key, value in entity.properties.items():
510
+ if isinstance(value, str):
511
+ text_parts.append(value)
512
+ elif isinstance(value, (list, tuple)):
513
+ text_parts.extend(str(v) for v in value if isinstance(v, str))
514
+ return " ".join(text_parts)
515
+
516
+ async def score(self, query: str, entities: List[Entity], **kwargs) -> List[float]:
517
+ """
518
+ Compute cross-encoder scores
519
+
520
+ Args:
521
+ query: Query text
522
+ entities: Entities to score
523
+ **kwargs: Additional parameters
524
+
525
+ Returns:
526
+ List of scores (0.0-1.0)
527
+ """
528
+ if not entities:
529
+ return []
530
+
531
+ if not query:
532
+ return [0.0] * len(entities)
533
+
534
+ # Placeholder implementation
535
+ # In production, this would use a cross-encoder model:
536
+ #
537
+ # if self._model is None:
538
+ # from sentence_transformers import CrossEncoder
539
+ # self._model = CrossEncoder(self.model_name or "cross-encoder/ms-marco-MiniLM-L-6-v2")
540
+ #
541
+ # entity_texts = [self._extract_text(entity) for entity in entities]
542
+ # pairs = [[query, text] for text in entity_texts]
543
+ # scores = self._model.predict(pairs)
544
+ #
545
+ # # Normalize to [0, 1]
546
+ # scores = (scores - scores.min()) / (scores.max() - scores.min() + 1e-10)
547
+ # return scores.tolist()
548
+
549
+ # Fallback: Use cosine similarity as placeholder
550
+ entity_texts = [self._extract_text(entity) for entity in entities]
551
+ scores = [cosine_similarity_text(query, text) for text in entity_texts]
552
+
553
+ return scores